feat(core-api): add rapid order transcribe and parse endpoints
This commit is contained in:
@@ -45,6 +45,13 @@ function guessMimeTypeFromUri(fileUri) {
|
||||
if (path.endsWith('.jpg') || path.endsWith('.jpeg')) return 'image/jpeg';
|
||||
if (path.endsWith('.png')) return 'image/png';
|
||||
if (path.endsWith('.pdf')) return 'application/pdf';
|
||||
if (path.endsWith('.webm')) return 'audio/webm';
|
||||
if (path.endsWith('.mp3')) return 'audio/mpeg';
|
||||
if (path.endsWith('.wav')) return 'audio/wav';
|
||||
if (path.endsWith('.m4a')) return 'audio/m4a';
|
||||
if (path.endsWith('.aac')) return 'audio/aac';
|
||||
if (path.endsWith('.ogg')) return 'audio/ogg';
|
||||
if (path.endsWith('.flac')) return 'audio/flac';
|
||||
return 'application/octet-stream';
|
||||
}
|
||||
|
||||
|
||||
410
backend/core-api/src/services/rapid-order.js
Normal file
410
backend/core-api/src/services/rapid-order.js
Normal file
@@ -0,0 +1,410 @@
|
||||
import { z } from 'zod';
|
||||
import { AppError } from '../lib/errors.js';
|
||||
import { invokeVertexModel, invokeVertexMultimodalModel } from './llm.js';
|
||||
|
||||
const rapidOrderTranscriptionSchema = z.object({
|
||||
transcript: z.string().trim().min(1).max(4000),
|
||||
confidence: z.number().min(0).max(1).default(0.7),
|
||||
language: z.string().trim().min(2).max(35).default('en-US'),
|
||||
warnings: z.array(z.string().trim().min(1).max(200)).max(10).default([]),
|
||||
});
|
||||
|
||||
const rapidOrderPositionSchema = z.object({
|
||||
role: z.string().trim().min(1).max(100),
|
||||
count: z.number().int().min(1).max(200),
|
||||
});
|
||||
|
||||
const rapidOrderParseResultSchema = z.object({
|
||||
parsed: z.object({
|
||||
orderType: z.literal('ONE_TIME'),
|
||||
isRapid: z.literal(true),
|
||||
positions: z.array(rapidOrderPositionSchema).max(20).default([]),
|
||||
startAt: z.string().datetime({ offset: true }).nullable().default(null),
|
||||
endAt: z.string().datetime({ offset: true }).nullable().default(null),
|
||||
durationMinutes: z.number().int().min(15).max(1440).nullable().default(null),
|
||||
locationHint: z.string().trim().max(200).nullable().default(null),
|
||||
notes: z.string().trim().max(1200).nullable().default(null),
|
||||
sourceText: z.string().trim().min(1).max(4000),
|
||||
}),
|
||||
missingFields: z.array(z.string().trim().min(1).max(60)).max(20).default([]),
|
||||
warnings: z.array(z.string().trim().min(1).max(200)).max(20).default([]),
|
||||
confidence: z.object({
|
||||
overall: z.number().min(0).max(1),
|
||||
fields: z.record(z.number().min(0).max(1)).default({}),
|
||||
}),
|
||||
});
|
||||
|
||||
const RAPID_ORDER_TRANSCRIPTION_JSON_SCHEMA = {
|
||||
type: 'object',
|
||||
additionalProperties: false,
|
||||
required: ['transcript', 'confidence', 'language', 'warnings'],
|
||||
properties: {
|
||||
transcript: { type: 'string' },
|
||||
confidence: { type: 'number', minimum: 0, maximum: 1 },
|
||||
language: { type: 'string' },
|
||||
warnings: {
|
||||
type: 'array',
|
||||
items: { type: 'string' },
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const RAPID_ORDER_PARSE_JSON_SCHEMA = {
|
||||
type: 'object',
|
||||
additionalProperties: false,
|
||||
required: ['parsed', 'missingFields', 'warnings', 'confidence'],
|
||||
properties: {
|
||||
parsed: {
|
||||
type: 'object',
|
||||
additionalProperties: false,
|
||||
required: [
|
||||
'orderType',
|
||||
'isRapid',
|
||||
'positions',
|
||||
'startAt',
|
||||
'endAt',
|
||||
'durationMinutes',
|
||||
'locationHint',
|
||||
'notes',
|
||||
'sourceText',
|
||||
],
|
||||
properties: {
|
||||
orderType: { type: 'string', enum: ['ONE_TIME'] },
|
||||
isRapid: { type: 'boolean', enum: [true] },
|
||||
positions: {
|
||||
type: 'array',
|
||||
items: {
|
||||
type: 'object',
|
||||
additionalProperties: false,
|
||||
required: ['role', 'count'],
|
||||
properties: {
|
||||
role: { type: 'string' },
|
||||
count: { type: 'integer', minimum: 1, maximum: 200 },
|
||||
},
|
||||
},
|
||||
},
|
||||
startAt: {
|
||||
anyOf: [
|
||||
{ type: 'string', format: 'date-time' },
|
||||
{ type: 'null' },
|
||||
],
|
||||
},
|
||||
endAt: {
|
||||
anyOf: [
|
||||
{ type: 'string', format: 'date-time' },
|
||||
{ type: 'null' },
|
||||
],
|
||||
},
|
||||
durationMinutes: {
|
||||
anyOf: [
|
||||
{ type: 'integer', minimum: 15, maximum: 1440 },
|
||||
{ type: 'null' },
|
||||
],
|
||||
},
|
||||
locationHint: {
|
||||
anyOf: [
|
||||
{ type: 'string' },
|
||||
{ type: 'null' },
|
||||
],
|
||||
},
|
||||
notes: {
|
||||
anyOf: [
|
||||
{ type: 'string' },
|
||||
{ type: 'null' },
|
||||
],
|
||||
},
|
||||
sourceText: { type: 'string' },
|
||||
},
|
||||
},
|
||||
missingFields: {
|
||||
type: 'array',
|
||||
items: { type: 'string' },
|
||||
},
|
||||
warnings: {
|
||||
type: 'array',
|
||||
items: { type: 'string' },
|
||||
},
|
||||
confidence: {
|
||||
type: 'object',
|
||||
additionalProperties: false,
|
||||
required: ['overall', 'fields'],
|
||||
properties: {
|
||||
overall: { type: 'number', minimum: 0, maximum: 1 },
|
||||
fields: {
|
||||
type: 'object',
|
||||
additionalProperties: { type: 'number', minimum: 0, maximum: 1 },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
function parseModelResult(schema, value, errorMessage) {
|
||||
const parsed = schema.safeParse(value);
|
||||
if (!parsed.success) {
|
||||
throw new AppError('MODEL_FAILED', errorMessage, 502, {
|
||||
issues: parsed.error.issues,
|
||||
});
|
||||
}
|
||||
|
||||
return parsed.data;
|
||||
}
|
||||
|
||||
function isMockLlmEnabled() {
|
||||
return process.env.LLM_MOCK !== 'false';
|
||||
}
|
||||
|
||||
function validateTimezoneOrThrow(timezone) {
|
||||
if (!timezone) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
new Intl.DateTimeFormat('en-US', { timeZone: timezone }).format(new Date());
|
||||
} catch {
|
||||
throw new AppError('VALIDATION_ERROR', 'timezone must be a valid IANA timezone', 400, {
|
||||
timezone,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
function detectRoleFromText(text) {
|
||||
const rolePatterns = [
|
||||
{ role: 'server', regex: /\bserver(s)?\b/i },
|
||||
{ role: 'bartender', regex: /\bbartender(s)?\b/i },
|
||||
{ role: 'cook', regex: /\bcook(s)?\b/i },
|
||||
{ role: 'chef', regex: /\bchef(s)?\b/i },
|
||||
{ role: 'dishwasher', regex: /\bdishwasher(s)?\b/i },
|
||||
{ role: 'host', regex: /\bhost(ess)?(es)?\b/i },
|
||||
{ role: 'cashier', regex: /\bcashier(s)?\b/i },
|
||||
{ role: 'barista', regex: /\bbarista(s)?\b/i },
|
||||
];
|
||||
|
||||
for (const item of rolePatterns) {
|
||||
if (item.regex.test(text)) {
|
||||
return item.role;
|
||||
}
|
||||
}
|
||||
|
||||
return 'general_staff';
|
||||
}
|
||||
|
||||
function detectCountFromText(text) {
|
||||
const match = text.match(/\b(\d{1,3})\b/);
|
||||
if (!match) {
|
||||
return 1;
|
||||
}
|
||||
const parsed = Number.parseInt(match[1], 10);
|
||||
if (Number.isNaN(parsed) || parsed < 1) {
|
||||
return 1;
|
||||
}
|
||||
return Math.min(parsed, 200);
|
||||
}
|
||||
|
||||
function detectDurationMinutesFromText(text) {
|
||||
const hoursMatch = text.match(/\b(\d{1,2})\s*(hour|hours|hr|hrs)\b/i);
|
||||
if (hoursMatch) {
|
||||
const hours = Number.parseInt(hoursMatch[1], 10);
|
||||
if (!Number.isNaN(hours) && hours > 0) {
|
||||
return Math.min(hours * 60, 1440);
|
||||
}
|
||||
}
|
||||
|
||||
const minutesMatch = text.match(/\b(\d{1,3})\s*(minute|minutes|min|mins)\b/i);
|
||||
if (minutesMatch) {
|
||||
const minutes = Number.parseInt(minutesMatch[1], 10);
|
||||
if (!Number.isNaN(minutes) && minutes >= 15) {
|
||||
return Math.min(minutes, 1440);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function detectAsap(text) {
|
||||
return /\b(asap|right now|immediately|urgent|emergency|now)\b/i.test(text);
|
||||
}
|
||||
|
||||
function buildMockParseResult({ text, now }) {
|
||||
const normalizedNow = now || new Date().toISOString();
|
||||
const role = detectRoleFromText(text);
|
||||
const count = detectCountFromText(text);
|
||||
const durationMinutes = detectDurationMinutesFromText(text);
|
||||
const isAsap = detectAsap(text);
|
||||
const startAt = isAsap ? normalizedNow : null;
|
||||
|
||||
const missingFields = [];
|
||||
if (!startAt) {
|
||||
missingFields.push('startAt');
|
||||
}
|
||||
if (!durationMinutes) {
|
||||
missingFields.push('durationMinutes');
|
||||
}
|
||||
if (!role) {
|
||||
missingFields.push('positions');
|
||||
}
|
||||
|
||||
const warnings = [];
|
||||
if (!startAt) {
|
||||
warnings.push('Missing explicit start time. Prompt user to confirm date and time.');
|
||||
}
|
||||
if (!durationMinutes) {
|
||||
warnings.push('Missing duration. Prompt user for shift length.');
|
||||
}
|
||||
|
||||
return {
|
||||
parsed: {
|
||||
orderType: 'ONE_TIME',
|
||||
isRapid: true,
|
||||
positions: [
|
||||
{
|
||||
role,
|
||||
count,
|
||||
},
|
||||
],
|
||||
startAt,
|
||||
endAt: null,
|
||||
durationMinutes,
|
||||
locationHint: null,
|
||||
notes: null,
|
||||
sourceText: text,
|
||||
},
|
||||
missingFields,
|
||||
warnings,
|
||||
confidence: {
|
||||
overall: 0.72,
|
||||
fields: {
|
||||
positions: 0.86,
|
||||
startAt: startAt ? 0.9 : 0.2,
|
||||
durationMinutes: durationMinutes ? 0.88 : 0.2,
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function normalizeRapidOrderParseResult(result) {
|
||||
const normalized = {
|
||||
parsed: {
|
||||
...result.parsed,
|
||||
positions: result.parsed.positions,
|
||||
sourceText: result.parsed.sourceText,
|
||||
},
|
||||
missingFields: Array.from(new Set(result.missingFields)),
|
||||
warnings: Array.from(new Set(result.warnings)),
|
||||
confidence: {
|
||||
overall: result.confidence.overall,
|
||||
fields: result.confidence.fields,
|
||||
},
|
||||
};
|
||||
|
||||
if (normalized.parsed.positions.length === 0 && !normalized.missingFields.includes('positions')) {
|
||||
normalized.missingFields.push('positions');
|
||||
}
|
||||
|
||||
if (!normalized.parsed.startAt && !normalized.missingFields.includes('startAt')) {
|
||||
normalized.missingFields.push('startAt');
|
||||
}
|
||||
|
||||
if (!normalized.parsed.durationMinutes && !normalized.missingFields.includes('durationMinutes')) {
|
||||
normalized.missingFields.push('durationMinutes');
|
||||
}
|
||||
|
||||
return normalized;
|
||||
}
|
||||
|
||||
function buildTranscriptionPrompt({ locale, promptHints }) {
|
||||
const hints = promptHints.length > 0 ? `Domain hints: ${promptHints.join(', ')}` : 'Domain hints: none';
|
||||
return [
|
||||
'You transcribe urgent staffing request audio for a workforce scheduling app.',
|
||||
`Locale hint: ${locale}`,
|
||||
hints,
|
||||
'Return only what was spoken in transcript form.',
|
||||
'Do not infer roles, counts, durations, dates, or locations that were not spoken.',
|
||||
'If audio quality is poor, still provide best-effort transcript and add warnings.',
|
||||
].join('\n');
|
||||
}
|
||||
|
||||
function buildParsePrompt({ text, locale, timezone, now }) {
|
||||
return [
|
||||
'You parse urgent staffing request text into a strict one-time order draft.',
|
||||
`Locale hint: ${locale}`,
|
||||
`Timezone hint: ${timezone || 'UTC'}`,
|
||||
`Current time (ISO): ${now}`,
|
||||
'Interpret phrases like ASAP/today/tonight into ISO datetimes when confidence is high.',
|
||||
'Do not invent uncertain data. Put unknown required values into missingFields.',
|
||||
'Use warnings for ambiguities that need user confirmation.',
|
||||
`Input text: ${text}`,
|
||||
].join('\n');
|
||||
}
|
||||
|
||||
export async function transcribeRapidOrderAudio({
|
||||
audioFileUri,
|
||||
locale = 'en-US',
|
||||
promptHints = [],
|
||||
}) {
|
||||
if (isMockLlmEnabled()) {
|
||||
return {
|
||||
transcript: 'Need 2 servers ASAP for 4 hours.',
|
||||
confidence: 0.87,
|
||||
language: locale,
|
||||
warnings: [],
|
||||
model: process.env.LLM_MODEL || 'vertexai/gemini-mock',
|
||||
};
|
||||
}
|
||||
|
||||
const llmResult = await invokeVertexMultimodalModel({
|
||||
prompt: buildTranscriptionPrompt({ locale, promptHints }),
|
||||
responseJsonSchema: RAPID_ORDER_TRANSCRIPTION_JSON_SCHEMA,
|
||||
fileUris: [audioFileUri],
|
||||
});
|
||||
|
||||
const parsed = parseModelResult(
|
||||
rapidOrderTranscriptionSchema,
|
||||
llmResult.result,
|
||||
'Rapid order transcription failed'
|
||||
);
|
||||
|
||||
return {
|
||||
...parsed,
|
||||
model: llmResult.model,
|
||||
};
|
||||
}
|
||||
|
||||
export async function parseRapidOrderText({
|
||||
text,
|
||||
locale = 'en-US',
|
||||
timezone,
|
||||
now,
|
||||
}) {
|
||||
validateTimezoneOrThrow(timezone);
|
||||
|
||||
const normalizedNow = now || new Date().toISOString();
|
||||
|
||||
if (isMockLlmEnabled()) {
|
||||
const mock = buildMockParseResult({
|
||||
text,
|
||||
now: normalizedNow,
|
||||
});
|
||||
|
||||
return {
|
||||
...mock,
|
||||
model: process.env.LLM_MODEL || 'vertexai/gemini-mock',
|
||||
};
|
||||
}
|
||||
|
||||
const llmResult = await invokeVertexModel({
|
||||
prompt: buildParsePrompt({ text, locale, timezone, now: normalizedNow }),
|
||||
responseJsonSchema: RAPID_ORDER_PARSE_JSON_SCHEMA,
|
||||
});
|
||||
|
||||
const parsed = parseModelResult(
|
||||
rapidOrderParseResultSchema,
|
||||
llmResult.result,
|
||||
'Rapid order parsing failed'
|
||||
);
|
||||
|
||||
return {
|
||||
...normalizeRapidOrderParseResult(parsed),
|
||||
model: llmResult.model,
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user