feat(core-api): add rapid order transcribe and parse endpoints

This commit is contained in:
zouantchaw
2026-02-27 11:12:32 -05:00
parent feb38c81fa
commit 7740ad4d2d
7 changed files with 808 additions and 12 deletions

View File

@@ -45,6 +45,13 @@ function guessMimeTypeFromUri(fileUri) {
if (path.endsWith('.jpg') || path.endsWith('.jpeg')) return 'image/jpeg';
if (path.endsWith('.png')) return 'image/png';
if (path.endsWith('.pdf')) return 'application/pdf';
if (path.endsWith('.webm')) return 'audio/webm';
if (path.endsWith('.mp3')) return 'audio/mpeg';
if (path.endsWith('.wav')) return 'audio/wav';
if (path.endsWith('.m4a')) return 'audio/m4a';
if (path.endsWith('.aac')) return 'audio/aac';
if (path.endsWith('.ogg')) return 'audio/ogg';
if (path.endsWith('.flac')) return 'audio/flac';
return 'application/octet-stream';
}

View File

@@ -0,0 +1,410 @@
import { z } from 'zod';
import { AppError } from '../lib/errors.js';
import { invokeVertexModel, invokeVertexMultimodalModel } from './llm.js';
const rapidOrderTranscriptionSchema = z.object({
transcript: z.string().trim().min(1).max(4000),
confidence: z.number().min(0).max(1).default(0.7),
language: z.string().trim().min(2).max(35).default('en-US'),
warnings: z.array(z.string().trim().min(1).max(200)).max(10).default([]),
});
const rapidOrderPositionSchema = z.object({
role: z.string().trim().min(1).max(100),
count: z.number().int().min(1).max(200),
});
const rapidOrderParseResultSchema = z.object({
parsed: z.object({
orderType: z.literal('ONE_TIME'),
isRapid: z.literal(true),
positions: z.array(rapidOrderPositionSchema).max(20).default([]),
startAt: z.string().datetime({ offset: true }).nullable().default(null),
endAt: z.string().datetime({ offset: true }).nullable().default(null),
durationMinutes: z.number().int().min(15).max(1440).nullable().default(null),
locationHint: z.string().trim().max(200).nullable().default(null),
notes: z.string().trim().max(1200).nullable().default(null),
sourceText: z.string().trim().min(1).max(4000),
}),
missingFields: z.array(z.string().trim().min(1).max(60)).max(20).default([]),
warnings: z.array(z.string().trim().min(1).max(200)).max(20).default([]),
confidence: z.object({
overall: z.number().min(0).max(1),
fields: z.record(z.number().min(0).max(1)).default({}),
}),
});
const RAPID_ORDER_TRANSCRIPTION_JSON_SCHEMA = {
type: 'object',
additionalProperties: false,
required: ['transcript', 'confidence', 'language', 'warnings'],
properties: {
transcript: { type: 'string' },
confidence: { type: 'number', minimum: 0, maximum: 1 },
language: { type: 'string' },
warnings: {
type: 'array',
items: { type: 'string' },
},
},
};
const RAPID_ORDER_PARSE_JSON_SCHEMA = {
type: 'object',
additionalProperties: false,
required: ['parsed', 'missingFields', 'warnings', 'confidence'],
properties: {
parsed: {
type: 'object',
additionalProperties: false,
required: [
'orderType',
'isRapid',
'positions',
'startAt',
'endAt',
'durationMinutes',
'locationHint',
'notes',
'sourceText',
],
properties: {
orderType: { type: 'string', enum: ['ONE_TIME'] },
isRapid: { type: 'boolean', enum: [true] },
positions: {
type: 'array',
items: {
type: 'object',
additionalProperties: false,
required: ['role', 'count'],
properties: {
role: { type: 'string' },
count: { type: 'integer', minimum: 1, maximum: 200 },
},
},
},
startAt: {
anyOf: [
{ type: 'string', format: 'date-time' },
{ type: 'null' },
],
},
endAt: {
anyOf: [
{ type: 'string', format: 'date-time' },
{ type: 'null' },
],
},
durationMinutes: {
anyOf: [
{ type: 'integer', minimum: 15, maximum: 1440 },
{ type: 'null' },
],
},
locationHint: {
anyOf: [
{ type: 'string' },
{ type: 'null' },
],
},
notes: {
anyOf: [
{ type: 'string' },
{ type: 'null' },
],
},
sourceText: { type: 'string' },
},
},
missingFields: {
type: 'array',
items: { type: 'string' },
},
warnings: {
type: 'array',
items: { type: 'string' },
},
confidence: {
type: 'object',
additionalProperties: false,
required: ['overall', 'fields'],
properties: {
overall: { type: 'number', minimum: 0, maximum: 1 },
fields: {
type: 'object',
additionalProperties: { type: 'number', minimum: 0, maximum: 1 },
},
},
},
},
};
function parseModelResult(schema, value, errorMessage) {
const parsed = schema.safeParse(value);
if (!parsed.success) {
throw new AppError('MODEL_FAILED', errorMessage, 502, {
issues: parsed.error.issues,
});
}
return parsed.data;
}
function isMockLlmEnabled() {
return process.env.LLM_MOCK !== 'false';
}
function validateTimezoneOrThrow(timezone) {
if (!timezone) {
return;
}
try {
new Intl.DateTimeFormat('en-US', { timeZone: timezone }).format(new Date());
} catch {
throw new AppError('VALIDATION_ERROR', 'timezone must be a valid IANA timezone', 400, {
timezone,
});
}
}
function detectRoleFromText(text) {
const rolePatterns = [
{ role: 'server', regex: /\bserver(s)?\b/i },
{ role: 'bartender', regex: /\bbartender(s)?\b/i },
{ role: 'cook', regex: /\bcook(s)?\b/i },
{ role: 'chef', regex: /\bchef(s)?\b/i },
{ role: 'dishwasher', regex: /\bdishwasher(s)?\b/i },
{ role: 'host', regex: /\bhost(ess)?(es)?\b/i },
{ role: 'cashier', regex: /\bcashier(s)?\b/i },
{ role: 'barista', regex: /\bbarista(s)?\b/i },
];
for (const item of rolePatterns) {
if (item.regex.test(text)) {
return item.role;
}
}
return 'general_staff';
}
function detectCountFromText(text) {
const match = text.match(/\b(\d{1,3})\b/);
if (!match) {
return 1;
}
const parsed = Number.parseInt(match[1], 10);
if (Number.isNaN(parsed) || parsed < 1) {
return 1;
}
return Math.min(parsed, 200);
}
function detectDurationMinutesFromText(text) {
const hoursMatch = text.match(/\b(\d{1,2})\s*(hour|hours|hr|hrs)\b/i);
if (hoursMatch) {
const hours = Number.parseInt(hoursMatch[1], 10);
if (!Number.isNaN(hours) && hours > 0) {
return Math.min(hours * 60, 1440);
}
}
const minutesMatch = text.match(/\b(\d{1,3})\s*(minute|minutes|min|mins)\b/i);
if (minutesMatch) {
const minutes = Number.parseInt(minutesMatch[1], 10);
if (!Number.isNaN(minutes) && minutes >= 15) {
return Math.min(minutes, 1440);
}
}
return null;
}
function detectAsap(text) {
return /\b(asap|right now|immediately|urgent|emergency|now)\b/i.test(text);
}
function buildMockParseResult({ text, now }) {
const normalizedNow = now || new Date().toISOString();
const role = detectRoleFromText(text);
const count = detectCountFromText(text);
const durationMinutes = detectDurationMinutesFromText(text);
const isAsap = detectAsap(text);
const startAt = isAsap ? normalizedNow : null;
const missingFields = [];
if (!startAt) {
missingFields.push('startAt');
}
if (!durationMinutes) {
missingFields.push('durationMinutes');
}
if (!role) {
missingFields.push('positions');
}
const warnings = [];
if (!startAt) {
warnings.push('Missing explicit start time. Prompt user to confirm date and time.');
}
if (!durationMinutes) {
warnings.push('Missing duration. Prompt user for shift length.');
}
return {
parsed: {
orderType: 'ONE_TIME',
isRapid: true,
positions: [
{
role,
count,
},
],
startAt,
endAt: null,
durationMinutes,
locationHint: null,
notes: null,
sourceText: text,
},
missingFields,
warnings,
confidence: {
overall: 0.72,
fields: {
positions: 0.86,
startAt: startAt ? 0.9 : 0.2,
durationMinutes: durationMinutes ? 0.88 : 0.2,
},
},
};
}
function normalizeRapidOrderParseResult(result) {
const normalized = {
parsed: {
...result.parsed,
positions: result.parsed.positions,
sourceText: result.parsed.sourceText,
},
missingFields: Array.from(new Set(result.missingFields)),
warnings: Array.from(new Set(result.warnings)),
confidence: {
overall: result.confidence.overall,
fields: result.confidence.fields,
},
};
if (normalized.parsed.positions.length === 0 && !normalized.missingFields.includes('positions')) {
normalized.missingFields.push('positions');
}
if (!normalized.parsed.startAt && !normalized.missingFields.includes('startAt')) {
normalized.missingFields.push('startAt');
}
if (!normalized.parsed.durationMinutes && !normalized.missingFields.includes('durationMinutes')) {
normalized.missingFields.push('durationMinutes');
}
return normalized;
}
function buildTranscriptionPrompt({ locale, promptHints }) {
const hints = promptHints.length > 0 ? `Domain hints: ${promptHints.join(', ')}` : 'Domain hints: none';
return [
'You transcribe urgent staffing request audio for a workforce scheduling app.',
`Locale hint: ${locale}`,
hints,
'Return only what was spoken in transcript form.',
'Do not infer roles, counts, durations, dates, or locations that were not spoken.',
'If audio quality is poor, still provide best-effort transcript and add warnings.',
].join('\n');
}
function buildParsePrompt({ text, locale, timezone, now }) {
return [
'You parse urgent staffing request text into a strict one-time order draft.',
`Locale hint: ${locale}`,
`Timezone hint: ${timezone || 'UTC'}`,
`Current time (ISO): ${now}`,
'Interpret phrases like ASAP/today/tonight into ISO datetimes when confidence is high.',
'Do not invent uncertain data. Put unknown required values into missingFields.',
'Use warnings for ambiguities that need user confirmation.',
`Input text: ${text}`,
].join('\n');
}
export async function transcribeRapidOrderAudio({
audioFileUri,
locale = 'en-US',
promptHints = [],
}) {
if (isMockLlmEnabled()) {
return {
transcript: 'Need 2 servers ASAP for 4 hours.',
confidence: 0.87,
language: locale,
warnings: [],
model: process.env.LLM_MODEL || 'vertexai/gemini-mock',
};
}
const llmResult = await invokeVertexMultimodalModel({
prompt: buildTranscriptionPrompt({ locale, promptHints }),
responseJsonSchema: RAPID_ORDER_TRANSCRIPTION_JSON_SCHEMA,
fileUris: [audioFileUri],
});
const parsed = parseModelResult(
rapidOrderTranscriptionSchema,
llmResult.result,
'Rapid order transcription failed'
);
return {
...parsed,
model: llmResult.model,
};
}
export async function parseRapidOrderText({
text,
locale = 'en-US',
timezone,
now,
}) {
validateTimezoneOrThrow(timezone);
const normalizedNow = now || new Date().toISOString();
if (isMockLlmEnabled()) {
const mock = buildMockParseResult({
text,
now: normalizedNow,
});
return {
...mock,
model: process.env.LLM_MODEL || 'vertexai/gemini-mock',
};
}
const llmResult = await invokeVertexModel({
prompt: buildParsePrompt({ text, locale, timezone, now: normalizedNow }),
responseJsonSchema: RAPID_ORDER_PARSE_JSON_SCHEMA,
});
const parsed = parseModelResult(
rapidOrderParseResultSchema,
llmResult.result,
'Rapid order parsing failed'
);
return {
...normalizeRapidOrderParseResult(parsed),
model: llmResult.model,
};
}