feat(core-api): add rapid order transcribe and parse endpoints

2026-02-27 11:12:32 -05:00
parent feb38c81fa
commit 7740ad4d2d
7 changed files with 808 additions and 12 deletions
--- a/backend/core-api/src/services/llm.js
+++ b/backend/core-api/src/services/llm.js
@@ -45,6 +45,13 @@ function guessMimeTypeFromUri(fileUri) {
  if (path.endsWith('.jpg') || path.endsWith('.jpeg')) return 'image/jpeg';
  if (path.endsWith('.png')) return 'image/png';
  if (path.endsWith('.pdf')) return 'application/pdf';
+  if (path.endsWith('.webm')) return 'audio/webm';
+  if (path.endsWith('.mp3')) return 'audio/mpeg';
+  if (path.endsWith('.wav')) return 'audio/wav';
+  if (path.endsWith('.m4a')) return 'audio/m4a';
+  if (path.endsWith('.aac')) return 'audio/aac';
+  if (path.endsWith('.ogg')) return 'audio/ogg';
+  if (path.endsWith('.flac')) return 'audio/flac';
  return 'application/octet-stream';
 }

--- a/backend/core-api/src/services/rapid-order.js
+++ b/backend/core-api/src/services/rapid-order.js
@@ -0,0 +1,410 @@
+import { z } from 'zod';
+import { AppError } from '../lib/errors.js';
+import { invokeVertexModel, invokeVertexMultimodalModel } from './llm.js';
+
+const rapidOrderTranscriptionSchema = z.object({
+  transcript: z.string().trim().min(1).max(4000),
+  confidence: z.number().min(0).max(1).default(0.7),
+  language: z.string().trim().min(2).max(35).default('en-US'),
+  warnings: z.array(z.string().trim().min(1).max(200)).max(10).default([]),
+});
+
+const rapidOrderPositionSchema = z.object({
+  role: z.string().trim().min(1).max(100),
+  count: z.number().int().min(1).max(200),
+});
+
+const rapidOrderParseResultSchema = z.object({
+  parsed: z.object({
+    orderType: z.literal('ONE_TIME'),
+    isRapid: z.literal(true),
+    positions: z.array(rapidOrderPositionSchema).max(20).default([]),
+    startAt: z.string().datetime({ offset: true }).nullable().default(null),
+    endAt: z.string().datetime({ offset: true }).nullable().default(null),
+    durationMinutes: z.number().int().min(15).max(1440).nullable().default(null),
+    locationHint: z.string().trim().max(200).nullable().default(null),
+    notes: z.string().trim().max(1200).nullable().default(null),
+    sourceText: z.string().trim().min(1).max(4000),
+  }),
+  missingFields: z.array(z.string().trim().min(1).max(60)).max(20).default([]),
+  warnings: z.array(z.string().trim().min(1).max(200)).max(20).default([]),
+  confidence: z.object({
+    overall: z.number().min(0).max(1),
+    fields: z.record(z.number().min(0).max(1)).default({}),
+  }),
+});
+
+const RAPID_ORDER_TRANSCRIPTION_JSON_SCHEMA = {
+  type: 'object',
+  additionalProperties: false,
+  required: ['transcript', 'confidence', 'language', 'warnings'],
+  properties: {
+    transcript: { type: 'string' },
+    confidence: { type: 'number', minimum: 0, maximum: 1 },
+    language: { type: 'string' },
+    warnings: {
+      type: 'array',
+      items: { type: 'string' },
+    },
+  },
+};
+
+const RAPID_ORDER_PARSE_JSON_SCHEMA = {
+  type: 'object',
+  additionalProperties: false,
+  required: ['parsed', 'missingFields', 'warnings', 'confidence'],
+  properties: {
+    parsed: {
+      type: 'object',
+      additionalProperties: false,
+      required: [
+        'orderType',
+        'isRapid',
+        'positions',
+        'startAt',
+        'endAt',
+        'durationMinutes',
+        'locationHint',
+        'notes',
+        'sourceText',
+      ],
+      properties: {
+        orderType: { type: 'string', enum: ['ONE_TIME'] },
+        isRapid: { type: 'boolean', enum: [true] },
+        positions: {
+          type: 'array',
+          items: {
+            type: 'object',
+            additionalProperties: false,
+            required: ['role', 'count'],
+            properties: {
+              role: { type: 'string' },
+              count: { type: 'integer', minimum: 1, maximum: 200 },
+            },
+          },
+        },
+        startAt: {
+          anyOf: [
+            { type: 'string', format: 'date-time' },
+            { type: 'null' },
+          ],
+        },
+        endAt: {
+          anyOf: [
+            { type: 'string', format: 'date-time' },
+            { type: 'null' },
+          ],
+        },
+        durationMinutes: {
+          anyOf: [
+            { type: 'integer', minimum: 15, maximum: 1440 },
+            { type: 'null' },
+          ],
+        },
+        locationHint: {
+          anyOf: [
+            { type: 'string' },
+            { type: 'null' },
+          ],
+        },
+        notes: {
+          anyOf: [
+            { type: 'string' },
+            { type: 'null' },
+          ],
+        },
+        sourceText: { type: 'string' },
+      },
+    },
+    missingFields: {
+      type: 'array',
+      items: { type: 'string' },
+    },
+    warnings: {
+      type: 'array',
+      items: { type: 'string' },
+    },
+    confidence: {
+      type: 'object',
+      additionalProperties: false,
+      required: ['overall', 'fields'],
+      properties: {
+        overall: { type: 'number', minimum: 0, maximum: 1 },
+        fields: {
+          type: 'object',
+          additionalProperties: { type: 'number', minimum: 0, maximum: 1 },
+        },
+      },
+    },
+  },
+};
+
+function parseModelResult(schema, value, errorMessage) {
+  const parsed = schema.safeParse(value);
+  if (!parsed.success) {
+    throw new AppError('MODEL_FAILED', errorMessage, 502, {
+      issues: parsed.error.issues,
+    });
+  }
+
+  return parsed.data;
+}
+
+function isMockLlmEnabled() {
+  return process.env.LLM_MOCK !== 'false';
+}
+
+function validateTimezoneOrThrow(timezone) {
+  if (!timezone) {
+    return;
+  }
+
+  try {
+    new Intl.DateTimeFormat('en-US', { timeZone: timezone }).format(new Date());
+  } catch {
+    throw new AppError('VALIDATION_ERROR', 'timezone must be a valid IANA timezone', 400, {
+      timezone,
+    });
+  }
+}
+
+function detectRoleFromText(text) {
+  const rolePatterns = [
+    { role: 'server', regex: /\bserver(s)?\b/i },
+    { role: 'bartender', regex: /\bbartender(s)?\b/i },
+    { role: 'cook', regex: /\bcook(s)?\b/i },
+    { role: 'chef', regex: /\bchef(s)?\b/i },
+    { role: 'dishwasher', regex: /\bdishwasher(s)?\b/i },
+    { role: 'host', regex: /\bhost(ess)?(es)?\b/i },
+    { role: 'cashier', regex: /\bcashier(s)?\b/i },
+    { role: 'barista', regex: /\bbarista(s)?\b/i },
+  ];
+
+  for (const item of rolePatterns) {
+    if (item.regex.test(text)) {
+      return item.role;
+    }
+  }
+
+  return 'general_staff';
+}
+
+function detectCountFromText(text) {
+  const match = text.match(/\b(\d{1,3})\b/);
+  if (!match) {
+    return 1;
+  }
+  const parsed = Number.parseInt(match[1], 10);
+  if (Number.isNaN(parsed) || parsed < 1) {
+    return 1;
+  }
+  return Math.min(parsed, 200);
+}
+
+function detectDurationMinutesFromText(text) {
+  const hoursMatch = text.match(/\b(\d{1,2})\s*(hour|hours|hr|hrs)\b/i);
+  if (hoursMatch) {
+    const hours = Number.parseInt(hoursMatch[1], 10);
+    if (!Number.isNaN(hours) && hours > 0) {
+      return Math.min(hours * 60, 1440);
+    }
+  }
+
+  const minutesMatch = text.match(/\b(\d{1,3})\s*(minute|minutes|min|mins)\b/i);
+  if (minutesMatch) {
+    const minutes = Number.parseInt(minutesMatch[1], 10);
+    if (!Number.isNaN(minutes) && minutes >= 15) {
+      return Math.min(minutes, 1440);
+    }
+  }
+
+  return null;
+}
+
+function detectAsap(text) {
+  return /\b(asap|right now|immediately|urgent|emergency|now)\b/i.test(text);
+}
+
+function buildMockParseResult({ text, now }) {
+  const normalizedNow = now || new Date().toISOString();
+  const role = detectRoleFromText(text);
+  const count = detectCountFromText(text);
+  const durationMinutes = detectDurationMinutesFromText(text);
+  const isAsap = detectAsap(text);
+  const startAt = isAsap ? normalizedNow : null;
+
+  const missingFields = [];
+  if (!startAt) {
+    missingFields.push('startAt');
+  }
+  if (!durationMinutes) {
+    missingFields.push('durationMinutes');
+  }
+  if (!role) {
+    missingFields.push('positions');
+  }
+
+  const warnings = [];
+  if (!startAt) {
+    warnings.push('Missing explicit start time. Prompt user to confirm date and time.');
+  }
+  if (!durationMinutes) {
+    warnings.push('Missing duration. Prompt user for shift length.');
+  }
+
+  return {
+    parsed: {
+      orderType: 'ONE_TIME',
+      isRapid: true,
+      positions: [
+        {
+          role,
+          count,
+        },
+      ],
+      startAt,
+      endAt: null,
+      durationMinutes,
+      locationHint: null,
+      notes: null,
+      sourceText: text,
+    },
+    missingFields,
+    warnings,
+    confidence: {
+      overall: 0.72,
+      fields: {
+        positions: 0.86,
+        startAt: startAt ? 0.9 : 0.2,
+        durationMinutes: durationMinutes ? 0.88 : 0.2,
+      },
+    },
+  };
+}
+
+function normalizeRapidOrderParseResult(result) {
+  const normalized = {
+    parsed: {
+      ...result.parsed,
+      positions: result.parsed.positions,
+      sourceText: result.parsed.sourceText,
+    },
+    missingFields: Array.from(new Set(result.missingFields)),
+    warnings: Array.from(new Set(result.warnings)),
+    confidence: {
+      overall: result.confidence.overall,
+      fields: result.confidence.fields,
+    },
+  };
+
+  if (normalized.parsed.positions.length === 0 && !normalized.missingFields.includes('positions')) {
+    normalized.missingFields.push('positions');
+  }
+
+  if (!normalized.parsed.startAt && !normalized.missingFields.includes('startAt')) {
+    normalized.missingFields.push('startAt');
+  }
+
+  if (!normalized.parsed.durationMinutes && !normalized.missingFields.includes('durationMinutes')) {
+    normalized.missingFields.push('durationMinutes');
+  }
+
+  return normalized;
+}
+
+function buildTranscriptionPrompt({ locale, promptHints }) {
+  const hints = promptHints.length > 0 ? `Domain hints: ${promptHints.join(', ')}` : 'Domain hints: none';
+  return [
+    'You transcribe urgent staffing request audio for a workforce scheduling app.',
+    `Locale hint: ${locale}`,
+    hints,
+    'Return only what was spoken in transcript form.',
+    'Do not infer roles, counts, durations, dates, or locations that were not spoken.',
+    'If audio quality is poor, still provide best-effort transcript and add warnings.',
+  ].join('\n');
+}
+
+function buildParsePrompt({ text, locale, timezone, now }) {
+  return [
+    'You parse urgent staffing request text into a strict one-time order draft.',
+    `Locale hint: ${locale}`,
+    `Timezone hint: ${timezone || 'UTC'}`,
+    `Current time (ISO): ${now}`,
+    'Interpret phrases like ASAP/today/tonight into ISO datetimes when confidence is high.',
+    'Do not invent uncertain data. Put unknown required values into missingFields.',
+    'Use warnings for ambiguities that need user confirmation.',
+    `Input text: ${text}`,
+  ].join('\n');
+}
+
+export async function transcribeRapidOrderAudio({
+  audioFileUri,
+  locale = 'en-US',
+  promptHints = [],
+}) {
+  if (isMockLlmEnabled()) {
+    return {
+      transcript: 'Need 2 servers ASAP for 4 hours.',
+      confidence: 0.87,
+      language: locale,
+      warnings: [],
+      model: process.env.LLM_MODEL || 'vertexai/gemini-mock',
+    };
+  }
+
+  const llmResult = await invokeVertexMultimodalModel({
+    prompt: buildTranscriptionPrompt({ locale, promptHints }),
+    responseJsonSchema: RAPID_ORDER_TRANSCRIPTION_JSON_SCHEMA,
+    fileUris: [audioFileUri],
+  });
+
+  const parsed = parseModelResult(
+    rapidOrderTranscriptionSchema,
+    llmResult.result,
+    'Rapid order transcription failed'
+  );
+
+  return {
+    ...parsed,
+    model: llmResult.model,
+  };
+}
+
+export async function parseRapidOrderText({
+  text,
+  locale = 'en-US',
+  timezone,
+  now,
+}) {
+  validateTimezoneOrThrow(timezone);
+
+  const normalizedNow = now || new Date().toISOString();
+
+  if (isMockLlmEnabled()) {
+    const mock = buildMockParseResult({
+      text,
+      now: normalizedNow,
+    });
+
+    return {
+      ...mock,
+      model: process.env.LLM_MODEL || 'vertexai/gemini-mock',
+    };
+  }
+
+  const llmResult = await invokeVertexModel({
+    prompt: buildParsePrompt({ text, locale, timezone, now: normalizedNow }),
+    responseJsonSchema: RAPID_ORDER_PARSE_JSON_SCHEMA,
+  });
+
+  const parsed = parseModelResult(
+    rapidOrderParseResultSchema,
+    llmResult.result,
+    'Rapid order parsing failed'
+  );
+
+  return {
+    ...normalizeRapidOrderParseResult(parsed),
+    model: llmResult.model,
+  };
+}