Merge pull request #560 from Oloodi/codex/rapid-order-transcribe-parse

feat(core-api): add rapid order transcribe and parse endpoints
This commit is contained in:
Wielfried Zouantcha
2026-02-27 11:13:33 -05:00
committed by GitHub
7 changed files with 808 additions and 12 deletions

View File

@@ -0,0 +1,17 @@
import { z } from 'zod';
const localePattern = /^[a-zA-Z]{2,3}(?:-[a-zA-Z0-9]{2,8}){0,2}$/;
export const rapidOrderParseSchema = z
.object({
text: z.string().trim().min(1).max(4000),
locale: z
.string()
.trim()
.regex(localePattern, 'locale must be a valid BCP-47 language tag')
.optional()
.default('en-US'),
timezone: z.string().trim().min(1).max(80).optional(),
now: z.string().datetime({ offset: true }).optional(),
})
.strict();

View File

@@ -0,0 +1,23 @@
import { z } from 'zod';
const localePattern = /^[a-zA-Z]{2,3}(?:-[a-zA-Z0-9]{2,8}){0,2}$/;
export const rapidOrderTranscribeSchema = z
.object({
audioFileUri: z
.string()
.startsWith('gs://', 'audioFileUri must start with gs://')
.max(2048),
locale: z
.string()
.trim()
.regex(localePattern, 'locale must be a valid BCP-47 language tag')
.optional()
.default('en-US'),
promptHints: z
.array(z.string().trim().min(1).max(80))
.max(20)
.optional()
.default([]),
})
.strict();

View File

@@ -6,9 +6,12 @@ import { requireAuth, requirePolicy } from '../middleware/auth.js';
import { createSignedUrlSchema } from '../contracts/core/create-signed-url.js';
import { createVerificationSchema } from '../contracts/core/create-verification.js';
import { invokeLlmSchema } from '../contracts/core/invoke-llm.js';
import { rapidOrderParseSchema } from '../contracts/core/rapid-order-parse.js';
import { rapidOrderTranscribeSchema } from '../contracts/core/rapid-order-transcribe.js';
import { reviewVerificationSchema } from '../contracts/core/review-verification.js';
import { invokeVertexModel } from '../services/llm.js';
import { checkLlmRateLimit } from '../services/llm-rate-limit.js';
import { parseRapidOrderText, transcribeRapidOrderAudio } from '../services/rapid-order.js';
import {
ensureFileExistsForActor,
generateReadSignedUrl,
@@ -24,7 +27,22 @@ import {
const DEFAULT_MAX_FILE_BYTES = 10 * 1024 * 1024;
const DEFAULT_MAX_SIGNED_URL_SECONDS = 900;
const ALLOWED_FILE_TYPES = new Set(['application/pdf', 'image/jpeg', 'image/jpg', 'image/png']);
const ALLOWED_FILE_TYPES = new Set([
'application/pdf',
'image/jpeg',
'image/jpg',
'image/png',
'audio/webm',
'audio/wav',
'audio/x-wav',
'audio/mpeg',
'audio/mp3',
'audio/mp4',
'audio/m4a',
'audio/aac',
'audio/ogg',
'audio/flac',
]);
const upload = multer({
storage: multer.memoryStorage(),
@@ -59,6 +77,10 @@ function requireVerificationFileExists() {
return process.env.VERIFICATION_REQUIRE_FILE_EXISTS !== 'false';
}
function requireRapidAudioFileExists() {
return process.env.RAPID_AUDIO_REQUIRE_FILE_EXISTS !== 'false';
}
function parseBody(schema, body) {
const parsed = schema.safeParse(body);
if (!parsed.success) {
@@ -69,6 +91,15 @@ function parseBody(schema, body) {
return parsed.data;
}
function enforceLlmRateLimit(uid) {
const rateLimit = checkLlmRateLimit({ uid });
if (!rateLimit.allowed) {
throw new AppError('RATE_LIMITED', 'Too many model requests. Please retry shortly.', 429, {
retryAfterSeconds: rateLimit.retryAfterSeconds,
});
}
}
async function handleUploadFile(req, res, next) {
try {
const file = req.file;
@@ -158,12 +189,7 @@ async function handleCreateSignedUrl(req, res, next) {
async function handleInvokeLlm(req, res, next) {
try {
const payload = parseBody(invokeLlmSchema, req.body || {});
const rateLimit = checkLlmRateLimit({ uid: req.actor.uid });
if (!rateLimit.allowed) {
throw new AppError('RATE_LIMITED', 'Too many model requests. Please retry shortly.', 429, {
retryAfterSeconds: rateLimit.retryAfterSeconds,
});
}
enforceLlmRateLimit(req.actor.uid);
const startedAt = Date.now();
if (process.env.LLM_MOCK === 'false') {
@@ -194,6 +220,63 @@ async function handleInvokeLlm(req, res, next) {
}
}
async function handleRapidOrderTranscribe(req, res, next) {
try {
const payload = parseBody(rapidOrderTranscribeSchema, req.body || {});
validateFileUriAccess({
fileUri: payload.audioFileUri,
actorUid: req.actor.uid,
});
if (requireRapidAudioFileExists() && !useMockUpload()) {
await ensureFileExistsForActor({
fileUri: payload.audioFileUri,
actorUid: req.actor.uid,
});
}
enforceLlmRateLimit(req.actor.uid);
const startedAt = Date.now();
const result = await transcribeRapidOrderAudio({
audioFileUri: payload.audioFileUri,
locale: payload.locale,
promptHints: payload.promptHints,
});
return res.status(200).json({
...result,
latencyMs: Date.now() - startedAt,
requestId: req.requestId,
});
} catch (error) {
return next(error);
}
}
async function handleRapidOrderParse(req, res, next) {
try {
const payload = parseBody(rapidOrderParseSchema, req.body || {});
enforceLlmRateLimit(req.actor.uid);
const startedAt = Date.now();
const result = await parseRapidOrderText({
text: payload.text,
locale: payload.locale,
timezone: payload.timezone,
now: payload.now,
});
return res.status(200).json({
...result,
latencyMs: Date.now() - startedAt,
requestId: req.requestId,
});
} catch (error) {
return next(error);
}
}
async function handleCreateVerification(req, res, next) {
try {
const payload = parseBody(createVerificationSchema, req.body || {});
@@ -268,6 +351,8 @@ export function createCoreRouter() {
router.post('/upload-file', requireAuth, requirePolicy('core.upload', 'file'), upload.single('file'), handleUploadFile);
router.post('/create-signed-url', requireAuth, requirePolicy('core.sign-url', 'file'), handleCreateSignedUrl);
router.post('/invoke-llm', requireAuth, requirePolicy('core.invoke-llm', 'model'), handleInvokeLlm);
router.post('/rapid-orders/transcribe', requireAuth, requirePolicy('core.rapid-order.transcribe', 'model'), handleRapidOrderTranscribe);
router.post('/rapid-orders/parse', requireAuth, requirePolicy('core.rapid-order.parse', 'model'), handleRapidOrderParse);
router.post('/verifications', requireAuth, requirePolicy('core.verification.create', 'verification'), handleCreateVerification);
router.get('/verifications/:verificationId', requireAuth, requirePolicy('core.verification.read', 'verification'), handleGetVerification);
router.post('/verifications/:verificationId/review', requireAuth, requirePolicy('core.verification.review', 'verification'), handleReviewVerification);

View File

@@ -45,6 +45,13 @@ function guessMimeTypeFromUri(fileUri) {
if (path.endsWith('.jpg') || path.endsWith('.jpeg')) return 'image/jpeg';
if (path.endsWith('.png')) return 'image/png';
if (path.endsWith('.pdf')) return 'application/pdf';
if (path.endsWith('.webm')) return 'audio/webm';
if (path.endsWith('.mp3')) return 'audio/mpeg';
if (path.endsWith('.wav')) return 'audio/wav';
if (path.endsWith('.m4a')) return 'audio/m4a';
if (path.endsWith('.aac')) return 'audio/aac';
if (path.endsWith('.ogg')) return 'audio/ogg';
if (path.endsWith('.flac')) return 'audio/flac';
return 'application/octet-stream';
}

View File

@@ -0,0 +1,410 @@
import { z } from 'zod';
import { AppError } from '../lib/errors.js';
import { invokeVertexModel, invokeVertexMultimodalModel } from './llm.js';
const rapidOrderTranscriptionSchema = z.object({
transcript: z.string().trim().min(1).max(4000),
confidence: z.number().min(0).max(1).default(0.7),
language: z.string().trim().min(2).max(35).default('en-US'),
warnings: z.array(z.string().trim().min(1).max(200)).max(10).default([]),
});
const rapidOrderPositionSchema = z.object({
role: z.string().trim().min(1).max(100),
count: z.number().int().min(1).max(200),
});
const rapidOrderParseResultSchema = z.object({
parsed: z.object({
orderType: z.literal('ONE_TIME'),
isRapid: z.literal(true),
positions: z.array(rapidOrderPositionSchema).max(20).default([]),
startAt: z.string().datetime({ offset: true }).nullable().default(null),
endAt: z.string().datetime({ offset: true }).nullable().default(null),
durationMinutes: z.number().int().min(15).max(1440).nullable().default(null),
locationHint: z.string().trim().max(200).nullable().default(null),
notes: z.string().trim().max(1200).nullable().default(null),
sourceText: z.string().trim().min(1).max(4000),
}),
missingFields: z.array(z.string().trim().min(1).max(60)).max(20).default([]),
warnings: z.array(z.string().trim().min(1).max(200)).max(20).default([]),
confidence: z.object({
overall: z.number().min(0).max(1),
fields: z.record(z.number().min(0).max(1)).default({}),
}),
});
const RAPID_ORDER_TRANSCRIPTION_JSON_SCHEMA = {
type: 'object',
additionalProperties: false,
required: ['transcript', 'confidence', 'language', 'warnings'],
properties: {
transcript: { type: 'string' },
confidence: { type: 'number', minimum: 0, maximum: 1 },
language: { type: 'string' },
warnings: {
type: 'array',
items: { type: 'string' },
},
},
};
const RAPID_ORDER_PARSE_JSON_SCHEMA = {
type: 'object',
additionalProperties: false,
required: ['parsed', 'missingFields', 'warnings', 'confidence'],
properties: {
parsed: {
type: 'object',
additionalProperties: false,
required: [
'orderType',
'isRapid',
'positions',
'startAt',
'endAt',
'durationMinutes',
'locationHint',
'notes',
'sourceText',
],
properties: {
orderType: { type: 'string', enum: ['ONE_TIME'] },
isRapid: { type: 'boolean', enum: [true] },
positions: {
type: 'array',
items: {
type: 'object',
additionalProperties: false,
required: ['role', 'count'],
properties: {
role: { type: 'string' },
count: { type: 'integer', minimum: 1, maximum: 200 },
},
},
},
startAt: {
anyOf: [
{ type: 'string', format: 'date-time' },
{ type: 'null' },
],
},
endAt: {
anyOf: [
{ type: 'string', format: 'date-time' },
{ type: 'null' },
],
},
durationMinutes: {
anyOf: [
{ type: 'integer', minimum: 15, maximum: 1440 },
{ type: 'null' },
],
},
locationHint: {
anyOf: [
{ type: 'string' },
{ type: 'null' },
],
},
notes: {
anyOf: [
{ type: 'string' },
{ type: 'null' },
],
},
sourceText: { type: 'string' },
},
},
missingFields: {
type: 'array',
items: { type: 'string' },
},
warnings: {
type: 'array',
items: { type: 'string' },
},
confidence: {
type: 'object',
additionalProperties: false,
required: ['overall', 'fields'],
properties: {
overall: { type: 'number', minimum: 0, maximum: 1 },
fields: {
type: 'object',
additionalProperties: { type: 'number', minimum: 0, maximum: 1 },
},
},
},
},
};
function parseModelResult(schema, value, errorMessage) {
const parsed = schema.safeParse(value);
if (!parsed.success) {
throw new AppError('MODEL_FAILED', errorMessage, 502, {
issues: parsed.error.issues,
});
}
return parsed.data;
}
function isMockLlmEnabled() {
return process.env.LLM_MOCK !== 'false';
}
function validateTimezoneOrThrow(timezone) {
if (!timezone) {
return;
}
try {
new Intl.DateTimeFormat('en-US', { timeZone: timezone }).format(new Date());
} catch {
throw new AppError('VALIDATION_ERROR', 'timezone must be a valid IANA timezone', 400, {
timezone,
});
}
}
function detectRoleFromText(text) {
const rolePatterns = [
{ role: 'server', regex: /\bserver(s)?\b/i },
{ role: 'bartender', regex: /\bbartender(s)?\b/i },
{ role: 'cook', regex: /\bcook(s)?\b/i },
{ role: 'chef', regex: /\bchef(s)?\b/i },
{ role: 'dishwasher', regex: /\bdishwasher(s)?\b/i },
{ role: 'host', regex: /\bhost(ess)?(es)?\b/i },
{ role: 'cashier', regex: /\bcashier(s)?\b/i },
{ role: 'barista', regex: /\bbarista(s)?\b/i },
];
for (const item of rolePatterns) {
if (item.regex.test(text)) {
return item.role;
}
}
return 'general_staff';
}
function detectCountFromText(text) {
const match = text.match(/\b(\d{1,3})\b/);
if (!match) {
return 1;
}
const parsed = Number.parseInt(match[1], 10);
if (Number.isNaN(parsed) || parsed < 1) {
return 1;
}
return Math.min(parsed, 200);
}
function detectDurationMinutesFromText(text) {
const hoursMatch = text.match(/\b(\d{1,2})\s*(hour|hours|hr|hrs)\b/i);
if (hoursMatch) {
const hours = Number.parseInt(hoursMatch[1], 10);
if (!Number.isNaN(hours) && hours > 0) {
return Math.min(hours * 60, 1440);
}
}
const minutesMatch = text.match(/\b(\d{1,3})\s*(minute|minutes|min|mins)\b/i);
if (minutesMatch) {
const minutes = Number.parseInt(minutesMatch[1], 10);
if (!Number.isNaN(minutes) && minutes >= 15) {
return Math.min(minutes, 1440);
}
}
return null;
}
function detectAsap(text) {
return /\b(asap|right now|immediately|urgent|emergency|now)\b/i.test(text);
}
function buildMockParseResult({ text, now }) {
const normalizedNow = now || new Date().toISOString();
const role = detectRoleFromText(text);
const count = detectCountFromText(text);
const durationMinutes = detectDurationMinutesFromText(text);
const isAsap = detectAsap(text);
const startAt = isAsap ? normalizedNow : null;
const missingFields = [];
if (!startAt) {
missingFields.push('startAt');
}
if (!durationMinutes) {
missingFields.push('durationMinutes');
}
if (!role) {
missingFields.push('positions');
}
const warnings = [];
if (!startAt) {
warnings.push('Missing explicit start time. Prompt user to confirm date and time.');
}
if (!durationMinutes) {
warnings.push('Missing duration. Prompt user for shift length.');
}
return {
parsed: {
orderType: 'ONE_TIME',
isRapid: true,
positions: [
{
role,
count,
},
],
startAt,
endAt: null,
durationMinutes,
locationHint: null,
notes: null,
sourceText: text,
},
missingFields,
warnings,
confidence: {
overall: 0.72,
fields: {
positions: 0.86,
startAt: startAt ? 0.9 : 0.2,
durationMinutes: durationMinutes ? 0.88 : 0.2,
},
},
};
}
function normalizeRapidOrderParseResult(result) {
const normalized = {
parsed: {
...result.parsed,
positions: result.parsed.positions,
sourceText: result.parsed.sourceText,
},
missingFields: Array.from(new Set(result.missingFields)),
warnings: Array.from(new Set(result.warnings)),
confidence: {
overall: result.confidence.overall,
fields: result.confidence.fields,
},
};
if (normalized.parsed.positions.length === 0 && !normalized.missingFields.includes('positions')) {
normalized.missingFields.push('positions');
}
if (!normalized.parsed.startAt && !normalized.missingFields.includes('startAt')) {
normalized.missingFields.push('startAt');
}
if (!normalized.parsed.durationMinutes && !normalized.missingFields.includes('durationMinutes')) {
normalized.missingFields.push('durationMinutes');
}
return normalized;
}
function buildTranscriptionPrompt({ locale, promptHints }) {
const hints = promptHints.length > 0 ? `Domain hints: ${promptHints.join(', ')}` : 'Domain hints: none';
return [
'You transcribe urgent staffing request audio for a workforce scheduling app.',
`Locale hint: ${locale}`,
hints,
'Return only what was spoken in transcript form.',
'Do not infer roles, counts, durations, dates, or locations that were not spoken.',
'If audio quality is poor, still provide best-effort transcript and add warnings.',
].join('\n');
}
function buildParsePrompt({ text, locale, timezone, now }) {
return [
'You parse urgent staffing request text into a strict one-time order draft.',
`Locale hint: ${locale}`,
`Timezone hint: ${timezone || 'UTC'}`,
`Current time (ISO): ${now}`,
'Interpret phrases like ASAP/today/tonight into ISO datetimes when confidence is high.',
'Do not invent uncertain data. Put unknown required values into missingFields.',
'Use warnings for ambiguities that need user confirmation.',
`Input text: ${text}`,
].join('\n');
}
export async function transcribeRapidOrderAudio({
audioFileUri,
locale = 'en-US',
promptHints = [],
}) {
if (isMockLlmEnabled()) {
return {
transcript: 'Need 2 servers ASAP for 4 hours.',
confidence: 0.87,
language: locale,
warnings: [],
model: process.env.LLM_MODEL || 'vertexai/gemini-mock',
};
}
const llmResult = await invokeVertexMultimodalModel({
prompt: buildTranscriptionPrompt({ locale, promptHints }),
responseJsonSchema: RAPID_ORDER_TRANSCRIPTION_JSON_SCHEMA,
fileUris: [audioFileUri],
});
const parsed = parseModelResult(
rapidOrderTranscriptionSchema,
llmResult.result,
'Rapid order transcription failed'
);
return {
...parsed,
model: llmResult.model,
};
}
export async function parseRapidOrderText({
text,
locale = 'en-US',
timezone,
now,
}) {
validateTimezoneOrThrow(timezone);
const normalizedNow = now || new Date().toISOString();
if (isMockLlmEnabled()) {
const mock = buildMockParseResult({
text,
now: normalizedNow,
});
return {
...mock,
model: process.env.LLM_MODEL || 'vertexai/gemini-mock',
};
}
const llmResult = await invokeVertexModel({
prompt: buildParsePrompt({ text, locale, timezone, now: normalizedNow }),
responseJsonSchema: RAPID_ORDER_PARSE_JSON_SCHEMA,
});
const parsed = parseModelResult(
rapidOrderParseResultSchema,
llmResult.result,
'Rapid order parsing failed'
);
return {
...normalizeRapidOrderParseResult(parsed),
model: llmResult.model,
};
}

View File

@@ -150,6 +150,130 @@ test('POST /core/invoke-llm enforces per-user rate limit', async () => {
assert.equal(typeof second.headers['retry-after'], 'string');
});
test('POST /core/upload-file accepts audio/webm for rapid transcription', async () => {
const app = createApp();
const res = await request(app)
.post('/core/upload-file')
.set('Authorization', 'Bearer test-token')
.field('visibility', 'private')
.attach('file', Buffer.from('fake-audio-data'), {
filename: 'rapid-request.webm',
contentType: 'audio/webm',
});
assert.equal(res.status, 200);
assert.equal(res.body.contentType, 'audio/webm');
assert.equal(typeof res.body.fileUri, 'string');
});
test('POST /core/rapid-orders/transcribe returns transcript in mock mode', async () => {
const app = createApp();
const res = await request(app)
.post('/core/rapid-orders/transcribe')
.set('Authorization', 'Bearer test-token')
.send({
audioFileUri: 'gs://krow-workforce-dev-private/uploads/test-user/request.webm',
locale: 'en-US',
promptHints: ['server', 'urgent'],
});
assert.equal(res.status, 200);
assert.equal(typeof res.body.transcript, 'string');
assert.ok(res.body.transcript.length > 0);
assert.equal(typeof res.body.confidence, 'number');
assert.equal(typeof res.body.model, 'string');
assert.equal(typeof res.body.requestId, 'string');
});
test('POST /core/rapid-orders/transcribe rejects non-owned file URI', async () => {
const app = createApp();
const res = await request(app)
.post('/core/rapid-orders/transcribe')
.set('Authorization', 'Bearer test-token')
.send({
audioFileUri: 'gs://krow-workforce-dev-private/uploads/other-user/request.webm',
locale: 'en-US',
});
assert.equal(res.status, 403);
assert.equal(res.body.code, 'FORBIDDEN');
});
test('POST /core/rapid-orders/parse returns structured rapid order draft', async () => {
const app = createApp();
const res = await request(app)
.post('/core/rapid-orders/parse')
.set('Authorization', 'Bearer test-token')
.send({
text: 'Need 2 servers ASAP for 4 hours',
locale: 'en-US',
timezone: 'America/New_York',
now: '2026-02-27T12:00:00.000Z',
});
assert.equal(res.status, 200);
assert.equal(res.body.parsed.orderType, 'ONE_TIME');
assert.equal(res.body.parsed.isRapid, true);
assert.equal(Array.isArray(res.body.parsed.positions), true);
assert.equal(res.body.parsed.positions[0].role, 'server');
assert.equal(res.body.parsed.positions[0].count, 2);
assert.equal(res.body.parsed.durationMinutes, 240);
assert.equal(typeof res.body.confidence.overall, 'number');
assert.equal(typeof res.body.requestId, 'string');
});
test('POST /core/rapid-orders/parse validates timezone', async () => {
const app = createApp();
const res = await request(app)
.post('/core/rapid-orders/parse')
.set('Authorization', 'Bearer test-token')
.send({
text: 'Need 2 servers ASAP',
timezone: 'Mars/OlympusMons',
});
assert.equal(res.status, 400);
assert.equal(res.body.code, 'VALIDATION_ERROR');
});
test('POST /core/rapid-orders/parse rejects unknown fields', async () => {
const app = createApp();
const res = await request(app)
.post('/core/rapid-orders/parse')
.set('Authorization', 'Bearer test-token')
.send({
text: 'Need 2 servers ASAP',
unexpected: 'not-allowed',
});
assert.equal(res.status, 400);
assert.equal(res.body.code, 'VALIDATION_ERROR');
});
test('POST /core/rapid-orders/parse enforces per-user model rate limit', async () => {
process.env.LLM_RATE_LIMIT_PER_MINUTE = '1';
const app = createApp();
const first = await request(app)
.post('/core/rapid-orders/parse')
.set('Authorization', 'Bearer test-token')
.send({
text: 'Need 2 servers ASAP for 4 hours',
});
const second = await request(app)
.post('/core/rapid-orders/parse')
.set('Authorization', 'Bearer test-token')
.send({
text: 'Need 3 bartenders tonight',
});
assert.equal(first.status, 200);
assert.equal(second.status, 429);
assert.equal(second.body.code, 'RATE_LIMITED');
assert.equal(typeof second.headers['retry-after'], 'string');
});
test('POST /core/verifications creates async job and GET returns status', async () => {
const app = createApp();
const created = await request(app)