initial project setup with README and ignore

This commit is contained in:
2026-04-08 15:13:42 +05:30
commit 2d5688cb35
47 changed files with 7929 additions and 0 deletions

58
.dockerignore Normal file
View File

@@ -0,0 +1,58 @@
# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
*.egg-info/
dist/
build/
# Virtual environments
venv/
env/
ENV/
# IDE
.vscode/
.idea/
*.swp
*.swo
# Testing
.pytest_cache/
.coverage
htmlcov/
# Documentation
*.md
!README.md
# Environment
.env
.env.local
# Logs
*.log
# OS
.DS_Store
Thumbs.db
# Git
.git/
.gitignore
# Docker
Dockerfile
docker-compose.yml
.dockerignore
# Test files
test_*.py
*_test.py
# Temporary files
*.tmp
*.bak

12
.gitignore vendored Normal file
View File

@@ -0,0 +1,12 @@
.env
__pycache__/
*.py[cod]
*$py.class
*.pkl
ml_data/
output.json
route.json
ml_params_output.txt
idea.txt
.idea/
.vscode/

25
Dockerfile Normal file
View File

@@ -0,0 +1,25 @@
# syntax=docker/dockerfile:1
FROM python:3.11-slim
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
PIP_NO_CACHE_DIR=1
WORKDIR /app
# Install dependencies first
COPY requirements.txt ./
RUN pip install --upgrade pip \
&& pip install -r requirements.txt
# Copy application code
COPY app ./app
COPY start.py ./start.py
COPY docker-entrypoint.sh ./docker-entrypoint.sh
# Make entrypoint executable
RUN chmod +x docker-entrypoint.sh
EXPOSE 8002
ENTRYPOINT ["./docker-entrypoint.sh"]

15
README.md Normal file
View File

@@ -0,0 +1,15 @@
# Route Rider API
Centralized Routing Engine for Rider Assignments.
## Setup
1. Install dependencies:
```bash
pip install -r requirements.txt
```
2. Run the application:
```bash
python start.py
```

1
app/__init__.py Normal file
View File

@@ -0,0 +1 @@
# Delivery Route Optimization API

1
app/config/__init__.py Normal file
View File

@@ -0,0 +1 @@
"""Configuration package for mobile delivery optimization."""

View File

@@ -0,0 +1,204 @@
"""
Dynamic Configuration - rider-api
Replaces all hardcoded hyperparameters with DB-backed values.
The ML hypertuner writes optimal values here; services read from here.
Fallback: If DB is unavailable or no tuned values exist, defaults are used.
This means zero risk - the system works day 1 with no data.
"""
import json
import logging
import os
import sqlite3
from datetime import datetime
from typing import Any, Dict, Optional
logger = logging.getLogger(__name__)
# --- DB Path ------------------------------------------------------------------
_DB_PATH = os.getenv("ML_DB_PATH", "ml_data/ml_store.db")
# --- Hard Defaults (What the system used before ML) ---------------------------
DEFAULTS: Dict[str, Any] = {
# System Strategy / Prompt
"ml_strategy": "balanced",
# AssignmentService
"max_pickup_distance_km": 10.0,
"max_kitchen_distance_km": 3.0,
"max_orders_per_rider": 12,
"ideal_load": 6,
"workload_balance_threshold": 0.7,
"workload_penalty_weight": 100.0,
"distance_penalty_weight": 2.0,
"preference_bonus": -15.0,
"home_zone_bonus_4km": -3.0,
"home_zone_bonus_2km": -5.0,
"emergency_load_penalty": 3.0, # km penalty per order in emergency assign
# RouteOptimizer
"search_time_limit_seconds": 5,
"avg_speed_kmh": 18.0,
"road_factor": 1.3,
# ClusteringService
"cluster_radius_km": 3.0,
# KalmanFilter
"kalman_process_noise": 1e-4,
"kalman_measurement_noise": 0.01,
# RealisticETACalculator
"eta_pickup_time_min": 3.0,
"eta_delivery_time_min": 4.0,
"eta_navigation_buffer_min": 1.5,
"eta_short_trip_factor": 0.8, # speed multiplier for dist < 2km
"eta_long_trip_factor": 1.1, # speed multiplier for dist > 8km
}
class DynamicConfig:
"""
Thread-safe, DB-backed configuration store.
Usage:
cfg = DynamicConfig()
max_dist = cfg.get("max_pickup_distance_km")
all_params = cfg.get_all()
"""
_instance: Optional["DynamicConfig"] = None
def __new__(cls) -> "DynamicConfig":
"""Singleton - one config per process."""
if cls._instance is None:
cls._instance = super().__new__(cls)
cls._instance._initialized = False
return cls._instance
def __init__(self):
if self._initialized:
return
self._initialized = True
self._cache: Dict[str, Any] = {}
self._last_loaded: Optional[datetime] = None
self._ensure_db()
self._load()
# --------------------------------------------------------------------------
# Public API
# --------------------------------------------------------------------------
def get(self, key: str, default: Any = None) -> Any:
"""Get a config value. Returns ML-tuned value if available, else default."""
self._maybe_reload()
val = self._cache.get(key)
if val is not None:
return val
fallback = default if default is not None else DEFAULTS.get(key)
return fallback
def get_all(self) -> Dict[str, Any]:
"""Return all current config values (ML-tuned + defaults for missing keys)."""
self._maybe_reload()
result = dict(DEFAULTS)
result.update(self._cache)
return result
def set(self, key: str, value: Any, source: str = "manual") -> None:
"""Write a config value to DB (used by hypertuner)."""
try:
os.makedirs(os.path.dirname(_DB_PATH) or ".", exist_ok=True)
conn = sqlite3.connect(_DB_PATH)
conn.execute("""
INSERT INTO dynamic_config (key, value, source, updated_at)
VALUES (?, ?, ?, ?)
ON CONFLICT(key) DO UPDATE SET
value=excluded.value,
source=excluded.source,
updated_at=excluded.updated_at
""", (key, json.dumps(value), source, datetime.utcnow().isoformat()))
conn.commit()
conn.close()
self._cache[key] = value
logger.info(f"[DynamicConfig] Set {key}={value} (source={source})")
except Exception as e:
logger.error(f"[DynamicConfig] Failed to set {key}: {e}")
def set_bulk(self, params: Dict[str, Any], source: str = "ml_hypertuner") -> None:
"""Write multiple config values at once (called after each Optuna study)."""
for key, value in params.items():
self.set(key, value, source=source)
logger.info(f"[DynamicConfig] Bulk update: {len(params)} params from {source}")
def reset_to_defaults(self) -> None:
"""Wipe all ML-tuned values, revert to hardcoded defaults."""
try:
conn = sqlite3.connect(_DB_PATH)
conn.execute("DELETE FROM dynamic_config")
conn.commit()
conn.close()
self._cache.clear()
logger.warning("[DynamicConfig] Reset to factory defaults.")
except Exception as e:
logger.error(f"[DynamicConfig] Reset failed: {e}")
# --------------------------------------------------------------------------
# Internal
# --------------------------------------------------------------------------
def _ensure_db(self) -> None:
try:
os.makedirs(os.path.dirname(_DB_PATH) or ".", exist_ok=True)
conn = sqlite3.connect(_DB_PATH)
conn.execute("""
CREATE TABLE IF NOT EXISTS dynamic_config (
key TEXT PRIMARY KEY,
value TEXT NOT NULL,
source TEXT DEFAULT 'manual',
updated_at TEXT
)
""")
conn.commit()
conn.close()
except Exception as e:
logger.error(f"[DynamicConfig] DB init failed: {e}")
def _load(self) -> None:
try:
conn = sqlite3.connect(_DB_PATH)
rows = conn.execute("SELECT key, value FROM dynamic_config").fetchall()
conn.close()
self._cache = {}
for key, raw in rows:
try:
self._cache[key] = json.loads(raw)
except Exception:
self._cache[key] = raw
self._last_loaded = datetime.utcnow()
if self._cache:
logger.info(f"[DynamicConfig] Loaded {len(self._cache)} ML-tuned params from DB")
except Exception as e:
logger.warning(f"[DynamicConfig] Could not load from DB (using defaults): {e}")
self._cache = {}
def _maybe_reload(self, interval_seconds: int = 300) -> None:
"""Reload from DB every 5 minutes - picks up new tuned params without restart."""
if self._last_loaded is None:
self._load()
return
delta = (datetime.utcnow() - self._last_loaded).total_seconds()
if delta > interval_seconds:
self._load()
# --- Module-level convenience singleton ---------------------------------------
_cfg = DynamicConfig()
def get_config() -> DynamicConfig:
"""Get the global DynamicConfig singleton."""
return _cfg

View File

@@ -0,0 +1,33 @@
"""Mobile-specific configuration for delivery route optimization."""
# Mobile optimization settings
MOBILE_CONFIG = {
"default_algorithm": "greedy",
"max_deliveries": 100,
"timeout_seconds": 5,
"response_compression": True,
"performance_monitoring": True,
"mobile_headers": True
}
# Performance targets for mobile
PERFORMANCE_TARGETS = {
"greedy_algorithm": {
"max_response_time": 0.1, # 100ms
"max_deliveries": 50,
"description": "Ultra-fast for real-time mobile apps"
},
"tsp_algorithm": {
"max_response_time": 3.0, # 3 seconds
"max_deliveries": 30,
"description": "Optimal but slower, good for planning"
}
}
# Mobile app recommendations
MOBILE_RECOMMENDATIONS = {
"real_time_delivery": "greedy",
"route_planning": "tsp",
"large_batches": "greedy",
"cost_optimization": "tsp"
}

View File

@@ -0,0 +1,50 @@
"""
Rider Preferred Kitchens Configuration
Mapping of Rider ID (int) to list of preferred Kitchen names (str).
Updated based on Deployment Plan.
"""
RIDER_PREFERRED_KITCHENS = {
# 1. VIVEK ANANDHAN - LOCAL, RS PURAM TO SELVAPURAM
1116: ["Daily grubs(jayanthi kitchen)", "Bhuvaneshwari kitchen", "Hilda kitchen", "Kalpana kitchen"],
# 2. NARAYANASAMY - VENGATAPURAM, VADAVALI, TADAGAM ROAD
1096: ["Daily grubs(jayanthi kitchen)", "Bhuvaneshwari kitchen", "Hilda kitchen", "Kalpana kitchen"],
# 3. VARUN EDWARD - GN MILLS, KAVUNDAMPALAYAM, THUDIYALUR
897: ["Daily grubs(jayanthi kitchen)", "Bhuvaneshwari kitchen", "Hilda kitchen", "Kalpana kitchen"],
# 4. JAYASABAESH - GANAPTHY
950: ["Daily grubs nandhini", "Vidhya kitchen"],
# 5. TAMILALAHZAN - GANDHIMA NAGAR
1114: ["Daily grubs nandhini", "Vidhya kitchen"],
# 6. RAJAN - PEELAMDU
883: ["Daily grubs nandhini", "Vidhya kitchen"],
# 7. MUTHURAJ - RAMANATHAPURAM TO SAIBABACOLONY
1272: ["Daily grubs(jayanthi kitchen)", "Bhuvaneshwari kitchen", "Hilda kitchen", "Kalpana kitchen", "Daily grubs nandhini", "Vidhya kitchen"],
# 8. MANIKANDAN - SINGNALLUR
753: ["Daily grubs nandhini", "Vidhya kitchen"],
# 9. TACHANAMOORTHI - KOVAI PUTHUR TO KAVUNDAMPALAYAM
1271: ["Daily grubs(jayanthi kitchen)", "Bhuvaneshwari kitchen", "Hilda kitchen", "Kalpana kitchen"],
1133: ["Daily grubs(jayanthi kitchen)", "Bhuvaneshwari kitchen", "Hilda kitchen", "Kalpana kitchen"], # Active ID
}
# Anchor Coordinates for Riders (Based on Area Name)
# Used as fallback if GPS is missing, or to bias assignment to their Home Zone.
RIDER_HOME_LOCATIONS = {
1116: (11.0067, 76.9558), # VIVEK ANANDAN: RS PURAM
1096: (11.0450, 76.9000), # NARAYANASAMY: VADAVALI
897: (11.0430, 76.9380), # VARUN EDWARD: KAVUNDAMPALAYAM
950: (11.0330, 76.9800), # JAYASABESH: GANAPATHY
1114: (11.0450, 77.0000), # TAMILAZHAGAN: GANDHIMA NAGAR
883: (11.0200, 77.0000), # RAJAN: PEELAMEDU
1272: (10.9950, 77.0000), # MUTHURAJA: RAMANATHAPURAM
753: (11.0000, 77.0300), # MANIKANDAN: SINGANALLUR
1271: (10.9500, 76.9600), # THATCHINAMOORTHI: KOVAI PUDUR
}

View File

@@ -0,0 +1,5 @@
"""Controllers package."""
from .route_controller import RouteController
__all__ = ["RouteController"]

View File

@@ -0,0 +1,87 @@
"""Controller for provider payload optimization and forwarding."""
import logging
import hashlib
import json
from typing import Dict, Any
import httpx
from fastapi import HTTPException
from app.core.exceptions import ValidationError, APIException
from app.services.routing.route_optimizer import RouteOptimizer
from app.services import cache
logger = logging.getLogger(__name__)
class RouteController:
"""Controller for optimizing provider payloads and forwarding upstream."""
def __init__(self):
self.route_optimizer = RouteOptimizer()
def _hash_key(self, prefix: str, payload: Dict[str, Any]) -> str:
"""Create a stable cache key from a dict payload."""
# ensure deterministic json by sorting keys
serialized = json.dumps(payload, sort_keys=True, separators=(",", ":"))
digest = hashlib.sha256(serialized.encode("utf-8")).hexdigest()
return f"routes:{prefix}:{digest}"
async def optimize_and_forward_provider_payload(self, orders: list[dict], forward_url: str) -> dict:
"""Optimize provider payload and return it (forwarding paused).
- Input: list of provider orders (dicts)
- Output: {code, details, message, status} where details is the optimized array
"""
try:
if not isinstance(orders, list) or not orders:
raise ValidationError("Orders array is required", field="body")
optimized = await self.route_optimizer.optimize_provider_payload(orders)
# Debug sample of optimized payload (first 3 items, select keys)
try:
sample = [
{
k: item.get(k)
for k in ("orderheaderid", "orderid", "deliverycustomerid", "step", "previouskms", "cumulativekms", "eta")
}
for item in optimized[:3]
]
logger.debug(f"Optimized payload sample: {sample}")
trace = [
{
"orderid": item.get("orderid"),
"step": item.get("step"),
"prev": item.get("previouskms"),
"cum": item.get("cumulativekms"),
}
for item in optimized
]
logger.debug(f"Optimized order trace: {trace}")
except Exception:
logger.debug("Optimized payload sample logging failed")
# Forwarding paused: return optimized payload directly
return {
"code": 200,
"details": optimized,
"message": "Success",
"status": True,
}
except ValidationError:
raise
except httpx.HTTPStatusError as e:
status_code = e.response.status_code
body_text = e.response.text
logger.error(f"Forwarding failed: {status_code} - {body_text}")
# Surface upstream details to the client for faster debugging
raise APIException(
status_code=502,
message=f"Upstream service error (status {status_code}): {body_text}",
code="UPSTREAM_ERROR"
)
except Exception as e:
logger.error(f"Error optimizing/forwarding provider payload: {e}", exc_info=True)
raise APIException(status_code=500, message="Internal server error", code="INTERNAL_ERROR")
# Batch routes removed - use single-route optimization for each pickup location

2
app/core/__init__.py Normal file
View File

@@ -0,0 +1,2 @@
"""Core application components."""

63
app/core/arrow_utils.py Normal file
View File

@@ -0,0 +1,63 @@
"""
High-performance utilities using Apache Arrow and NumPy for geographic data.
Provides vectorized operations for distances and coordinate processing.
"""
import numpy as np
import pyarrow as pa
import pyarrow.parquet as pq
import logging
from typing import List, Dict, Any, Tuple
logger = logging.getLogger(__name__)
def calculate_haversine_matrix_vectorized(lats: np.ndarray, lons: np.ndarray) -> np.ndarray:
"""
Calculate an N x N distance matrix using the Haversine formula.
Fully vectorized using NumPy for O(N^2) speed improvement over Python loops.
"""
# Earth's radius in kilometers
R = 6371.0
# Convert degrees to radians
lats_rad = np.radians(lats)
lons_rad = np.radians(lons)
# Create meshgrids for pairwise differences
# lats.reshape(-1, 1) creates a column vector
# lats.reshape(1, -1) creates a row vector
# Subtracting them creates an N x N matrix of differences
dlat = lats_rad.reshape(-1, 1) - lats_rad.reshape(1, -1)
dlon = lons_rad.reshape(-1, 1) - lons_rad.reshape(1, -1)
# Haversine formula
a = np.sin(dlat / 2)**2 + np.cos(lats_rad.reshape(-1, 1)) * np.cos(lats_rad.reshape(1, -1)) * np.sin(dlon / 2)**2
c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
return R * c
def orders_to_arrow_table(orders: List[Dict[str, Any]]) -> pa.Table:
"""
Convert a list of order dictionaries to an Apache Arrow Table.
This enables zero-copy operations and efficient columnar storage.
"""
return pa.Table.from_pylist(orders)
def save_optimized_route_parquet(orders: List[Dict[str, Any]], filename: str):
"""
Save optimized route data to a Parquet file for high-speed analysis.
Useful for logging and historical simulation replays.
"""
try:
table = orders_to_arrow_table(orders)
pq.write_table(table, filename)
logger.info(f" Saved route data to Parquet: {filename}")
except Exception as e:
logger.error(f" Failed to save Parquet: {e}")
def load_route_parquet(filename: str) -> List[Dict[str, Any]]:
"""
Load route data from a Parquet file and return as a list of dicts.
"""
table = pq.read_table(filename)
return table.to_pylist()

26
app/core/constants.py Normal file
View File

@@ -0,0 +1,26 @@
"""API constants and configuration."""
# API Configuration
API_VERSION = "2.0.0"
API_TITLE = "Route Optimization API"
API_DESCRIPTION = "Professional API for delivery route optimization"
# Route Optimization Limits
MAX_DELIVERIES = 50
MIN_DELIVERIES = 1
# Coordinate Validation
MIN_LATITUDE = -90
MAX_LATITUDE = 90
MIN_LONGITUDE = -180
MAX_LONGITUDE = 180
# Algorithm Types
ALGORITHM_GREEDY = "greedy"
ALGORITHM_TSP = "tsp"
# Response Messages
MESSAGE_SUCCESS = "Route optimized successfully"
MESSAGE_VALIDATION_ERROR = "Request validation failed"
MESSAGE_INTERNAL_ERROR = "An unexpected error occurred"

View File

@@ -0,0 +1,112 @@
"""Professional exception handlers for the API."""
import logging
from fastapi import Request, status
from fastapi.responses import JSONResponse
from fastapi.exceptions import RequestValidationError
from starlette.exceptions import HTTPException as StarletteHTTPException
from app.core.exceptions import APIException
from app.models.errors import ErrorResponse, ErrorDetail
logger = logging.getLogger(__name__)
async def api_exception_handler(request: Request, exc: APIException) -> JSONResponse:
"""Handle custom API exceptions."""
request_id = getattr(request.state, "request_id", None)
error_response = ErrorResponse(
success=False,
error=ErrorDetail(
field=exc.field,
message=exc.message,
code=exc.code
),
path=request.url.path,
request_id=request_id
)
logger.warning(f"API Exception: {exc.code} - {exc.message} (Request ID: {request_id})")
return JSONResponse(
status_code=exc.status_code,
content=error_response.model_dump(exclude_none=True)
)
async def http_exception_handler(request: Request, exc: StarletteHTTPException) -> JSONResponse:
"""Handle HTTP exceptions."""
request_id = getattr(request.state, "request_id", None)
error_response = ErrorResponse(
success=False,
error=ErrorDetail(
message=exc.detail,
code="HTTP_ERROR"
),
path=request.url.path,
request_id=request_id
)
logger.warning(f"HTTP Exception: {exc.status_code} - {exc.detail} (Request ID: {request_id})")
return JSONResponse(
status_code=exc.status_code,
content=error_response.model_dump(exclude_none=True)
)
async def validation_exception_handler(request: Request, exc: RequestValidationError) -> JSONResponse:
"""Handle validation errors with detailed field information."""
request_id = getattr(request.state, "request_id", None)
errors = exc.errors()
if errors:
first_error = errors[0]
field = ".".join(str(loc) for loc in first_error.get("loc", []))
message = first_error.get("msg", "Validation error")
else:
field = None
message = "Validation error"
error_response = ErrorResponse(
success=False,
error=ErrorDetail(
field=field,
message=message,
code="VALIDATION_ERROR"
),
path=request.url.path,
request_id=request_id
)
logger.warning(f"Validation Error: {message} (Field: {field}, Request ID: {request_id})")
return JSONResponse(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
content=error_response.model_dump(exclude_none=True)
)
async def general_exception_handler(request: Request, exc: Exception) -> JSONResponse:
"""Handle unexpected exceptions."""
request_id = getattr(request.state, "request_id", None)
error_response = ErrorResponse(
success=False,
error=ErrorDetail(
message="An unexpected error occurred. Please try again later.",
code="INTERNAL_SERVER_ERROR"
),
path=request.url.path,
request_id=request_id
)
logger.error(f"Unexpected Error: {str(exc)} (Request ID: {request_id})", exc_info=True)
return JSONResponse(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
content=error_response.model_dump(exclude_none=True)
)

70
app/core/exceptions.py Normal file
View File

@@ -0,0 +1,70 @@
"""Custom exceptions for the API."""
from fastapi import HTTPException, status
class APIException(HTTPException):
"""Base API exception with structured error format."""
def __init__(
self,
status_code: int,
message: str,
field: str = None,
code: str = None,
detail: str = None
):
self.message = message
self.field = field
self.code = code or self._get_default_code(status_code)
super().__init__(status_code=status_code, detail=detail or message)
def _get_default_code(self, status_code: int) -> str:
"""Get default error code based on status code."""
codes = {
400: "BAD_REQUEST",
401: "UNAUTHORIZED",
403: "FORBIDDEN",
404: "NOT_FOUND",
409: "CONFLICT",
422: "VALIDATION_ERROR",
429: "RATE_LIMIT_EXCEEDED",
500: "INTERNAL_SERVER_ERROR",
503: "SERVICE_UNAVAILABLE"
}
return codes.get(status_code, "UNKNOWN_ERROR")
class ValidationError(APIException):
"""Validation error exception."""
def __init__(self, message: str, field: str = None):
super().__init__(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
message=message,
field=field,
code="VALIDATION_ERROR"
)
class NotFoundError(APIException):
"""Resource not found exception."""
def __init__(self, message: str = "Resource not found"):
super().__init__(
status_code=status.HTTP_404_NOT_FOUND,
message=message,
code="NOT_FOUND"
)
class RateLimitError(APIException):
"""Rate limit exceeded exception."""
def __init__(self, message: str = "Rate limit exceeded"):
super().__init__(
status_code=status.HTTP_429_TOO_MANY_REQUESTS,
message=message,
code="RATE_LIMIT_EXCEEDED"
)

263
app/main.py Normal file
View File

@@ -0,0 +1,263 @@
"""Professional FastAPI application for delivery route optimization."""
import logging
import os
import sys
import time
import threading
from contextlib import asynccontextmanager
from fastapi import FastAPI, Request, status
from fastapi.middleware.cors import CORSMiddleware
from fastapi.middleware.gzip import GZipMiddleware
from fastapi.exceptions import RequestValidationError
from starlette.exceptions import HTTPException as StarletteHTTPException
from app.routes import optimization_router, health_router, cache_router, ml_router, ml_web_router
from app.middleware.request_id import RequestIDMiddleware
from app.core.exceptions import APIException
from app.core.exception_handlers import (
api_exception_handler,
http_exception_handler,
validation_exception_handler,
general_exception_handler
)
# Configure professional logging with env control
_log_level_name = os.getenv("LOG_LEVEL", "INFO").upper()
_log_level = getattr(logging, _log_level_name, logging.INFO)
logging.basicConfig(
level=_log_level,
format="%(asctime)s - %(name)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
handlers=[
logging.StreamHandler(sys.stdout)
]
)
logger = logging.getLogger(__name__)
# Ensure root and key libraries honor desired level
logging.getLogger().setLevel(_log_level)
logging.getLogger("httpx").setLevel(_log_level)
logging.getLogger("uvicorn").setLevel(_log_level)
logging.getLogger("uvicorn.error").setLevel(_log_level)
logging.getLogger("uvicorn.access").setLevel(_log_level)
# --- Smart Post-Call ML Trainer ----------------------------------------------------------
#
# Trains in a BACKGROUND THREAD after every N /riderassign calls.
# - The API response is NEVER blocked - training is fully async.
# - Cooldown prevents overlapping runs (won't train if one is already running).
# - MIN_RECORDS guard: won't attempt if DB doesn't have enough data yet.
#
# Config:
# TRAIN_EVERY_N_CALLS : retrain after this many calls (default: 10)
# MIN_RECORDS_TO_TRAIN: minimum DB rows before first train (default: 30)
# COOLDOWN_SECONDS : min gap between two training runs (default: 120s)
# -------------------------------------------------------------------
TRAIN_EVERY_N_CALLS = int(os.getenv("ML_TRAIN_EVERY_N", "10"))
MIN_RECORDS_TO_TRAIN = int(os.getenv("ML_MIN_RECORDS", "30"))
COOLDOWN_SECONDS = int(os.getenv("ML_COOLDOWN_SEC", "120"))
_call_counter = 0
_counter_lock = threading.Lock()
_training_lock = threading.Lock()
_last_trained_at = 0.0 # epoch seconds
def _run_training_background():
"""
The actual training job - runs in a daemon thread.
Fully safe to call while the API is serving requests.
"""
global _last_trained_at
# Acquire lock - only ONE training run at a time
if not _training_lock.acquire(blocking=False):
logger.info("[MLTrigger] Training already running - skipping this trigger.")
return
try:
from app.services.ml.ml_hypertuner import get_hypertuner
from app.services.ml.ml_data_collector import get_collector
count = get_collector().count_records()
if count < MIN_RECORDS_TO_TRAIN:
logger.info(f"[MLTrigger] Only {count} records - need >={MIN_RECORDS_TO_TRAIN}. Skipping.")
return
logger.info(f"[MLTrigger] [ML] Background hypertuning started ({count} records)...")
result = get_hypertuner().run(n_trials=100)
if result.get("status") == "ok":
_last_trained_at = time.time()
logger.info(
f"[MLTrigger] [OK] Hypertuning done - "
f"quality={result.get('best_predicted_quality', '?')}/100 "
f"| {result.get('training_rows', '?')} rows "
f"| {result.get('trials_run', '?')} trials"
)
else:
logger.info(f"[MLTrigger] Hypertuning skipped: {result.get('message', '')}")
except Exception as e:
logger.error(f"[MLTrigger] Background training error: {e}", exc_info=True)
finally:
_training_lock.release()
def trigger_training_if_due():
"""
Called after every /riderassign call.
Increments counter - fires background thread every TRAIN_EVERY_N_CALLS.
Non-blocking: returns immediately regardless.
"""
global _call_counter, _last_trained_at
with _counter_lock:
_call_counter += 1
should_train = (_call_counter % TRAIN_EVERY_N_CALLS == 0)
if not should_train:
return
# Cooldown check - don't train if we just trained recently
elapsed = time.time() - _last_trained_at
if elapsed < COOLDOWN_SECONDS:
logger.info(
f"[MLTrigger] Cooldown active - "
f"{int(COOLDOWN_SECONDS - elapsed)}s remaining. Skipping."
)
return
# Fire background thread - does NOT block the API response
t = threading.Thread(target=_run_training_background, daemon=True, name="ml-hypertuner")
t.start()
logger.info(f"[MLTrigger] [START] Background training thread launched (call #{_call_counter})")
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Application lifespan events."""
logger.info("[START] Starting Route Optimization API...")
# -- On startup: if enough data exists, train immediately in background --
try:
from app.services.ml.ml_data_collector import get_collector
count = get_collector().count_records()
if count >= MIN_RECORDS_TO_TRAIN:
logger.info(f"[Startup] {count} records found -> launching startup hypertuning...")
t = threading.Thread(target=_run_training_background, daemon=True, name="ml-startup")
t.start()
else:
logger.info(
f"[Startup] {count}/{MIN_RECORDS_TO_TRAIN} records in ML DB - "
f"will auto-train after every {TRAIN_EVERY_N_CALLS} /riderassign calls."
)
except Exception as e:
logger.warning(f"[Startup] ML status check failed (non-fatal): {e}")
logger.info(
f"[OK] Application initialized - "
f"ML trains every {TRAIN_EVERY_N_CALLS} calls "
f"(cooldown {COOLDOWN_SECONDS}s, min {MIN_RECORDS_TO_TRAIN} records)"
)
yield
logger.info(" Shutting down Route Optimization API...")
# Create FastAPI application with professional configuration
app = FastAPI(
title="Route Optimization API",
version="2.0.0",
docs_url="/docs",
redoc_url="/redoc",
openapi_url="/api/v1/openapi.json",
lifespan=lifespan
)
# Add Request ID middleware (must be first)
app.add_middleware(RequestIDMiddleware)
# Add CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # Configure specific domains in production
allow_credentials=True,
allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
allow_headers=["*"],
expose_headers=["X-Request-ID", "X-Process-Time"]
)
# Add GZIP compression
app.add_middleware(GZipMiddleware, minimum_size=1000)
# Add request timing middleware
@app.middleware("http")
async def add_process_time_header(request: Request, call_next):
"""Add performance monitoring headers."""
start_time = time.time()
response = await call_next(request)
process_time = time.time() - start_time
response.headers["X-Process-Time"] = str(round(process_time, 4))
response.headers["X-API-Version"] = "2.0.0"
return response
# Register exception handlers
app.add_exception_handler(APIException, api_exception_handler)
app.add_exception_handler(StarletteHTTPException, http_exception_handler)
app.add_exception_handler(RequestValidationError, validation_exception_handler)
app.add_exception_handler(Exception, general_exception_handler)
# Include routers
app.include_router(optimization_router)
app.include_router(health_router)
app.include_router(cache_router)
app.include_router(ml_router)
app.include_router(ml_web_router)
@app.get("/", tags=["Root"])
async def root(request: Request):
"""
API root endpoint with service information.
Returns API metadata, available endpoints, and usage information.
"""
request_id = getattr(request.state, "request_id", None)
return {
"service": "Route Optimization API",
"version": "2.0.0",
"status": "operational",
"documentation": {
"swagger": "/docs",
"redoc": "/redoc",
"openapi": "/api/v1/openapi.json"
},
"endpoints": {
"createdeliveries": {
"url": "/api/v1/optimization/createdeliveries",
"method": "POST",
"description": "Accept provider array, optimize order, add step/previouskms/cumulativekms, forward upstream"
},
"health": {
"url": "/api/v1/health",
"method": "GET",
"description": "Health check endpoint"
}
},
"features": {
"algorithm": "Greedy Nearest-Neighbor",
"optimization": "Provider array reordering with distance metrics",
"added_fields": ["step", "previouskms", "cumulativekms", "actualkms"]
},
"request_id": request_id
}
if __name__ == "__main__":
import uvicorn
uvicorn.run("app.main:app", host="0.0.0.0", port=8002, reload=True)

View File

@@ -0,0 +1,2 @@
"""Middleware components."""

View File

@@ -0,0 +1,26 @@
"""Request ID middleware for request tracing."""
import uuid
from fastapi import Request
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.responses import Response
class RequestIDMiddleware(BaseHTTPMiddleware):
"""Middleware to add unique request ID to each request."""
async def dispatch(self, request: Request, call_next):
# Generate or retrieve request ID
request_id = request.headers.get("X-Request-ID") or str(uuid.uuid4())
# Add request ID to request state
request.state.request_id = request_id
# Process request
response = await call_next(request)
# Add request ID to response headers
response.headers["X-Request-ID"] = request_id
return response

21
app/models/__init__.py Normal file
View File

@@ -0,0 +1,21 @@
"""Models package."""
from .schemas import (
Location,
Delivery,
RouteOptimizationRequest,
RouteStep,
OptimizedRoute,
PickupLocation,
DeliveryLocation
)
__all__ = [
"Location",
"Delivery",
"RouteOptimizationRequest",
"RouteStep",
"OptimizedRoute",
"PickupLocation",
"DeliveryLocation"
]

45
app/models/errors.py Normal file
View File

@@ -0,0 +1,45 @@
"""Professional error response models for API."""
from typing import Optional, Any, Dict
from pydantic import BaseModel, Field
from datetime import datetime
class ErrorDetail(BaseModel):
"""Detailed error information."""
field: Optional[str] = Field(None, description="Field name that caused the error")
message: str = Field(..., description="Error message")
code: Optional[str] = Field(None, description="Error code")
class ErrorResponse(BaseModel):
"""Standardized error response model."""
success: bool = Field(False, description="Request success status")
error: ErrorDetail = Field(..., description="Error details")
timestamp: str = Field(default_factory=lambda: datetime.utcnow().isoformat(), description="Error timestamp")
path: Optional[str] = Field(None, description="Request path")
request_id: Optional[str] = Field(None, description="Request ID for tracing")
class Config:
json_schema_extra = {
"example": {
"success": False,
"error": {
"field": "pickup_location",
"message": "Pickup location is required",
"code": "VALIDATION_ERROR"
},
"timestamp": "2024-01-15T10:30:00.000Z",
"path": "/api/v1/optimization/single-route",
"request_id": "req-123456"
}
}
class SuccessResponse(BaseModel):
"""Standardized success response wrapper."""
success: bool = Field(True, description="Request success status")
data: Any = Field(..., description="Response data")
timestamp: str = Field(default_factory=lambda: datetime.utcnow().isoformat(), description="Response timestamp")
request_id: Optional[str] = Field(None, description="Request ID for tracing")

167
app/models/schemas.py Normal file
View File

@@ -0,0 +1,167 @@
"""Professional Pydantic models for request/response validation."""
from typing import List, Optional
from pydantic import BaseModel, Field, field_validator
from datetime import datetime
class Location(BaseModel):
"""Location model with latitude and longitude."""
lat: float = Field(..., description="Latitude")
lng: float = Field(..., description="Longitude")
class PickupLocation(BaseModel):
"""Pickup location model with latitude and longitude."""
pickuplat: float = Field(
...,
description="Pickup latitude",
ge=-90,
le=90,
examples=[11.0050534]
)
pickuplon: float = Field(
...,
description="Pickup longitude",
ge=-180,
le=180,
examples=[76.9508991]
)
@field_validator("pickuplat", "pickuplon")
@classmethod
def validate_coordinates(cls, v):
"""Validate coordinate values."""
if v is None:
raise ValueError("Coordinate cannot be None")
return float(v)
class DeliveryLocation(BaseModel):
"""Delivery location model with latitude and longitude."""
deliverylat: float = Field(
...,
description="Delivery latitude",
ge=-90,
le=90,
examples=[11.0309723]
)
deliverylong: float = Field(
...,
description="Delivery longitude",
ge=-180,
le=180,
examples=[77.0004574]
)
@field_validator("deliverylat", "deliverylong")
@classmethod
def validate_coordinates(cls, v):
"""Validate coordinate values."""
if v is None:
raise ValueError("Coordinate cannot be None")
return float(v)
class Delivery(BaseModel):
"""Delivery order model."""
deliveryid: str = Field(..., description="Unique delivery identifier")
deliverycustomerid: int = Field(..., description="Customer ID for this delivery")
location: DeliveryLocation = Field(..., description="Delivery location coordinates")
class RouteOptimizationRequest(BaseModel):
"""
Request model for route optimization.
Optimizes delivery routes starting from a pickup location (warehouse/store) to multiple delivery locations.
Uses greedy nearest-neighbor algorithm for fast, efficient route calculation.
"""
pickup_location: PickupLocation = Field(
...,
description="Pickup location (warehouse/store) coordinates - starting point for optimization"
)
pickup_location_id: Optional[int] = Field(
None,
description="Optional pickup location ID for tracking purposes"
)
deliveries: List[Delivery] = Field(
...,
min_items=1,
max_items=50,
description="List of delivery locations to optimize (1-50 deliveries supported)"
)
class Config:
json_schema_extra = {
"example": {
"pickup_location": {
"pickuplat": 11.0050534,
"pickuplon": 76.9508991
},
"pickup_location_id": 1,
"deliveries": [
{
"deliveryid": "90465",
"deliverycustomerid": 1,
"location": {
"deliverylat": 11.0309723,
"deliverylong": 77.0004574
}
}
]
}
}
class RouteStep(BaseModel):
"""Single step in the optimized route."""
step_number: int = Field(..., description="Step number in the route")
delivery_id: str = Field(..., description="Delivery ID for this step")
delivery_customer_id: int = Field(..., description="Customer ID for this delivery")
location: DeliveryLocation = Field(..., description="Delivery location coordinates")
distance_from_previous_km: float = Field(..., description="Distance from previous step in kilometers")
cumulative_distance_km: float = Field(..., description="Total distance traveled so far in kilometers")
class OptimizedRoute(BaseModel):
"""
Optimized route response with step-by-step delivery sequence.
Contains the optimized route starting from pickup location, with each step showing:
- Delivery order (Step 1, Step 2, etc.)
- Distance from previous step
- Cumulative distance traveled
"""
route_id: str = Field(..., description="Unique route identifier (UUID)")
pickup_location_id: Optional[int] = Field(None, description="Pickup location ID")
pickup_location: PickupLocation = Field(..., description="Pickup location (warehouse/store) coordinates")
total_distance_km: float = Field(
...,
ge=0,
description="Total route distance in kilometers",
examples=[12.45]
)
total_deliveries: int = Field(
...,
ge=1,
description="Total number of deliveries in the route",
examples=[5]
)
optimization_algorithm: str = Field(
"greedy",
description="Algorithm used for optimization",
examples=["greedy"]
)
steps: List[RouteStep] = Field(
...,
description="Ordered list of route steps (Step 1 = nearest from pickup, Step 2 = nearest from Step 1, etc.)"
)
created_at: str = Field(
default_factory=lambda: datetime.utcnow().isoformat(),
description="Route creation timestamp (ISO 8601)"
)
# Batch optimization removed - no rider support needed
# Use single-route optimization for each pickup location

8
app/routes/__init__.py Normal file
View File

@@ -0,0 +1,8 @@
"""Routes package."""
from .optimization import router as optimization_router
from .health import router as health_router
from .cache import router as cache_router
from .ml_admin import router as ml_router, web_router as ml_web_router
__all__ = ["optimization_router", "health_router", "cache_router", "ml_router", "ml_web_router"]

79
app/routes/cache.py Normal file
View File

@@ -0,0 +1,79 @@
"""Cache management API endpoints."""
import logging
from fastapi import APIRouter, HTTPException
from typing import Dict, Any
from app.services import cache
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/v1/cache", tags=["Cache Management"])
@router.get("/stats", response_model=Dict[str, Any])
async def get_cache_stats():
"""
Get cache statistics.
Returns:
- hits: Number of cache hits
- misses: Number of cache misses
- sets: Number of cache writes
- total_keys: Current number of cached route keys
- enabled: Whether Redis cache is enabled
"""
try:
stats = cache.get_stats()
# Calculate hit rate
total_requests = stats.get("hits", 0) + stats.get("misses", 0)
if total_requests > 0:
stats["hit_rate"] = round(stats.get("hits", 0) / total_requests * 100, 2)
else:
stats["hit_rate"] = 0.0
return stats
except Exception as e:
logger.error(f"Error getting cache stats: {e}")
raise HTTPException(status_code=500, detail="Internal server error")
@router.get("/keys")
async def list_cache_keys(pattern: str = "routes:*"):
"""
List cache keys matching pattern.
- **pattern**: Redis key pattern (default: "routes:*")
"""
try:
keys = cache.get_keys(pattern)
return {
"pattern": pattern,
"count": len(keys),
"keys": keys[:100] # Limit to first 100 for response size
}
except Exception as e:
logger.error(f"Error listing cache keys: {e}")
raise HTTPException(status_code=500, detail="Internal server error")
@router.delete("/clear")
async def clear_cache(pattern: str = "routes:*"):
"""
Clear cache keys matching pattern.
- **pattern**: Redis key pattern to delete (default: "routes:*")
[WARN] **Warning**: This will delete cached route optimizations!
"""
try:
deleted_count = cache.delete(pattern)
logger.info(f"Cleared {deleted_count} cache keys matching pattern: {pattern}")
return {
"pattern": pattern,
"deleted_count": deleted_count,
"message": f"Cleared {deleted_count} cache keys"
}
except Exception as e:
logger.error(f"Error clearing cache: {e}")
raise HTTPException(status_code=500, detail="Internal server error")

98
app/routes/health.py Normal file
View File

@@ -0,0 +1,98 @@
"""Professional health check endpoints."""
import time
import logging
import sys
from typing import Optional
from datetime import datetime
from fastapi import APIRouter, Request
from pydantic import BaseModel, Field
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/v1/health", tags=["Health"])
start_time = time.time()
class HealthResponse(BaseModel):
"""Health check response model."""
status: str = Field(..., description="Service status")
uptime_seconds: float = Field(..., description="Service uptime in seconds")
version: str = Field("2.0.0", description="API version")
timestamp: str = Field(..., description="Health check timestamp (ISO 8601)")
request_id: Optional[str] = Field(None, description="Request ID for tracing")
@router.get("/", response_model=HealthResponse)
async def health_check(request: Request):
"""
Health check endpoint.
Returns the current health status of the API service including:
- Service status (healthy/unhealthy)
- Uptime in seconds
- API version
- Timestamp
"""
try:
uptime = time.time() - start_time
request_id = getattr(request.state, "request_id", None)
return HealthResponse(
status="healthy",
uptime_seconds=round(uptime, 2),
version="2.0.0",
timestamp=datetime.utcnow().isoformat() + "Z",
request_id=request_id
)
except Exception as e:
logger.error(f"Health check failed: {e}", exc_info=True)
request_id = getattr(request.state, "request_id", None)
return HealthResponse(
status="unhealthy",
uptime_seconds=0.0,
version="2.0.0",
timestamp=datetime.utcnow().isoformat() + "Z",
request_id=request_id
)
@router.get("/ready")
async def readiness_check(request: Request):
"""
Readiness check endpoint for load balancers.
Returns 200 if the service is ready to accept requests.
"""
try:
# Check if critical services are available
# Add your service health checks here
return {
"status": "ready",
"timestamp": datetime.utcnow().isoformat() + "Z",
"request_id": getattr(request.state, "request_id", None)
}
except Exception as e:
logger.error(f"Readiness check failed: {e}")
return {
"status": "not_ready",
"timestamp": datetime.utcnow().isoformat() + "Z",
"request_id": getattr(request.state, "request_id", None)
}
@router.get("/live")
async def liveness_check(request: Request):
"""
Liveness check endpoint for container orchestration.
Returns 200 if the service is alive.
"""
return {
"status": "alive",
"timestamp": datetime.utcnow().isoformat() + "Z",
"request_id": getattr(request.state, "request_id", None)
}

286
app/routes/ml_admin.py Normal file
View File

@@ -0,0 +1,286 @@
"""
ML Admin API - rider-api
Endpoints:
GET /api/v1/ml/status - DB record count, quality trend, model info
GET /api/v1/ml/config - Current active hyperparameters (ML-tuned + defaults)
POST /api/v1/ml/train - Trigger hypertuning immediately
POST /api/v1/ml/reset - Reset config to factory defaults
GET /api/v1/ml/reports - List past tuning reports
"""
import logging
import os
import json
from fastapi import APIRouter, HTTPException, Body, Request
from fastapi.responses import FileResponse, PlainTextResponse
from typing import Optional
logger = logging.getLogger(__name__)
router = APIRouter(
prefix="/api/v1/ml",
tags=["ML Hypertuner"],
responses={
500: {"description": "Internal server error"}
}
)
web_router = APIRouter(
tags=["ML Monitor Web Dashboard"]
)
# -----------------------------------------------------------------------------
# GET /ml-ops
# -----------------------------------------------------------------------------
@web_router.get("/ml-ops", summary="Visual ML monitoring dashboard")
def ml_dashboard():
"""Returns the beautiful HTML dashboard for visualizing ML progress."""
path = os.path.join(os.getcwd(), "app/templates/ml_dashboard.html")
if not os.path.isfile(path):
raise HTTPException(status_code=404, detail=f"Dashboard template not found at {path}")
return FileResponse(path)
# -----------------------------------------------------------------------------
# GET /status
# -----------------------------------------------------------------------------
@router.get("/status", summary="ML system status & quality trend")
def ml_status():
"""
Returns:
- How many assignment events are logged
- Recent quality score trend (avg / min / max over last 20 calls)
- Whether the model has been trained
- Current hyperparameter source (ml_tuned vs defaults)
"""
from app.services.ml.ml_data_collector import get_collector
from app.services.ml.ml_hypertuner import get_hypertuner
try:
collector = get_collector()
tuner = get_hypertuner()
record_count = collector.count_records()
quality_trend = collector.get_recent_quality_trend(last_n=50)
model_info = tuner.get_model_info()
from app.services.ml.behavior_analyzer import get_analyzer
b_analyzer = get_analyzer()
from app.config.dynamic_config import get_config
cfg = get_config()
return {
"status": "ok",
"db_records": record_count,
"ready_to_train": record_count >= 30,
"quality_trend": quality_trend,
"hourly_stats": collector.get_hourly_stats(),
"quality_histogram": collector.get_quality_histogram(),
"strategy_comparison": collector.get_strategy_comparison(),
"zone_stats": collector.get_zone_stats(),
"behavior": b_analyzer.get_info() if hasattr(b_analyzer, 'get_info') else {},
"config": cfg.get_all(),
"model": model_info,
"message": (
f"Collecting data - need {max(0, 30 - record_count)} more records to train."
if record_count < 30
else "Ready to train! Call POST /api/v1/ml/train"
if not model_info["model_trained"]
else "Model trained and active."
)
}
except Exception as e:
logger.error(f"[ML API] Status failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
# -----------------------------------------------------------------------------
# GET /config
# -----------------------------------------------------------------------------
@router.get("/config", summary="Current active hyperparameter values")
def ml_config():
"""
Returns every hyperparameter currently in use by the system.
Values marked 'ml_tuned' were set by the ML model.
Values marked 'default' are factory defaults (not yet tuned).
"""
from app.config.dynamic_config import get_config, DEFAULTS
try:
cfg = get_config()
all_values = cfg.get_all()
cached_keys = set(cfg._cache.keys())
annotated = {}
for k, v in all_values.items():
annotated[k] = {
"value": v,
"source": "ml_tuned" if k in cached_keys else "default",
}
return {
"status": "ok",
"hyperparameters": annotated,
"total_params": len(annotated),
"ml_tuned_count": sum(1 for x in annotated.values() if x["source"] == "ml_tuned"),
}
except Exception as e:
logger.error(f"[ML API] Config fetch failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.patch("/config", summary="Update specific ML configuration defaults")
def ml_config_patch(payload: dict = Body(...)):
"""Allows updating any active parameter via JSON overrides. e.g. \{ \"ml_strategy\": \"balanced\" \}"""
from app.config.dynamic_config import get_config
try:
cfg = get_config()
cfg.set_bulk(payload, source="ml_admin")
return {"status": "ok"}
except Exception as e:
logger.error(f"[ML API] Config patch failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
# -----------------------------------------------------------------------------
# POST /train
# -----------------------------------------------------------------------------
@router.post("/train", summary="Trigger XGBoost training + Optuna hyperparameter search")
def ml_train(
n_trials: int = Body(default=100, embed=True, ge=10, le=500,
description="Number of Optuna trials (10500)"),
min_records: int = Body(default=30, embed=True, ge=10,
description="Minimum DB records required")
):
"""
Runs the full hypertuning pipeline:
1. Load logged assignment data from DB
2. Train XGBoost surrogate model
3. Run Optuna TPE search ({n_trials} trials)
4. Write optimal params to DynamicConfig
The AssignmentService picks up new params within 5 minutes (auto-reload).
"""
from app.services.ml.ml_hypertuner import get_hypertuner
try:
logger.info(f"[ML API] Hypertuning triggered: n_trials={n_trials}, min_records={min_records}")
tuner = get_hypertuner()
result = tuner.run(n_trials=n_trials, min_training_records=min_records)
return result
except Exception as e:
logger.error(f"[ML API] Training failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
# -----------------------------------------------------------------------------
# POST /reset
# -----------------------------------------------------------------------------
@router.post("/reset", summary="Reset all hyperparameters to factory defaults")
def ml_reset():
"""
Wipes all ML-tuned config values and reverts every parameter to the
original hardcoded defaults. Useful if the model produced bad results.
"""
from app.config.dynamic_config import get_config
try:
get_config().reset_to_defaults()
return {
"status": "ok",
"message": "All hyperparameters reset to factory defaults.",
}
except Exception as e:
logger.error(f"[ML API] Reset failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
# -----------------------------------------------------------------------------
# POST /strategy
# -----------------------------------------------------------------------------
@router.post("/strategy", summary="Change the AI Optimization Prompt/Strategy")
def ml_strategy(strategy: str = Body(default="balanced", embed=True)):
"""
Changes the mathematical objective of the AI.
Choices: 'balanced', 'fuel_saver', 'aggressive_speed', 'zone_strict'
Historical data is NOT wiped. Instead, the AI dynamically recalculates
the quality score of all past events using the new strategy rules.
"""
from app.config.dynamic_config import get_config
import sqlite3
valid = ["balanced", "fuel_saver", "aggressive_speed", "zone_strict"]
if strategy not in valid:
raise HTTPException(400, f"Invalid strategy. Choose from {valid}")
try:
get_config().set("ml_strategy", strategy)
return {
"status": "ok",
"message": f"Strategy changed to '{strategy}'. Historical AI data will be mathematically repurposed to train towards this new goal.",
"strategy": strategy
}
except Exception as e:
logger.error(f"[ML API] Strategy change failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
# -----------------------------------------------------------------------------
# GET /reports
# -----------------------------------------------------------------------------
@router.get("/reports", summary="List past hypertuning reports")
def ml_reports():
"""Returns the last 10 tuning reports (JSON files in ml_data/reports/)."""
try:
report_dir = "ml_data/reports"
if not os.path.isdir(report_dir):
return {"status": "ok", "reports": [], "message": "No reports yet."}
files = sorted(
[f for f in os.listdir(report_dir) if f.endswith(".json")],
reverse=True
)[:10]
reports = []
for fname in files:
path = os.path.join(report_dir, fname)
try:
with open(path) as f:
reports.append(json.load(f))
except Exception:
pass
return {"status": "ok", "reports": reports, "count": len(reports)}
except Exception as e:
logger.error(f"[ML API] Reports fetch failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
# -----------------------------------------------------------------------------
# GET /export
# -----------------------------------------------------------------------------
@router.get("/export", summary="Export all records as CSV")
def ml_export():
"""Generates a CSV string containing all rows in the assignment_ml_log table."""
try:
from app.services.ml.ml_data_collector import get_collector
csv_data = get_collector().export_csv()
response = PlainTextResponse(content=csv_data, media_type="text/csv")
response.headers["Content-Disposition"] = 'attachment; filename="ml_export.csv"'
return response
except Exception as e:
logger.error(f"[ML API] Export failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))

364
app/routes/optimization.py Normal file
View File

@@ -0,0 +1,364 @@
"""Provider payload optimization endpoints."""
import logging
import time
from fastapi import APIRouter, Request, Depends, status, HTTPException, Query
from app.controllers.route_controller import RouteController
from app.core.exceptions import APIException
from app.core.arrow_utils import save_optimized_route_parquet
import os
logger = logging.getLogger(__name__)
router = APIRouter(
prefix="/api/v1/optimization",
tags=["Route Optimization"],
responses={
400: {"description": "Bad request - Invalid input parameters"},
422: {"description": "Validation error - Request validation failed"},
500: {"description": "Internal server error"}
}
)
def get_route_controller() -> RouteController:
"""Dependency injection for route controller."""
return RouteController()
# Legacy single-route endpoint removed; provider flow only.
@router.post(
"/createdeliveries",
status_code=status.HTTP_200_OK,
summary="Optimize provider payload (forwarding paused)",
description="""
Accepts the provider's orders array, reorders it using greedy nearest-neighbor, adds only:
- step (1..N)
- previouskms (distance from previous stop in km)
- cumulativekms (total distance so far in km)
- actualkms (direct pickup-to-delivery distance)
Forwarding is temporarily paused: returns the optimized array in the response.
""",
responses={
200: {
"description": "Upstream response",
"content": {
"application/json": {
"example": {"code": 200, "details": [], "message": "Success", "status": True}
}
}
}
}
)
async def provider_optimize_forward(
body: list[dict],
controller: RouteController = Depends(get_route_controller)
):
"""
Accept provider JSON array, reorder by greedy nearest-neighbor, annotate each item with:
- step (1..N)
- previouskms (km from previous point)
- cumulativekms (km so far)
- actualkms (pickup to delivery distance)
Then forward the optimized array to the external API and return only its response.
"""
try:
url = "https://jupiter.nearle.app/live/api/v1/deliveries/createdeliveries"
result = await controller.optimize_and_forward_provider_payload(body, url)
# Performance Logging: Save a Parquet Snapshot (Async-friendly backup)
try:
os.makedirs("data/snapshots", exist_ok=True)
snapshot_path = f"data/snapshots/route_{int(time.time())}.parquet"
save_optimized_route_parquet(body, snapshot_path)
logger.info(f"Apache Arrow: Snapshot saved to {snapshot_path}")
except Exception as e:
logger.warning(f"Could not save Arrow snapshot: {e}")
return result
except APIException:
raise
except Exception as e:
logger.error(f"Unexpected error in provider_optimize_forward: {e}", exc_info=True)
raise HTTPException(status_code=500, detail="Internal server error")
@router.get(
"/createdeliveries",
summary="Usage info for provider optimize forward"
)
async def provider_optimize_forward_info():
"""Return usage info; this endpoint accepts POST only for processing."""
return {
"message": "Use POST with a JSON array of orders to optimize and forward.",
"method": "POST",
"path": "/api/v1/optimization/provider-optimize-forward"
}
@router.post(
"/riderassign",
status_code=status.HTTP_200_OK,
summary="Assign created orders to active riders",
description="""
Assigns orders to riders based on kitchen preferences, proximity, and load.
- If a payload of orders is provided, processes those.
- If payload is empty, fetches all 'created' orders from the external API.
- Fetches active riders and matches them.
""",
responses={
200: {
"description": "Assignment Result",
"content": {
"application/json": {
"example": {"code": 200, "details": {"1234": [{"orderid": "..."}]}, "message": "Success", "status": True}
}
}
}
}
)
async def assign_orders_to_riders(
request: Request,
body: list[dict] = None,
resuffle: bool = Query(False),
reshuffle: bool = Query(False),
rehuffle: bool = Query(False),
hypertuning_params: str = None
):
"""
Smart assignment of orders to riders.
"""
from app.services.rider.get_active_riders import fetch_active_riders, fetch_created_orders, fetch_rider_pricing
from app.services.core.assignment_service import AssignmentService
from app.services.routing.route_optimizer import RouteOptimizer
from app.services.routing.realistic_eta_calculator import RealisticETACalculator
from datetime import datetime, timedelta
from dateutil.parser import parse as parse_date
import asyncio
eta_calculator = RealisticETACalculator()
try:
# Check if any variant is present in query params (flag-style) or explicitly true
q_params = request.query_params
do_reshuffle = any(k in q_params for k in ["reshuffle", "resuffle", "rehuffle"]) or \
resuffle or reshuffle or rehuffle
# 1. Fetch Riders and Pricing
riders_task = fetch_active_riders()
pricing_task = fetch_rider_pricing()
riders, pricing = await asyncio.gather(riders_task, pricing_task)
# Determine pricing (Default: 30 base + 2.5/km)
fuel_charge = 2.5
base_pay = 30.0
if pricing:
shift_1 = next((p for p in pricing if p.get("shiftid") == 1), None)
if shift_1:
fuel_charge = float(shift_1.get("fuelcharge", 2.5))
base_pay = float(shift_1.get("basepay") or shift_1.get("base_pay") or 30.0)
# 2. Determine Orders Source
orders = body
if not orders:
logger.info("No payload provided, fetching created orders from external API.")
orders = await fetch_created_orders()
else:
logger.info(f"Processing {len(orders)} orders from payload.")
if not orders:
return {
"code": 200,
"details": {},
"message": "No orders found to assign.",
"status": True,
"meta": {
"active_riders_count": len(riders)
}
}
# 3. Run Assignment (AssignmentService)
# -- Per-request strategy override --
from app.config.dynamic_config import get_config
_cfg = get_config()
_original_strategy = None
valid_strategies = ["balanced", "fuel_saver", "aggressive_speed", "zone_strict"]
if hypertuning_params and hypertuning_params in valid_strategies:
_original_strategy = _cfg.get("ml_strategy", "balanced")
_cfg._cache["ml_strategy"] = hypertuning_params
logger.info(f"[HYPERTUNE] Per-request strategy override: {hypertuning_params}")
service = AssignmentService()
assignments, unassigned_orders = await service.assign_orders(
riders=riders,
orders=orders,
fuel_charge=fuel_charge,
base_pay=base_pay,
reshuffle=do_reshuffle
)
# Restore original strategy after this call
if _original_strategy is not None:
_cfg._cache["ml_strategy"] = _original_strategy
if do_reshuffle:
logger.info("[RESHUFFLE] Retry mode active - exploring alternative rider assignments.")
# 4. Optimize Routes for Each Rider and Flatten Response
optimizer = RouteOptimizer()
flat_orders_list = []
# Prepare tasks for parallel execution
# We need to store context (rider_id) to map results back
optimization_tasks = []
task_contexts = []
for rider_id, rider_orders in assignments.items():
if not rider_orders:
continue
# Align with createdeliveries model: Always optimize from the Pickup/Kitchen location.
# This prevents route reversal if the rider is on the "far" side of the deliveries.
# The rider's current location (rlat/rlon) is ignored for sequence optimization
# to ensure the logical flow (Kitchen -> Stop 1 -> Stop 2 -> Stop 3) is followed.
start_coords = None
# Add to task list
optimization_tasks.append(
optimizer.optimize_provider_payload(rider_orders, start_coords=start_coords)
)
task_contexts.append(rider_id)
total_assigned = 0
# Execute all optimizations in parallel
# This dramatically reduces time from Sum(RiderTimes) to Max(RiderTime)
if optimization_tasks:
results = await asyncio.gather(*optimization_tasks)
# Create a lookup for rider details
rider_info_map = {}
for r in riders:
# Use string conversion for robust ID matching
r_id = str(r.get("userid") or r.get("_id", ""))
if r_id:
rider_info_map[r_id] = {
"name": r.get("username", ""),
"contactno": r.get("contactno", "")
}
# Process results matching them back to riders
for stored_rider_id, optimized_route in zip(task_contexts, results):
r_id_str = str(stored_rider_id)
r_info = rider_info_map.get(r_id_str, {})
rider_name = r_info.get("name", "")
rider_contact = r_info.get("contactno", "")
# Calculate total distance for this rider
total_rider_kms = 0
if optimized_route:
# Usually the last order has the max cumulative kms if steps are 1..N
try:
total_rider_kms = max([float(o.get("cumulativekms", 0)) for o in optimized_route])
except:
total_rider_kms = sum([float(o.get("actualkms", o.get("kms", 0))) for o in optimized_route])
for order in optimized_route:
order["userid"] = stored_rider_id
order["username"] = rider_name
# Populate the specific fields requested by the user
order["rider"] = rider_name
order["ridercontactno"] = rider_contact
order["riderkms"] = str(round(total_rider_kms, 2))
# --- DYNAMIC ETA COMPUTATION -----------------------------
# Try various cases and names for pickup slot
pickup_slot_str = (
order.get("pickupSlot") or
order.get("pickupslot") or
order.get("pickup_slot") or
order.get("pickuptime")
)
if pickup_slot_str:
# Find the actual travel distance for THIS specific order
# cumulativekms represents distance from pickup to this delivery stop
dist_km = float(order.get("cumulativekms") or order.get("actualkms", order.get("kms", 0)))
step = int(order.get("step", 1))
order_type = order.get("ordertype", "Economy")
try:
# Robust date parsing (handles almost any format magically)
pickup_time = parse_date(str(pickup_slot_str))
eta_mins = eta_calculator.calculate_eta(
distance_km=dist_km,
is_first_order=(step == 1),
order_type=order_type,
time_of_day="normal"
)
expected_time = pickup_time + timedelta(minutes=eta_mins)
# Format output as requested: "2026-03-24 08:25 AM"
order["expectedDeliveryTime"] = expected_time.strftime("%Y-%m-%d %I:%M %p")
order["transitMinutes"] = eta_mins
order["calculationDistanceKm"] = round(dist_km, 2)
except Exception as e:
logger.warning(f"Could not calculate ETA from pickupSlot '{pickup_slot_str}': {e}")
# ---------------------------------------------------------
flat_orders_list.append(order)
total_assigned += len(optimized_route)
# 5. Zone Processing
from app.services.routing.zone_service import ZoneService
zone_service = ZoneService()
zone_data = zone_service.group_by_zones(flat_orders_list, unassigned_orders, fuel_charge=fuel_charge, base_pay=base_pay)
zones_structure = zone_data["detailed_zones"]
zone_analysis = zone_data["zone_analysis"]
return {
"code": 200,
"zone_summary": zone_analysis, # High-level zone metrics
"zones": zones_structure, # Detailed data
"details": flat_orders_list, # Flat list
"message": "Success",
"status": True,
"meta": {
"total_orders": len(orders),
"utilized_riders": len([rid for rid, rl in assignments.items() if rl]),
"active_riders_pool": len(riders),
"assigned_orders": total_assigned,
"unassigned_orders": len(unassigned_orders),
"total_profit": round(sum(z["total_profit"] for z in zone_analysis), 2),
"fuel_charge_base": fuel_charge,
"unassigned_details": [
{
"orderid": o.get("orderid") or o.get("_id"),
"reason": o.get("unassigned_reason", "Unknown capacity/proximity issue")
} for o in unassigned_orders
],
"distribution_summary": {rid: len(rl) for rid, rl in assignments.items() if rl},
"resuffle_mode": do_reshuffle,
"hypertuning_params": hypertuning_params or "default"
}
}
except Exception as e:
logger.error(f"Error in rider assignment: {e}", exc_info=True)
raise HTTPException(status_code=500, detail="Internal server error during assignment")
finally:
# -- Fire ML training trigger (non-blocking) -----------------------
# Runs AFTER response is ready. Every 10th call kicks off a
# background thread that retrains the model. API is never blocked.
try:
from app.main import trigger_training_if_due
trigger_training_if_due()
except Exception:
pass # Never crash the endpoint due to ML trigger

124
app/services/__init__.py Normal file
View File

@@ -0,0 +1,124 @@
"""Services package."""
from __future__ import annotations
import json
import os
import logging
from typing import Any, Optional, Dict
try:
import redis # type: ignore
except Exception: # pragma: no cover
redis = None # type: ignore
logger = logging.getLogger(__name__)
class RedisCache:
"""Lightweight Redis cache wrapper with graceful fallback."""
def __init__(self, url_env: str = "REDIS_URL", default_ttl_seconds: Optional[int] = None) -> None:
# Allow TTL to be configurable via env var (default 300s = 5 min, or 86400 = 24h)
ttl_env = os.getenv("REDIS_CACHE_TTL_SECONDS")
if default_ttl_seconds is None:
default_ttl_seconds = int(ttl_env) if ttl_env else 300
self.default_ttl_seconds = default_ttl_seconds
self._enabled = False
self._client = None
self._stats = {"hits": 0, "misses": 0, "sets": 0}
url = os.getenv(url_env)
if not url or redis is None:
logger.warning("Redis not configured or client unavailable; caching disabled")
return
try:
self._client = redis.Redis.from_url(url, decode_responses=True)
self._client.ping()
self._enabled = True
logger.info(f"Redis cache connected (TTL: {self.default_ttl_seconds}s)")
except Exception as exc:
logger.warning(f"Redis connection failed: {exc}; caching disabled")
self._enabled = False
self._client = None
@property
def enabled(self) -> bool:
return self._enabled and self._client is not None
def get_json(self, key: str) -> Optional[Any]:
if not self.enabled:
self._stats["misses"] += 1
return None
try:
raw = self._client.get(key) # type: ignore[union-attr]
if raw:
self._stats["hits"] += 1
return json.loads(raw)
else:
self._stats["misses"] += 1
return None
except Exception as exc:
logger.debug(f"Redis get_json error for key={key}: {exc}")
self._stats["misses"] += 1
return None
def set_json(self, key: str, value: Any, ttl_seconds: Optional[int] = None) -> None:
if not self.enabled:
return
try:
payload = json.dumps(value, default=lambda o: getattr(o, "model_dump", lambda: o)())
ttl = ttl_seconds if ttl_seconds is not None else self.default_ttl_seconds
# Use -1 for no expiration, otherwise use setex
if ttl > 0:
self._client.setex(key, ttl, payload) # type: ignore[union-attr]
else:
self._client.set(key, payload) # type: ignore[union-attr]
self._stats["sets"] += 1
except Exception as exc:
logger.debug(f"Redis set_json error for key={key}: {exc}")
def delete(self, pattern: str) -> int:
"""Delete keys matching pattern (e.g., 'routes:*'). Returns count deleted."""
if not self.enabled:
return 0
try:
keys = list(self._client.scan_iter(match=pattern)) # type: ignore[union-attr]
if keys:
return self._client.delete(*keys) # type: ignore[union-attr]
return 0
except Exception as exc:
logger.error(f"Redis delete error for pattern={pattern}: {exc}")
return 0
def get_stats(self) -> Dict[str, Any]:
"""Get cache statistics."""
stats = self._stats.copy()
if self.enabled:
try:
# Count cache keys
route_keys = list(self._client.scan_iter(match="routes:*")) # type: ignore[union-attr]
stats["total_keys"] = len(route_keys)
stats["enabled"] = True
except Exception:
stats["total_keys"] = 0
stats["enabled"] = True
else:
stats["total_keys"] = 0
stats["enabled"] = False
return stats
def get_keys(self, pattern: str = "routes:*") -> list[str]:
"""Get list of cache keys matching pattern."""
if not self.enabled:
return []
try:
return list(self._client.scan_iter(match=pattern)) # type: ignore[union-attr]
except Exception as exc:
logger.error(f"Redis get_keys error for pattern={pattern}: {exc}")
return []
# Singleton cache instance for app
cache = RedisCache()

View File

@@ -0,0 +1,515 @@
import logging
import random
import time
from math import radians, cos, sin, asin, sqrt
from typing import List, Dict, Any, Optional
from collections import defaultdict
from app.config.rider_preferences import RIDER_PREFERRED_KITCHENS
from app.services.routing.kalman_filter import smooth_rider_locations, smooth_order_coordinates
from app.config.dynamic_config import get_config
from app.services.ml.ml_data_collector import get_collector
logger = logging.getLogger(__name__)
class AssignmentService:
def __init__(self):
self.rider_preferences = RIDER_PREFERRED_KITCHENS
self.earth_radius_km = 6371
self._cfg = get_config()
def _load_config(self):
"""Load ML-tuned hyperparams fresh on every assignment call."""
cfg = self._cfg
self.MAX_PICKUP_DISTANCE_KM = cfg.get("max_pickup_distance_km")
self.MAX_KITCHEN_DISTANCE_KM = cfg.get("max_kitchen_distance_km")
self.MAX_ORDERS_PER_RIDER = int(cfg.get("max_orders_per_rider"))
self.IDEAL_LOAD = int(cfg.get("ideal_load"))
self.WORKLOAD_BALANCE_THRESHOLD = cfg.get("workload_balance_threshold")
self.WORKLOAD_PENALTY_WEIGHT = cfg.get("workload_penalty_weight")
self.DISTANCE_PENALTY_WEIGHT = cfg.get("distance_penalty_weight")
self.PREFERENCE_BONUS = cfg.get("preference_bonus")
self.HOME_ZONE_BONUS_4KM = cfg.get("home_zone_bonus_4km")
self.HOME_ZONE_BONUS_2KM = cfg.get("home_zone_bonus_2km")
self.EMERGENCY_LOAD_PENALTY = cfg.get("emergency_load_penalty")
def haversine(self, lat1, lon1, lat2, lon2):
"""Calculate the great circle distance between two points."""
lon1, lat1, lon2, lat2 = map(radians, [float(lon1), float(lat1), float(lon2), float(lat2)])
dlon = lon2 - lon1
dlat = lat2 - lat1
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
c = 2 * asin(min(1.0, sqrt(a))) # Clamp to 1.0 to avoid domain errors
return c * self.earth_radius_km
def get_lat_lon(self, obj: Dict[str, Any], prefix: str = "") -> tuple[float, float]:
"""Generic helper to extract lat/lon from diversely named keys."""
# Try specific prefixes first
candidates = [
(f"{prefix}lat", f"{prefix}lon"),
(f"{prefix}lat", f"{prefix}long"),
(f"{prefix}latitude", f"{prefix}longitude"),
]
# Also try standard keys if prefix fails
candidates.extend([
("lat", "lon"), ("latitude", "longitude"),
("pickuplat", "pickuplon"), ("pickuplat", "pickuplong"),
("deliverylat", "deliverylong"), ("droplat", "droplon")
])
for lat_key, lon_key in candidates:
if lat_key in obj and lon_key in obj and obj[lat_key] and obj[lon_key]:
try:
return float(obj[lat_key]), float(obj[lon_key])
except: pass
# Special case: nested 'pickup_location'
if "pickup_location" in obj:
return self.get_lat_lon(obj["pickup_location"])
return 0.0, 0.0
def get_order_kitchen(self, order: Dict[str, Any]) -> str:
possible_keys = ['storename', 'restaurantname', 'kitchenname', 'partnername', 'store_name']
for key in possible_keys:
if key in order and order[key]:
return str(order[key]).strip()
return "Unknown"
def assign_orders(self, orders: List[Dict[str, Any]], riders: List[Dict[str, Any]], reshuffle: bool = False) -> tuple[Dict[int, List[Dict[str, Any]]], List[Dict[str, Any]]]:
"""
ENHANCED: Cluster-Based Load-Balanced Assignment.
Strategy:
1. Cluster orders by kitchen proximity
2. Calculate rider workload (current capacity usage)
3. Assign clusters to best-fit riders (proximity + workload balance)
4. Rebalance if needed
If reshuffle=True, controlled randomness is injected into rider scoring
so that retrying the same input can explore alternative assignments.
"""
from app.services.rider.rider_history_service import RiderHistoryService
from app.services.rider.rider_state_manager import RiderStateManager
from app.services.routing.clustering_service import ClusteringService
# -- Load ML-tuned hyperparameters (or defaults on first run) ------
self._load_config()
_call_start = time.time()
# 0. Prep
assignments: Dict[int, List[Dict[str, Any]]] = defaultdict(list)
unassigned_orders: List[Dict[str, Any]] = []
rider_states = {} # Track live load
# 0a. KALMAN FILTER - Smooth rider GPS locations before scoring
riders = smooth_rider_locations(list(riders))
# 0b. KALMAN FILTER - Smooth order delivery coordinates before clustering
orders = smooth_order_coordinates(list(orders))
# 1. Parse and Filter Riders
valid_riders = []
BLOCKED_RIDERS = [1242, 1266, 1245, 1232, 1240, 1007] # Test/Blocked IDs
# Load Existing State (Persistence)
state_mgr = RiderStateManager()
for r in riders:
# Robust ID Extraction
rid_raw = r.get("userid") or r.get("riderid") or r.get("id") or r.get("_id")
try:
rid = int(rid_raw)
except (ValueError, TypeError):
continue
if rid in BLOCKED_RIDERS: continue
# Robust Status Check
# Keep if: onduty (1, "1", True) OR status is active/idle/online
is_onduty = str(r.get("onduty")) in ["1", "True"] or r.get("onduty") is True
is_active = r.get("status") in ["active", "idle", "online"]
if not (is_onduty or is_active):
continue
# Location
lat, lon = self.get_lat_lon(r)
# Fetch previous state to know if they are already busy
p_state = state_mgr.get_rider_state(rid)
# If rider has valid GPS, use it. If not, fallback to Last Drop or Home.
if lat == 0 or lon == 0:
if p_state['last_drop_lat']:
lat, lon = p_state['last_drop_lat'], p_state['last_drop_lon']
else:
# Home Location Fallback
from app.config.rider_preferences import RIDER_HOME_LOCATIONS
lat, lon = RIDER_HOME_LOCATIONS.get(rid, (0.0, 0.0))
valid_riders.append({
"id": rid,
"lat": lat,
"lon": lon,
"obj": r
})
# Initialize rider state with existing workload
existing_load = p_state.get('minutes_remaining', 0) / 15 # Convert minutes to order estimate
rider_states[rid] = {
'lat': lat,
'lon': lon,
'kitchens': set(),
'count': int(existing_load), # Start with existing workload
'workload_score': existing_load # For prioritization
}
if not valid_riders:
logger.warning("No riders passed on-duty filter. Retrying with all available riders as emergency rescue...")
# If no on-duty riders, we take ANY rider provided by the API to ensure assignment
for r in riders:
rid = int(r.get("userid", 0))
if rid in BLOCKED_RIDERS: continue
lat, lon = self.get_lat_lon(r)
if lat == 0 or lon == 0:
from app.config.rider_preferences import RIDER_HOME_LOCATIONS
lat, lon = RIDER_HOME_LOCATIONS.get(rid, (0.0, 0.0))
if lat != 0:
valid_riders.append({"id": rid, "lat": lat, "lon": lon, "obj": r})
rider_states[rid] = {
'lat': lat, 'lon': lon, 'kitchens': set(),
'count': 0, 'workload_score': 0
}
if not valid_riders:
logger.error("DANGER: Absolutely no riders available for assignment.")
# Mark all as unassigned
for o in orders:
o["unassigned_reason"] = "No riders found (check partner online status)."
unassigned_orders.append(o)
return assignments, unassigned_orders
logger.info(f"Found {len(valid_riders)} active riders")
# 2. CLUSTER ORDERS BY KITCHEN PROXIMITY
clustering_service = ClusteringService()
clusters = clustering_service.cluster_orders_by_kitchen(orders, max_cluster_radius_km=self.MAX_KITCHEN_DISTANCE_KM) # radius from ML
logger.info(f"Created {len(clusters)} order clusters")
# 3. ASSIGN CLUSTERS TO RIDERS (Load-Balanced)
for cluster_idx, cluster in enumerate(clusters):
centroid_lat, centroid_lon = cluster['centroid']
cluster_orders = cluster['orders']
cluster_size = len(cluster_orders)
logger.info(f"Assigning cluster {cluster_idx+1}/{len(clusters)}: {cluster_size} orders at ({centroid_lat:.4f}, {centroid_lon:.4f})")
# Find best riders for this cluster
candidate_riders = []
for r in valid_riders:
rid = r["id"]
r_state = rider_states[rid]
# Calculate distance to cluster centroid
dist = self.haversine(r_state['lat'], r_state['lon'], centroid_lat, centroid_lon)
# Preference bonus & Distance Bypass
prefs = self.rider_preferences.get(rid, [])
has_preference = False
for k_name in cluster['kitchen_names']:
if any(p.lower() in k_name.lower() or k_name.lower() in p.lower() for p in prefs):
has_preference = True
break
# Dynamic Limit: 6km default, 10km for preferred kitchens
allowed_dist = self.MAX_PICKUP_DISTANCE_KM
if has_preference:
allowed_dist = max(allowed_dist, 10.0)
# Skip if too far
if dist > allowed_dist:
continue
# Calculate workload utilization (0.0 to 1.0)
utilization = r_state['count'] / self.MAX_ORDERS_PER_RIDER
# Calculate score (lower is better) - weights from DynamicConfig
workload_penalty = utilization * self.WORKLOAD_PENALTY_WEIGHT
distance_penalty = dist * self.DISTANCE_PENALTY_WEIGHT
# Preference bonus (ML-tuned)
preference_bonus = self.PREFERENCE_BONUS if has_preference else 0
# Home zone bonus (ML-tuned)
from app.config.rider_preferences import RIDER_HOME_LOCATIONS
h_lat, h_lon = RIDER_HOME_LOCATIONS.get(rid, (0.0, 0.0))
home_bonus = 0
if h_lat != 0:
home_dist = self.haversine(h_lat, h_lon, centroid_lat, centroid_lon)
if home_dist <= 4.0:
home_bonus = self.HOME_ZONE_BONUS_4KM
if home_dist <= 2.0:
home_bonus = self.HOME_ZONE_BONUS_2KM
score = workload_penalty + distance_penalty + preference_bonus + home_bonus
# RESHUFFLE: Add controlled noise so retries explore different riders
if reshuffle:
noise = random.uniform(-15.0, 15.0)
score += noise
candidate_riders.append({
'id': rid,
'score': score,
'distance': dist,
'utilization': utilization,
'current_load': r_state['count']
})
if not candidate_riders:
logger.warning(f"No riders available for cluster {cluster_idx+1}")
for o in cluster_orders:
o["unassigned_reason"] = f"No riders within {self.MAX_PICKUP_DISTANCE_KM}km radius of kitchen."
unassigned_orders.append(o)
continue
# Sort by score (best first)
candidate_riders.sort(key=lambda x: x['score'])
# SMART DISTRIBUTION: Split cluster if needed
remaining_orders = cluster_orders[:]
while remaining_orders and candidate_riders:
best_rider = candidate_riders[0]
rid = best_rider['id']
r_state = rider_states[rid]
# How many orders can this rider take?
available_capacity = self.MAX_ORDERS_PER_RIDER - r_state['count']
if available_capacity <= 0:
# Rider is full, remove from candidates
candidate_riders.pop(0)
continue
# Decide batch size
# If rider is underutilized and cluster is small, give all
# If rider is busy or cluster is large, split it
if best_rider['utilization'] < self.WORKLOAD_BALANCE_THRESHOLD:
# Rider has capacity, can take more
batch_size = min(available_capacity, len(remaining_orders))
else:
# Rider is getting busy, be conservative (IDEAL_LOAD from ML)
batch_size = min(self.IDEAL_LOAD - r_state['count'], len(remaining_orders), available_capacity)
batch_size = max(1, batch_size) # At least 1 order
# Assign batch
batch = remaining_orders[:batch_size]
remaining_orders = remaining_orders[batch_size:]
assignments[rid].extend(batch)
# Update rider state
r_state['count'] += len(batch)
r_state['lat'] = centroid_lat
r_state['lon'] = centroid_lon
r_state['kitchens'].update(cluster['kitchen_names'])
r_state['workload_score'] = r_state['count'] / self.MAX_ORDERS_PER_RIDER
logger.info(f" -> Assigned {len(batch)} orders to Rider {rid} (load: {r_state['count']}/{self.MAX_ORDERS_PER_RIDER})")
# Re-sort candidates by updated scores
for candidate in candidate_riders:
if candidate['id'] == rid:
candidate['utilization'] = r_state['count'] / self.MAX_ORDERS_PER_RIDER
candidate['current_load'] = r_state['count']
# Recalculate score
workload_penalty = candidate['utilization'] * 100
distance_penalty = candidate['distance'] * 2
candidate['score'] = workload_penalty + distance_penalty
candidate_riders.sort(key=lambda x: x['score'])
# If any orders left in the cluster after exhaustion of candidates
if remaining_orders:
# Instead of giving up, keep them in a pool for mandatory assignment
unassigned_orders.extend(remaining_orders)
# 4. EMERGENCY MANDATORY ASSIGNMENT (Ensures 0 unassigned if riders exist)
if unassigned_orders and valid_riders:
logger.info(f"[ALERT] Starting Emergency Mandatory Assignment for {len(unassigned_orders)} orders...")
force_pool = unassigned_orders[:]
unassigned_orders.clear()
for o in force_pool:
# Determine pickup location
o_lat, o_lon = self.get_lat_lon(o, prefix="pickup")
if o_lat == 0:
o["unassigned_reason"] = "Could not geolocate order (0,0)."
unassigned_orders.append(o)
continue
# Find the 'least bad' rider (Closest + Balanced Load)
best_emergency_rider = None
best_emergency_score = float('inf')
for r in valid_riders:
rid = r["id"]
r_state = rider_states[rid]
dist = self.haversine(r_state['lat'], r_state['lon'], o_lat, o_lon)
# For emergency: Distance is important, but load prevents one rider taking EVERYTHING
# Score = distance + ML-tuned penalty per existing order
e_score = dist + (r_state['count'] * self.EMERGENCY_LOAD_PENALTY)
if e_score < best_emergency_score:
best_emergency_score = e_score
best_emergency_rider = rid
if best_emergency_rider:
assignments[best_emergency_rider].append(o)
rider_states[best_emergency_rider]['count'] += 1
logger.info(f" Force-Assigned order {o.get('orderid')} to Rider {best_emergency_rider} (Score: {best_emergency_score:.2f})")
else:
unassigned_orders.append(o)
# 5. FINAL REBALANCING (Optional)
# Check if any rider is overloaded while others are idle
self._rebalance_workload(assignments, rider_states, valid_riders)
# 6. Commit State and History
self._post_process(assignments, rider_states)
# 7. -- ML DATA COLLECTION -----------------------------------------
try:
elapsed_ms = (time.time() - _call_start) * 1000
get_collector().log_assignment_event(
num_orders=len(orders),
num_riders=len(riders),
hyperparams=self._cfg.get_all(),
assignments=assignments,
unassigned_count=len(unassigned_orders),
elapsed_ms=elapsed_ms,
)
except Exception as _ml_err:
logger.debug(f"ML logging skipped: {_ml_err}")
# Log final distribution
logger.info("=" * 50)
logger.info("FINAL ASSIGNMENT DISTRIBUTION:")
for rid, orders in sorted(assignments.items()):
logger.info(f" Rider {rid}: {len(orders)} orders")
if unassigned_orders:
logger.warning(f" [ALERT] STILL UNASSIGNED: {len(unassigned_orders)} (Reason: No riders online or invalid coords)")
else:
logger.info(" [OK] ALL ORDERS ASSIGNED SUCCESSFULLY")
logger.info("=" * 50)
return assignments, unassigned_orders
def _rebalance_workload(self, assignments: Dict[int, List], rider_states: Dict, valid_riders: List):
"""
Rebalance if workload is heavily skewed.
Move orders from overloaded riders to idle ones if possible.
"""
if not assignments:
return
# Calculate average load
total_orders = sum(len(orders) for orders in assignments.values())
avg_load = total_orders / len(valid_riders) if valid_riders else 0
# Find overloaded and underutilized riders
overloaded = []
underutilized = []
for r in valid_riders:
rid = r['id']
load = rider_states[rid]['count']
if load > avg_load * 1.5 and load > self.IDEAL_LOAD: # 50% above average
overloaded.append(rid)
elif load < avg_load * 0.5: # 50% below average
underutilized.append(rid)
if not overloaded or not underutilized:
return
logger.info(f"Rebalancing: {len(overloaded)} overloaded, {len(underutilized)} underutilized riders")
# Try to move orders from overloaded to underutilized
for over_rid in overloaded:
over_orders = assignments[over_rid]
over_state = rider_states[over_rid]
# Try to offload some orders
for under_rid in underutilized:
under_state = rider_states[under_rid]
under_capacity = self.MAX_ORDERS_PER_RIDER - under_state['count']
if under_capacity <= 0:
continue
# Find orders that are closer to underutilized rider
transferable = []
for order in over_orders:
o_lat, o_lon = self.get_lat_lon(order, prefix="pickup")
if o_lat == 0:
continue
dist_to_under = self.haversine(under_state['lat'], under_state['lon'], o_lat, o_lon)
dist_to_over = self.haversine(over_state['lat'], over_state['lon'], o_lat, o_lon)
# Transfer if underutilized rider is closer or similar distance
if dist_to_under <= self.MAX_PICKUP_DISTANCE_KM and dist_to_under <= dist_to_over * 1.2:
transferable.append(order)
if transferable:
# Transfer up to capacity
transfer_count = min(len(transferable), under_capacity, over_state['count'] - self.IDEAL_LOAD)
transfer_batch = transferable[:transfer_count]
# Move orders
for order in transfer_batch:
over_orders.remove(order)
assignments[under_rid].append(order)
# Update states
over_state['count'] -= len(transfer_batch)
under_state['count'] += len(transfer_batch)
logger.info(f" Rebalanced: {len(transfer_batch)} orders from Rider {over_rid} -> {under_rid}")
def _post_process(self, assignments, rider_states):
"""Update History and Persistence."""
from app.services.rider.rider_history_service import RiderHistoryService
from app.services.rider.rider_state_manager import RiderStateManager
history_service = RiderHistoryService()
state_mgr = RiderStateManager()
import time
ts = time.time()
for rid, orders in assignments.items():
if not orders: continue
history_service.update_rider_stats(rid, 5.0, len(orders))
st = rider_states[rid]
state_mgr.states[rid] = {
'minutes_remaining': len(orders) * 15,
'last_drop_lat': st['lat'],
'last_drop_lon': st['lon'],
'active_kitchens': st['kitchens'],
'last_updated_ts': ts
}
state_mgr._save_states()

View File

@@ -0,0 +1,311 @@
"""
Behavior Analyzer - Production Grade
======================================
Analyzes historical assignment data using the ID3 decision tree to classify
assignment outcomes as 'SUCCESS' or 'RISK'.
Key fixes and upgrades over the original
------------------------------------------
1. BUG FIX: distance_band now uses `total_distance_km` (not `num_orders`).
2. BUG FIX: time_band input is always normalized to uppercase before predict.
3. Rich feature set: distance_band, time_band, load_band, order_density_band.
4. Returns (label, confidence) from the classifier - exposes uncertainty.
5. Trend analysis: tracks rolling success rate over recent N windows.
6. Tree persistence: saves/loads trained tree as JSON to survive restarts.
7. Feature importance proxy: logs which features drove the split.
8. Thread-safe lazy training via a simple lock.
"""
import json
import logging
import os
import sqlite3
import threading
from datetime import datetime
from typing import Any, Dict, List, Optional, Tuple
from app.services.ml.id3_classifier import ID3Classifier, get_behavior_model
logger = logging.getLogger(__name__)
_DB_PATH = os.getenv("ML_DB_PATH", "ml_data/ml_store.db")
_TREE_PATH = os.getenv("ML_TREE_PATH", "ml_data/behavior_tree.json")
# ---------------------------------------------------------------------------
# Band encoders (discrete labels for ID3)
# ---------------------------------------------------------------------------
def distance_band(km: float) -> str:
"""Total route distance -> discrete band."""
if km <= 5.0: return "SHORT"
if km <= 15.0: return "MID"
if km <= 30.0: return "LONG"
return "VERY_LONG"
def time_band(ts_str: str) -> str:
"""ISO timestamp -> time-of-day band."""
try:
hour = datetime.fromisoformat(ts_str).hour
if 6 <= hour < 10: return "MORNING_RUSH"
if 10 <= hour < 12: return "LATE_MORNING"
if 12 <= hour < 14: return "LUNCH_RUSH"
if 14 <= hour < 17: return "AFTERNOON"
if 17 <= hour < 20: return "EVENING_RUSH"
if 20 <= hour < 23: return "NIGHT"
return "LATE_NIGHT"
except Exception:
return "UNKNOWN"
def load_band(avg_load: float) -> str:
"""Average orders-per-rider -> load band."""
if avg_load <= 2.0: return "LIGHT"
if avg_load <= 5.0: return "MODERATE"
if avg_load <= 8.0: return "HEAVY"
return "OVERLOADED"
def order_density_band(num_orders: int, num_riders: int) -> str:
"""Orders per available rider -> density band."""
if num_riders == 0:
return "NO_RIDERS"
ratio = num_orders / num_riders
if ratio <= 2.0: return "SPARSE"
if ratio <= 5.0: return "NORMAL"
if ratio <= 9.0: return "DENSE"
return "OVERLOADED"
# ---------------------------------------------------------------------------
# Behavior Analyzer
# ---------------------------------------------------------------------------
class BehaviorAnalyzer:
"""
Trains an ID3 tree on historical assignment logs and predicts whether
a new assignment context is likely to SUCCEED or be at RISK.
Features used
-------------
- distance_band : total route distance bucket
- time_band : time-of-day bucket
- load_band : average load per rider bucket
- order_density_band : orders-per-rider ratio bucket
Target
------
- is_success: "SUCCESS" if unassigned_count == 0, else "RISK"
"""
TARGET = "is_success"
FEATURES = ["distance_band", "time_band", "load_band", "order_density_band"]
def __init__(self):
self._db_path = _DB_PATH
self._tree_path = _TREE_PATH
self.model: ID3Classifier = get_behavior_model(max_depth=5)
self.is_trained: bool = False
self._lock = threading.Lock()
self._training_size: int = 0
self._success_rate: float = 0.0
self._rules: List[str] = []
self._recent_trend: List[float] = []
self._load_tree()
# ------------------------------------------------------------------
# Training
# ------------------------------------------------------------------
def train_on_history(self, limit: int = 2000) -> Dict[str, Any]:
"""Fetch the most recent rows from SQLite and rebuild the tree."""
with self._lock:
try:
rows = self._fetch_rows(limit)
if len(rows) < 10:
logger.warning(f"ID3 BehaviorAnalyzer: only {len(rows)} rows - need >=10.")
return {"status": "insufficient_data", "rows": len(rows)}
training_data, successes = self._preprocess(rows)
if not training_data:
return {"status": "preprocess_failed", "rows": len(rows)}
self.model.train(
data=training_data,
target=self.TARGET,
features=self.FEATURES,
)
self.is_trained = True
self._training_size = len(training_data)
self._success_rate = successes / len(training_data)
self._rules = self.model.get_tree_rules()
self._compute_trend(rows)
self._save_tree()
summary = {
"status": "ok",
"training_rows": self._training_size,
"success_rate": round(self._success_rate, 4),
"n_rules": len(self._rules),
"classes": self.model.classes,
"feature_values": self.model.feature_values,
}
logger.info(
f"ID3 BehaviorAnalyzer trained - rows={self._training_size}, "
f"success_rate={self._success_rate:.1%}, rules={len(self._rules)}"
)
return summary
except Exception as e:
logger.error(f"ID3 BehaviorAnalyzer training failed: {e}", exc_info=True)
return {"status": "error", "message": str(e)}
# ------------------------------------------------------------------
# Prediction
# ------------------------------------------------------------------
def predict(self, distance_km: float, timestamp_or_band: str,
avg_load: float = 4.0, num_orders: int = 5,
num_riders: int = 2) -> Dict[str, Any]:
"""Predict whether an assignment context will SUCCEED or be at RISK."""
if not self.is_trained:
return {
"label": "SUCCESS",
"confidence": 0.5,
"features_used": {},
"model_trained": False,
}
KNOWN_BANDS = {
"MORNING_RUSH", "LATE_MORNING", "LUNCH_RUSH",
"AFTERNOON", "EVENING_RUSH", "NIGHT", "LATE_NIGHT", "UNKNOWN"
}
t_band = (
timestamp_or_band.upper()
if timestamp_or_band.upper() in KNOWN_BANDS
else time_band(timestamp_or_band)
)
features_used = {
"distance_band": distance_band(distance_km),
"time_band": t_band,
"load_band": load_band(avg_load),
"order_density_band": order_density_band(num_orders, num_riders),
}
label, confidence = self.model.predict(features_used)
return {
"label": label,
"confidence": round(confidence, 4),
"features_used": features_used,
"model_trained": True,
}
# ------------------------------------------------------------------
# Info / Diagnostics
# ------------------------------------------------------------------
def get_info(self) -> Dict[str, Any]:
return {
"is_trained": self.is_trained,
"training_rows": self._training_size,
"success_rate": round(self._success_rate, 4),
"n_rules": len(self._rules),
"rules": self._rules[:20],
"recent_trend": self._recent_trend,
"feature_names": self.FEATURES,
"feature_values": self.model.feature_values if self.is_trained else {},
"classes": self.model.classes if self.is_trained else [],
}
# ------------------------------------------------------------------
# Internal helpers
# ------------------------------------------------------------------
def _fetch_rows(self, limit: int) -> List[Dict]:
conn = sqlite3.connect(self._db_path)
conn.row_factory = sqlite3.Row
rows = conn.execute(
"SELECT * FROM assignment_ml_log ORDER BY id DESC LIMIT ?", (limit,)
).fetchall()
conn.close()
return [dict(r) for r in rows]
def _preprocess(self, rows: List[Dict]) -> Tuple[List[Dict], int]:
training_data: List[Dict] = []
successes = 0
for r in rows:
try:
dist_km = float(r.get("total_distance_km") or 0.0)
ts = str(r.get("timestamp") or "")
avg_ld = float(r.get("avg_load") or 0.0)
n_orders = int(r.get("num_orders") or 0)
n_riders = int(r.get("num_riders") or 1)
unassigned = int(r.get("unassigned_count") or 0)
label = "SUCCESS" if unassigned == 0 else "RISK"
if label == "SUCCESS":
successes += 1
training_data.append({
"distance_band": distance_band(dist_km),
"time_band": time_band(ts),
"load_band": load_band(avg_ld),
"order_density_band": order_density_band(n_orders, n_riders),
self.TARGET: label,
})
except Exception:
continue
return training_data, successes
def _compute_trend(self, rows: List[Dict], window: int = 50) -> None:
trend = []
for i in range(0, len(rows), window):
chunk = rows[i:i + window]
if not chunk:
break
rate = sum(1 for r in chunk if int(r.get("unassigned_count", 1)) == 0) / len(chunk)
trend.append(round(rate, 4))
self._recent_trend = trend[-20:]
def _save_tree(self) -> None:
try:
os.makedirs(os.path.dirname(self._tree_path) or ".", exist_ok=True)
with open(self._tree_path, "w") as f:
f.write(self.model.to_json())
logger.info(f"ID3 tree persisted -> {self._tree_path}")
except Exception as e:
logger.warning(f"ID3 tree save failed: {e}")
def _load_tree(self) -> None:
try:
if not os.path.exists(self._tree_path):
return
with open(self._tree_path) as f:
self.model = ID3Classifier.from_json(f.read())
self.is_trained = True
self._rules = self.model.get_tree_rules()
logger.info(f"ID3 tree restored - rules={len(self._rules)}")
except Exception as e:
logger.warning(f"ID3 tree load failed (will retrain): {e}")
# ---------------------------------------------------------------------------
# Module-level singleton
# ---------------------------------------------------------------------------
_analyzer: Optional[BehaviorAnalyzer] = None
_analyzer_lock = threading.Lock()
def get_analyzer() -> BehaviorAnalyzer:
global _analyzer
with _analyzer_lock:
if _analyzer is None:
_analyzer = BehaviorAnalyzer()
if not _analyzer.is_trained:
_analyzer.train_on_history()
return _analyzer

View File

@@ -0,0 +1,400 @@
"""
ID3 Classifier - Production Grade
Improvements over v1:
- Chi-squared pruning to prevent overfitting on sparse branches
- Confidence scores on every prediction (Laplace smoothed)
- Gain-ratio variant for high-cardinality features
- Serialization (to_dict / from_dict / to_json / from_json)
- Per-feature importance scores
- Full prediction audit trail via explain()
- min_samples_split and min_info_gain stopping criteria
"""
import math
import json
import logging
from collections import Counter
from typing import Any, Dict, List, Optional, Tuple
logger = logging.getLogger(__name__)
class ID3Classifier:
"""
ID3 decision tree (entropy / information-gain splitting).
All predict* methods work even if the model has never been trained -
they return safe defaults rather than raising.
"""
def __init__(
self,
max_depth: int = 6,
min_samples_split: int = 5,
min_info_gain: float = 0.001,
use_gain_ratio: bool = False,
chi2_pruning: bool = True,
):
self.max_depth = max_depth
self.min_samples_split = min_samples_split
self.min_info_gain = min_info_gain
self.use_gain_ratio = use_gain_ratio
self.chi2_pruning = chi2_pruning
self.tree: Any = None
self.features: List[str] = []
self.target: str = ""
self.classes_: List[str] = []
self.feature_importances_: Dict[str, float] = {}
self.feature_values: Dict[str, List[str]] = {} # unique values seen per feature
self._n_samples: int = 0
self._total_gain: Dict[str, float] = {}
# ------------------------------------------------------------------ train
def train(self, data: List[Dict[str, Any]], target: str, features: List[str]) -> None:
if not data:
logger.warning("ID3: train() called with empty data.")
return
self.target = target
self.features = list(features)
self.classes_ = sorted({str(row.get(target)) for row in data})
self._total_gain = {f: 0.0 for f in features}
self._n_samples = len(data)
# Collect unique values per feature for dashboard display
self.feature_values = {
f: sorted({str(row.get(f)) for row in data if row.get(f) is not None})
for f in features
}
self.tree = self._build_tree(data, list(features), target, depth=0)
if self.chi2_pruning:
self.tree = self._prune(self.tree, data, target)
total_gain = sum(self._total_gain.values()) or 1.0
self.feature_importances_ = {
f: round(v / total_gain, 4) for f, v in self._total_gain.items()
}
logger.info(
f"ID3: trained on {len(data)} samples | "
f"classes={self.classes_} | importances={self.feature_importances_}"
)
# ----------------------------------------------------------- predict API
def predict(self, sample: Dict[str, Any]) -> Tuple[str, float]:
"""Return (label, confidence 0-1). Safe to call before training."""
if self.tree is None:
return "Unknown", 0.0
label, proba = self._classify(self.tree, sample, [])
confidence = proba.get(str(label), 0.0) if isinstance(proba, dict) else 1.0
return str(label), round(confidence, 4)
def predict_proba(self, sample: Dict[str, Any]) -> Dict[str, float]:
"""Full class probability distribution."""
if self.tree is None:
return {}
_, proba = self._classify(self.tree, sample, [])
return proba if isinstance(proba, dict) else {str(proba): 1.0}
def explain(self, sample: Dict[str, Any]) -> Dict[str, Any]:
"""Human-readable decision path for audit / dashboard display."""
if self.tree is None:
return {"prediction": "Unknown", "confidence": 0.0, "decision_path": []}
path: List[str] = []
label, proba = self._classify(self.tree, sample, path)
return {
"prediction": str(label),
"confidence": round(proba.get(str(label), 1.0), 4),
"probabilities": proba,
"decision_path": path,
}
# ---------------------------------------------------------- serialisation
def to_dict(self) -> Dict[str, Any]:
return {
"tree": self.tree,
"features": self.features,
"target": self.target,
"classes": self.classes_,
"feature_importances": self.feature_importances_,
"feature_values": self.feature_values,
"n_samples": self._n_samples,
"params": {
"max_depth": self.max_depth,
"min_samples_split": self.min_samples_split,
"min_info_gain": self.min_info_gain,
"use_gain_ratio": self.use_gain_ratio,
"chi2_pruning": self.chi2_pruning,
},
}
@classmethod
def from_dict(cls, d: Dict[str, Any]) -> "ID3Classifier":
p = d.get("params", {})
obj = cls(
max_depth=p.get("max_depth", 6),
min_samples_split=p.get("min_samples_split", 5),
min_info_gain=p.get("min_info_gain", 0.001),
use_gain_ratio=p.get("use_gain_ratio", False),
chi2_pruning=p.get("chi2_pruning", True),
)
obj.tree = d["tree"]
obj.features = d["features"]
obj.target = d["target"]
obj.classes_ = d["classes"]
obj.feature_importances_ = d.get("feature_importances", {})
obj.feature_values = d.get("feature_values", {})
obj._n_samples = d.get("n_samples", 0)
return obj
def to_json(self) -> str:
return json.dumps(self.to_dict(), indent=2)
@classmethod
def from_json(cls, s: str) -> "ID3Classifier":
return cls.from_dict(json.loads(s))
def summary(self) -> Dict[str, Any]:
return {
"n_samples": self._n_samples,
"n_classes": len(self.classes_),
"classes": self.classes_,
"n_features": len(self.features),
"feature_importances": self.feature_importances_,
"feature_values": self.feature_values,
"trained": self.tree is not None,
}
@property
def classes(self) -> List[str]:
"""Alias for classes_ for compatibility."""
return self.classes_
def get_tree_rules(self) -> List[str]:
"""Extract human-readable if/then rules from the trained tree."""
rules: List[str] = []
if self.tree is None:
return rules
self._extract_rules(self.tree, [], rules)
return rules
def _extract_rules(self, node: Any, conditions: List[str], rules: List[str]) -> None:
"""Recursively walk the tree and collect decision paths as strings."""
if not isinstance(node, dict):
return
if node.get("__leaf__"):
label = node.get("__label__", "?")
proba = node.get("__proba__", {})
conf = proba.get(str(label), 0.0)
prefix = " AND ".join(conditions) if conditions else "(root)"
rules.append(f"{prefix} => {label} ({conf:.0%})")
return
feature = node.get("__feature__", "?")
for val, child in node.get("__branches__", {}).items():
self._extract_rules(child, conditions + [f"{feature}={val}"], rules)
# --------------------------------------------------------- tree building
def _build_tree(
self,
data: List[Dict[str, Any]],
features: List[str],
target: str,
depth: int,
) -> Any:
counts = Counter(str(row.get(target)) for row in data)
# Pure node
if len(counts) == 1:
return self._make_leaf(data, target)
# Stopping criteria
if not features or depth >= self.max_depth or len(data) < self.min_samples_split:
return self._make_leaf(data, target)
best_f, best_gain = self._best_split(data, features, target)
if best_f is None or best_gain < self.min_info_gain:
return self._make_leaf(data, target)
self._total_gain[best_f] = self._total_gain.get(best_f, 0.0) + best_gain
remaining = [f for f in features if f != best_f]
node = {
"__feature__": best_f,
"__gain__": round(best_gain, 6),
"__n__": len(data),
"__branches__": {},
}
for val in {row.get(best_f) for row in data}:
subset = [r for r in data if r.get(best_f) == val]
node["__branches__"][str(val)] = self._build_tree(
subset, remaining, target, depth + 1
)
return node
def _make_leaf(self, data: List[Dict[str, Any]], target: str) -> Dict[str, Any]:
counts = Counter(str(row.get(target)) for row in data)
total = len(data)
k = len(self.classes_) or 1
# Laplace smoothing
proba = {
cls: round((counts.get(cls, 0) + 1) / (total + k), 4)
for cls in self.classes_
}
label = max(proba, key=proba.get)
return {"__leaf__": True, "__label__": label, "__proba__": proba, "__n__": total}
# ---------------------------------------------------------- splitting
def _best_split(
self, data: List[Dict[str, Any]], features: List[str], target: str
) -> Tuple[Optional[str], float]:
base_e = self._entropy(data, target)
best_f, best_gain = None, -1.0
for f in features:
gain = self._info_gain(data, f, target, base_e)
if self.use_gain_ratio:
si = self._split_info(data, f)
gain = gain / si if si > 0 else 0.0
if gain > best_gain:
best_gain = gain
best_f = f
return best_f, best_gain
# ----------------------------------------------------------- pruning
def _prune(self, node: Any, data: List[Dict[str, Any]], target: str) -> Any:
if not isinstance(node, dict) or node.get("__leaf__"):
return node
feature = node["__feature__"]
# Recurse children first
for val in list(node["__branches__"].keys()):
subset = [r for r in data if str(r.get(feature)) == str(val)]
node["__branches__"][val] = self._prune(node["__branches__"][val], subset, target)
# Chi-squared test: if split is not significant, collapse to leaf
if not self._chi2_significant(data, feature, target):
return self._make_leaf(data, target)
return node
def _chi2_significant(
self, data: List[Dict[str, Any]], feature: str, target: str
) -> bool:
classes = self.classes_
feature_vals = list({str(r.get(feature)) for r in data})
if not classes or len(feature_vals) < 2:
return False
total = len(data)
class_totals = Counter(str(r.get(target)) for r in data)
chi2 = 0.0
for val in feature_vals:
subset = [r for r in data if str(r.get(feature)) == val]
n_val = len(subset)
val_counts = Counter(str(r.get(target)) for r in subset)
for cls in classes:
observed = val_counts.get(cls, 0)
expected = (n_val * class_totals.get(cls, 0)) / total
if expected > 0:
chi2 += (observed - expected) ** 2 / expected
df = (len(feature_vals) - 1) * (len(classes) - 1)
if df <= 0:
return False
# Critical values at p=0.05
crit_table = {1: 3.841, 2: 5.991, 3: 7.815, 4: 9.488, 5: 11.070, 6: 12.592}
crit = crit_table.get(df, 3.841 * df)
return chi2 > crit
# ---------------------------------------------------------- classify
def _classify(
self, node: Any, row: Dict[str, Any], path: List[str]
) -> Tuple[Any, Any]:
if not isinstance(node, dict):
return node, {str(node): 1.0}
if node.get("__leaf__"):
label = node["__label__"]
proba = node["__proba__"]
path.append(f"predict={label} (p={proba.get(label, 0):.2f})")
return label, proba
feature = node["__feature__"]
value = str(row.get(feature, ""))
path.append(f"{feature}={value}")
branches = node["__branches__"]
if value in branches:
return self._classify(branches[value], row, path)
# Unseen value: weighted vote from all leaf children
all_proba: Counter = Counter()
total_n = 0
for child in branches.values():
if isinstance(child, dict) and child.get("__leaf__"):
n = child.get("__n__", 1)
total_n += n
for cls, p in child.get("__proba__", {}).items():
all_proba[cls] += p * n
if not total_n:
fallback = self.classes_[0] if self.classes_ else "Unknown"
path.append(f"unseen fallback: {fallback}")
return fallback, {fallback: 1.0}
proba = {cls: round(v / total_n, 4) for cls, v in all_proba.items()}
label = max(proba, key=proba.get)
path.append(f"weighted vote: {label}")
return label, proba
# ---------------------------------------------------------- entropy math
def _entropy(self, data: List[Dict[str, Any]], target: str) -> float:
if not data:
return 0.0
counts = Counter(str(row.get(target)) for row in data)
total = len(data)
return -sum((c / total) * math.log2(c / total) for c in counts.values() if c > 0)
def _info_gain(
self,
data: List[Dict[str, Any]],
feature: str,
target: str,
base_entropy: Optional[float] = None,
) -> float:
if base_entropy is None:
base_entropy = self._entropy(data, target)
total = len(data)
buckets: Dict[Any, list] = {}
for row in data:
buckets.setdefault(row.get(feature), []).append(row)
weighted = sum(
(len(sub) / total) * self._entropy(sub, target) for sub in buckets.values()
)
return base_entropy - weighted
def _split_info(self, data: List[Dict[str, Any]], feature: str) -> float:
total = len(data)
counts = Counter(row.get(feature) for row in data)
return -sum((c / total) * math.log2(c / total) for c in counts.values() if c > 0)
# ------------------------------------------------------------------ factory
def get_behavior_model(
max_depth: int = 5,
min_samples_split: int = 8,
min_info_gain: float = 0.005,
use_gain_ratio: bool = True,
chi2_pruning: bool = True,
) -> ID3Classifier:
return ID3Classifier(
max_depth=max_depth,
min_samples_split=min_samples_split,
min_info_gain=min_info_gain,
use_gain_ratio=use_gain_ratio,
chi2_pruning=chi2_pruning,
)

View File

@@ -0,0 +1,539 @@
"""
ML Data Collector - Production Grade
======================================
Logs every assignment call (inputs + outcomes) to SQLite.
Key upgrades over the original
--------------------------------
1. FROZEN historical scores - quality_score is written ONCE at log time.
get_training_data() returns scores as-is from the DB (no retroactive mutation).
2. Rich schema - zone_id, city_id, is_peak, weather_code,
sla_breached, avg_delivery_time_min for richer features.
3. SLA tracking - logs whether delivery SLA was breached.
4. Analytics API - get_hourly_stats(), get_strategy_comparison(),
get_quality_histogram(), get_zone_stats() for dashboard consumption.
5. Thread-safe writes - connection-per-write pattern for FastAPI workers.
6. Indexed columns - timestamp, ml_strategy, zone_id for fast queries.
"""
import csv
import io
import logging
import os
import sqlite3
import threading
from datetime import datetime, timedelta
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
_DB_PATH = os.getenv("ML_DB_PATH", "ml_data/ml_store.db")
_WRITE_LOCK = threading.Lock()
def _std(values: List[float]) -> float:
if len(values) < 2:
return 0.0
mean = sum(values) / len(values)
return (sum((v - mean) ** 2 for v in values) / len(values)) ** 0.5
class MLDataCollector:
"""
Event logger for assignment service calls.
Each log_assignment_event() call writes one row capturing:
- Operating context (time, orders, riders, zone, city)
- Active hyperparams (exact config snapshot for this call)
- Measured outcomes (quality score, SLA, latency, distances)
quality_score is computed once and FROZEN - never retroactively changed.
"""
def __init__(self):
self._db_path = _DB_PATH
self._ensure_db()
# ------------------------------------------------------------------
# Main logging API
# ------------------------------------------------------------------
def log_assignment_event(
self,
*,
num_orders: int,
num_riders: int,
hyperparams: Dict[str, Any],
assignments: Dict[int, List[Any]],
unassigned_count: int,
elapsed_ms: float,
zone_id: str = "default",
city_id: str = "default",
weather_code: str = "CLEAR",
sla_minutes: Optional[float] = None,
avg_delivery_time_min: Optional[float] = None,
) -> None:
"""
Log one assignment event.
Call this at the END of AssignmentService.assign_orders() once
outcomes are known.
"""
try:
now = datetime.utcnow()
hour = now.hour
day_of_week = now.weekday()
is_peak = int(hour in (7, 8, 9, 12, 13, 18, 19, 20))
rider_loads = [len(orders) for orders in assignments.values() if orders]
riders_used = len(rider_loads)
total_assigned = sum(rider_loads)
avg_load = total_assigned / riders_used if riders_used else 0.0
load_std = _std(rider_loads) if rider_loads else 0.0
all_orders = [o for orders in assignments.values() if orders for o in orders]
total_distance_km = sum(self._get_km(o) for o in all_orders)
ml_strategy = hyperparams.get("ml_strategy", "balanced")
max_opr = hyperparams.get("max_orders_per_rider", 12)
sla_breached = 0
if sla_minutes and avg_delivery_time_min:
sla_breached = int(avg_delivery_time_min > sla_minutes)
# Quality score - FROZEN at log time
quality_score = self._compute_quality_score(
num_orders=num_orders,
unassigned_count=unassigned_count,
load_std=load_std,
riders_used=riders_used,
num_riders=num_riders,
total_distance_km=total_distance_km,
max_orders_per_rider=max_opr,
ml_strategy=ml_strategy,
)
row = {
"timestamp": now.isoformat(),
"hour": hour,
"day_of_week": day_of_week,
"is_peak": is_peak,
"zone_id": zone_id,
"city_id": city_id,
"weather_code": weather_code,
"num_orders": num_orders,
"num_riders": num_riders,
"max_pickup_distance_km": hyperparams.get("max_pickup_distance_km", 10.0),
"max_kitchen_distance_km": hyperparams.get("max_kitchen_distance_km", 3.0),
"max_orders_per_rider": max_opr,
"ideal_load": hyperparams.get("ideal_load", 6),
"workload_balance_threshold": hyperparams.get("workload_balance_threshold", 0.7),
"workload_penalty_weight": hyperparams.get("workload_penalty_weight", 100.0),
"distance_penalty_weight": hyperparams.get("distance_penalty_weight", 2.0),
"cluster_radius_km": hyperparams.get("cluster_radius_km", 3.0),
"search_time_limit_seconds": hyperparams.get("search_time_limit_seconds", 5),
"road_factor": hyperparams.get("road_factor", 1.3),
"ml_strategy": ml_strategy,
"riders_used": riders_used,
"total_assigned": total_assigned,
"unassigned_count": unassigned_count,
"avg_load": round(avg_load, 3),
"load_std": round(load_std, 3),
"total_distance_km": round(total_distance_km, 2),
"elapsed_ms": round(elapsed_ms, 1),
"sla_breached": sla_breached,
"avg_delivery_time_min": round(avg_delivery_time_min or 0.0, 2),
"quality_score": round(quality_score, 2),
}
with _WRITE_LOCK:
self._insert(row)
logger.info(
f"[MLCollector] zone={zone_id} orders={num_orders} "
f"assigned={total_assigned} unassigned={unassigned_count} "
f"quality={quality_score:.1f} elapsed={elapsed_ms:.0f}ms"
)
except Exception as e:
logger.warning(f"[MLCollector] Logging failed (non-fatal): {e}")
# ------------------------------------------------------------------
# Data retrieval for training
# ------------------------------------------------------------------
def get_training_data(
self,
min_records: int = 30,
strategy_filter: Optional[str] = None,
since_hours: Optional[int] = None,
) -> Optional[List[Dict[str, Any]]]:
"""
Return logged rows for model training.
quality_score is returned AS-IS (frozen at log time - no re-scoring).
"""
try:
conn = sqlite3.connect(self._db_path)
conn.row_factory = sqlite3.Row
query = "SELECT * FROM assignment_ml_log"
params: list = []
clauses: list = []
if strategy_filter:
clauses.append("ml_strategy = ?")
params.append(strategy_filter)
if since_hours:
cutoff = (datetime.utcnow() - timedelta(hours=since_hours)).isoformat()
clauses.append("timestamp >= ?")
params.append(cutoff)
if clauses:
query += " WHERE " + " AND ".join(clauses)
query += " ORDER BY id ASC"
rows = conn.execute(query, params).fetchall()
conn.close()
if len(rows) < min_records:
logger.info(f"[MLCollector] {len(rows)} records < {min_records} minimum.")
return None
return [dict(r) for r in rows]
except Exception as e:
logger.error(f"[MLCollector] get_training_data failed: {e}")
return None
# ------------------------------------------------------------------
# Analytics API
# ------------------------------------------------------------------
def get_recent_quality_trend(self, last_n: int = 50) -> Dict[str, Any]:
"""Recent quality scores + series for sparkline charts."""
try:
conn = sqlite3.connect(self._db_path)
rows = conn.execute(
"SELECT quality_score, timestamp, unassigned_count, elapsed_ms "
"FROM assignment_ml_log ORDER BY id DESC LIMIT ?", (last_n,)
).fetchall()
conn.close()
if not rows:
return {"avg_quality": 0.0, "sample_size": 0, "history": []}
scores = [r[0] for r in rows]
return {
"avg_quality": round(sum(scores) / len(scores), 2),
"min_quality": round(min(scores), 2),
"max_quality": round(max(scores), 2),
"sample_size": len(scores),
"history": list(reversed(scores)),
"timestamps": list(reversed([r[1] for r in rows])),
"unassigned_series": list(reversed([r[2] for r in rows])),
"latency_series": list(reversed([r[3] for r in rows])),
}
except Exception:
return {"avg_quality": 0.0, "sample_size": 0, "history": []}
def get_hourly_stats(self, last_days: int = 7) -> List[Dict[str, Any]]:
"""Quality, SLA, and call volume aggregated by hour-of-day."""
try:
conn = sqlite3.connect(self._db_path)
cutoff = (datetime.utcnow() - timedelta(days=last_days)).isoformat()
rows = conn.execute(
"""
SELECT hour,
COUNT(*) AS call_count,
AVG(quality_score) AS avg_quality,
AVG(unassigned_count) AS avg_unassigned,
AVG(elapsed_ms) AS avg_latency_ms,
SUM(CASE WHEN sla_breached=1 THEN 1 ELSE 0 END) AS sla_breaches
FROM assignment_ml_log WHERE timestamp >= ?
GROUP BY hour ORDER BY hour
""", (cutoff,)
).fetchall()
conn.close()
return [
{
"hour": r[0],
"call_count": r[1],
"avg_quality": round(r[2] or 0.0, 2),
"avg_unassigned": round(r[3] or 0.0, 2),
"avg_latency_ms": round(r[4] or 0.0, 1),
"sla_breaches": r[5],
}
for r in rows
]
except Exception as e:
logger.error(f"[MLCollector] get_hourly_stats: {e}")
return []
def get_strategy_comparison(self) -> List[Dict[str, Any]]:
"""Compare quality metrics across ml_strategy values."""
try:
conn = sqlite3.connect(self._db_path)
rows = conn.execute(
"""
SELECT ml_strategy,
COUNT(*) AS call_count,
AVG(quality_score) AS avg_quality,
MIN(quality_score) AS min_quality,
MAX(quality_score) AS max_quality,
AVG(unassigned_count) AS avg_unassigned,
AVG(total_distance_km) AS avg_distance_km,
AVG(elapsed_ms) AS avg_latency_ms
FROM assignment_ml_log
GROUP BY ml_strategy ORDER BY avg_quality DESC
"""
).fetchall()
conn.close()
return [
{
"strategy": r[0],
"call_count": r[1],
"avg_quality": round(r[2] or 0.0, 2),
"min_quality": round(r[3] or 0.0, 2),
"max_quality": round(r[4] or 0.0, 2),
"avg_unassigned": round(r[5] or 0.0, 2),
"avg_distance_km": round(r[6] or 0.0, 2),
"avg_latency_ms": round(r[7] or 0.0, 1),
}
for r in rows
]
except Exception as e:
logger.error(f"[MLCollector] get_strategy_comparison: {e}")
return []
def get_quality_histogram(self, bins: int = 10) -> List[Dict[str, Any]]:
"""Quality score distribution for histogram chart."""
try:
conn = sqlite3.connect(self._db_path)
rows = conn.execute("SELECT quality_score FROM assignment_ml_log").fetchall()
conn.close()
scores = [r[0] for r in rows if r[0] is not None]
if not scores:
return []
bin_width = 100.0 / bins
return [
{
"range": f"{i*bin_width:.0f}-{(i+1)*bin_width:.0f}",
"count": sum(1 for s in scores if i*bin_width <= s < (i+1)*bin_width)
}
for i in range(bins)
]
except Exception as e:
logger.error(f"[MLCollector] get_quality_histogram: {e}")
return []
def get_zone_stats(self) -> List[Dict[str, Any]]:
"""Quality and SLA stats grouped by zone."""
try:
conn = sqlite3.connect(self._db_path)
rows = conn.execute(
"""
SELECT zone_id, COUNT(*) AS call_count,
AVG(quality_score) AS avg_quality,
SUM(sla_breached) AS sla_breaches,
AVG(total_distance_km) AS avg_distance_km
FROM assignment_ml_log
GROUP BY zone_id ORDER BY avg_quality DESC
"""
).fetchall()
conn.close()
return [
{
"zone_id": r[0],
"call_count": r[1],
"avg_quality": round(r[2] or 0.0, 2),
"sla_breaches": r[3],
"avg_distance_km": round(r[4] or 0.0, 2),
}
for r in rows
]
except Exception as e:
logger.error(f"[MLCollector] get_zone_stats: {e}")
return []
def count_records(self) -> int:
try:
conn = sqlite3.connect(self._db_path)
count = conn.execute("SELECT COUNT(*) FROM assignment_ml_log").fetchone()[0]
conn.close()
return count
except Exception:
return 0
def count_by_strategy(self) -> Dict[str, int]:
try:
conn = sqlite3.connect(self._db_path)
rows = conn.execute(
"SELECT ml_strategy, COUNT(*) FROM assignment_ml_log GROUP BY ml_strategy"
).fetchall()
conn.close()
return {r[0]: r[1] for r in rows}
except Exception:
return {}
def export_csv(self) -> str:
"""Export all records as CSV string."""
try:
conn = sqlite3.connect(self._db_path)
conn.row_factory = sqlite3.Row
rows = conn.execute("SELECT * FROM assignment_ml_log ORDER BY id ASC").fetchall()
conn.close()
if not rows:
return ""
buf = io.StringIO()
writer = csv.DictWriter(buf, fieldnames=rows[0].keys())
writer.writeheader()
writer.writerows([dict(r) for r in rows])
return buf.getvalue()
except Exception as e:
logger.error(f"[MLCollector] export_csv failed: {e}")
return ""
def purge_old_records(self, keep_days: int = 90) -> int:
"""Delete records older than keep_days. Returns count deleted."""
try:
cutoff = (datetime.utcnow() - timedelta(days=keep_days)).isoformat()
conn = sqlite3.connect(self._db_path)
cursor = conn.execute(
"DELETE FROM assignment_ml_log WHERE timestamp < ?", (cutoff,)
)
deleted = cursor.rowcount
conn.commit()
conn.close()
logger.info(f"[MLCollector] Purged {deleted} records older than {keep_days} days.")
return deleted
except Exception as e:
logger.error(f"[MLCollector] purge failed: {e}")
return 0
# ------------------------------------------------------------------
# Quality Score Formula (frozen at log time - do not change behavior)
# ------------------------------------------------------------------
@staticmethod
def _compute_quality_score(
num_orders: int, unassigned_count: int, load_std: float,
riders_used: int, num_riders: int, total_distance_km: float,
max_orders_per_rider: int, ml_strategy: str = "balanced",
) -> float:
if num_orders == 0:
return 0.0
assigned_ratio = 1.0 - (unassigned_count / num_orders)
max_std = max(1.0, max_orders_per_rider / 2.0)
balance_ratio = max(0.0, 1.0 - (load_std / max_std))
max_dist = max(1.0, float((num_orders - unassigned_count) * 8.0))
distance_ratio = max(0.0, 1.0 - (total_distance_km / max_dist))
weights = {
"aggressive_speed": (80.0, 20.0, 0.0),
"fuel_saver": (30.0, 70.0, 0.0),
"zone_strict": (40.0, 30.0, 30.0),
"balanced": (50.0, 25.0, 25.0),
}
w_comp, w_dist, w_bal = weights.get(ml_strategy, (50.0, 25.0, 25.0))
return min(
assigned_ratio * w_comp + distance_ratio * w_dist + balance_ratio * w_bal,
100.0,
)
@staticmethod
def _get_km(order: Any) -> float:
try:
return float(order.get("kms") or order.get("calculationDistanceKm") or 0.0)
except Exception:
return 0.0
# ------------------------------------------------------------------
# DB Bootstrap
# ------------------------------------------------------------------
def _ensure_db(self) -> None:
try:
os.makedirs(os.path.dirname(self._db_path) or ".", exist_ok=True)
conn = sqlite3.connect(self._db_path)
conn.execute("""
CREATE TABLE IF NOT EXISTS assignment_ml_log (
id INTEGER PRIMARY KEY AUTOINCREMENT,
timestamp TEXT NOT NULL,
hour INTEGER,
day_of_week INTEGER,
is_peak INTEGER DEFAULT 0,
zone_id TEXT DEFAULT 'default',
city_id TEXT DEFAULT 'default',
weather_code TEXT DEFAULT 'CLEAR',
num_orders INTEGER,
num_riders INTEGER,
max_pickup_distance_km REAL,
max_kitchen_distance_km REAL,
max_orders_per_rider INTEGER,
ideal_load INTEGER,
workload_balance_threshold REAL,
workload_penalty_weight REAL,
distance_penalty_weight REAL,
cluster_radius_km REAL,
search_time_limit_seconds INTEGER,
road_factor REAL,
ml_strategy TEXT DEFAULT 'balanced',
riders_used INTEGER,
total_assigned INTEGER,
unassigned_count INTEGER,
avg_load REAL,
load_std REAL,
total_distance_km REAL DEFAULT 0.0,
elapsed_ms REAL,
sla_breached INTEGER DEFAULT 0,
avg_delivery_time_min REAL DEFAULT 0.0,
quality_score REAL
)
""")
migrations = [
"ALTER TABLE assignment_ml_log ADD COLUMN is_peak INTEGER DEFAULT 0",
"ALTER TABLE assignment_ml_log ADD COLUMN zone_id TEXT DEFAULT 'default'",
"ALTER TABLE assignment_ml_log ADD COLUMN city_id TEXT DEFAULT 'default'",
"ALTER TABLE assignment_ml_log ADD COLUMN weather_code TEXT DEFAULT 'CLEAR'",
"ALTER TABLE assignment_ml_log ADD COLUMN sla_breached INTEGER DEFAULT 0",
"ALTER TABLE assignment_ml_log ADD COLUMN avg_delivery_time_min REAL DEFAULT 0.0",
"ALTER TABLE assignment_ml_log ADD COLUMN ml_strategy TEXT DEFAULT 'balanced'",
"ALTER TABLE assignment_ml_log ADD COLUMN total_distance_km REAL DEFAULT 0.0",
]
for ddl in migrations:
try:
conn.execute(ddl)
except Exception:
pass
for idx in [
"CREATE INDEX IF NOT EXISTS idx_timestamp ON assignment_ml_log(timestamp)",
"CREATE INDEX IF NOT EXISTS idx_strategy ON assignment_ml_log(ml_strategy)",
"CREATE INDEX IF NOT EXISTS idx_zone ON assignment_ml_log(zone_id)",
]:
conn.execute(idx)
conn.commit()
conn.close()
except Exception as e:
logger.error(f"[MLCollector] DB init failed: {e}")
def _insert(self, row: Dict[str, Any]) -> None:
os.makedirs(os.path.dirname(self._db_path) or ".", exist_ok=True)
conn = sqlite3.connect(self._db_path)
cols = ", ".join(row.keys())
placeholders = ", ".join(["?"] * len(row))
conn.execute(
f"INSERT INTO assignment_ml_log ({cols}) VALUES ({placeholders})",
list(row.values()),
)
conn.commit()
conn.close()
# ---------------------------------------------------------------------------
# Module-level singleton
# ---------------------------------------------------------------------------
_collector: Optional[MLDataCollector] = None
def get_collector() -> MLDataCollector:
global _collector
if _collector is None:
_collector = MLDataCollector()
return _collector

View File

@@ -0,0 +1,610 @@
"""
ML Hypertuner - Production Grade
===================================
XGBoost surrogate model + Optuna TPE Bayesian optimization.
Key upgrades over the original
--------------------------------
1. Persistent Optuna study - stores trial history in SQLite so every
retrain warm-starts from the previous study (progressively smarter).
2. Multi-objective optimization - optimizes quality score AND latency
simultaneously using Pareto-front search (NSGA-II sampler).
3. Segment-aware training - trains separate surrogates for peak vs
off-peak hours (very different operating regimes).
4. Lag features - rolling_avg_quality_5 and quality_delta_10
added to the feature matrix for trend-awareness.
5. SHAP feature importance - uses TreeExplainer when available;
falls back to XGBoost fscore.
6. Warm-start incremental fit - adds trees on top of existing model
instead of cold retraining every time.
7. Staleness detection - warns if model is older than 24h.
8. Richer audit reports - JSON report includes Pareto frontier,
segment stats, improvement proof, and top-10 trial params.
"""
import json
import logging
import os
from datetime import datetime, timedelta
from typing import Any, Dict, List, Optional, Tuple
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import r2_score, mean_absolute_error
logger = logging.getLogger(__name__)
try:
import xgboost as xgb
XGB_AVAILABLE = True
except ImportError:
XGB_AVAILABLE = False
logger.warning("[Hypertuner] xgboost not installed.")
try:
import optuna
optuna.logging.set_verbosity(optuna.logging.WARNING)
OPTUNA_AVAILABLE = True
except ImportError:
OPTUNA_AVAILABLE = False
logger.warning("[Hypertuner] optuna not installed.")
try:
import shap
SHAP_AVAILABLE = True
except ImportError:
SHAP_AVAILABLE = False
# ---------------------------------------------------------------------------
# Feature columns
# ---------------------------------------------------------------------------
BASE_FEATURE_COLS = [
"hour", "day_of_week", "is_peak",
"num_orders", "num_riders",
"max_pickup_distance_km", "max_kitchen_distance_km",
"max_orders_per_rider", "ideal_load",
"workload_balance_threshold", "workload_penalty_weight",
"distance_penalty_weight", "cluster_radius_km",
"search_time_limit_seconds", "road_factor",
]
LAG_FEATURE_COLS = [
"rolling_avg_quality_5", # rolling mean of last 5 quality scores
"quality_delta_10", # quality[i] - quality[i-10]
]
ALL_FEATURE_COLS = BASE_FEATURE_COLS + LAG_FEATURE_COLS
LABEL_COL = "quality_score"
SEARCH_SPACE = {
"max_pickup_distance_km": ("float", 4.0, 15.0),
"max_kitchen_distance_km": ("float", 1.0, 8.0),
"max_orders_per_rider": ("int", 6, 20),
"ideal_load": ("int", 2, 10),
"workload_balance_threshold": ("float", 0.3, 0.95),
"workload_penalty_weight": ("float", 20.0, 200.0),
"distance_penalty_weight": ("float", 0.5, 10.0),
"cluster_radius_km": ("float", 1.0, 8.0),
"search_time_limit_seconds": ("int", 2, 15),
"road_factor": ("float", 1.1, 1.6),
}
_STUDY_DB_PATH = os.getenv("ML_DB_PATH", "ml_data/ml_store.db")
_REPORT_DIR = "ml_data/reports"
_MAX_MODEL_AGE_H = 24
# ---------------------------------------------------------------------------
# MLHypertuner
# ---------------------------------------------------------------------------
class MLHypertuner:
"""XGBoost surrogate + Optuna TPE / NSGA-II hyperparameter optimizer."""
def __init__(self):
self._model: Optional[Any] = None
self._peak_model: Optional[Any] = None
self._offpeak_model: Optional[Any] = None
self._model_trained_at: Optional[datetime] = None
self._training_rows: int = 0
self._latest_validation: Optional[Dict] = None
self._latest_baseline: Optional[Dict] = None
self._feature_importance: Optional[Dict[str, float]] = None
self._top_trials: List[Dict] = []
self._pareto_frontier: List[Dict] = []
self._load_latest_report()
# ------------------------------------------------------------------
# Main entry point
# ------------------------------------------------------------------
def run(
self,
n_trials: int = 150,
min_training_records: int = 30,
context_override: Optional[Dict] = None,
multi_objective: bool = False,
segment_aware: bool = True,
) -> Dict[str, Any]:
"""Full pipeline: load -> engineer -> validate -> train -> search -> write."""
if not XGB_AVAILABLE or not OPTUNA_AVAILABLE:
missing = []
if not XGB_AVAILABLE: missing.append("xgboost")
if not OPTUNA_AVAILABLE: missing.append("optuna")
return {"status": "error", "message": f"Missing: {', '.join(missing)}"}
from app.services.ml.ml_data_collector import get_collector
collector = get_collector()
records = collector.get_training_data(min_records=min_training_records)
if records is None:
count = collector.count_records()
return {
"status": "insufficient_data",
"message": f"{count} records - need >={min_training_records}.",
"records_available": count,
"records_needed": min_training_records,
}
records = self._add_lag_features(records)
X, y = self._prepare_data(records, ALL_FEATURE_COLS)
if X is None or len(X) == 0:
return {"status": "error", "message": "Data preparation failed."}
cv_results = self._cross_validate(X, y)
logger.info(f"[Hypertuner] CV: R2={cv_results['r2_score']:.3f}, MAE={cv_results['mae']:.2f}")
self._train_model(X, y, model_attr="_model")
self._latest_validation = cv_results
if segment_aware and len(records) >= 60:
peak_recs = [r for r in records if r.get("is_peak", 0) == 1]
offpeak_recs = [r for r in records if r.get("is_peak", 0) == 0]
if len(peak_recs) >= 20:
Xp, yp = self._prepare_data(peak_recs, ALL_FEATURE_COLS)
self._train_model(Xp, yp, model_attr="_peak_model")
if len(offpeak_recs) >= 20:
Xo, yo = self._prepare_data(offpeak_recs, ALL_FEATURE_COLS)
self._train_model(Xo, yo, model_attr="_offpeak_model")
baseline_stats = self._compute_baseline_stats(records)
self._latest_baseline = baseline_stats
context = context_override or self._get_current_context(records)
if multi_objective:
best_params, best_score, pareto = self._optuna_search_multi(context, n_trials)
self._pareto_frontier = pareto
else:
best_params, best_score = self._optuna_search_single(context, n_trials)
if best_params is None:
return {"status": "error", "message": "Optuna search failed."}
improvement = round(best_score - baseline_stats["avg_quality"], 2)
self._compute_feature_importance()
if cv_results["r2_score"] < 0.5:
return {
"status": "model_not_ready",
"message": f"R2={cv_results['r2_score']:.3f} too low.",
"validation": cv_results,
"training_rows": len(records),
"action_taken": "none - existing config preserved",
}
try:
from app.config.dynamic_config import get_config
get_config().set_bulk(best_params, source="ml_hypertuner")
except ImportError:
logger.info("[Hypertuner] DynamicConfig not available - params not written to config.")
self._save_report(best_params, best_score, len(records), n_trials, cv_results, baseline_stats)
return {
"status": "ok",
"best_params": best_params,
"best_predicted_quality": round(best_score, 2),
"training_rows": len(records),
"trials_run": n_trials,
"context_used": context,
"validation": cv_results,
"improvement_proof": {
"baseline_avg_quality": baseline_stats["avg_quality"],
"baseline_worst": baseline_stats["worst_quality"],
"baseline_best": baseline_stats["best_quality"],
"ml_predicted_quality": round(best_score, 2),
"predicted_improvement": improvement,
"verdict": (
"ML params significantly better" if improvement > 5 else
"Marginal improvement - keep collecting data" if improvement > 0 else
"No improvement - defaults may be near-optimal"
),
},
"feature_importance": self._feature_importance,
"top_trials": self._top_trials[:5],
"message": "Hyperparameters updated successfully.",
}
# ------------------------------------------------------------------
# Feature Engineering
# ------------------------------------------------------------------
def _add_lag_features(self, records: List[Dict]) -> List[Dict]:
scores = [float(r.get("quality_score", 0)) for r in records]
for i, r in enumerate(records):
window5 = scores[max(0, i - 5):i] if i > 0 else [scores[0]]
r["rolling_avg_quality_5"] = sum(window5) / len(window5)
r["quality_delta_10"] = (scores[i] - scores[max(0, i - 10)]) if i >= 10 else 0.0
return records
# ------------------------------------------------------------------
# Data Preparation
# ------------------------------------------------------------------
def _prepare_data(
self, records: List[Dict], feature_cols: List[str]
) -> Tuple[Optional[np.ndarray], Optional[np.ndarray]]:
try:
X_rows, y_vals = [], []
for rec in records:
row = []
for col in feature_cols:
try:
row.append(float(rec.get(col, 0) or 0))
except (TypeError, ValueError):
row.append(0.0)
X_rows.append(row)
y_vals.append(float(rec.get(LABEL_COL, 0)))
return (
np.array(X_rows, dtype=np.float32),
np.array(y_vals, dtype=np.float32),
)
except Exception as e:
logger.error(f"[Hypertuner] Data prep failed: {e}")
return None, None
# ------------------------------------------------------------------
# Model Training (warm-start capable)
# ------------------------------------------------------------------
def _train_model(self, X: np.ndarray, y: np.ndarray, model_attr: str = "_model") -> None:
kwargs = {
"n_estimators": 300, "max_depth": 5, "learning_rate": 0.04,
"subsample": 0.8, "colsample_bytree": 0.8,
"reg_alpha": 0.1, "reg_lambda": 1.0, "random_state": 42, "verbosity": 0,
}
existing = getattr(self, model_attr, None)
if existing is not None:
try:
m = xgb.XGBRegressor(n_estimators=50, **{k: v for k, v in kwargs.items() if k != "n_estimators"})
m.fit(X, y, xgb_model=existing.get_booster())
setattr(self, model_attr, m)
if model_attr == "_model":
self._model_trained_at = datetime.utcnow()
self._training_rows = len(X)
logger.info(f"[Hypertuner] XGBoost warm-updated ({model_attr}) - {len(X)} rows.")
return
except Exception:
pass
m = xgb.XGBRegressor(**kwargs)
m.fit(X, y)
setattr(self, model_attr, m)
if model_attr == "_model":
self._model_trained_at = datetime.utcnow()
self._training_rows = len(X)
logger.info(f"[Hypertuner] XGBoost trained ({model_attr}) - {len(X)} rows.")
# ------------------------------------------------------------------
# Cross Validation
# ------------------------------------------------------------------
def _cross_validate(self, X: np.ndarray, y: np.ndarray, k: int = 5) -> Dict:
if len(X) < k * 2:
split = max(1, int(len(X) * 0.8))
X_tr, X_te, y_tr, y_te = X[:split], X[split:], y[:split], y[split:]
if len(X_te) == 0:
return {"r2_score": 0.0, "mae": 99.0, "trust_level": "insufficient_data",
"trust_score": 0, "folds": 0}
m = xgb.XGBRegressor(n_estimators=100, max_depth=4, verbosity=0, random_state=42)
m.fit(X_tr, y_tr)
r2 = float(r2_score(y_te, m.predict(X_te)))
mae = float(mean_absolute_error(y_te, m.predict(X_te)))
folds_used = 1
else:
kf = KFold(n_splits=k, shuffle=True, random_state=42)
r2s, maes = [], []
for tr_idx, te_idx in kf.split(X):
m = xgb.XGBRegressor(n_estimators=100, max_depth=4, verbosity=0, random_state=42)
m.fit(X[tr_idx], y[tr_idx])
preds = m.predict(X[te_idx])
r2s.append(r2_score(y[te_idx], preds))
maes.append(mean_absolute_error(y[te_idx], preds))
r2, mae, folds_used = float(np.mean(r2s)), float(np.mean(maes)), k
trust_map = [(0.85, "excellent", 5), (0.75, "strong", 4),
(0.60, "good", 3), (0.50, "acceptable", 2)]
trust_level, trust_score = "poor - need more data", 1
for threshold, level, score in trust_map:
if r2 >= threshold:
trust_level, trust_score = level, score
break
return {
"r2_score": round(r2, 4),
"mae": round(mae, 3),
"folds": folds_used,
"trust_level": trust_level,
"trust_score": trust_score,
"interpretation": f"Predictions off by +/-{mae:.1f} pts (R2={r2:.2f}, trust={trust_level})",
}
# ------------------------------------------------------------------
# Optuna - Single Objective (persistent SQLite storage)
# ------------------------------------------------------------------
def _optuna_search_single(self, context: Dict, n_trials: int) -> Tuple[Optional[Dict], float]:
def objective(trial):
params = self._sample_params(trial)
if params.get("ideal_load", 6) > params.get("max_orders_per_rider", 12):
return 0.0
return self._predict_quality(context, params)
try:
study = optuna.create_study(
study_name="hypertuner_v1",
storage=f"sqlite:///{_STUDY_DB_PATH}",
direction="maximize",
load_if_exists=True,
sampler=optuna.samplers.TPESampler(seed=42),
)
study.optimize(objective, n_trials=n_trials, show_progress_bar=False)
best = study.best_trial
self._top_trials = [
{"params": t.params, "score": t.value}
for t in sorted(study.trials, key=lambda x: x.value or 0, reverse=True)[:10]
if t.value is not None
]
return {k: best.params[k] for k in SEARCH_SPACE if k in best.params}, best.value
except Exception as e:
logger.error(f"[Hypertuner] Optuna single-obj failed: {e}", exc_info=True)
return None, 0.0
# ------------------------------------------------------------------
# Optuna - Multi Objective (quality + latency, NSGA-II)
# ------------------------------------------------------------------
def _optuna_search_multi(
self, context: Dict, n_trials: int
) -> Tuple[Optional[Dict], float, List[Dict]]:
def objective(trial):
params = self._sample_params(trial)
if params.get("ideal_load", 6) > params.get("max_orders_per_rider", 12):
return 0.0, 99.0
quality = self._predict_quality(context, params)
latency_proxy = float(params.get("search_time_limit_seconds", 5)) * 200.0
return quality, latency_proxy
try:
study = optuna.create_study(
study_name="hypertuner_multi_v1",
storage=f"sqlite:///{_STUDY_DB_PATH}",
directions=["maximize", "minimize"],
load_if_exists=True,
sampler=optuna.samplers.NSGAIISampler(seed=42),
)
study.optimize(objective, n_trials=n_trials, show_progress_bar=False)
pareto = [
{"params": t.params, "quality": t.values[0], "latency_proxy": t.values[1]}
for t in study.best_trials
]
if not pareto:
return None, 0.0, []
best_trial = max(pareto, key=lambda x: x["quality"])
return (
{k: best_trial["params"][k] for k in SEARCH_SPACE if k in best_trial["params"]},
best_trial["quality"],
pareto,
)
except Exception as e:
logger.error(f"[Hypertuner] Optuna multi-obj failed: {e}", exc_info=True)
return None, 0.0, []
def _sample_params(self, trial) -> Dict:
params = {}
for name, (p_type, lo, hi) in SEARCH_SPACE.items():
if p_type == "float":
params[name] = trial.suggest_float(name, lo, hi)
elif p_type == "int":
params[name] = trial.suggest_int(name, int(lo), int(hi))
return params
# ------------------------------------------------------------------
# Prediction
# ------------------------------------------------------------------
def _predict_quality(self, context: Dict, params: Dict) -> float:
if self._model is None:
return 0.0
combined = {
**context, **params,
"rolling_avg_quality_5": context.get("rolling_avg_quality_5", 50.0),
"quality_delta_10": context.get("quality_delta_10", 0.0),
}
row = []
for col in ALL_FEATURE_COLS:
try:
row.append(float(combined.get(col, 0) or 0))
except (TypeError, ValueError):
row.append(0.0)
is_peak = int(context.get("is_peak", 0))
model = (self._peak_model if is_peak else self._offpeak_model) or self._model
pred = float(model.predict(np.array([row], dtype=np.float32))[0])
return max(0.0, min(pred, 100.0))
# ------------------------------------------------------------------
# Feature Importance
# ------------------------------------------------------------------
def _compute_feature_importance(self) -> None:
if self._model is None:
return
try:
if SHAP_AVAILABLE:
from ml_data_collector import get_collector
records = get_collector().get_training_data(min_records=1) or []
records = self._add_lag_features(records[-200:])
X, _ = self._prepare_data(records, ALL_FEATURE_COLS)
if X is not None and len(X) > 0:
explainer = shap.TreeExplainer(self._model)
shap_values = np.abs(explainer.shap_values(X)).mean(axis=0)
total = max(shap_values.sum(), 1e-9)
self._feature_importance = dict(sorted(
{ALL_FEATURE_COLS[i]: round(float(shap_values[i] / total) * 100, 2)
for i in range(len(ALL_FEATURE_COLS))}.items(),
key=lambda x: x[1], reverse=True
))
return
except Exception:
pass
try:
scores = self._model.get_booster().get_fscore()
total = max(sum(scores.values()), 1)
self._feature_importance = dict(sorted(
{ALL_FEATURE_COLS[int(k[1:])]: round(v / total * 100, 2)
for k, v in scores.items()
if k.startswith("f") and k[1:].isdigit() and int(k[1:]) < len(ALL_FEATURE_COLS)
}.items(),
key=lambda x: x[1], reverse=True
))
except Exception as e:
logger.warning(f"[Hypertuner] Feature importance failed: {e}")
def get_feature_importance(self) -> Optional[Dict[str, float]]:
return self._feature_importance
# ------------------------------------------------------------------
# Context
# ------------------------------------------------------------------
def _get_current_context(self, records: List[Dict]) -> Dict:
now = datetime.utcnow()
recent = records[-20:]
avg_orders = sum(r.get("num_orders", 0) for r in recent) / max(len(recent), 1)
avg_riders = sum(r.get("num_riders", 0) for r in recent) / max(len(recent), 1)
recent_scores = [float(r.get("quality_score", 0)) for r in recent]
rolling_avg5 = sum(recent_scores[-5:]) / max(len(recent_scores[-5:]), 1)
delta10 = (recent_scores[-1] - recent_scores[-11]) if len(recent_scores) >= 11 else 0.0
return {
"hour": now.hour,
"day_of_week": now.weekday(),
"is_peak": int(now.hour in (7, 8, 9, 12, 13, 18, 19, 20)),
"num_orders": round(avg_orders),
"num_riders": round(avg_riders),
"rolling_avg_quality_5": round(rolling_avg5, 2),
"quality_delta_10": round(delta10, 2),
}
def _compute_baseline_stats(self, records: List[Dict]) -> Dict:
scores = [float(r.get("quality_score", 0)) for r in records if r.get("quality_score")]
if not scores:
return {"avg_quality": 0.0, "best_quality": 0.0, "worst_quality": 0.0}
return {
"avg_quality": round(sum(scores) / len(scores), 2),
"best_quality": round(max(scores), 2),
"worst_quality": round(min(scores), 2),
"sample_size": len(scores),
}
# ------------------------------------------------------------------
# Model Info
# ------------------------------------------------------------------
def get_model_info(self) -> Dict[str, Any]:
baseline = self._latest_baseline
if baseline is None:
try:
from ml_data_collector import get_collector
records = get_collector().get_training_data(min_records=1)
if records:
baseline = self._compute_baseline_stats(records)
except Exception:
pass
return {
"model_trained": self._model is not None,
"trained_at": self._model_trained_at.isoformat() if self._model_trained_at else None,
"training_rows": self._training_rows,
"peak_model_trained": self._peak_model is not None,
"offpeak_model_trained": self._offpeak_model is not None,
"features": ALL_FEATURE_COLS,
"validation": self._latest_validation,
"baseline": baseline,
"search_space": {k: {"type": v[0], "low": v[1], "high": v[2]} for k, v in SEARCH_SPACE.items()},
"feature_importance": self._feature_importance,
"top_trials": self._top_trials[:10],
"pareto_frontier_size": len(self._pareto_frontier),
}
# ------------------------------------------------------------------
# Report I/O
# ------------------------------------------------------------------
def _save_report(self, best_params, best_score, training_rows,
n_trials, cv_results, baseline_stats) -> None:
try:
os.makedirs(_REPORT_DIR, exist_ok=True)
report = {
"timestamp": datetime.utcnow().isoformat(),
"training_rows": training_rows,
"n_trials": n_trials,
"best_predicted_quality": round(best_score, 2),
"best_params": best_params,
"validation": cv_results or {},
"baseline_stats": baseline_stats or {},
"feature_importance": self._feature_importance or {},
"top_trials": self._top_trials[:10],
"pareto_frontier": self._pareto_frontier[:20],
}
path = os.path.join(_REPORT_DIR, f"tuning_{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}.json")
with open(path, "w") as f:
json.dump(report, f, indent=2)
logger.info(f"[Hypertuner] Report -> {path}")
except Exception as e:
logger.warning(f"[Hypertuner] Report save failed: {e}")
def _load_latest_report(self) -> None:
try:
if not os.path.isdir(_REPORT_DIR):
return
files = sorted([f for f in os.listdir(_REPORT_DIR) if f.endswith(".json")], reverse=True)
if not files:
return
with open(os.path.join(_REPORT_DIR, files[0])) as f:
report = json.load(f)
self._latest_validation = report.get("validation")
self._latest_baseline = report.get("baseline_stats")
self._training_rows = report.get("training_rows", 0)
self._feature_importance = report.get("feature_importance")
self._top_trials = report.get("top_trials", [])
self._pareto_frontier = report.get("pareto_frontier", [])
ts = report.get("timestamp")
if ts:
self._model_trained_at = datetime.fromisoformat(ts)
logger.info(f"[Hypertuner] Restored state from {files[0]}")
except Exception as e:
logger.warning(f"[Hypertuner] Load latest report failed: {e}")
# ---------------------------------------------------------------------------
# Module-level singleton
# ---------------------------------------------------------------------------
_tuner: Optional[MLHypertuner] = None
def get_hypertuner() -> MLHypertuner:
global _tuner
if _tuner is None:
_tuner = MLHypertuner()
return _tuner

View File

@@ -0,0 +1,99 @@
import httpx
import logging
from datetime import datetime
from typing import List, Dict, Any, Optional
from app.config.rider_preferences import RIDER_PREFERRED_KITCHENS
logger = logging.getLogger(__name__)
async def fetch_active_riders() -> List[Dict[str, Any]]:
"""
Fetch active rider logs from the external API for the current date.
Returns a list of rider log dictionaries.
"""
try:
today_str = datetime.now().strftime("%Y-%m-%d")
url = "https://jupiter.nearle.app/live/api/v2/partners/getriderlogs/"
params = {
"applocationid": 1,
"partnerid": 44,
"fromdate": today_str,
"todate": today_str,
"keyword": ""
}
async with httpx.AsyncClient(timeout=30.0) as client:
response = await client.get(url, params=params)
response.raise_for_status()
data = response.json()
if data and data.get("code") == 200 and data.get("details"):
# Filter riders who are in our preferences list and are 'active' or 'idle' (assuming we want online riders)
# The user's example showed "onduty": 1. We might want to filter by that.
# For now, returning all logs, filtering can happen in assignment logic or here.
# Let's return the raw list as requested, filtering logic will be applied during assignment.
return data.get("details", [])
logger.warning(f"Fetch active riders returned no details: {data}")
return []
except Exception as e:
logger.error(f"Error fetching active riders: {e}", exc_info=True)
return []
async def fetch_created_orders() -> List[Dict[str, Any]]:
"""
Fetch all orders in 'created' state for the current date.
"""
try:
today_str = datetime.now().strftime("%Y-%m-%d")
url = "https://jupiter.nearle.app/live/api/v1/orders/tenant/getorders/"
# Removed pagesize as per user request to fetch all
params = {
"applocationid": 0,
"tenantid": 0,
"locationid": 0,
"status": "created",
"fromdate": today_str,
"todate": today_str,
"keyword": "",
"pageno": 1
# "pagesize" intentionally omitted to fetch all
}
async with httpx.AsyncClient(timeout=60.0) as client:
response = await client.get(url, params=params)
response.raise_for_status()
data = response.json()
if data and data.get("code") == 200 and data.get("details"):
return data.get("details", [])
logger.warning(f"Fetch created orders returned no details: {data}")
return []
except Exception as e:
logger.error(f"Error fetching created orders: {e}", exc_info=True)
return []
async def fetch_rider_pricing() -> List[Dict[str, Any]]:
"""
Fetch rider pricing configuration from external API.
"""
try:
url = "https://jupiter.nearle.app/live/api/v1/partners/getriderpricing"
async with httpx.AsyncClient(timeout=30.0) as client:
response = await client.get(url)
response.raise_for_status()
data = response.json()
if data and data.get("code") == 200:
return data.get("details", [])
logger.warning(f"Fetch rider pricing returned no details: {data}")
return []
except Exception as e:
logger.error(f"Error fetching rider pricing: {e}", exc_info=True)
return []

View File

@@ -0,0 +1,78 @@
import os
import pickle
import logging
from datetime import datetime
from typing import Dict, Any
logger = logging.getLogger(__name__)
HISTORY_FILE = "rider_history.pkl"
class RiderHistoryService:
def __init__(self, history_file: str = HISTORY_FILE):
self.history_file = history_file
self.history = self._load_history()
def _load_history(self) -> Dict[int, Dict[str, float]]:
"""Load history from pickle file."""
if not os.path.exists(self.history_file):
return {}
try:
with open(self.history_file, 'rb') as f:
return pickle.load(f)
except Exception as e:
logger.error(f"Failed to load rider history: {e}")
return {}
def _save_history(self):
"""Save history to pickle file."""
try:
with open(self.history_file, 'wb') as f:
pickle.dump(self.history, f)
except Exception as e:
logger.error(f"Failed to save rider history: {e}")
def update_rider_stats(self, rider_id: int, distance_km: float, order_count: int):
"""Update cumulative stats for a rider."""
rider_id = int(rider_id)
if rider_id not in self.history:
self.history[rider_id] = {
"total_km": 0.0,
"total_orders": 0,
"last_updated": datetime.now().isoformat()
}
self.history[rider_id]["total_km"] += distance_km
self.history[rider_id]["total_orders"] += order_count
self.history[rider_id]["last_updated"] = datetime.now().isoformat()
# Auto-save on update
self._save_history()
def get_rider_score(self, rider_id: int) -> float:
"""
Get a score representing the rider's historical 'load' (KMs).
Higher Score = More KMs driven recently.
"""
rider_id = int(rider_id)
stats = self.history.get(rider_id, {})
return stats.get("total_km", 0.0)
def get_preferred_assignment_type(self, rider_id: int, all_rider_scores: Dict[int, float]) -> str:
"""
Determine if rider should get 'Long' or 'Short' routes based on population average.
"""
score = self.get_rider_score(rider_id)
if not all_rider_scores:
return "ANY"
avg_score = sum(all_rider_scores.values()) / len(all_rider_scores)
# If rider has driven LESS than average, prefer LONG routes (Risky)
if score < avg_score:
return "LONG"
# If rider has driven MORE than average, prefer SHORT routes (Economy)
else:
return "SHORT"

View File

@@ -0,0 +1,108 @@
import os
import pickle
import logging
import time
from datetime import datetime
from typing import Dict, Any, List, Set
logger = logging.getLogger(__name__)
STATE_FILE = "rider_active_state.pkl"
class RiderStateManager:
"""
Manages the 'Short-Term' Active State of Riders for session persistence.
Tracks:
- Minutes Committed (Remaining Workload)
- Active Kitchens (Unique Pickups in current queue)
- Last Planned Drop Location (for Daisy Chaining)
- Timestamp of last update (for Time Decay)
"""
def __init__(self, state_file: str = STATE_FILE):
self.state_file = state_file
self.states = self._load_states()
def _load_states(self) -> Dict[str, Any]:
"""Load states from pickle."""
if not os.path.exists(self.state_file):
return {}
try:
with open(self.state_file, 'rb') as f:
return pickle.load(f)
except Exception as e:
logger.error(f"Failed to load rider active states: {e}")
return {}
def _save_states(self):
"""Save states to pickle."""
try:
with open(self.state_file, 'wb') as f:
pickle.dump(self.states, f)
except Exception as e:
logger.error(f"Failed to save rider active states: {e}")
def get_rider_state(self, rider_id: int) -> Dict[str, Any]:
"""
Get the current active state of a rider with TIME DECAY applied.
If the server restarts after 30 mins, the 'minutes_committed' should reduce by 30.
"""
rider_id = int(rider_id)
raw_state = self.states.get(rider_id)
if not raw_state:
return {
'minutes_remaining': 0.0,
'last_drop_lat': None,
'last_drop_lon': None,
'active_kitchens': set(),
'last_updated_ts': time.time()
}
# Apply Time Decay
last_ts = raw_state.get('last_updated_ts', time.time())
current_ts = time.time()
elapsed_mins = (current_ts - last_ts) / 60.0
remaining = max(0.0, raw_state.get('minutes_remaining', 0.0) - elapsed_mins)
# If queue is empty, kitchens are cleared
kitchens = raw_state.get('active_kitchens', set())
if remaining <= 5.0: # Buffer: if almost done, free up kitchens
kitchens = set()
return {
'minutes_remaining': remaining,
'last_drop_lat': raw_state.get('last_drop_lat'),
'last_drop_lon': raw_state.get('last_drop_lon'),
'active_kitchens': kitchens,
'last_updated_ts': current_ts
}
def update_rider_state(self, rider_id: int, added_minutes: float, new_kitchens: Set[str], last_lat: float, last_lon: float):
"""
Update the state after a new assignment.
"""
rider_id = int(rider_id)
# Get current state (decayed)
current = self.get_rider_state(rider_id)
# Accumulate
updated_minutes = current['minutes_remaining'] + added_minutes
updated_kitchens = current['active_kitchens'].union(new_kitchens)
self.states[rider_id] = {
'minutes_remaining': updated_minutes,
'last_drop_lat': last_lat,
'last_drop_lon': last_lon,
'active_kitchens': updated_kitchens,
'last_updated_ts': time.time()
}
self._save_states()
def clear_state(self, rider_id: int):
rider_id = int(rider_id)
if rider_id in self.states:
del self.states[rider_id]
self._save_states()

View File

@@ -0,0 +1,133 @@
"""
Geographic Clustering Service for Order Assignment
Uses K-means clustering to group orders by kitchen location.
"""
import logging
import numpy as np
from typing import List, Dict, Any, Tuple
from collections import defaultdict
from math import radians, cos, sin, asin, sqrt
logger = logging.getLogger(__name__)
class ClusteringService:
"""Clusters orders geographically to enable balanced rider assignment."""
def __init__(self):
self.earth_radius_km = 6371
def haversine(self, lat1: float, lon1: float, lat2: float, lon2: float) -> float:
"""Calculate distance between two points in km."""
lon1, lat1, lon2, lat2 = map(radians, [float(lon1), float(lat1), float(lon2), float(lat2)])
dlon = lon2 - lon1
dlat = lat2 - lat1
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
c = 2 * asin(min(1.0, sqrt(a)))
return c * self.earth_radius_km
def get_kitchen_location(self, order: Dict[str, Any]) -> Tuple[float, float]:
"""Extract kitchen coordinates from order."""
try:
lat = float(order.get("pickuplat", 0))
lon = float(order.get("pickuplon") or order.get("pickuplong", 0))
if lat != 0 and lon != 0:
return lat, lon
except (ValueError, TypeError):
pass
return 0.0, 0.0
def cluster_orders_by_kitchen(self, orders: List[Dict[str, Any]], max_cluster_radius_km: float = 3.0) -> List[Dict[str, Any]]:
"""
Cluster orders by kitchen proximity.
Returns list of clusters, each containing:
- centroid: (lat, lon) of cluster center
- orders: list of orders in this cluster
- kitchen_names: set of kitchen names in cluster
- total_orders: count
"""
if not orders:
return []
# Group by kitchen location
kitchen_groups = defaultdict(list)
kitchen_coords = {}
for order in orders:
k_name = self._get_kitchen_name(order)
k_lat, k_lon = self.get_kitchen_location(order)
if k_lat == 0:
# Fallback: use delivery location if pickup missing
k_lat = float(order.get("deliverylat", 0))
k_lon = float(order.get("deliverylong", 0))
if k_lat != 0:
kitchen_groups[k_name].append(order)
kitchen_coords[k_name] = (k_lat, k_lon)
# Now cluster kitchens that are close together
clusters = []
processed_kitchens = set()
for k_name, k_orders in kitchen_groups.items():
if k_name in processed_kitchens:
continue
# Start a new cluster with this kitchen
cluster_kitchens = [k_name]
cluster_orders = k_orders[:]
processed_kitchens.add(k_name)
k_lat, k_lon = kitchen_coords[k_name]
# Find nearby kitchens to merge into this cluster
for other_name, other_coords in kitchen_coords.items():
if other_name in processed_kitchens:
continue
other_lat, other_lon = other_coords
dist = self.haversine(k_lat, k_lon, other_lat, other_lon)
if dist <= max_cluster_radius_km:
cluster_kitchens.append(other_name)
cluster_orders.extend(kitchen_groups[other_name])
processed_kitchens.add(other_name)
# Calculate cluster centroid
lats = []
lons = []
for order in cluster_orders:
lat, lon = self.get_kitchen_location(order)
if lat != 0:
lats.append(lat)
lons.append(lon)
if lats:
centroid_lat = sum(lats) / len(lats)
centroid_lon = sum(lons) / len(lons)
else:
centroid_lat, centroid_lon = k_lat, k_lon
clusters.append({
'centroid': (centroid_lat, centroid_lon),
'orders': cluster_orders,
'kitchen_names': set(cluster_kitchens),
'total_orders': len(cluster_orders)
})
# Sort clusters by order count (largest first)
clusters.sort(key=lambda x: x['total_orders'], reverse=True)
logger.info(f"Created {len(clusters)} clusters from {len(kitchen_groups)} kitchens")
return clusters
def _get_kitchen_name(self, order: Dict[str, Any]) -> str:
"""Extract kitchen name from order."""
possible_keys = ['storename', 'restaurantname', 'kitchenname', 'partnername', 'store_name']
for key in possible_keys:
if key in order and order[key]:
return str(order[key]).strip()
return "Unknown"

View File

@@ -0,0 +1,326 @@
"""
GPS Kalman Filter \u2014 rider-api
A 1D Kalman filter applied independently to latitude and longitude
to smooth noisy GPS coordinates from riders and delivery points.
Why Kalman for GPS?
- GPS readings contain measurement noise (\u00b15\u201315m typical, \u00b150m poor signal)
- Rider location pings can "jump" due to bad signal or device error
- Kalman filter gives an optimal estimate by balancing:
(1) Previous predicted position (process model)
(2) New GPS measurement (observation model)
Design:
- Separate filter instance per rider (stateful \u2014 preserves history)
- `CoordinateKalmanFilter` \u2014 single lat/lon smoother
- `GPSKalmanFilter` \u2014 wraps two CoordinateKalmanFilters (lat + lon)
- `RiderKalmanRegistry` \u2014 manages per-rider filter instances
- `smooth_coordinates()` \u2014 stateless single-shot smoother for delivery coords
Usage:
# Stateless (one-shot, no history \u2014 for delivery coords):
smooth_lat, smooth_lon = smooth_coordinates(raw_lat, raw_lon)
# Stateful (per-rider, preserves motion history):
registry = RiderKalmanRegistry()
lat, lon = registry.update(rider_id=1116, lat=11.0067, lon=76.9558)
"""
import logging
import time
from typing import Dict, Optional, Tuple
logger = logging.getLogger(__name__)
# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
# CORE 1D KALMAN FILTER
# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
class CoordinateKalmanFilter:
"""
1-dimensional Kalman filter for a single GPS coordinate (lat or lon).
State model: position only (constant position with random walk).
Equations:
Prediction: x\u0302\u2096\u207b = x\u0302\u2096\u208b\u2081 (no movement assumed between pings)
P\u0302\u2096\u207b = P\u2096\u208b\u2081 + Q (uncertainty grows over time)
Update: K\u2096 = P\u0302\u2096\u207b / (P\u0302\u2096\u207b + R) (Kalman gain)
x\u0302\u2096 = x\u0302\u2096\u207b + K\u2096\u00b7(z\u2096 - x\u0302\u2096\u207b) (weighted fusion)
P\u2096 = (1 - K\u2096)\u00b7P\u0302\u2096\u207b (update uncertainty)
Parameters:
process_noise (Q): How much position can change between measurements.
Higher = filter trusts new measurements more (less smoothing).
measurement_noise (R): GPS measurement uncertainty.
Higher = filter trusts history more (more smoothing).
"""
def __init__(
self,
process_noise: float = 1e-4,
measurement_noise: float = 0.01,
initial_uncertainty: float = 1.0,
):
self.Q = process_noise
self.R = measurement_noise
self._x: Optional[float] = None
self._P: float = initial_uncertainty
@property
def initialized(self) -> bool:
return self._x is not None
def update(self, measurement: float) -> float:
"""Process one new measurement and return the filtered estimate."""
if not self.initialized:
self._x = measurement
return self._x
# Predict
x_prior = self._x
P_prior = self._P + self.Q
# Update
K = P_prior / (P_prior + self.R)
self._x = x_prior + K * (measurement - x_prior)
self._P = (1.0 - K) * P_prior
return self._x
def reset(self):
self._x = None
self._P = 1.0
# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
# 2D GPS KALMAN FILTER (lat + lon)
# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
class GPSKalmanFilter:
"""
Two-dimensional GPS smoother using independent 1D Kalman filters
for latitude and longitude.
"""
def __init__(
self,
process_noise: float = 1e-4,
measurement_noise: float = 0.01,
):
self.lat_filter = CoordinateKalmanFilter(process_noise, measurement_noise)
self.lon_filter = CoordinateKalmanFilter(process_noise, measurement_noise)
self.last_updated: float = time.time()
self.update_count: int = 0
def update(self, lat: float, lon: float) -> Tuple[float, float]:
"""Feed a new GPS reading and get the smoothed (lat, lon)."""
if not self._is_valid_coord(lat, lon):
if self.lat_filter.initialized:
return self.lat_filter._x, self.lon_filter._x
return lat, lon
smooth_lat = self.lat_filter.update(lat)
smooth_lon = self.lon_filter.update(lon)
self.last_updated = time.time()
self.update_count += 1
return smooth_lat, smooth_lon
def get_estimate(self) -> Optional[Tuple[float, float]]:
if self.lat_filter.initialized:
return self.lat_filter._x, self.lon_filter._x
return None
def reset(self):
self.lat_filter.reset()
self.lon_filter.reset()
self.update_count = 0
@staticmethod
def _is_valid_coord(lat: float, lon: float) -> bool:
try:
lat, lon = float(lat), float(lon)
return (
-90.0 <= lat <= 90.0
and -180.0 <= lon <= 180.0
and not (lat == 0.0 and lon == 0.0)
)
except (TypeError, ValueError):
return False
# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
# PER-RIDER FILTER REGISTRY
# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
class RiderKalmanRegistry:
"""
Maintains per-rider Kalman filter instances across calls.
Stale filters (> 30 min silence) are automatically reset.
"""
def __init__(
self,
process_noise: float = 1e-4,
measurement_noise: float = 0.01,
stale_seconds: float = 1800.0,
):
self._filters: Dict[str, GPSKalmanFilter] = {}
self._process_noise = process_noise
self._measurement_noise = measurement_noise
self._stale_seconds = stale_seconds
def _get_or_create(self, rider_id) -> GPSKalmanFilter:
key = str(rider_id)
now = time.time()
if key in self._filters:
f = self._filters[key]
if now - f.last_updated > self._stale_seconds:
f.reset()
return f
self._filters[key] = GPSKalmanFilter(
process_noise=self._process_noise,
measurement_noise=self._measurement_noise,
)
return self._filters[key]
def update(self, rider_id, lat: float, lon: float) -> Tuple[float, float]:
return self._get_or_create(rider_id).update(lat, lon)
def get_estimate(self, rider_id) -> Optional[Tuple[float, float]]:
key = str(rider_id)
if key in self._filters:
return self._filters[key].get_estimate()
return None
def reset_rider(self, rider_id):
key = str(rider_id)
if key in self._filters:
self._filters[key].reset()
def clear_all(self):
self._filters.clear()
# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
# GLOBAL REGISTRY (process-level singleton)
# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
_global_registry = RiderKalmanRegistry()
def get_registry() -> RiderKalmanRegistry:
"""Get the process-level rider Kalman filter registry."""
return _global_registry
# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
# STATELESS COORDINATE SMOOTHER
# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
def smooth_coordinates(
lat: float,
lon: float,
*,
prior_lat: Optional[float] = None,
prior_lon: Optional[float] = None,
process_noise: float = 1e-4,
measurement_noise: float = 0.01,
) -> Tuple[float, float]:
"""
Stateless single-shot GPS smoother.
If a prior is provided, blends the new reading towards it.
"""
f = GPSKalmanFilter(process_noise=process_noise, measurement_noise=measurement_noise)
if prior_lat is not None and prior_lon is not None:
try:
_flat = float(prior_lat)
_flon = float(prior_lon)
if GPSKalmanFilter._is_valid_coord(_flat, _flon):
f.update(_flat, _flon)
except (TypeError, ValueError):
pass
return f.update(lat, lon)
# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
# BATCH SMOOTHERS
# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
def smooth_rider_locations(riders: list) -> list:
"""
Apply Kalman smoothing to a list of rider dicts in-place using
the global per-rider registry (history preserved across calls).
Reads/writes: latitude, longitude (and currentlat/currentlong if present).
Adds: _kalman_smoothed = True on each processed rider.
"""
registry = get_registry()
for rider in riders:
try:
rider_id = (
rider.get("userid") or rider.get("riderid") or
rider.get("id") or "unknown"
)
raw_lat = float(rider.get("latitude") or rider.get("currentlat") or 0)
raw_lon = float(rider.get("longitude") or rider.get("currentlong") or 0)
if raw_lat == 0.0 and raw_lon == 0.0:
continue
smooth_lat, smooth_lon = registry.update(rider_id, raw_lat, raw_lon)
# Cast back to string for Go compatibility
s_lat, s_lon = str(round(smooth_lat, 8)), str(round(smooth_lon, 8))
rider["latitude"] = s_lat
rider["longitude"] = s_lon
if "currentlat" in rider:
rider["currentlat"] = s_lat
if "currentlong" in rider:
rider["currentlong"] = s_lon
rider["_kalman_smoothed"] = True
except Exception as e:
logger.debug(f"Kalman rider smoothing skipped: {e}")
return riders
def smooth_order_coordinates(orders: list) -> list:
"""
Apply stateless Kalman smoothing to delivery coordinates in a list
of order dicts. Uses pickup coords as a seed (prior) when available.
Modifies orders in-place. Returns the same list.
"""
for order in orders:
try:
dlat = float(order.get("deliverylat") or order.get("droplat") or 0)
dlon = float(order.get("deliverylong") or order.get("droplon") or 0)
if not GPSKalmanFilter._is_valid_coord(dlat, dlon):
continue
plat_raw = order.get("pickuplat")
plon_raw = order.get("pickuplon") or order.get("pickuplong")
try:
plat = float(plat_raw) if plat_raw else None
plon = float(plon_raw) if plon_raw else None
except (TypeError, ValueError):
plat, plon = None, None
smooth_dlat, smooth_dlon = smooth_coordinates(
dlat, dlon,
prior_lat=plat,
prior_lon=plon,
)
# Cast back to string for Go compatibility (fixes unmarshal error)
s_lat, s_lon = str(round(smooth_dlat, 8)), str(round(smooth_dlon, 8))
order["deliverylat"] = s_lat
order["deliverylong"] = s_lon
if "droplat" in order:
order["droplat"] = s_lat
if "droplon" in order:
order["droplon"] = s_lon
order["_kalman_smoothed"] = True
except Exception as e:
logger.debug(f"Kalman order smoothing skipped: {e}")
return orders

View File

@@ -0,0 +1,158 @@
"""
Realistic ETA Calculator for Delivery Operations
Accounts for:
- City traffic conditions
- Stop time at pickup/delivery
- Navigation time
- Parking/finding address time
- Different speeds for different order types
"""
import logging
from typing import Dict, Any
logger = logging.getLogger(__name__)
class RealisticETACalculator:
"""
Calculates realistic ETAs accounting for real-world delivery conditions.
"""
def __init__(self):
from app.config.dynamic_config import get_config
cfg = get_config()
# BASE SPEED (km/h) - Driven by the DB configuration
base_speed = cfg.get("avg_speed_kmh", 18.0)
# REALISTIC SPEEDS based on time of day
self.CITY_SPEED_HEAVY_TRAFFIC = base_speed * 0.7 # Usually ~12 km/h
self.CITY_SPEED_MODERATE = base_speed # Usually ~18 km/h
self.CITY_SPEED_LIGHT = base_speed * 1.2 # Usually ~21.6 km/h
# TIME BUFFERS (minutes)
self.PICKUP_TIME = cfg.get("eta_pickup_time_min", 3.0)
self.DELIVERY_TIME = cfg.get("eta_delivery_time_min", 4.0)
self.NAVIGATION_BUFFER = cfg.get("eta_navigation_buffer_min", 1.5)
# DISTANCE-BASED SPEED SELECTION
# Short distances (<2km) are slower due to more stops/starts
# Long distances (>8km) might have highway portions
self.SHORT_TRIP_FACTOR = cfg.get("eta_short_trip_factor", 0.8)
self.LONG_TRIP_FACTOR = cfg.get("eta_long_trip_factor", 1.1)
def calculate_eta(self,
distance_km: float,
is_first_order: bool = False,
order_type: str = "Economy",
time_of_day: str = "peak") -> int:
"""
Calculate realistic ETA in minutes.
Args:
distance_km: Distance to travel in kilometers
is_first_order: If True, includes pickup time
order_type: "Economy", "Premium", or "Risky"
time_of_day: "peak", "normal", or "light" traffic
Returns:
ETA in minutes (rounded up for safety)
"""
if distance_km <= 0:
return 0
# 1. SELECT SPEED BASED ON CONDITIONS
if time_of_day == "peak":
base_speed = self.CITY_SPEED_HEAVY_TRAFFIC
elif time_of_day == "light":
base_speed = self.CITY_SPEED_LIGHT
else:
base_speed = self.CITY_SPEED_MODERATE
# 2. ADJUST SPEED BASED ON DISTANCE
# Short trips are slower (more intersections, traffic lights)
if distance_km < 2.0:
effective_speed = base_speed * self.SHORT_TRIP_FACTOR
elif distance_km > 8.0:
effective_speed = base_speed * self.LONG_TRIP_FACTOR
else:
effective_speed = base_speed
# 3. CALCULATE TRAVEL TIME
travel_time = (distance_km / effective_speed) * 60 # Convert to minutes
# 4. ADD BUFFERS
total_time = travel_time
# Pickup time (only for first order in sequence)
if is_first_order:
total_time += self.PICKUP_TIME
# Delivery time (always)
total_time += self.DELIVERY_TIME
# Navigation buffer (proportional to distance)
if distance_km > 3.0:
total_time += self.NAVIGATION_BUFFER
# 5. SAFETY MARGIN (Round up to next minute)
# Riders prefer to arrive early than late
eta_minutes = int(total_time) + 1
return eta_minutes
def calculate_batch_eta(self, orders: list) -> list:
"""
Calculate ETAs for a batch of orders in sequence.
Args:
orders: List of order dicts with 'previouskms' and 'step' fields
Returns:
Same list with updated 'eta' fields
"""
for order in orders:
distance_km = float(order.get('previouskms', 0))
step = order.get('step', 1)
order_type = order.get('ordertype', 'Economy')
# First order includes pickup time
is_first = (step == 1)
# Assume peak traffic for safety (can be made dynamic)
eta = self.calculate_eta(
distance_km=distance_km,
is_first_order=is_first,
order_type=order_type,
time_of_day="normal" # Default to moderate traffic
)
order['eta'] = str(eta)
order['eta_realistic'] = True # Flag to indicate realistic calculation
return orders
def get_time_of_day_category() -> str:
"""
Determine current traffic conditions based on time.
Returns:
"peak", "normal", or "light"
"""
from datetime import datetime
current_hour = datetime.now().hour
# Peak hours: 8-10 AM, 12-2 PM, 5-8 PM
if (8 <= current_hour < 10) or (12 <= current_hour < 14) or (17 <= current_hour < 20):
return "peak"
# Light traffic: Late night/early morning
elif current_hour < 7 or current_hour >= 22:
return "light"
else:
return "normal"

View File

@@ -0,0 +1,425 @@
"""Production-grade route optimization using Google OR-Tools.
ALGORITHM: TSP / VRP with Google OR-Tools
- Industry-standard solver (same as used by major logistics companies)
- Constraint-based optimization
- Handles time windows (future proofing)
- Guaranteed optimal or near-optimal solution
FEATURES:
- Automatic outlier detection and coordinate correction
- Hybrid distance calculation (Google Maps + Haversine fallback)
- Robust error handling for invalid inputs
"""
import math
import os
import logging
import asyncio
from typing import Dict, Any, List as _List, Optional, Tuple, Union
from datetime import datetime, timedelta
import httpx
from app.services.routing.kalman_filter import smooth_order_coordinates
import numpy as np
from app.core.arrow_utils import calculate_haversine_matrix_vectorized
from app.config.dynamic_config import get_config
try:
from ortools.constraint_solver import routing_enums_pb2
from ortools.constraint_solver import pywrapcp
ORTOOLS_AVAILABLE = True
except ImportError:
ORTOOLS_AVAILABLE = False
logging.warning("Google OR-Tools not found. Falling back to simple greedy solver.")
logger = logging.getLogger(__name__)
class RouteOptimizer:
"""Route optimization using Google OR-Tools (Async)."""
def __init__(self):
self.earth_radius = 6371 # Earth radius in km
_cfg = get_config()
# Initialize Realistic ETA Calculator
from app.services.routing.realistic_eta_calculator import RealisticETACalculator, get_time_of_day_category
self.eta_calculator = RealisticETACalculator()
self.get_traffic_condition = get_time_of_day_category
# Speed settings (ML-tuned via DynamicConfig)
self.avg_speed_kmh = float(_cfg.get("avg_speed_kmh"))
# Road factor (haversine -> road distance multiplier, ML-tuned)
self.road_factor = float(_cfg.get("road_factor"))
# Google Maps API settings
self.google_maps_api_key = os.getenv("GOOGLE_MAPS_API_KEY", "")
self.use_google_maps = bool(self.google_maps_api_key)
# Solver time limit (ML-tuned)
self.search_time_limit_seconds = int(_cfg.get("search_time_limit_seconds"))
# Initialize ID3 Behavior Analyzer
from app.services.ml.behavior_analyzer import get_analyzer
self.behavior_analyzer = get_analyzer()
def haversine_distance(self, lat1: float, lon1: float, lat2: float, lon2: float) -> float:
"""Calculate great circle distance between two points on Earth (in km)."""
try:
lat1, lon1, lat2, lon2 = map(math.radians, [float(lat1), float(lon1), float(lat2), float(lon2)])
dlat = lat2 - lat1
dlon = lon2 - lon1
a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2
c = 2 * math.asin(math.sqrt(a))
return self.earth_radius * c
except Exception:
return 0.0
async def _get_google_maps_distances_batch(self, origin_lat: float, origin_lon: float,
destinations: _List[tuple]) -> Dict[tuple, float]:
"""Get road distances for multiple destinations from Google Maps API. (Async, Parallel)"""
if not self.use_google_maps or not destinations:
return {}
results = {}
batch_size = 25
chunks = [destinations[i:i + batch_size] for i in range(0, len(destinations), batch_size)]
async def process_batch(batch):
batch_result = {}
try:
dest_str = "|".join([f"{lat},{lon}" for lat, lon in batch])
url = "https://maps.googleapis.com/maps/api/distancematrix/json"
params = {
"origins": f"{origin_lat},{origin_lon}",
"destinations": dest_str,
"key": self.google_maps_api_key,
"units": "metric"
}
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.get(url, params=params)
response.raise_for_status()
data = response.json()
if data.get("status") == "OK":
rows = data.get("rows", [])
if rows:
elements = rows[0].get("elements", [])
for idx, element in enumerate(elements):
if idx < len(batch):
dest_coord = batch[idx]
if element.get("status") == "OK":
dist = element.get("distance", {}).get("value")
dur = element.get("duration", {}).get("value")
if dist is not None:
batch_result[dest_coord] = {
'distance': dist / 1000.0,
'duration': dur / 60.0 if dur else None
}
except Exception as e:
logger.warning(f"Google Maps batch call failed: {e}")
return batch_result
batch_results_list = await asyncio.gather(*[process_batch(chunk) for chunk in chunks])
for res in batch_results_list:
results.update(res)
return results
def _solve_tsp_ortools(self, locations: _List[Tuple[float, float]], dist_matrix: _List[_List[float]]) -> _List[int]:
"""Solve TSP using Google OR-Tools."""
if not ORTOOLS_AVAILABLE:
# Fallback to simple Greedy NN if OR-Tools not installed
return self._solve_greedy(locations, dist_matrix)
if not locations or len(locations) <= 1:
return [0]
manager = pywrapcp.RoutingIndexManager(len(locations), 1, 0) # num_nodes, num_vehicles, depot
routing = pywrapcp.RoutingModel(manager)
def distance_callback(from_index, to_index):
from_node = manager.IndexToNode(from_index)
to_node = manager.IndexToNode(to_index)
# Open TSP: Returning to the depot (index 0) has zero cost.
# This ensures the solver optimizes for the path from start to last drop-off
# rather than a closed circuit that might be reversed if the rider is on the "far" side.
if to_node == 0:
return 0
# OR-Tools works with integers, so we scale by 1000 (meters)
val = dist_matrix[from_node][to_node]
return int(val * 1000)
transit_callback_index = routing.RegisterTransitCallback(distance_callback)
routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index)
search_parameters = pywrapcp.DefaultRoutingSearchParameters()
search_parameters.first_solution_strategy = (
routing_enums_pb2.FirstSolutionStrategy.PATH_CHEAPEST_ARC
)
search_parameters.local_search_metaheuristic = (
routing_enums_pb2.LocalSearchMetaheuristic.GUIDED_LOCAL_SEARCH
)
search_parameters.time_limit.seconds = self.search_time_limit_seconds
solution = routing.SolveWithParameters(search_parameters)
if solution:
index = routing.Start(0)
route = []
while not routing.IsEnd(index):
route.append(manager.IndexToNode(index))
index = solution.Value(routing.NextVar(index))
return route
else:
return self._solve_greedy(locations, dist_matrix)
def _solve_greedy(self, locations, dist_matrix):
"""Simple Greedy Nearest Neighbor fallback."""
unvisited = set(range(1, len(locations)))
curr = 0
route = [0]
while unvisited:
nearest = min(unvisited, key=lambda x: dist_matrix[curr][x])
route.append(nearest)
unvisited.remove(nearest)
curr = nearest
return route
def _cleanup_coords(self, lat: Any, lon: Any, ref_lat: float, ref_lon: float) -> Tuple[float, float]:
"""
Heuristic to fix bad coordinates.
1. Fixes lat==lon typo.
2. Fixes missing negative signs if needed (not needed for India).
3. Projects outlier > 500km to reference (centroid).
"""
try:
lat = float(lat)
lon = float(lon)
except:
return 0.0, 0.0
if lat == 0 or lon == 0:
return lat, lon
# 1. Check strict equality (typo)
if abs(lat - lon) < 0.0001:
if ref_lon != 0:
# If reference is available, assume lat is correct and fix lon
# (Common error: copy lat to lon field)
return lat, ref_lon
# 2. Check general outlier (e.g. 500km away)
if ref_lat != 0 and ref_lon != 0:
dist = self.haversine_distance(lat, lon, ref_lat, ref_lon)
if dist > 500:
# Returning reference prevents map explosion
return ref_lat, ref_lon
return lat, lon
async def optimize_provider_payload(self, orders: _List[Dict[str, Any]], start_coords: Optional[tuple] = None) -> _List[Dict[str, Any]]:
"""Optimize delivery route and add step metrics (OR-Tools)."""
if not orders:
return []
# Deep copy
orders = [dict(order) for order in orders]
# 0. KALMAN FILTER - Smooth noisy delivery GPS coordinates
orders = smooth_order_coordinates(orders)
# Helpers
def _to_float(v: Any) -> float:
try: return float(v)
except: return 0.0
def _normalize_dt(val: Any) -> str:
if val in (None, "", 0): return ""
s = str(val).strip()
for fmt in ("%Y-%m-%dT%H:%M:%SZ", "%Y-%m-%d %H:%M:%S"):
try: return datetime.strptime(s, fmt).strftime("%Y-%m-%d %H:%M:%S")
except: pass
return s
# 1. PREPARE COORDINATES & CENTROID
valid_lats = []
valid_lons = []
for o in orders:
lat = _to_float(o.get("deliverylat"))
lon = _to_float(o.get("deliverylong"))
if lat != 0 and lon != 0:
valid_lats.append(lat)
valid_lons.append(lon)
centroid_lat = sum(valid_lats)/len(valid_lats) if valid_lats else 0.0
centroid_lon = sum(valid_lons)/len(valid_lons) if valid_lons else 0.0
# 2. DETERMINE START LOCATION (With Fix)
start_lat, start_lon = 0.0, 0.0
# Try explicit start_coords first
if start_coords and len(start_coords) == 2:
try:
start_lat, start_lon = float(start_coords[0]), float(start_coords[1])
except: pass
# Fallback to pickup location in orders
if start_lat == 0:
for o in orders:
plat = _to_float(o.get("pickuplat"))
plon = _to_float(o.get("pickuplon") or o.get("pickuplong"))
if plat != 0:
start_lat, start_lon = plat, plon
break
# Fallback to centroid
if start_lat == 0:
start_lat, start_lon = centroid_lat, centroid_lon
# FIX BAD START COORDINATES
start_lat, start_lon = self._cleanup_coords(start_lat, start_lon, centroid_lat, centroid_lon)
# 3. BUILD LOCATIONS LIST FOR SOLVER
# Index 0 is Start (Depot), 1..N are orders
locations = [(start_lat, start_lon)]
points_map = [] # Maps solver index 1..N back to original order index
for idx, order in enumerate(orders):
lat = _to_float(order.get("deliverylat"))
lon = _to_float(order.get("deliverylong"))
# Project coordinates and ensure they are strings for Go compatibility
lat, lon = self._cleanup_coords(lat, lon, centroid_lat, centroid_lon)
order_str_lat, order_str_lon = str(lat), str(lon)
order["deliverylat"] = order_str_lat
order["deliverylong"] = order_str_lon
if "droplat" in order: order["droplat"] = order_str_lat
if "droplon" in order: order["droplon"] = order_str_lon
locations.append((lat, lon))
points_map.append(idx)
# 4. COMPUTE DISTANCE MATRIX (Vectorized with Arrow/NumPy)
# road_factor is now ML-tuned (was hardcoded 1.3)
lats = np.array([loc[0] for loc in locations])
lons = np.array([loc[1] for loc in locations])
dist_matrix = calculate_haversine_matrix_vectorized(lats, lons) * self.road_factor
# 5. RISK-AWARE COST MATRIX (ID3 INTELLIGENCE)
# Apply Risk Penalties to the matrix before solving
cost_matrix = dist_matrix.copy()
traffic = self.get_traffic_condition()
num_locs = len(locations)
risk_penalty_count = 0
for i in range(num_locs):
for j in range(num_locs):
if i == j: continue
# Predict success risk for this specific leg
dist_km = dist_matrix[i][j]
prediction = self.behavior_analyzer.predict(
distance_km=dist_km,
timestamp_or_band=traffic,
)
if prediction.get("label") == "RISK": # High Risk predicted by ID3
# Add 25% penalty to distance to discourage this leg
cost_matrix[i][j] *= 1.25
risk_penalty_count += 1
if risk_penalty_count > 0:
logger.info(f"ID3 Intelligence: Applied {risk_penalty_count} Risk Penalties to optimize for delivery safety.")
# 6. SOLVE TSP
route_indices = self._solve_tsp_ortools(locations, cost_matrix)
# Remove 0 (depot)
optimized_order_indices = [i for i in route_indices if i != 0]
# 6. BUILD RESULT
result = []
cumulative_dist = 0.0
# Track previous location (starts at 0)
prev_idx = 0
for step_num, solver_idx in enumerate(optimized_order_indices, start=1):
order_idx = points_map[solver_idx - 1]
order = dict(orders[order_idx])
# Clean fields
for k in ("step", "previouskms", "cumulativekms", "eta", "actualkms", "ordertype"):
order.pop(k, None)
# Normalize dates
for field in ["orderdate", "deliverytime", "created"]:
if field in order: order[field] = _normalize_dt(order.get(field))
# Distance for this leg
step_dist = dist_matrix[prev_idx][solver_idx]
cumulative_dist += step_dist
# Metadata (Step metrics are integers in the Go struct)
order["step"] = int(step_num)
order["previouskms"] = int(0 if step_num == 1 else int(round(step_dist)))
order["cumulativekms"] = int(round(cumulative_dist))
# 7. METRICS (Calculate actual distance, prioritize provider input)
plat, plon = start_lat, start_lon
if plat == 0: plat, plon = _to_float(order.get("pickuplat")), _to_float(order.get("pickuplon") or order.get("pickuplong"))
dlat, dlon = locations[solver_idx]
# Baseline: Haversine * 1.3 (estimated road factor)
true_dist = self.haversine_distance(plat, plon, dlat, dlon) * 1.3
provided_kms = order.get("kms")
if provided_kms not in (None, "", 0, "0"):
try:
# If provider gave us a distance, respect it as the 'actual' distance
true_dist = float(provided_kms)
except:
pass
order["actualkms"] = str(round(true_dist, 2))
order["kms"] = str(provided_kms) if provided_kms else str(int(round(true_dist)))
# Financial metrics - keeping as numbers for calculations
if "rider_charge" in order: order["rider_charge"] = round(float(order["rider_charge"]), 2)
if "profit" in order: order["profit"] = round(float(order["profit"]), 2)
# Type & ETA
order["ordertype"] = "Economy" if true_dist <= 5 else "Premium" if true_dist <= 12 else "Risky"
traffic = self.get_traffic_condition()
eta = self.eta_calculator.calculate_eta(
distance_km=step_dist,
is_first_order=(step_num == 1),
order_type=order["ordertype"],
time_of_day=traffic
)
order["eta"] = str(eta)
result.append(order)
prev_idx = solver_idx
return result
def optimize_route(orders: _List[Dict[str, Any]]) -> _List[Dict[str, Any]]:
"""Synchronous wrapper."""
optimizer = RouteOptimizer()
try:
loop = asyncio.get_event_loop()
except RuntimeError:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
if loop.is_running():
# Fallback if loop is running (shouldn't happen in standard usage)
return []
return loop.run_until_complete(optimizer.optimize_provider_payload(orders))

View File

@@ -0,0 +1,196 @@
import logging
from typing import List, Dict, Any, Optional
logger = logging.getLogger(__name__)
class ZoneService:
"""
Service to classify orders and riders into geographic zones.
Defaulting to Coimbatore logic as per user context.
"""
# Approximate Center of Coimbatore (Gandhipuram/Bus Stand area)
CENTER_LAT = 11.0168
CENTER_LON = 76.9558
def __init__(self):
pass
def determine_zone(self, lat: float, lon: float, pincode: Optional[str] = None) -> str:
"""
Determine the zone (North, South, East, West, etc.) based on coordinates.
"""
if lat == 0 or lon == 0:
return "Unknown"
lat_diff = lat - self.CENTER_LAT
lon_diff = lon - self.CENTER_LON
# Simple Quadrant Logic
# North: +Lat
# South: -Lat
# East: +Lon
# West: -Lon
# Define a small central buffer (0.01 degrees ~ 1.1km)
buffer = 0.010
is_north = lat_diff > buffer
is_south = lat_diff < -buffer
is_east = lon_diff > buffer
is_west = lon_diff < -buffer
zone_parts = []
if is_north: zone_parts.append("North")
elif is_south: zone_parts.append("South")
if is_east: zone_parts.append("East")
elif is_west: zone_parts.append("West")
if not zone_parts:
return "Central"
return " ".join(zone_parts)
def group_by_zones(self, flat_orders: List[Dict[str, Any]], unassigned_orders: List[Dict[str, Any]] = None, fuel_charge: float = 2.5, base_pay: float = 30.0) -> Dict[str, Any]:
"""
Group a flat list of optimized orders into Zones -> Riders -> Orders.
Calculates profit per order and per zone.
"""
zones_map = {} # "North East": { "riders": { rider_id: [orders] } }
unassigned_orders = unassigned_orders or []
# Merge both for initial processing if you want everything zoned
all_to_process = []
for o in flat_orders:
all_to_process.append((o, True))
for o in unassigned_orders:
all_to_process.append((o, False))
for order, is_assigned in all_to_process:
# 1. Extract Coords
try:
# Prefer Delivery location for zoning (where the customer is)
lat = float(order.get("deliverylat") or order.get("droplat") or 0)
lon = float(order.get("deliverylong") or order.get("droplon") or 0)
pincode = str(order.get("deliveryzip") or "")
except:
lat, lon, pincode = 0, 0, ""
# 2. Get Zone
zone_name = self.determine_zone(lat, lon, pincode)
order["zone_name"] = zone_name
# 3. Initialize Zone Bucket
if zone_name not in zones_map:
zones_map[zone_name] = {
"riders_map": {},
"total_orders": 0,
"assigned_orders": 0,
"unassigned_orders": [],
"total_kms": 0.0,
"total_profit": 0.0
}
# 4. Add to Rider bucket within Zone
rider_id = order.get("userid") or order.get("_id")
# Track kms and profit for this zone
try:
# 'actualkms' is preferred for delivery distance
dist = float(order.get("actualkms", order.get("previouskms", 0)))
zones_map[zone_name]["total_kms"] += dist
# Individual charge for this order: Fixed Base + Variable Distance
order_amount = float(order.get("orderamount") or order.get("deliveryamount") or 0)
rider_payment = base_pay + (dist * fuel_charge)
profit = order_amount - rider_payment
order["rider_charge"] = round(rider_payment, 2)
order["profit"] = round(profit, 2)
# Profit-based classification (Order Type)
if profit <= 0:
order["ordertype"] = "Loss"
elif profit <= 5:
order["ordertype"] = "Risky"
elif profit <= 10:
order["ordertype"] = "Economy"
else:
order["ordertype"] = "Premium"
zones_map[zone_name]["total_profit"] += profit
except:
pass
# If strictly unassigned order (no rider), put in unassigned
if not is_assigned:
zones_map[zone_name]["unassigned_orders"].append(order)
else:
str_rid = str(rider_id)
if str_rid not in zones_map[zone_name]["riders_map"]:
zones_map[zone_name]["riders_map"][str_rid] = {
"rider_details": {
"id": str_rid,
"name": order.get("username", "Unknown")
},
"orders": []
}
zones_map[zone_name]["riders_map"][str_rid]["orders"].append(order)
zones_map[zone_name]["assigned_orders"] += 1
zones_map[zone_name]["total_orders"] += 1
# 5. Restructure for API Response
output_zones = []
zone_metrics = []
sorted_zone_names = sorted(zones_map.keys())
for z_name in sorted_zone_names:
z_data = zones_map[z_name]
# Flatten riders map
riders_list = []
for r_id, r_data in z_data["riders_map"].items():
riders_list.append({
"rider_id": r_data["rider_details"]["id"],
"rider_name": r_data["rider_details"]["name"],
"orders_count": len(r_data["orders"]),
"orders": r_data["orders"]
})
# Create the flat metric summary
metrics = {
"zone_name": z_name,
"total_orders": z_data["total_orders"],
"assigned_orders": z_data["assigned_orders"],
"unassigned_orders_count": len(z_data["unassigned_orders"]),
"active_riders_count": len(riders_list),
"total_delivery_kms": round(z_data["total_kms"], 2),
"total_profit": round(z_data["total_profit"], 2)
}
zone_metrics.append(metrics)
# Create the detailed zone object with flattened metrics
zone_obj = {
"zone_name": z_name,
"total_orders": metrics["total_orders"],
"active_riders_count": metrics["active_riders_count"],
"assigned_orders": metrics["assigned_orders"],
"unassigned_orders_count": metrics["unassigned_orders_count"],
"total_delivery_kms": metrics["total_delivery_kms"],
"total_profit": metrics["total_profit"],
"riders": riders_list,
"unassigned_orders": z_data["unassigned_orders"]
}
output_zones.append(zone_obj)
return {
"detailed_zones": output_zones,
"zone_analysis": zone_metrics
}

File diff suppressed because it is too large Load Diff

36
docker-compose.yml Normal file
View File

@@ -0,0 +1,36 @@
version: "3.9"
networks:
frontend:
external: true
services:
routes_api:
build:
context: .
dockerfile: Dockerfile
image: routes-api:latest
container_name: routes_api
restart: unless-stopped
environment:
- UVICORN_WORKERS=2
- REDIS_URL=redis://:${REDIS_PASSWORD}@routes_redis:6379/0
# Optional: Set cache TTL in seconds (default: 300 = 5 min, 86400 = 24h)
# Uncomment and set in .env file: REDIS_CACHE_TTL_SECONDS=86400
# - REDIS_CACHE_TTL_SECONDS=${REDIS_CACHE_TTL_SECONDS}
# Google Maps API key for accurate road distance calculation (actualkms)
# Set in .env file: GOOGLE_MAPS_API_KEY=your_api_key_here
- GOOGLE_MAPS_API_KEY=${GOOGLE_MAPS_API_KEY}
labels:
- traefik.enable=true
- traefik.http.routers.routes_api.rule=Host(`routes.workolik.com`)
- traefik.http.routers.routes_api.entrypoints=websecure
- traefik.http.routers.routes_api.tls.certresolver=letsencrypt
- traefik.http.services.routes_api.loadbalancer.server.port=8002
- traefik.docker.network=frontend
volumes:
- ./ml_data:/app/ml_data
- ./rider_history.pkl:/app/rider_history.pkl
- ./rider_active_state.pkl:/app/rider_active_state.pkl
networks:
- frontend

11
docker-entrypoint.sh Normal file
View File

@@ -0,0 +1,11 @@
#!/bin/sh
set -e
# Get number of workers from environment or default to 1
WORKERS=${UVICORN_WORKERS:-1}
echo "Starting Route Optimization API with ${WORKERS} worker(s)..."
# Start uvicorn
exec uvicorn app.main:app --host 0.0.0.0 --port 8002 --workers ${WORKERS}

18
requirements.txt Normal file
View File

@@ -0,0 +1,18 @@
fastapi
uvicorn
python-dotenv
requests
numpy
pandas
scikit-learn
scipy
openpyxl
xlsxwriter
httpx
ortools
pyarrow
# ML Hypertuning
xgboost>=2.0.0
optuna>=3.5.0
sqlalchemy>=2.0.0
apscheduler>=3.10.0

173
run_simulation.py Normal file
View File

@@ -0,0 +1,173 @@
import json
import logging
import asyncio
from app.services.core.assignment_service import AssignmentService
from app.services.routing.route_optimizer import RouteOptimizer
from app.core.arrow_utils import save_optimized_route_parquet
# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Load Environment Variables
try:
from dotenv import load_dotenv
load_dotenv()
print("✅ Loaded .env file")
except ImportError:
print("⚠️ python-dotenv not installed, skipping .env load")
async def run_simulation():
print("🚀 Starting Logic Simulation (High Efficiency Mode + K-wMeans)...")
# 1. Load Orders (using route.json as source)
try:
with open('route.json', 'r') as f:
route_data = json.load(f)
except FileNotFoundError:
print("❌ route.json not found.")
return
raw_orders = route_data.get('details', [])
# Strip assignment data to simulate fresh orders
clean_orders = []
for o in raw_orders:
o_copy = o.copy()
for key in ['userid', 'step', 'cumulativekms', 'eta']:
o_copy.pop(key, None)
clean_orders.append(o_copy)
print(f"📦 Loaded {len(clean_orders)} orders.")
# 2. Mock Riders
# Using the 5 rider fleet as agreed
rider_ids = [753, 883, 1114, 1271, 1116, 1096, 897, 950, 1272, 1133] # Full Active Riders List
# Rider Starting Locations (Based on "Mostly Available Location")
# Coordinates approximated for Coimbatore areas
rider_locations = {
1116: (11.0067, 76.9558), # VIVEK ANANDAN: RS PURAM
1096: (11.0450, 76.9000), # NARAYANASAMY: VADAVALI
897: (11.0430, 76.9380), # VARUN EDWARD: KAVUNDAMPALAYAM
950: (11.0330, 76.9800), # JAYASABESH: GANAPATHY
1114: (11.0450, 77.0000), # TAMILAZHAGAN: GANDHIMA NAGAR
883: (11.0200, 77.0000), # RAJAN: PEELAMEDU
1272: (10.9950, 77.0000), # MUTHURAJA: RAMANATHAPURAM
753: (11.0000, 77.0300), # MANIKANDAN: SINGANALLUR
1133: (11.0067, 76.9558), # THATCHINAMOORTHI: RS PURAM (Covering Kavundampalayam to Kovaipudur)
1271: (11.0067, 76.9558) # Legacy ID for Thatchinamoorthi
}
riders = []
for i, rid in enumerate(rider_ids):
lat, lon = rider_locations.get(rid, (11.0168, 76.9558)) # Default to Central if unknown
riders.append({
"userid": rid,
"status": "idle",
"onduty": 1,
"latitude": str(lat),
"longitude": str(lon)
})
# 3. Run Assignment
assignment_service = AssignmentService()
try:
assignments, unassigned_orders = assignment_service.assign_orders(clean_orders, riders)
except Exception as e:
print(f"❌ Error during assignment: {e}")
import traceback
traceback.print_exc()
return
# 4. Generate Output (Mirroring API Logic)
optimizer = RouteOptimizer()
output_details = []
distribution = {}
assigned_count = 0
# Prepare async tasks
tasks = []
task_rids = []
for rid, orders in assignments.items():
if not orders: continue
distribution[rid] = len(orders)
assigned_count += len(orders)
# Optimize Route & Add Metrics (Cumulative KMS, Step, etc.)
mock_rider = next((r for r in riders if r["userid"] == rid), None)
start_coords = None
if mock_rider:
start_coords = (float(mock_rider['latitude']), float(mock_rider['longitude']))
tasks.append(optimizer.optimize_provider_payload(orders, start_coords=start_coords))
task_rids.append(rid)
# Run tasks
if tasks:
results = await asyncio.gather(*tasks)
for rid, optimized_route in zip(task_rids, results):
mock_rider = next((r for r in riders if r["userid"] == rid), {})
r_name = mock_rider.get("username", "")
r_contact = mock_rider.get("contactno", "")
total_kms = 0
if optimized_route:
try:
total_kms = max([float(o.get("cumulativekms", 0)) for o in optimized_route])
except:
total_kms = sum([float(o.get("actualkms", o.get("kms", 0))) for o in optimized_route])
for o in optimized_route:
o['userid'] = rid
o['username'] = r_name
o['rider'] = r_name
o['ridercontactno'] = r_contact
o['riderkms'] = str(round(total_kms, 2))
output_details.append(o)
# 5. Zone Processing
fuel_charge = 2.5
base_pay = 30.0
from app.services.routing.zone_service import ZoneService
zone_service = ZoneService()
zone_data = zone_service.group_by_zones(output_details, unassigned_orders, fuel_charge=fuel_charge, base_pay=base_pay)
# 6. Save output.json
output_data = {
"message": "Success",
"status": True,
"details": output_details,
"zone_summary": zone_data["zone_analysis"],
"zones": zone_data["detailed_zones"],
"meta": {
"total_orders": len(clean_orders),
"total_riders": len(rider_ids),
"assigned_orders": assigned_count,
"unassigned_orders": len(unassigned_orders),
"total_profit": round(sum(z["total_profit"] for z in zone_data["zone_analysis"]), 2),
"unassigned_details": [
{"id": o.get("orderid") or o.get("_id"), "reason": o.get("unassigned_reason")}
for o in unassigned_orders
],
"distribution_summary": distribution
}
}
with open('output.json', 'w') as f:
json.dump(output_data, f, indent=4)
# Apache Arrow / Parquet Export
try:
save_optimized_route_parquet(output_details, 'output.parquet')
print("📊 Also saved results to output.parquet (Apache Arrow format)")
except Exception as e:
print(f"⚠️ Could not save Parquet: {e}")
print("✅ Simulation Complete. Saved to output.json")
print("📊 Distribution Summary:")
print(json.dumps(distribution, indent=4))
if __name__ == "__main__":
asyncio.run(run_simulation())

24
start.py Normal file
View File

@@ -0,0 +1,24 @@
#!/usr/bin/env python3
"""Mobile-optimized startup script for the Delivery Route Optimization API."""
import uvicorn
def main():
"""Start the mobile-optimized API server."""
print("📱 Starting Mobile Delivery Route Optimization API...")
print("⚡ Optimized for real-time mobile apps")
print("🎯 Default algorithm: GREEDY (ultra-fast)")
print("📚 Documentation: http://localhost:8002/docs")
print("=" * 60)
uvicorn.run(
"app.main:app",
host="0.0.0.0",
port=8002,
reload=True,
access_log=True,
log_level="info"
)
if __name__ == "__main__":
main()