initial project setup with README and ignore

2026-04-08 15:13:42 +05:30
commit 2d5688cb35
47 changed files with 7929 additions and 0 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1,58 @@
 # Python
 __pycache__/
 *.py[cod]
 *$py.class
 *.so
 .Python
 *.egg-info/
 dist/
 build/
 # Virtual environments
 venv/
 env/
 ENV/
 # IDE
 .vscode/
 .idea/
 *.swp
 *.swo
 # Testing
 .pytest_cache/
 .coverage
 htmlcov/
 # Documentation
 *.md
 !README.md
 # Environment
 .env
 .env.local
 # Logs
 *.log
 # OS
 .DS_Store
 Thumbs.db
 # Git
 .git/
 .gitignore
 # Docker
 Dockerfile
 docker-compose.yml
 .dockerignore
 # Test files
 test_*.py
 *_test.py
 # Temporary files
 *.tmp
 *.bak
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,12 @@
 .env
 __pycache__/
 *.py[cod]
 *$py.class
 *.pkl
 ml_data/
 output.json
 route.json
 ml_params_output.txt
 idea.txt
 .idea/
 .vscode/
--- a/25
+++ b/25
@@ -0,0 +1,25 @@
 # syntax=docker/dockerfile:1
 FROM python:3.11-slim
 ENV PYTHONDONTWRITEBYTECODE=1 \
    PYTHONUNBUFFERED=1 \
    PIP_NO_CACHE_DIR=1
 WORKDIR /app
 # Install dependencies first
 COPY requirements.txt ./
 RUN pip install --upgrade pip \
    && pip install -r requirements.txt
 # Copy application code
 COPY app ./app
 COPY start.py ./start.py
 COPY docker-entrypoint.sh ./docker-entrypoint.sh
 # Make entrypoint executable
 RUN chmod +x docker-entrypoint.sh
 EXPOSE 8002
 ENTRYPOINT ["./docker-entrypoint.sh"]
--- a/README.md
+++ b/README.md
@@ -0,0 +1,15 @@
 # Route Rider API
 Centralized Routing Engine for Rider Assignments.
 ## Setup
 1. Install dependencies:
   ```bash
   pip install -r requirements.txt
   ```
 2. Run the application:
   ```bash
   python start.py
   ```
--- a/app/init.py
+++ b/app/init.py
@@ -0,0 +1 @@
 # Delivery Route Optimization API
--- a/app/config/init.py
+++ b/app/config/init.py
@@ -0,0 +1 @@
 """Configuration package for mobile delivery optimization."""
--- a/app/config/dynamic_config.py
+++ b/app/config/dynamic_config.py
@@ -0,0 +1,204 @@
 """
 Dynamic Configuration - rider-api
 Replaces all hardcoded hyperparameters with DB-backed values.
 The ML hypertuner writes optimal values here; services read from here.
 Fallback: If DB is unavailable or no tuned values exist, defaults are used.
 This means zero risk - the system works day 1 with no data.
 """
 import json
 import logging
 import os
 import sqlite3
 from datetime import datetime
 from typing import Any, Dict, Optional
 logger = logging.getLogger(__name__)
 # --- DB Path ------------------------------------------------------------------
 _DB_PATH = os.getenv("ML_DB_PATH", "ml_data/ml_store.db")
 # --- Hard Defaults (What the system used before ML) ---------------------------
 DEFAULTS: Dict[str, Any] = {
    # System Strategy / Prompt
    "ml_strategy":                  "balanced",
    # AssignmentService
    "max_pickup_distance_km":       10.0,
    "max_kitchen_distance_km":       3.0,
    "max_orders_per_rider":         12,
    "ideal_load":                    6,
    "workload_balance_threshold":    0.7,
    "workload_penalty_weight":     100.0,
    "distance_penalty_weight":       2.0,
    "preference_bonus":            -15.0,
    "home_zone_bonus_4km":          -3.0,
    "home_zone_bonus_2km":          -5.0,
    "emergency_load_penalty":        3.0,   # km penalty per order in emergency assign
    # RouteOptimizer
    "search_time_limit_seconds":     5,
    "avg_speed_kmh":                18.0,
    "road_factor":                   1.3,
    # ClusteringService
    "cluster_radius_km":             3.0,
    # KalmanFilter
    "kalman_process_noise":          1e-4,
    "kalman_measurement_noise":      0.01,
    # RealisticETACalculator
    "eta_pickup_time_min":           3.0,
    "eta_delivery_time_min":         4.0,
    "eta_navigation_buffer_min":     1.5,
    "eta_short_trip_factor":         0.8,   # speed multiplier for dist < 2km
    "eta_long_trip_factor":          1.1,   # speed multiplier for dist > 8km
 }
 class DynamicConfig:
    """
    Thread-safe, DB-backed configuration store.
    Usage:
        cfg = DynamicConfig()
        max_dist = cfg.get("max_pickup_distance_km")
        all_params = cfg.get_all()
    """
    _instance: Optional["DynamicConfig"] = None
    def __new__(cls) -> "DynamicConfig":
        """Singleton - one config per process."""
        if cls._instance is None:
            cls._instance = super().__new__(cls)
            cls._instance._initialized = False
        return cls._instance
    def __init__(self):
        if self._initialized:
            return
        self._initialized = True
        self._cache: Dict[str, Any] = {}
        self._last_loaded: Optional[datetime] = None
        self._ensure_db()
        self._load()
    # --------------------------------------------------------------------------
    # Public API
    # --------------------------------------------------------------------------
    def get(self, key: str, default: Any = None) -> Any:
        """Get a config value. Returns ML-tuned value if available, else default."""
        self._maybe_reload()
        val = self._cache.get(key)
        if val is not None:
            return val
        fallback = default if default is not None else DEFAULTS.get(key)
        return fallback
    def get_all(self) -> Dict[str, Any]:
        """Return all current config values (ML-tuned + defaults for missing keys)."""
        self._maybe_reload()
        result = dict(DEFAULTS)
        result.update(self._cache)
        return result
    def set(self, key: str, value: Any, source: str = "manual") -> None:
        """Write a config value to DB (used by hypertuner)."""
        try:
            os.makedirs(os.path.dirname(_DB_PATH) or ".", exist_ok=True)
            conn = sqlite3.connect(_DB_PATH)
            conn.execute("""
                INSERT INTO dynamic_config (key, value, source, updated_at)
                VALUES (?, ?, ?, ?)
                ON CONFLICT(key) DO UPDATE SET
                    value=excluded.value,
                    source=excluded.source,
                    updated_at=excluded.updated_at
            """, (key, json.dumps(value), source, datetime.utcnow().isoformat()))
            conn.commit()
            conn.close()
            self._cache[key] = value
            logger.info(f"[DynamicConfig] Set {key}={value} (source={source})")
        except Exception as e:
            logger.error(f"[DynamicConfig] Failed to set {key}: {e}")
    def set_bulk(self, params: Dict[str, Any], source: str = "ml_hypertuner") -> None:
        """Write multiple config values at once (called after each Optuna study)."""
        for key, value in params.items():
            self.set(key, value, source=source)
        logger.info(f"[DynamicConfig] Bulk update: {len(params)} params from {source}")
    def reset_to_defaults(self) -> None:
        """Wipe all ML-tuned values, revert to hardcoded defaults."""
        try:
            conn = sqlite3.connect(_DB_PATH)
            conn.execute("DELETE FROM dynamic_config")
            conn.commit()
            conn.close()
            self._cache.clear()
            logger.warning("[DynamicConfig] Reset to factory defaults.")
        except Exception as e:
            logger.error(f"[DynamicConfig] Reset failed: {e}")
    # --------------------------------------------------------------------------
    # Internal
    # --------------------------------------------------------------------------
    def _ensure_db(self) -> None:
        try:
            os.makedirs(os.path.dirname(_DB_PATH) or ".", exist_ok=True)
            conn = sqlite3.connect(_DB_PATH)
            conn.execute("""
                CREATE TABLE IF NOT EXISTS dynamic_config (
                    key        TEXT PRIMARY KEY,
                    value      TEXT NOT NULL,
                    source     TEXT DEFAULT 'manual',
                    updated_at TEXT
                )
            """)
            conn.commit()
            conn.close()
        except Exception as e:
            logger.error(f"[DynamicConfig] DB init failed: {e}")
    def _load(self) -> None:
        try:
            conn = sqlite3.connect(_DB_PATH)
            rows = conn.execute("SELECT key, value FROM dynamic_config").fetchall()
            conn.close()
            self._cache = {}
            for key, raw in rows:
                try:
                    self._cache[key] = json.loads(raw)
                except Exception:
                    self._cache[key] = raw
            self._last_loaded = datetime.utcnow()
            if self._cache:
                logger.info(f"[DynamicConfig] Loaded {len(self._cache)} ML-tuned params from DB")
        except Exception as e:
            logger.warning(f"[DynamicConfig] Could not load from DB (using defaults): {e}")
            self._cache = {}
    def _maybe_reload(self, interval_seconds: int = 300) -> None:
        """Reload from DB every 5 minutes - picks up new tuned params without restart."""
        if self._last_loaded is None:
            self._load()
            return
        delta = (datetime.utcnow() - self._last_loaded).total_seconds()
        if delta > interval_seconds:
            self._load()
 # --- Module-level convenience singleton ---------------------------------------
 _cfg = DynamicConfig()
 def get_config() -> DynamicConfig:
    """Get the global DynamicConfig singleton."""
    return _cfg
--- a/app/config/mobile_config.py
+++ b/app/config/mobile_config.py
@@ -0,0 +1,33 @@
 """Mobile-specific configuration for delivery route optimization."""
 # Mobile optimization settings
 MOBILE_CONFIG = {
    "default_algorithm": "greedy",
    "max_deliveries": 100,
    "timeout_seconds": 5,
    "response_compression": True,
    "performance_monitoring": True,
    "mobile_headers": True
 }
 # Performance targets for mobile
 PERFORMANCE_TARGETS = {
    "greedy_algorithm": {
        "max_response_time": 0.1,  # 100ms
        "max_deliveries": 50,
        "description": "Ultra-fast for real-time mobile apps"
    },
    "tsp_algorithm": {
        "max_response_time": 3.0,  # 3 seconds
        "max_deliveries": 30,
        "description": "Optimal but slower, good for planning"
    }
 }
 # Mobile app recommendations
 MOBILE_RECOMMENDATIONS = {
    "real_time_delivery": "greedy",
    "route_planning": "tsp",
    "large_batches": "greedy",
    "cost_optimization": "tsp"
 }
--- a/app/config/rider_preferences.py
+++ b/app/config/rider_preferences.py
@@ -0,0 +1,50 @@
 """
 Rider Preferred Kitchens Configuration
 Mapping of Rider ID (int) to list of preferred Kitchen names (str).
 Updated based on Deployment Plan.
 """
 RIDER_PREFERRED_KITCHENS = {
    # 1. VIVEK ANANDHAN - LOCAL, RS PURAM TO SELVAPURAM
    1116: ["Daily grubs(jayanthi kitchen)", "Bhuvaneshwari kitchen", "Hilda kitchen", "Kalpana kitchen"],
    # 2. NARAYANASAMY - VENGATAPURAM, VADAVALI, TADAGAM ROAD
    1096: ["Daily grubs(jayanthi kitchen)", "Bhuvaneshwari kitchen", "Hilda kitchen", "Kalpana kitchen"],
    # 3. VARUN EDWARD - GN MILLS, KAVUNDAMPALAYAM, THUDIYALUR
    897: ["Daily grubs(jayanthi kitchen)", "Bhuvaneshwari kitchen", "Hilda kitchen", "Kalpana kitchen"],
    # 4. JAYASABAESH - GANAPTHY
    950: ["Daily grubs nandhini", "Vidhya kitchen"],
    # 5. TAMILALAHZAN - GANDHIMA NAGAR
    1114: ["Daily grubs nandhini", "Vidhya kitchen"],
    # 6. RAJAN - PEELAMDU
    883: ["Daily grubs nandhini", "Vidhya kitchen"],
    # 7. MUTHURAJ - RAMANATHAPURAM TO SAIBABACOLONY
    1272: ["Daily grubs(jayanthi kitchen)", "Bhuvaneshwari kitchen", "Hilda kitchen", "Kalpana kitchen", "Daily grubs nandhini", "Vidhya kitchen"],
    # 8. MANIKANDAN - SINGNALLUR
    753: ["Daily grubs nandhini", "Vidhya kitchen"],
    # 9. TACHANAMOORTHI - KOVAI PUTHUR TO KAVUNDAMPALAYAM
    1271: ["Daily grubs(jayanthi kitchen)", "Bhuvaneshwari kitchen", "Hilda kitchen", "Kalpana kitchen"],
    1133: ["Daily grubs(jayanthi kitchen)", "Bhuvaneshwari kitchen", "Hilda kitchen", "Kalpana kitchen"], # Active ID
 }
 # Anchor Coordinates for Riders (Based on Area Name)
 # Used as fallback if GPS is missing, or to bias assignment to their Home Zone.
 RIDER_HOME_LOCATIONS = {
    1116: (11.0067, 76.9558), # VIVEK ANANDAN: RS PURAM
    1096: (11.0450, 76.9000), # NARAYANASAMY: VADAVALI
    897:  (11.0430, 76.9380), # VARUN EDWARD: KAVUNDAMPALAYAM
    950:  (11.0330, 76.9800), # JAYASABESH: GANAPATHY
    1114: (11.0450, 77.0000), # TAMILAZHAGAN: GANDHIMA NAGAR
    883:  (11.0200, 77.0000), # RAJAN: PEELAMEDU
    1272: (10.9950, 77.0000), # MUTHURAJA: RAMANATHAPURAM
    753:  (11.0000, 77.0300), # MANIKANDAN: SINGANALLUR
    1271: (10.9500, 76.9600), # THATCHINAMOORTHI: KOVAI PUDUR
 }
--- a/app/controllers/init.py
+++ b/app/controllers/init.py
@@ -0,0 +1,5 @@
 """Controllers package."""
 from .route_controller import RouteController
 __all__ = ["RouteController"]
--- a/app/controllers/route_controller.py
+++ b/app/controllers/route_controller.py
@@ -0,0 +1,87 @@
 """Controller for provider payload optimization and forwarding."""
 import logging
 import hashlib
 import json
 from typing import Dict, Any
 import httpx
 from fastapi import HTTPException
 from app.core.exceptions import ValidationError, APIException
 from app.services.routing.route_optimizer import RouteOptimizer
 from app.services import cache
 logger = logging.getLogger(__name__)
 class RouteController:
    """Controller for optimizing provider payloads and forwarding upstream."""
    def __init__(self):
        self.route_optimizer = RouteOptimizer()
    def _hash_key(self, prefix: str, payload: Dict[str, Any]) -> str:
        """Create a stable cache key from a dict payload."""
        # ensure deterministic json by sorting keys
        serialized = json.dumps(payload, sort_keys=True, separators=(",", ":"))
        digest = hashlib.sha256(serialized.encode("utf-8")).hexdigest()
        return f"routes:{prefix}:{digest}"
    async def optimize_and_forward_provider_payload(self, orders: list[dict], forward_url: str) -> dict:
        """Optimize provider payload and return it (forwarding paused).
        - Input: list of provider orders (dicts)
        - Output: {code, details, message, status} where details is the optimized array
        """
        try:
            if not isinstance(orders, list) or not orders:
                raise ValidationError("Orders array is required", field="body")
            optimized = await self.route_optimizer.optimize_provider_payload(orders)
            # Debug sample of optimized payload (first 3 items, select keys)
            try:
                sample = [
                    {
                        k: item.get(k)
                        for k in ("orderheaderid", "orderid", "deliverycustomerid", "step", "previouskms", "cumulativekms", "eta")
                    }
                    for item in optimized[:3]
                ]
                logger.debug(f"Optimized payload sample: {sample}")
                trace = [
                    {
                        "orderid": item.get("orderid"),
                        "step": item.get("step"),
                        "prev": item.get("previouskms"),
                        "cum": item.get("cumulativekms"),
                    }
                    for item in optimized
                ]
                logger.debug(f"Optimized order trace: {trace}")
            except Exception:
                logger.debug("Optimized payload sample logging failed")
            # Forwarding paused: return optimized payload directly
            return {
                "code": 200,
                "details": optimized,
                "message": "Success",
                "status": True,
            }
        except ValidationError:
            raise
        except httpx.HTTPStatusError as e:
            status_code = e.response.status_code
            body_text = e.response.text
            logger.error(f"Forwarding failed: {status_code} - {body_text}")
            # Surface upstream details to the client for faster debugging
            raise APIException(
                status_code=502,
                message=f"Upstream service error (status {status_code}): {body_text}",
                code="UPSTREAM_ERROR"
            )
        except Exception as e:
            logger.error(f"Error optimizing/forwarding provider payload: {e}", exc_info=True)
            raise APIException(status_code=500, message="Internal server error", code="INTERNAL_ERROR")
 # Batch routes removed - use single-route optimization for each pickup location
--- a/app/core/init.py
+++ b/app/core/init.py
@@ -0,0 +1,2 @@
 """Core application components."""
--- a/app/core/arrow_utils.py
+++ b/app/core/arrow_utils.py
@@ -0,0 +1,63 @@
 """
 High-performance utilities using Apache Arrow and NumPy for geographic data.
 Provides vectorized operations for distances and coordinate processing.
 """
 import numpy as np
 import pyarrow as pa
 import pyarrow.parquet as pq
 import logging
 from typing import List, Dict, Any, Tuple
 logger = logging.getLogger(__name__)
 def calculate_haversine_matrix_vectorized(lats: np.ndarray, lons: np.ndarray) -> np.ndarray:
    """
    Calculate an N x N distance matrix using the Haversine formula.
    Fully vectorized using NumPy for O(N^2) speed improvement over Python loops.
    """
    # Earth's radius in kilometers
    R = 6371.0
    # Convert degrees to radians
    lats_rad = np.radians(lats)
    lons_rad = np.radians(lons)
    # Create meshgrids for pairwise differences
    # lats.reshape(-1, 1) creates a column vector
    # lats.reshape(1, -1) creates a row vector
    # Subtracting them creates an N x N matrix of differences
    dlat = lats_rad.reshape(-1, 1) - lats_rad.reshape(1, -1)
    dlon = lons_rad.reshape(-1, 1) - lons_rad.reshape(1, -1)
    # Haversine formula
    a = np.sin(dlat / 2)**2 + np.cos(lats_rad.reshape(-1, 1)) * np.cos(lats_rad.reshape(1, -1)) * np.sin(dlon / 2)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    return R * c
 def orders_to_arrow_table(orders: List[Dict[str, Any]]) -> pa.Table:
    """
    Convert a list of order dictionaries to an Apache Arrow Table.
    This enables zero-copy operations and efficient columnar storage.
    """
    return pa.Table.from_pylist(orders)
 def save_optimized_route_parquet(orders: List[Dict[str, Any]], filename: str):
    """
    Save optimized route data to a Parquet file for high-speed analysis.
    Useful for logging and historical simulation replays.
    """
    try:
        table = orders_to_arrow_table(orders)
        pq.write_table(table, filename)
        logger.info(f" Saved route data to Parquet: {filename}")
    except Exception as e:
        logger.error(f" Failed to save Parquet: {e}")
 def load_route_parquet(filename: str) -> List[Dict[str, Any]]:
    """
    Load route data from a Parquet file and return as a list of dicts.
    """
    table = pq.read_table(filename)
    return table.to_pylist()
--- a/app/core/constants.py
+++ b/app/core/constants.py
@@ -0,0 +1,26 @@
 """API constants and configuration."""
 # API Configuration
 API_VERSION = "2.0.0"
 API_TITLE = "Route Optimization API"
 API_DESCRIPTION = "Professional API for delivery route optimization"
 # Route Optimization Limits
 MAX_DELIVERIES = 50
 MIN_DELIVERIES = 1
 # Coordinate Validation
 MIN_LATITUDE = -90
 MAX_LATITUDE = 90
 MIN_LONGITUDE = -180
 MAX_LONGITUDE = 180
 # Algorithm Types
 ALGORITHM_GREEDY = "greedy"
 ALGORITHM_TSP = "tsp"
 # Response Messages
 MESSAGE_SUCCESS = "Route optimized successfully"
 MESSAGE_VALIDATION_ERROR = "Request validation failed"
 MESSAGE_INTERNAL_ERROR = "An unexpected error occurred"
--- a/app/core/exception_handlers.py
+++ b/app/core/exception_handlers.py
@@ -0,0 +1,112 @@
 """Professional exception handlers for the API."""
 import logging
 from fastapi import Request, status
 from fastapi.responses import JSONResponse
 from fastapi.exceptions import RequestValidationError
 from starlette.exceptions import HTTPException as StarletteHTTPException
 from app.core.exceptions import APIException
 from app.models.errors import ErrorResponse, ErrorDetail
 logger = logging.getLogger(__name__)
 async def api_exception_handler(request: Request, exc: APIException) -> JSONResponse:
    """Handle custom API exceptions."""
    request_id = getattr(request.state, "request_id", None)
    error_response = ErrorResponse(
        success=False,
        error=ErrorDetail(
            field=exc.field,
            message=exc.message,
            code=exc.code
        ),
        path=request.url.path,
        request_id=request_id
    )
    logger.warning(f"API Exception: {exc.code} - {exc.message} (Request ID: {request_id})")
    return JSONResponse(
        status_code=exc.status_code,
        content=error_response.model_dump(exclude_none=True)
    )
 async def http_exception_handler(request: Request, exc: StarletteHTTPException) -> JSONResponse:
    """Handle HTTP exceptions."""
    request_id = getattr(request.state, "request_id", None)
    error_response = ErrorResponse(
        success=False,
        error=ErrorDetail(
            message=exc.detail,
            code="HTTP_ERROR"
        ),
        path=request.url.path,
        request_id=request_id
    )
    logger.warning(f"HTTP Exception: {exc.status_code} - {exc.detail} (Request ID: {request_id})")
    return JSONResponse(
        status_code=exc.status_code,
        content=error_response.model_dump(exclude_none=True)
    )
 async def validation_exception_handler(request: Request, exc: RequestValidationError) -> JSONResponse:
    """Handle validation errors with detailed field information."""
    request_id = getattr(request.state, "request_id", None)
    errors = exc.errors()
    if errors:
        first_error = errors[0]
        field = ".".join(str(loc) for loc in first_error.get("loc", []))
        message = first_error.get("msg", "Validation error")
    else:
        field = None
        message = "Validation error"
    error_response = ErrorResponse(
        success=False,
        error=ErrorDetail(
            field=field,
            message=message,
            code="VALIDATION_ERROR"
        ),
        path=request.url.path,
        request_id=request_id
    )
    logger.warning(f"Validation Error: {message} (Field: {field}, Request ID: {request_id})")
    return JSONResponse(
        status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
        content=error_response.model_dump(exclude_none=True)
    )
 async def general_exception_handler(request: Request, exc: Exception) -> JSONResponse:
    """Handle unexpected exceptions."""
    request_id = getattr(request.state, "request_id", None)
    error_response = ErrorResponse(
        success=False,
        error=ErrorDetail(
            message="An unexpected error occurred. Please try again later.",
            code="INTERNAL_SERVER_ERROR"
        ),
        path=request.url.path,
        request_id=request_id
    )
    logger.error(f"Unexpected Error: {str(exc)} (Request ID: {request_id})", exc_info=True)
    return JSONResponse(
        status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
        content=error_response.model_dump(exclude_none=True)
    )
--- a/app/core/exceptions.py
+++ b/app/core/exceptions.py
@@ -0,0 +1,70 @@
 """Custom exceptions for the API."""
 from fastapi import HTTPException, status
 class APIException(HTTPException):
    """Base API exception with structured error format."""
    def __init__(
        self,
        status_code: int,
        message: str,
        field: str = None,
        code: str = None,
        detail: str = None
    ):
        self.message = message
        self.field = field
        self.code = code or self._get_default_code(status_code)
        super().__init__(status_code=status_code, detail=detail or message)
    def _get_default_code(self, status_code: int) -> str:
        """Get default error code based on status code."""
        codes = {
            400: "BAD_REQUEST",
            401: "UNAUTHORIZED",
            403: "FORBIDDEN",
            404: "NOT_FOUND",
            409: "CONFLICT",
            422: "VALIDATION_ERROR",
            429: "RATE_LIMIT_EXCEEDED",
            500: "INTERNAL_SERVER_ERROR",
            503: "SERVICE_UNAVAILABLE"
        }
        return codes.get(status_code, "UNKNOWN_ERROR")
 class ValidationError(APIException):
    """Validation error exception."""
    def __init__(self, message: str, field: str = None):
        super().__init__(
            status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
            message=message,
            field=field,
            code="VALIDATION_ERROR"
        )
 class NotFoundError(APIException):
    """Resource not found exception."""
    def __init__(self, message: str = "Resource not found"):
        super().__init__(
            status_code=status.HTTP_404_NOT_FOUND,
            message=message,
            code="NOT_FOUND"
        )
 class RateLimitError(APIException):
    """Rate limit exceeded exception."""
    def __init__(self, message: str = "Rate limit exceeded"):
        super().__init__(
            status_code=status.HTTP_429_TOO_MANY_REQUESTS,
            message=message,
            code="RATE_LIMIT_EXCEEDED"
        )
--- a/app/main.py
+++ b/app/main.py
@@ -0,0 +1,263 @@
 """Professional FastAPI application for delivery route optimization."""
 import logging
 import os
 import sys
 import time
 import threading
 from contextlib import asynccontextmanager
 from fastapi import FastAPI, Request, status
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.middleware.gzip import GZipMiddleware
 from fastapi.exceptions import RequestValidationError
 from starlette.exceptions import HTTPException as StarletteHTTPException
 from app.routes import optimization_router, health_router, cache_router, ml_router, ml_web_router
 from app.middleware.request_id import RequestIDMiddleware
 from app.core.exceptions import APIException
 from app.core.exception_handlers import (
    api_exception_handler,
    http_exception_handler,
    validation_exception_handler,
    general_exception_handler
 )
 # Configure professional logging with env control
 _log_level_name = os.getenv("LOG_LEVEL", "INFO").upper()
 _log_level = getattr(logging, _log_level_name, logging.INFO)
 logging.basicConfig(
    level=_log_level,
    format="%(asctime)s - %(name)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
    handlers=[
        logging.StreamHandler(sys.stdout)
    ]
 )
 logger = logging.getLogger(__name__)
 # Ensure root and key libraries honor desired level
 logging.getLogger().setLevel(_log_level)
 logging.getLogger("httpx").setLevel(_log_level)
 logging.getLogger("uvicorn").setLevel(_log_level)
 logging.getLogger("uvicorn.error").setLevel(_log_level)
 logging.getLogger("uvicorn.access").setLevel(_log_level)
 # --- Smart Post-Call ML Trainer ----------------------------------------------------------
 #
 # Trains in a BACKGROUND THREAD after every N /riderassign calls.
 # - The API response is NEVER blocked - training is fully async.
 # - Cooldown prevents overlapping runs (won't train if one is already running).
 # - MIN_RECORDS guard: won't attempt if DB doesn't have enough data yet.
 #
 # Config:
 #   TRAIN_EVERY_N_CALLS : retrain after this many calls         (default: 10)
 #   MIN_RECORDS_TO_TRAIN: minimum DB rows before first train    (default: 30)
 #   COOLDOWN_SECONDS    : min gap between two training runs     (default: 120s)
 # -------------------------------------------------------------------
 TRAIN_EVERY_N_CALLS  = int(os.getenv("ML_TRAIN_EVERY_N",  "10"))
 MIN_RECORDS_TO_TRAIN = int(os.getenv("ML_MIN_RECORDS",    "30"))
 COOLDOWN_SECONDS     = int(os.getenv("ML_COOLDOWN_SEC",  "120"))
 _call_counter     = 0
 _counter_lock     = threading.Lock()
 _training_lock    = threading.Lock()
 _last_trained_at  = 0.0   # epoch seconds
 def _run_training_background():
    """
    The actual training job - runs in a daemon thread.
    Fully safe to call while the API is serving requests.
    """
    global _last_trained_at
    # Acquire lock - only ONE training run at a time
    if not _training_lock.acquire(blocking=False):
        logger.info("[MLTrigger] Training already running - skipping this trigger.")
        return
    try:
        from app.services.ml.ml_hypertuner import get_hypertuner
        from app.services.ml.ml_data_collector import get_collector
        count = get_collector().count_records()
        if count < MIN_RECORDS_TO_TRAIN:
            logger.info(f"[MLTrigger] Only {count} records - need >={MIN_RECORDS_TO_TRAIN}. Skipping.")
            return
        logger.info(f"[MLTrigger] [ML] Background hypertuning started ({count} records)...")
        result = get_hypertuner().run(n_trials=100)
        if result.get("status") == "ok":
            _last_trained_at = time.time()
            logger.info(
                f"[MLTrigger] [OK] Hypertuning done - "
                f"quality={result.get('best_predicted_quality', '?')}/100 "
                f"| {result.get('training_rows', '?')} rows "
                f"| {result.get('trials_run', '?')} trials"
            )
        else:
            logger.info(f"[MLTrigger] Hypertuning skipped: {result.get('message', '')}")
    except Exception as e:
        logger.error(f"[MLTrigger] Background training error: {e}", exc_info=True)
    finally:
        _training_lock.release()
 def trigger_training_if_due():
    """
    Called after every /riderassign call.
    Increments counter - fires background thread every TRAIN_EVERY_N_CALLS.
    Non-blocking: returns immediately regardless.
    """
    global _call_counter, _last_trained_at
    with _counter_lock:
        _call_counter += 1
        should_train = (_call_counter % TRAIN_EVERY_N_CALLS == 0)
    if not should_train:
        return
    # Cooldown check - don't train if we just trained recently
    elapsed = time.time() - _last_trained_at
    if elapsed < COOLDOWN_SECONDS:
        logger.info(
            f"[MLTrigger] Cooldown active - "
            f"{int(COOLDOWN_SECONDS - elapsed)}s remaining. Skipping."
        )
        return
    # Fire background thread - does NOT block the API response
    t = threading.Thread(target=_run_training_background, daemon=True, name="ml-hypertuner")
    t.start()
    logger.info(f"[MLTrigger] [START] Background training thread launched (call #{_call_counter})")
@asynccontextmanager
 async def lifespan(app: FastAPI):
    """Application lifespan events."""
    logger.info("[START] Starting Route Optimization API...")
    # -- On startup: if enough data exists, train immediately in background --
    try:
        from app.services.ml.ml_data_collector import get_collector
        count = get_collector().count_records()
        if count >= MIN_RECORDS_TO_TRAIN:
            logger.info(f"[Startup] {count} records found -> launching startup hypertuning...")
            t = threading.Thread(target=_run_training_background, daemon=True, name="ml-startup")
            t.start()
        else:
            logger.info(
                f"[Startup] {count}/{MIN_RECORDS_TO_TRAIN} records in ML DB - "
                f"will auto-train after every {TRAIN_EVERY_N_CALLS} /riderassign calls."
            )
    except Exception as e:
        logger.warning(f"[Startup] ML status check failed (non-fatal): {e}")
    logger.info(
        f"[OK] Application initialized - "
        f"ML trains every {TRAIN_EVERY_N_CALLS} calls "
        f"(cooldown {COOLDOWN_SECONDS}s, min {MIN_RECORDS_TO_TRAIN} records)"
    )
    yield
    logger.info(" Shutting down Route Optimization API...")
 # Create FastAPI application with professional configuration
 app = FastAPI(
    title="Route Optimization API",
    version="2.0.0",
    docs_url="/docs",
    redoc_url="/redoc",
    openapi_url="/api/v1/openapi.json",
    lifespan=lifespan
 )
 # Add Request ID middleware (must be first)
 app.add_middleware(RequestIDMiddleware)
 # Add CORS middleware
 app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # Configure specific domains in production
    allow_credentials=True,
    allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
    allow_headers=["*"],
    expose_headers=["X-Request-ID", "X-Process-Time"]
 )
 # Add GZIP compression
 app.add_middleware(GZipMiddleware, minimum_size=1000)
 # Add request timing middleware
@app.middleware("http")
 async def add_process_time_header(request: Request, call_next):
    """Add performance monitoring headers."""
    start_time = time.time()
    response = await call_next(request)
    process_time = time.time() - start_time
    response.headers["X-Process-Time"] = str(round(process_time, 4))
    response.headers["X-API-Version"] = "2.0.0"
    return response
 # Register exception handlers
 app.add_exception_handler(APIException, api_exception_handler)
 app.add_exception_handler(StarletteHTTPException, http_exception_handler)
 app.add_exception_handler(RequestValidationError, validation_exception_handler)
 app.add_exception_handler(Exception, general_exception_handler)
 # Include routers
 app.include_router(optimization_router)
 app.include_router(health_router)
 app.include_router(cache_router)
 app.include_router(ml_router)
 app.include_router(ml_web_router)
@app.get("/", tags=["Root"])
 async def root(request: Request):
    """
    API root endpoint with service information.
    Returns API metadata, available endpoints, and usage information.
    """
    request_id = getattr(request.state, "request_id", None)
    return {
        "service": "Route Optimization API",
        "version": "2.0.0",
        "status": "operational",
        "documentation": {
            "swagger": "/docs",
            "redoc": "/redoc",
            "openapi": "/api/v1/openapi.json"
        },
        "endpoints": {
            "createdeliveries": {
                "url": "/api/v1/optimization/createdeliveries",
                "method": "POST",
                "description": "Accept provider array, optimize order, add step/previouskms/cumulativekms, forward upstream"
            },
            "health": {
                "url": "/api/v1/health",
                "method": "GET",
                "description": "Health check endpoint"
            }
        },
        "features": {
            "algorithm": "Greedy Nearest-Neighbor",
            "optimization": "Provider array reordering with distance metrics",
            "added_fields": ["step", "previouskms", "cumulativekms", "actualkms"]
        },
        "request_id": request_id
    }
 if __name__ == "__main__":
    import uvicorn
    uvicorn.run("app.main:app", host="0.0.0.0", port=8002, reload=True)
--- a/app/middleware/init.py
+++ b/app/middleware/init.py
@@ -0,0 +1,2 @@
 """Middleware components."""
--- a/app/middleware/request_id.py
+++ b/app/middleware/request_id.py
@@ -0,0 +1,26 @@
 """Request ID middleware for request tracing."""
 import uuid
 from fastapi import Request
 from starlette.middleware.base import BaseHTTPMiddleware
 from starlette.responses import Response
 class RequestIDMiddleware(BaseHTTPMiddleware):
    """Middleware to add unique request ID to each request."""
    async def dispatch(self, request: Request, call_next):
        # Generate or retrieve request ID
        request_id = request.headers.get("X-Request-ID") or str(uuid.uuid4())
        # Add request ID to request state
        request.state.request_id = request_id
        # Process request
        response = await call_next(request)
        # Add request ID to response headers
        response.headers["X-Request-ID"] = request_id
        return response
--- a/app/models/init.py
+++ b/app/models/init.py
@@ -0,0 +1,21 @@
 """Models package."""
 from .schemas import (
    Location,
    Delivery,
    RouteOptimizationRequest,
    RouteStep,
    OptimizedRoute,
    PickupLocation,
    DeliveryLocation
 )
 __all__ = [
    "Location",
    "Delivery",
    "RouteOptimizationRequest",
    "RouteStep",
    "OptimizedRoute",
    "PickupLocation",
    "DeliveryLocation"
 ]
--- a/app/models/errors.py
+++ b/app/models/errors.py
@@ -0,0 +1,45 @@
 """Professional error response models for API."""
 from typing import Optional, Any, Dict
 from pydantic import BaseModel, Field
 from datetime import datetime
 class ErrorDetail(BaseModel):
    """Detailed error information."""
    field: Optional[str] = Field(None, description="Field name that caused the error")
    message: str = Field(..., description="Error message")
    code: Optional[str] = Field(None, description="Error code")
 class ErrorResponse(BaseModel):
    """Standardized error response model."""
    success: bool = Field(False, description="Request success status")
    error: ErrorDetail = Field(..., description="Error details")
    timestamp: str = Field(default_factory=lambda: datetime.utcnow().isoformat(), description="Error timestamp")
    path: Optional[str] = Field(None, description="Request path")
    request_id: Optional[str] = Field(None, description="Request ID for tracing")
    class Config:
        json_schema_extra = {
            "example": {
                "success": False,
                "error": {
                    "field": "pickup_location",
                    "message": "Pickup location is required",
                    "code": "VALIDATION_ERROR"
                },
                "timestamp": "2024-01-15T10:30:00.000Z",
                "path": "/api/v1/optimization/single-route",
                "request_id": "req-123456"
            }
        }
 class SuccessResponse(BaseModel):
    """Standardized success response wrapper."""
    success: bool = Field(True, description="Request success status")
    data: Any = Field(..., description="Response data")
    timestamp: str = Field(default_factory=lambda: datetime.utcnow().isoformat(), description="Response timestamp")
    request_id: Optional[str] = Field(None, description="Request ID for tracing")
--- a/app/models/schemas.py
+++ b/app/models/schemas.py
@@ -0,0 +1,167 @@
 """Professional Pydantic models for request/response validation."""
 from typing import List, Optional
 from pydantic import BaseModel, Field, field_validator
 from datetime import datetime
 class Location(BaseModel):
    """Location model with latitude and longitude."""
    lat: float = Field(..., description="Latitude")
    lng: float = Field(..., description="Longitude")
 class PickupLocation(BaseModel):
    """Pickup location model with latitude and longitude."""
    pickuplat: float = Field(
        ..., 
        description="Pickup latitude",
        ge=-90, 
        le=90,
        examples=[11.0050534]
    )
    pickuplon: float = Field(
        ..., 
        description="Pickup longitude",
        ge=-180, 
        le=180,
        examples=[76.9508991]
    )
    @field_validator("pickuplat", "pickuplon")
    @classmethod
    def validate_coordinates(cls, v):
        """Validate coordinate values."""
        if v is None:
            raise ValueError("Coordinate cannot be None")
        return float(v)
 class DeliveryLocation(BaseModel):
    """Delivery location model with latitude and longitude."""
    deliverylat: float = Field(
        ..., 
        description="Delivery latitude",
        ge=-90, 
        le=90,
        examples=[11.0309723]
    )
    deliverylong: float = Field(
        ..., 
        description="Delivery longitude",
        ge=-180, 
        le=180,
        examples=[77.0004574]
    )
    @field_validator("deliverylat", "deliverylong")
    @classmethod
    def validate_coordinates(cls, v):
        """Validate coordinate values."""
        if v is None:
            raise ValueError("Coordinate cannot be None")
        return float(v)
 class Delivery(BaseModel):
    """Delivery order model."""
    deliveryid: str = Field(..., description="Unique delivery identifier")
    deliverycustomerid: int = Field(..., description="Customer ID for this delivery")
    location: DeliveryLocation = Field(..., description="Delivery location coordinates")
 class RouteOptimizationRequest(BaseModel):
    """
    Request model for route optimization.
    Optimizes delivery routes starting from a pickup location (warehouse/store) to multiple delivery locations.
    Uses greedy nearest-neighbor algorithm for fast, efficient route calculation.
    """
    pickup_location: PickupLocation = Field(
        ..., 
        description="Pickup location (warehouse/store) coordinates - starting point for optimization"
    )
    pickup_location_id: Optional[int] = Field(
        None, 
        description="Optional pickup location ID for tracking purposes"
    )
    deliveries: List[Delivery] = Field(
        ..., 
        min_items=1,
        max_items=50,
        description="List of delivery locations to optimize (1-50 deliveries supported)"
    )
    class Config:
        json_schema_extra = {
            "example": {
                "pickup_location": {
                    "pickuplat": 11.0050534,
                    "pickuplon": 76.9508991
                },
                "pickup_location_id": 1,
                "deliveries": [
                    {
                        "deliveryid": "90465",
                        "deliverycustomerid": 1,
                        "location": {
                            "deliverylat": 11.0309723,
                            "deliverylong": 77.0004574
                        }
                    }
                ]
            }
        }
 class RouteStep(BaseModel):
    """Single step in the optimized route."""
    step_number: int = Field(..., description="Step number in the route")
    delivery_id: str = Field(..., description="Delivery ID for this step")
    delivery_customer_id: int = Field(..., description="Customer ID for this delivery")
    location: DeliveryLocation = Field(..., description="Delivery location coordinates")
    distance_from_previous_km: float = Field(..., description="Distance from previous step in kilometers")
    cumulative_distance_km: float = Field(..., description="Total distance traveled so far in kilometers")
 class OptimizedRoute(BaseModel):
    """
    Optimized route response with step-by-step delivery sequence.
    Contains the optimized route starting from pickup location, with each step showing:
    - Delivery order (Step 1, Step 2, etc.)
    - Distance from previous step
    - Cumulative distance traveled
    """
    route_id: str = Field(..., description="Unique route identifier (UUID)")
    pickup_location_id: Optional[int] = Field(None, description="Pickup location ID")
    pickup_location: PickupLocation = Field(..., description="Pickup location (warehouse/store) coordinates")
    total_distance_km: float = Field(
        ..., 
        ge=0,
        description="Total route distance in kilometers",
        examples=[12.45]
    )
    total_deliveries: int = Field(
        ..., 
        ge=1,
        description="Total number of deliveries in the route",
        examples=[5]
    )
    optimization_algorithm: str = Field(
        "greedy",
        description="Algorithm used for optimization",
        examples=["greedy"]
    )
    steps: List[RouteStep] = Field(
        ..., 
        description="Ordered list of route steps (Step 1 = nearest from pickup, Step 2 = nearest from Step 1, etc.)"
    )
    created_at: str = Field(
        default_factory=lambda: datetime.utcnow().isoformat(),
        description="Route creation timestamp (ISO 8601)"
    )
 # Batch optimization removed - no rider support needed
 # Use single-route optimization for each pickup location
--- a/app/routes/init.py
+++ b/app/routes/init.py
@@ -0,0 +1,8 @@
 """Routes package."""
 from .optimization import router as optimization_router
 from .health import router as health_router
 from .cache import router as cache_router
 from .ml_admin import router as ml_router, web_router as ml_web_router
 __all__ = ["optimization_router", "health_router", "cache_router", "ml_router", "ml_web_router"]
--- a/app/routes/cache.py
+++ b/app/routes/cache.py
@@ -0,0 +1,79 @@
 """Cache management API endpoints."""
 import logging
 from fastapi import APIRouter, HTTPException
 from typing import Dict, Any
 from app.services import cache
 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/api/v1/cache", tags=["Cache Management"])
@router.get("/stats", response_model=Dict[str, Any])
 async def get_cache_stats():
    """
    Get cache statistics.
    Returns:
    - hits: Number of cache hits
    - misses: Number of cache misses  
    - sets: Number of cache writes
    - total_keys: Current number of cached route keys
    - enabled: Whether Redis cache is enabled
    """
    try:
        stats = cache.get_stats()
        # Calculate hit rate
        total_requests = stats.get("hits", 0) + stats.get("misses", 0)
        if total_requests > 0:
            stats["hit_rate"] = round(stats.get("hits", 0) / total_requests * 100, 2)
        else:
            stats["hit_rate"] = 0.0
        return stats
    except Exception as e:
        logger.error(f"Error getting cache stats: {e}")
        raise HTTPException(status_code=500, detail="Internal server error")
@router.get("/keys")
 async def list_cache_keys(pattern: str = "routes:*"):
    """
    List cache keys matching pattern.
    - **pattern**: Redis key pattern (default: "routes:*")
    """
    try:
        keys = cache.get_keys(pattern)
        return {
            "pattern": pattern,
            "count": len(keys),
            "keys": keys[:100]  # Limit to first 100 for response size
        }
    except Exception as e:
        logger.error(f"Error listing cache keys: {e}")
        raise HTTPException(status_code=500, detail="Internal server error")
@router.delete("/clear")
 async def clear_cache(pattern: str = "routes:*"):
    """
    Clear cache keys matching pattern.
    - **pattern**: Redis key pattern to delete (default: "routes:*")
    [WARN] **Warning**: This will delete cached route optimizations!
    """
    try:
        deleted_count = cache.delete(pattern)
        logger.info(f"Cleared {deleted_count} cache keys matching pattern: {pattern}")
        return {
            "pattern": pattern,
            "deleted_count": deleted_count,
            "message": f"Cleared {deleted_count} cache keys"
        }
    except Exception as e:
        logger.error(f"Error clearing cache: {e}")
        raise HTTPException(status_code=500, detail="Internal server error")
--- a/app/routes/health.py
+++ b/app/routes/health.py
@@ -0,0 +1,98 @@
 """Professional health check endpoints."""
 import time
 import logging
 import sys
 from typing import Optional
 from datetime import datetime
 from fastapi import APIRouter, Request
 from pydantic import BaseModel, Field
 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/api/v1/health", tags=["Health"])
 start_time = time.time()
 class HealthResponse(BaseModel):
    """Health check response model."""
    status: str = Field(..., description="Service status")
    uptime_seconds: float = Field(..., description="Service uptime in seconds")
    version: str = Field("2.0.0", description="API version")
    timestamp: str = Field(..., description="Health check timestamp (ISO 8601)")
    request_id: Optional[str] = Field(None, description="Request ID for tracing")
@router.get("/", response_model=HealthResponse)
 async def health_check(request: Request):
    """
    Health check endpoint.
    Returns the current health status of the API service including:
    - Service status (healthy/unhealthy)
    - Uptime in seconds
    - API version
    - Timestamp
    """
    try:
        uptime = time.time() - start_time
        request_id = getattr(request.state, "request_id", None)
        return HealthResponse(
            status="healthy",
            uptime_seconds=round(uptime, 2),
            version="2.0.0",
            timestamp=datetime.utcnow().isoformat() + "Z",
            request_id=request_id
        )
    except Exception as e:
        logger.error(f"Health check failed: {e}", exc_info=True)
        request_id = getattr(request.state, "request_id", None)
        return HealthResponse(
            status="unhealthy",
            uptime_seconds=0.0,
            version="2.0.0",
            timestamp=datetime.utcnow().isoformat() + "Z",
            request_id=request_id
        )
@router.get("/ready")
 async def readiness_check(request: Request):
    """
    Readiness check endpoint for load balancers.
    Returns 200 if the service is ready to accept requests.
    """
    try:
        # Check if critical services are available
        # Add your service health checks here
        return {
            "status": "ready",
            "timestamp": datetime.utcnow().isoformat() + "Z",
            "request_id": getattr(request.state, "request_id", None)
        }
    except Exception as e:
        logger.error(f"Readiness check failed: {e}")
        return {
            "status": "not_ready",
            "timestamp": datetime.utcnow().isoformat() + "Z",
            "request_id": getattr(request.state, "request_id", None)
        }
@router.get("/live")
 async def liveness_check(request: Request):
    """
    Liveness check endpoint for container orchestration.
    Returns 200 if the service is alive.
    """
    return {
        "status": "alive",
        "timestamp": datetime.utcnow().isoformat() + "Z",
        "request_id": getattr(request.state, "request_id", None)
    }
--- a/app/routes/ml_admin.py
+++ b/app/routes/ml_admin.py
@@ -0,0 +1,286 @@
 """
 ML Admin API - rider-api
 Endpoints:
  GET  /api/v1/ml/status         - DB record count, quality trend, model info
  GET  /api/v1/ml/config         - Current active hyperparameters (ML-tuned + defaults)
  POST /api/v1/ml/train          - Trigger hypertuning immediately
  POST /api/v1/ml/reset          - Reset config to factory defaults
  GET  /api/v1/ml/reports        - List past tuning reports
 """
 import logging
 import os
 import json
 from fastapi import APIRouter, HTTPException, Body, Request
 from fastapi.responses import FileResponse, PlainTextResponse
 from typing import Optional
 logger = logging.getLogger(__name__)
 router = APIRouter(
    prefix="/api/v1/ml",
    tags=["ML Hypertuner"],
    responses={
        500: {"description": "Internal server error"}
    }
 )
 web_router = APIRouter(
    tags=["ML Monitor Web Dashboard"]
 )
 # -----------------------------------------------------------------------------
 # GET /ml-ops
 # -----------------------------------------------------------------------------
@web_router.get("/ml-ops", summary="Visual ML monitoring dashboard")
 def ml_dashboard():
    """Returns the beautiful HTML dashboard for visualizing ML progress."""
    path = os.path.join(os.getcwd(), "app/templates/ml_dashboard.html")
    if not os.path.isfile(path):
        raise HTTPException(status_code=404, detail=f"Dashboard template not found at {path}")
    return FileResponse(path)
 # -----------------------------------------------------------------------------
 # GET /status
 # -----------------------------------------------------------------------------
@router.get("/status", summary="ML system status & quality trend")
 def ml_status():
    """
    Returns:
    - How many assignment events are logged
    - Recent quality score trend (avg / min / max over last 20 calls)
    - Whether the model has been trained
    - Current hyperparameter source (ml_tuned vs defaults)
    """
    from app.services.ml.ml_data_collector import get_collector
    from app.services.ml.ml_hypertuner import get_hypertuner
    try:
        collector = get_collector()
        tuner     = get_hypertuner()
        record_count   = collector.count_records()
        quality_trend  = collector.get_recent_quality_trend(last_n=50)
        model_info     = tuner.get_model_info()
        from app.services.ml.behavior_analyzer import get_analyzer
        b_analyzer = get_analyzer()
        from app.config.dynamic_config import get_config
        cfg = get_config()
        return {
            "status": "ok",
            "db_records": record_count,
            "ready_to_train": record_count >= 30,
            "quality_trend": quality_trend,
            "hourly_stats": collector.get_hourly_stats(),
            "quality_histogram": collector.get_quality_histogram(),
            "strategy_comparison": collector.get_strategy_comparison(),
            "zone_stats": collector.get_zone_stats(),
            "behavior": b_analyzer.get_info() if hasattr(b_analyzer, 'get_info') else {},
            "config": cfg.get_all(),
            "model": model_info,
            "message": (
                f"Collecting data - need {max(0, 30 - record_count)} more records to train."
                if record_count < 30
                else "Ready to train! Call POST /api/v1/ml/train"
                if not model_info["model_trained"]
                else "Model trained and active."
            )
        }
    except Exception as e:
        logger.error(f"[ML API] Status failed: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))
 # -----------------------------------------------------------------------------
 # GET /config
 # -----------------------------------------------------------------------------
@router.get("/config", summary="Current active hyperparameter values")
 def ml_config():
    """
    Returns every hyperparameter currently in use by the system.
    Values marked 'ml_tuned' were set by the ML model.
    Values marked 'default' are factory defaults (not yet tuned).
    """
    from app.config.dynamic_config import get_config, DEFAULTS
    try:
        cfg          = get_config()
        all_values   = cfg.get_all()
        cached_keys  = set(cfg._cache.keys())
        annotated = {}
        for k, v in all_values.items():
            annotated[k] = {
                "value":  v,
                "source": "ml_tuned" if k in cached_keys else "default",
            }
        return {
            "status": "ok",
            "hyperparameters": annotated,
            "total_params": len(annotated),
            "ml_tuned_count": sum(1 for x in annotated.values() if x["source"] == "ml_tuned"),
        }
    except Exception as e:
        logger.error(f"[ML API] Config fetch failed: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))
@router.patch("/config", summary="Update specific ML configuration defaults")
 def ml_config_patch(payload: dict = Body(...)):
    """Allows updating any active parameter via JSON overrides. e.g. \{ \"ml_strategy\": \"balanced\" \}"""
    from app.config.dynamic_config import get_config
    try:
        cfg = get_config()
        cfg.set_bulk(payload, source="ml_admin")
        return {"status": "ok"}
    except Exception as e:
        logger.error(f"[ML API] Config patch failed: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))
 # -----------------------------------------------------------------------------
 # POST /train
 # -----------------------------------------------------------------------------
@router.post("/train", summary="Trigger XGBoost training + Optuna hyperparameter search")
 def ml_train(
    n_trials: int = Body(default=100, embed=True, ge=10, le=500,
                         description="Number of Optuna trials (10500)"),
    min_records: int = Body(default=30, embed=True, ge=10,
                            description="Minimum DB records required")
 ):
    """
    Runs the full hypertuning pipeline:
    1. Load logged assignment data from DB
    2. Train XGBoost surrogate model
    3. Run Optuna TPE search ({n_trials} trials)
    4. Write optimal params to DynamicConfig
    The AssignmentService picks up new params within 5 minutes (auto-reload).
    """
    from app.services.ml.ml_hypertuner import get_hypertuner
    try:
        logger.info(f"[ML API] Hypertuning triggered: n_trials={n_trials}, min_records={min_records}")
        tuner  = get_hypertuner()
        result = tuner.run(n_trials=n_trials, min_training_records=min_records)
        return result
    except Exception as e:
        logger.error(f"[ML API] Training failed: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))
 # -----------------------------------------------------------------------------
 # POST /reset
 # -----------------------------------------------------------------------------
@router.post("/reset", summary="Reset all hyperparameters to factory defaults")
 def ml_reset():
    """
    Wipes all ML-tuned config values and reverts every parameter to the
    original hardcoded defaults. Useful if the model produced bad results.
    """
    from app.config.dynamic_config import get_config
    try:
        get_config().reset_to_defaults()
        return {
            "status": "ok",
            "message": "All hyperparameters reset to factory defaults.",
        }
    except Exception as e:
        logger.error(f"[ML API] Reset failed: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))
 # -----------------------------------------------------------------------------
 # POST /strategy
 # -----------------------------------------------------------------------------
@router.post("/strategy", summary="Change the AI Optimization Prompt/Strategy")
 def ml_strategy(strategy: str = Body(default="balanced", embed=True)):
    """
    Changes the mathematical objective of the AI.
    Choices: 'balanced', 'fuel_saver', 'aggressive_speed', 'zone_strict'
    Historical data is NOT wiped. Instead, the AI dynamically recalculates 
    the quality score of all past events using the new strategy rules.
    """
    from app.config.dynamic_config import get_config
    import sqlite3
    valid = ["balanced", "fuel_saver", "aggressive_speed", "zone_strict"]
    if strategy not in valid:
        raise HTTPException(400, f"Invalid strategy. Choose from {valid}")
    try:
        get_config().set("ml_strategy", strategy)
        return {
            "status": "ok", 
            "message": f"Strategy changed to '{strategy}'. Historical AI data will be mathematically repurposed to train towards this new goal.",
            "strategy": strategy
        }
    except Exception as e:
        logger.error(f"[ML API] Strategy change failed: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))
 # -----------------------------------------------------------------------------
 # GET /reports
 # -----------------------------------------------------------------------------
@router.get("/reports", summary="List past hypertuning reports")
 def ml_reports():
    """Returns the last 10 tuning reports (JSON files in ml_data/reports/)."""
    try:
        report_dir = "ml_data/reports"
        if not os.path.isdir(report_dir):
            return {"status": "ok", "reports": [], "message": "No reports yet."}
        files = sorted(
            [f for f in os.listdir(report_dir) if f.endswith(".json")],
            reverse=True
        )[:10]
        reports = []
        for fname in files:
            path = os.path.join(report_dir, fname)
            try:
                with open(path) as f:
                    reports.append(json.load(f))
            except Exception:
                pass
        return {"status": "ok", "reports": reports, "count": len(reports)}
    except Exception as e:
        logger.error(f"[ML API] Reports fetch failed: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))
 # -----------------------------------------------------------------------------
 # GET /export
 # -----------------------------------------------------------------------------
@router.get("/export", summary="Export all records as CSV")
 def ml_export():
    """Generates a CSV string containing all rows in the assignment_ml_log table."""
    try:
        from app.services.ml.ml_data_collector import get_collector
        csv_data = get_collector().export_csv()
        response = PlainTextResponse(content=csv_data, media_type="text/csv")
        response.headers["Content-Disposition"] = 'attachment; filename="ml_export.csv"'
        return response
    except Exception as e:
        logger.error(f"[ML API] Export failed: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))
--- a/app/routes/optimization.py
+++ b/app/routes/optimization.py
@@ -0,0 +1,364 @@
 """Provider payload optimization endpoints."""
 import logging
 import time
 from fastapi import APIRouter, Request, Depends, status, HTTPException, Query
 from app.controllers.route_controller import RouteController
 from app.core.exceptions import APIException
 from app.core.arrow_utils import save_optimized_route_parquet
 import os
 logger = logging.getLogger(__name__)
 router = APIRouter(
    prefix="/api/v1/optimization", 
    tags=["Route Optimization"],
    responses={
        400: {"description": "Bad request - Invalid input parameters"},
        422: {"description": "Validation error - Request validation failed"},
        500: {"description": "Internal server error"}
    }
 )
 def get_route_controller() -> RouteController:
    """Dependency injection for route controller."""
    return RouteController()
 # Legacy single-route endpoint removed; provider flow only.
@router.post(
    "/createdeliveries",
    status_code=status.HTTP_200_OK,
    summary="Optimize provider payload (forwarding paused)",
    description="""
    Accepts the provider's orders array, reorders it using greedy nearest-neighbor, adds only:
    - step (1..N)
    - previouskms (distance from previous stop in km)
    - cumulativekms (total distance so far in km)
    - actualkms (direct pickup-to-delivery distance)
    Forwarding is temporarily paused: returns the optimized array in the response.
    """,
    responses={
        200: {
            "description": "Upstream response",
            "content": {
                "application/json": {
                    "example": {"code": 200, "details": [], "message": "Success", "status": True}
                }
            }
        }
    }
 )
 async def provider_optimize_forward(
    body: list[dict],
    controller: RouteController = Depends(get_route_controller)
 ):
    """
    Accept provider JSON array, reorder by greedy nearest-neighbor, annotate each item with:
    - step (1..N)
    - previouskms (km from previous point)
    - cumulativekms (km so far)
    - actualkms (pickup to delivery distance)
    Then forward the optimized array to the external API and return only its response.
    """
    try:
        url = "https://jupiter.nearle.app/live/api/v1/deliveries/createdeliveries"
        result = await controller.optimize_and_forward_provider_payload(body, url)
        # Performance Logging: Save a Parquet Snapshot (Async-friendly backup)
        try:
            os.makedirs("data/snapshots", exist_ok=True)
            snapshot_path = f"data/snapshots/route_{int(time.time())}.parquet"
            save_optimized_route_parquet(body, snapshot_path)
            logger.info(f"Apache Arrow: Snapshot saved to {snapshot_path}")
        except Exception as e:
            logger.warning(f"Could not save Arrow snapshot: {e}")
        return result
    except APIException:
        raise
    except Exception as e:
        logger.error(f"Unexpected error in provider_optimize_forward: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail="Internal server error")
@router.get(
    "/createdeliveries",
    summary="Usage info for provider optimize forward"
 )
 async def provider_optimize_forward_info():
    """Return usage info; this endpoint accepts POST only for processing."""
    return {
        "message": "Use POST with a JSON array of orders to optimize and forward.",
        "method": "POST",
        "path": "/api/v1/optimization/provider-optimize-forward"
    }
@router.post(
    "/riderassign",
    status_code=status.HTTP_200_OK,
    summary="Assign created orders to active riders",
    description="""
    Assigns orders to riders based on kitchen preferences, proximity, and load.
    - If a payload of orders is provided, processes those.
    - If payload is empty, fetches all 'created' orders from the external API.
    - Fetches active riders and matches them.
    """,
    responses={
        200: {
            "description": "Assignment Result",
            "content": {
                "application/json": {
                    "example": {"code": 200, "details": {"1234": [{"orderid": "..."}]}, "message": "Success", "status": True}
                }
            }
        }
    }
 )
 async def assign_orders_to_riders(
    request: Request,
    body: list[dict] = None,
    resuffle: bool = Query(False),
    reshuffle: bool = Query(False),
    rehuffle: bool = Query(False),
    hypertuning_params: str = None
 ):
    """
    Smart assignment of orders to riders.
    """
    from app.services.rider.get_active_riders import fetch_active_riders, fetch_created_orders, fetch_rider_pricing
    from app.services.core.assignment_service import AssignmentService
    from app.services.routing.route_optimizer import RouteOptimizer
    from app.services.routing.realistic_eta_calculator import RealisticETACalculator
    from datetime import datetime, timedelta
    from dateutil.parser import parse as parse_date
    import asyncio
    eta_calculator = RealisticETACalculator()
    try:
        # Check if any variant is present in query params (flag-style) or explicitly true
        q_params = request.query_params
        do_reshuffle = any(k in q_params for k in ["reshuffle", "resuffle", "rehuffle"]) or \
                       resuffle or reshuffle or rehuffle
        # 1. Fetch Riders and Pricing
        riders_task = fetch_active_riders()
        pricing_task = fetch_rider_pricing()
        riders, pricing = await asyncio.gather(riders_task, pricing_task)
        # Determine pricing (Default: 30 base + 2.5/km)
        fuel_charge = 2.5
        base_pay = 30.0
        if pricing:
            shift_1 = next((p for p in pricing if p.get("shiftid") == 1), None)
            if shift_1:
                fuel_charge = float(shift_1.get("fuelcharge", 2.5))
                base_pay = float(shift_1.get("basepay") or shift_1.get("base_pay") or 30.0)
        # 2. Determine Orders Source
        orders = body
        if not orders:
            logger.info("No payload provided, fetching created orders from external API.")
            orders = await fetch_created_orders()
        else:
            logger.info(f"Processing {len(orders)} orders from payload.")
        if not orders:
            return {
                "code": 200, 
                "details": {}, 
                "message": "No orders found to assign.", 
                "status": True,
                "meta": {
                    "active_riders_count": len(riders)
                }
            }
        # 3. Run Assignment (AssignmentService)
        # -- Per-request strategy override --
        from app.config.dynamic_config import get_config
        _cfg = get_config()
        _original_strategy = None
        valid_strategies = ["balanced", "fuel_saver", "aggressive_speed", "zone_strict"]
        if hypertuning_params and hypertuning_params in valid_strategies:
            _original_strategy = _cfg.get("ml_strategy", "balanced")
            _cfg._cache["ml_strategy"] = hypertuning_params
            logger.info(f"[HYPERTUNE] Per-request strategy override: {hypertuning_params}")
        service = AssignmentService()
        assignments, unassigned_orders = await service.assign_orders(
            riders=riders, 
            orders=orders,
            fuel_charge=fuel_charge,
            base_pay=base_pay,
            reshuffle=do_reshuffle
        )
        # Restore original strategy after this call
        if _original_strategy is not None:
            _cfg._cache["ml_strategy"] = _original_strategy
        if do_reshuffle:
            logger.info("[RESHUFFLE] Retry mode active - exploring alternative rider assignments.")
        # 4. Optimize Routes for Each Rider and Flatten Response
        optimizer = RouteOptimizer()
        flat_orders_list = []
        # Prepare tasks for parallel execution
        # We need to store context (rider_id) to map results back
        optimization_tasks = []
        task_contexts = []
        for rider_id, rider_orders in assignments.items():
            if not rider_orders:
                continue
            # Align with createdeliveries model: Always optimize from the Pickup/Kitchen location.
            # This prevents route reversal if the rider is on the "far" side of the deliveries.
            # The rider's current location (rlat/rlon) is ignored for sequence optimization
            # to ensure the logical flow (Kitchen -> Stop 1 -> Stop 2 -> Stop 3) is followed.
            start_coords = None
            # Add to task list
            optimization_tasks.append(
                optimizer.optimize_provider_payload(rider_orders, start_coords=start_coords)
            )
            task_contexts.append(rider_id)
        total_assigned = 0
        # Execute all optimizations in parallel
        # This dramatically reduces time from Sum(RiderTimes) to Max(RiderTime)
        if optimization_tasks:
            results = await asyncio.gather(*optimization_tasks)
            # Create a lookup for rider details
            rider_info_map = {}
            for r in riders:
                # Use string conversion for robust ID matching
                r_id = str(r.get("userid") or r.get("_id", ""))
                if r_id:
                    rider_info_map[r_id] = {
                        "name": r.get("username", ""),
                        "contactno": r.get("contactno", "")
                    }
            # Process results matching them back to riders
            for stored_rider_id, optimized_route in zip(task_contexts, results):
                r_id_str = str(stored_rider_id)
                r_info = rider_info_map.get(r_id_str, {})
                rider_name = r_info.get("name", "")
                rider_contact = r_info.get("contactno", "")
                # Calculate total distance for this rider
                total_rider_kms = 0
                if optimized_route:
                    # Usually the last order has the max cumulative kms if steps are 1..N
                    try:
                        total_rider_kms = max([float(o.get("cumulativekms", 0)) for o in optimized_route])
                    except:
                        total_rider_kms = sum([float(o.get("actualkms", o.get("kms", 0))) for o in optimized_route])
                for order in optimized_route:
                    order["userid"] = stored_rider_id
                    order["username"] = rider_name
                    # Populate the specific fields requested by the user
                    order["rider"] = rider_name
                    order["ridercontactno"] = rider_contact
                    order["riderkms"] = str(round(total_rider_kms, 2))
                    # --- DYNAMIC ETA COMPUTATION -----------------------------
                    # Try various cases and names for pickup slot
                    pickup_slot_str = (
                        order.get("pickupSlot") or 
                        order.get("pickupslot") or 
                        order.get("pickup_slot") or
                        order.get("pickuptime")
                    )
                    if pickup_slot_str:
                        # Find the actual travel distance for THIS specific order
                        # cumulativekms represents distance from pickup to this delivery stop
                        dist_km = float(order.get("cumulativekms") or order.get("actualkms", order.get("kms", 0)))
                        step = int(order.get("step", 1))
                        order_type = order.get("ordertype", "Economy")
                        try:
                            # Robust date parsing (handles almost any format magically)
                            pickup_time = parse_date(str(pickup_slot_str))
                            eta_mins = eta_calculator.calculate_eta(
                                distance_km=dist_km,
                                is_first_order=(step == 1),
                                order_type=order_type,
                                time_of_day="normal"
                            )
                            expected_time = pickup_time + timedelta(minutes=eta_mins)
                            # Format output as requested: "2026-03-24 08:25 AM"
                            order["expectedDeliveryTime"] = expected_time.strftime("%Y-%m-%d %I:%M %p")
                            order["transitMinutes"] = eta_mins
                            order["calculationDistanceKm"] = round(dist_km, 2)
                        except Exception as e:
                            logger.warning(f"Could not calculate ETA from pickupSlot '{pickup_slot_str}': {e}")
                    # ---------------------------------------------------------
                    flat_orders_list.append(order)
                total_assigned += len(optimized_route)
        # 5. Zone Processing
        from app.services.routing.zone_service import ZoneService
        zone_service = ZoneService()
        zone_data = zone_service.group_by_zones(flat_orders_list, unassigned_orders, fuel_charge=fuel_charge, base_pay=base_pay)
        zones_structure = zone_data["detailed_zones"]
        zone_analysis = zone_data["zone_analysis"]
        return {
            "code": 200,
            "zone_summary": zone_analysis,   # High-level zone metrics
            "zones": zones_structure,         # Detailed data
            "details": flat_orders_list,      # Flat list
            "message": "Success",
            "status": True,
            "meta": {
                "total_orders": len(orders),
                "utilized_riders": len([rid for rid, rl in assignments.items() if rl]),
                "active_riders_pool": len(riders),
                "assigned_orders": total_assigned,
                "unassigned_orders": len(unassigned_orders),
                "total_profit": round(sum(z["total_profit"] for z in zone_analysis), 2),
                "fuel_charge_base": fuel_charge,
                "unassigned_details": [
                    {
                        "orderid": o.get("orderid") or o.get("_id"),
                        "reason": o.get("unassigned_reason", "Unknown capacity/proximity issue")
                    } for o in unassigned_orders
                ],
                "distribution_summary": {rid: len(rl) for rid, rl in assignments.items() if rl},
                "resuffle_mode": do_reshuffle,
                "hypertuning_params": hypertuning_params or "default"
            }
        }
    except Exception as e:
        logger.error(f"Error in rider assignment: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail="Internal server error during assignment")
    finally:
        # -- Fire ML training trigger (non-blocking) -----------------------
        # Runs AFTER response is ready. Every 10th call kicks off a
        # background thread that retrains the model. API is never blocked.
        try:
            from app.main import trigger_training_if_due
            trigger_training_if_due()
        except Exception:
            pass  # Never crash the endpoint due to ML trigger
--- a/app/services/init.py
+++ b/app/services/init.py
@@ -0,0 +1,124 @@
 """Services package."""
 from __future__ import annotations
 import json
 import os
 import logging
 from typing import Any, Optional, Dict
 try:
    import redis  # type: ignore
 except Exception:  # pragma: no cover
    redis = None  # type: ignore
 logger = logging.getLogger(__name__)
 class RedisCache:
    """Lightweight Redis cache wrapper with graceful fallback."""
    def __init__(self, url_env: str = "REDIS_URL", default_ttl_seconds: Optional[int] = None) -> None:
        # Allow TTL to be configurable via env var (default 300s = 5 min, or 86400 = 24h)
        ttl_env = os.getenv("REDIS_CACHE_TTL_SECONDS")
        if default_ttl_seconds is None:
            default_ttl_seconds = int(ttl_env) if ttl_env else 300
        self.default_ttl_seconds = default_ttl_seconds
        self._enabled = False
        self._client = None
        self._stats = {"hits": 0, "misses": 0, "sets": 0}
        url = os.getenv(url_env)
        if not url or redis is None:
            logger.warning("Redis not configured or client unavailable; caching disabled")
            return
        try:
            self._client = redis.Redis.from_url(url, decode_responses=True)
            self._client.ping()
            self._enabled = True
            logger.info(f"Redis cache connected (TTL: {self.default_ttl_seconds}s)")
        except Exception as exc:
            logger.warning(f"Redis connection failed: {exc}; caching disabled")
            self._enabled = False
            self._client = None
    @property
    def enabled(self) -> bool:
        return self._enabled and self._client is not None
    def get_json(self, key: str) -> Optional[Any]:
        if not self.enabled:
            self._stats["misses"] += 1
            return None
        try:
            raw = self._client.get(key)  # type: ignore[union-attr]
            if raw:
                self._stats["hits"] += 1
                return json.loads(raw)
            else:
                self._stats["misses"] += 1
                return None
        except Exception as exc:
            logger.debug(f"Redis get_json error for key={key}: {exc}")
            self._stats["misses"] += 1
            return None
    def set_json(self, key: str, value: Any, ttl_seconds: Optional[int] = None) -> None:
        if not self.enabled:
            return
        try:
            payload = json.dumps(value, default=lambda o: getattr(o, "model_dump", lambda: o)())
            ttl = ttl_seconds if ttl_seconds is not None else self.default_ttl_seconds
            # Use -1 for no expiration, otherwise use setex
            if ttl > 0:
                self._client.setex(key, ttl, payload)  # type: ignore[union-attr]
            else:
                self._client.set(key, payload)  # type: ignore[union-attr]
            self._stats["sets"] += 1
        except Exception as exc:
            logger.debug(f"Redis set_json error for key={key}: {exc}")
    def delete(self, pattern: str) -> int:
        """Delete keys matching pattern (e.g., 'routes:*'). Returns count deleted."""
        if not self.enabled:
            return 0
        try:
            keys = list(self._client.scan_iter(match=pattern))  # type: ignore[union-attr]
            if keys:
                return self._client.delete(*keys)  # type: ignore[union-attr]
            return 0
        except Exception as exc:
            logger.error(f"Redis delete error for pattern={pattern}: {exc}")
            return 0
    def get_stats(self) -> Dict[str, Any]:
        """Get cache statistics."""
        stats = self._stats.copy()
        if self.enabled:
            try:
                # Count cache keys
                route_keys = list(self._client.scan_iter(match="routes:*"))  # type: ignore[union-attr]
                stats["total_keys"] = len(route_keys)
                stats["enabled"] = True
            except Exception:
                stats["total_keys"] = 0
                stats["enabled"] = True
        else:
            stats["total_keys"] = 0
            stats["enabled"] = False
        return stats
    def get_keys(self, pattern: str = "routes:*") -> list[str]:
        """Get list of cache keys matching pattern."""
        if not self.enabled:
            return []
        try:
            return list(self._client.scan_iter(match=pattern))  # type: ignore[union-attr]
        except Exception as exc:
            logger.error(f"Redis get_keys error for pattern={pattern}: {exc}")
            return []
 # Singleton cache instance for app
 cache = RedisCache()
--- a/app/services/core/assignment_service.py
+++ b/app/services/core/assignment_service.py
@@ -0,0 +1,515 @@
 import logging
 import random
 import time
 from math import radians, cos, sin, asin, sqrt
 from typing import List, Dict, Any, Optional
 from collections import defaultdict
 from app.config.rider_preferences import RIDER_PREFERRED_KITCHENS
 from app.services.routing.kalman_filter import smooth_rider_locations, smooth_order_coordinates
 from app.config.dynamic_config import get_config
 from app.services.ml.ml_data_collector import get_collector
 logger = logging.getLogger(__name__)
 class AssignmentService:
    def __init__(self):
        self.rider_preferences = RIDER_PREFERRED_KITCHENS
        self.earth_radius_km = 6371
        self._cfg = get_config()
    def _load_config(self):
        """Load ML-tuned hyperparams fresh on every assignment call."""
        cfg = self._cfg
        self.MAX_PICKUP_DISTANCE_KM      = cfg.get("max_pickup_distance_km")
        self.MAX_KITCHEN_DISTANCE_KM     = cfg.get("max_kitchen_distance_km")
        self.MAX_ORDERS_PER_RIDER        = int(cfg.get("max_orders_per_rider"))
        self.IDEAL_LOAD                  = int(cfg.get("ideal_load"))
        self.WORKLOAD_BALANCE_THRESHOLD  = cfg.get("workload_balance_threshold")
        self.WORKLOAD_PENALTY_WEIGHT     = cfg.get("workload_penalty_weight")
        self.DISTANCE_PENALTY_WEIGHT     = cfg.get("distance_penalty_weight")
        self.PREFERENCE_BONUS            = cfg.get("preference_bonus")
        self.HOME_ZONE_BONUS_4KM         = cfg.get("home_zone_bonus_4km")
        self.HOME_ZONE_BONUS_2KM         = cfg.get("home_zone_bonus_2km")
        self.EMERGENCY_LOAD_PENALTY      = cfg.get("emergency_load_penalty")
    def haversine(self, lat1, lon1, lat2, lon2):
        """Calculate the great circle distance between two points."""
        lon1, lat1, lon2, lat2 = map(radians, [float(lon1), float(lat1), float(lon2), float(lat2)])
        dlon = lon2 - lon1 
        dlat = lat2 - lat1 
        a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
        c = 2 * asin(min(1.0, sqrt(a)))  # Clamp to 1.0 to avoid domain errors 
        return c * self.earth_radius_km
    def get_lat_lon(self, obj: Dict[str, Any], prefix: str = "") -> tuple[float, float]:
        """Generic helper to extract lat/lon from diversely named keys."""
        # Try specific prefixes first
        candidates = [
            (f"{prefix}lat", f"{prefix}lon"),
            (f"{prefix}lat", f"{prefix}long"),
            (f"{prefix}latitude", f"{prefix}longitude"),
        ]
        # Also try standard keys if prefix fails
        candidates.extend([
            ("lat", "lon"), ("latitude", "longitude"),
            ("pickuplat", "pickuplon"), ("pickuplat", "pickuplong"),
            ("deliverylat", "deliverylong"), ("droplat", "droplon")
        ])
        for lat_key, lon_key in candidates:
            if lat_key in obj and lon_key in obj and obj[lat_key] and obj[lon_key]:
                 try:
                     return float(obj[lat_key]), float(obj[lon_key])
                 except: pass
        # Special case: nested 'pickup_location'
        if "pickup_location" in obj:
            return self.get_lat_lon(obj["pickup_location"])
        return 0.0, 0.0
    def get_order_kitchen(self, order: Dict[str, Any]) -> str:
        possible_keys = ['storename', 'restaurantname', 'kitchenname', 'partnername', 'store_name']
        for key in possible_keys:
            if key in order and order[key]:
                return str(order[key]).strip()
        return "Unknown"
    def assign_orders(self, orders: List[Dict[str, Any]], riders: List[Dict[str, Any]], reshuffle: bool = False) -> tuple[Dict[int, List[Dict[str, Any]]], List[Dict[str, Any]]]:
        """
        ENHANCED: Cluster-Based Load-Balanced Assignment.
        Strategy:
        1. Cluster orders by kitchen proximity
        2. Calculate rider workload (current capacity usage)
        3. Assign clusters to best-fit riders (proximity + workload balance)
        4. Rebalance if needed
        If reshuffle=True, controlled randomness is injected into rider scoring
        so that retrying the same input can explore alternative assignments.
        """
        from app.services.rider.rider_history_service import RiderHistoryService
        from app.services.rider.rider_state_manager import RiderStateManager
        from app.services.routing.clustering_service import ClusteringService
        # -- Load ML-tuned hyperparameters (or defaults on first run) ------
        self._load_config()
        _call_start = time.time()
        # 0. Prep
        assignments: Dict[int, List[Dict[str, Any]]] = defaultdict(list)
        unassigned_orders: List[Dict[str, Any]] = []
        rider_states = {} # Track live load
        # 0a. KALMAN FILTER - Smooth rider GPS locations before scoring
        riders = smooth_rider_locations(list(riders))
        # 0b. KALMAN FILTER - Smooth order delivery coordinates before clustering
        orders = smooth_order_coordinates(list(orders))
        # 1. Parse and Filter Riders
        valid_riders = []
        BLOCKED_RIDERS = [1242, 1266, 1245, 1232, 1240, 1007] # Test/Blocked IDs
        # Load Existing State (Persistence)
        state_mgr = RiderStateManager()
        for r in riders:
            # Robust ID Extraction
            rid_raw = r.get("userid") or r.get("riderid") or r.get("id") or r.get("_id")
            try:
                rid = int(rid_raw)
            except (ValueError, TypeError):
                continue
            if rid in BLOCKED_RIDERS: continue
            # Robust Status Check
            # Keep if: onduty (1, "1", True) OR status is active/idle/online
            is_onduty = str(r.get("onduty")) in ["1", "True"] or r.get("onduty") is True
            is_active = r.get("status") in ["active", "idle", "online"]
            if not (is_onduty or is_active):
                continue
            # Location
            lat, lon = self.get_lat_lon(r)
            # Fetch previous state to know if they are already busy
            p_state = state_mgr.get_rider_state(rid)
            # If rider has valid GPS, use it. If not, fallback to Last Drop or Home.
            if lat == 0 or lon == 0:
                if p_state['last_drop_lat']:
                    lat, lon = p_state['last_drop_lat'], p_state['last_drop_lon']
                else:
                    # Home Location Fallback
                    from app.config.rider_preferences import RIDER_HOME_LOCATIONS
                    lat, lon = RIDER_HOME_LOCATIONS.get(rid, (0.0, 0.0))
            valid_riders.append({
                "id": rid,
                "lat": lat,
                "lon": lon,
                "obj": r
            })
            # Initialize rider state with existing workload
            existing_load = p_state.get('minutes_remaining', 0) / 15  # Convert minutes to order estimate
            rider_states[rid] = {
                'lat': lat,
                'lon': lon,
                'kitchens': set(), 
                'count': int(existing_load),  # Start with existing workload
                'workload_score': existing_load  # For prioritization
            }
        if not valid_riders:
            logger.warning("No riders passed on-duty filter. Retrying with all available riders as emergency rescue...")
            # If no on-duty riders, we take ANY rider provided by the API to ensure assignment
            for r in riders:
                rid = int(r.get("userid", 0))
                if rid in BLOCKED_RIDERS: continue
                lat, lon = self.get_lat_lon(r)
                if lat == 0 or lon == 0:
                    from app.config.rider_preferences import RIDER_HOME_LOCATIONS
                    lat, lon = RIDER_HOME_LOCATIONS.get(rid, (0.0, 0.0))
                if lat != 0:
                    valid_riders.append({"id": rid, "lat": lat, "lon": lon, "obj": r})
                    rider_states[rid] = {
                        'lat': lat, 'lon': lon, 'kitchens': set(), 
                        'count': 0, 'workload_score': 0
                    }
        if not valid_riders:
            logger.error("DANGER: Absolutely no riders available for assignment.")
            # Mark all as unassigned
            for o in orders:
                o["unassigned_reason"] = "No riders found (check partner online status)."
                unassigned_orders.append(o)
            return assignments, unassigned_orders
        logger.info(f"Found {len(valid_riders)} active riders")
        # 2. CLUSTER ORDERS BY KITCHEN PROXIMITY
        clustering_service = ClusteringService()
        clusters = clustering_service.cluster_orders_by_kitchen(orders, max_cluster_radius_km=self.MAX_KITCHEN_DISTANCE_KM)  # radius from ML
        logger.info(f"Created {len(clusters)} order clusters")
        # 3. ASSIGN CLUSTERS TO RIDERS (Load-Balanced)
        for cluster_idx, cluster in enumerate(clusters):
            centroid_lat, centroid_lon = cluster['centroid']
            cluster_orders = cluster['orders']
            cluster_size = len(cluster_orders)
            logger.info(f"Assigning cluster {cluster_idx+1}/{len(clusters)}: {cluster_size} orders at ({centroid_lat:.4f}, {centroid_lon:.4f})")
            # Find best riders for this cluster
            candidate_riders = []
            for r in valid_riders:
                rid = r["id"]
                r_state = rider_states[rid]
                # Calculate distance to cluster centroid
                dist = self.haversine(r_state['lat'], r_state['lon'], centroid_lat, centroid_lon)
                # Preference bonus & Distance Bypass
                prefs = self.rider_preferences.get(rid, [])
                has_preference = False
                for k_name in cluster['kitchen_names']:
                    if any(p.lower() in k_name.lower() or k_name.lower() in p.lower() for p in prefs):
                        has_preference = True
                        break
                # Dynamic Limit: 6km default, 10km for preferred kitchens
                allowed_dist = self.MAX_PICKUP_DISTANCE_KM
                if has_preference:
                    allowed_dist = max(allowed_dist, 10.0) 
                # Skip if too far
                if dist > allowed_dist:
                    continue
                # Calculate workload utilization (0.0 to 1.0)
                utilization = r_state['count'] / self.MAX_ORDERS_PER_RIDER
                # Calculate score (lower is better) - weights from DynamicConfig
                workload_penalty  = utilization * self.WORKLOAD_PENALTY_WEIGHT
                distance_penalty  = dist * self.DISTANCE_PENALTY_WEIGHT
                # Preference bonus (ML-tuned)
                preference_bonus = self.PREFERENCE_BONUS if has_preference else 0
                # Home zone bonus (ML-tuned)
                from app.config.rider_preferences import RIDER_HOME_LOCATIONS
                h_lat, h_lon = RIDER_HOME_LOCATIONS.get(rid, (0.0, 0.0))
                home_bonus = 0
                if h_lat != 0:
                    home_dist = self.haversine(h_lat, h_lon, centroid_lat, centroid_lon)
                    if home_dist <= 4.0:
                        home_bonus = self.HOME_ZONE_BONUS_4KM
                    if home_dist <= 2.0:
                        home_bonus = self.HOME_ZONE_BONUS_2KM
                score = workload_penalty + distance_penalty + preference_bonus + home_bonus
                # RESHUFFLE: Add controlled noise so retries explore different riders
                if reshuffle:
                    noise = random.uniform(-15.0, 15.0)
                    score += noise
                candidate_riders.append({
                    'id': rid,
                    'score': score,
                    'distance': dist,
                    'utilization': utilization,
                    'current_load': r_state['count']
                })
            if not candidate_riders:
                logger.warning(f"No riders available for cluster {cluster_idx+1}")
                for o in cluster_orders:
                    o["unassigned_reason"] = f"No riders within {self.MAX_PICKUP_DISTANCE_KM}km radius of kitchen."
                    unassigned_orders.append(o)
                continue
            # Sort by score (best first)
            candidate_riders.sort(key=lambda x: x['score'])
            # SMART DISTRIBUTION: Split cluster if needed
            remaining_orders = cluster_orders[:]
            while remaining_orders and candidate_riders:
                best_rider = candidate_riders[0]
                rid = best_rider['id']
                r_state = rider_states[rid]
                # How many orders can this rider take?
                available_capacity = self.MAX_ORDERS_PER_RIDER - r_state['count']
                if available_capacity <= 0:
                    # Rider is full, remove from candidates
                    candidate_riders.pop(0)
                    continue
                # Decide batch size
                # If rider is underutilized and cluster is small, give all
                # If rider is busy or cluster is large, split it
                if best_rider['utilization'] < self.WORKLOAD_BALANCE_THRESHOLD:
                    # Rider has capacity, can take more
                    batch_size = min(available_capacity, len(remaining_orders))
                else:
                    # Rider is getting busy, be conservative (IDEAL_LOAD from ML)
                    batch_size = min(self.IDEAL_LOAD - r_state['count'], len(remaining_orders), available_capacity)
                    batch_size = max(1, batch_size)  # At least 1 order
                # Assign batch
                batch = remaining_orders[:batch_size]
                remaining_orders = remaining_orders[batch_size:]
                assignments[rid].extend(batch)
                # Update rider state
                r_state['count'] += len(batch)
                r_state['lat'] = centroid_lat
                r_state['lon'] = centroid_lon
                r_state['kitchens'].update(cluster['kitchen_names'])
                r_state['workload_score'] = r_state['count'] / self.MAX_ORDERS_PER_RIDER
                logger.info(f"  -> Assigned {len(batch)} orders to Rider {rid} (load: {r_state['count']}/{self.MAX_ORDERS_PER_RIDER})")
                # Re-sort candidates by updated scores
                for candidate in candidate_riders:
                    if candidate['id'] == rid:
                        candidate['utilization'] = r_state['count'] / self.MAX_ORDERS_PER_RIDER
                        candidate['current_load'] = r_state['count']
                        # Recalculate score
                        workload_penalty = candidate['utilization'] * 100
                        distance_penalty = candidate['distance'] * 2
                        candidate['score'] = workload_penalty + distance_penalty
                candidate_riders.sort(key=lambda x: x['score'])
            # If any orders left in the cluster after exhaustion of candidates
            if remaining_orders:
                # Instead of giving up, keep them in a pool for mandatory assignment
                unassigned_orders.extend(remaining_orders)
        # 4. EMERGENCY MANDATORY ASSIGNMENT (Ensures 0 unassigned if riders exist)
        if unassigned_orders and valid_riders:
            logger.info(f"[ALERT] Starting Emergency Mandatory Assignment for {len(unassigned_orders)} orders...")
            force_pool = unassigned_orders[:]
            unassigned_orders.clear()
            for o in force_pool:
                # Determine pickup location
                o_lat, o_lon = self.get_lat_lon(o, prefix="pickup")
                if o_lat == 0:
                    o["unassigned_reason"] = "Could not geolocate order (0,0)."
                    unassigned_orders.append(o)
                    continue
                # Find the 'least bad' rider (Closest + Balanced Load)
                best_emergency_rider = None
                best_emergency_score = float('inf')
                for r in valid_riders:
                    rid = r["id"]
                    r_state = rider_states[rid]
                    dist = self.haversine(r_state['lat'], r_state['lon'], o_lat, o_lon)
                    # For emergency: Distance is important, but load prevents one rider taking EVERYTHING
                    # Score = distance + ML-tuned penalty per existing order
                    e_score = dist + (r_state['count'] * self.EMERGENCY_LOAD_PENALTY)
                    if e_score < best_emergency_score:
                        best_emergency_score = e_score
                        best_emergency_rider = rid
                if best_emergency_rider:
                    assignments[best_emergency_rider].append(o)
                    rider_states[best_emergency_rider]['count'] += 1
                    logger.info(f"   Force-Assigned order {o.get('orderid')} to Rider {best_emergency_rider} (Score: {best_emergency_score:.2f})")
                else:
                    unassigned_orders.append(o)
        # 5. FINAL REBALANCING (Optional)
        # Check if any rider is overloaded while others are idle
        self._rebalance_workload(assignments, rider_states, valid_riders)
        # 6. Commit State and History
        self._post_process(assignments, rider_states)
        # 7. -- ML DATA COLLECTION -----------------------------------------
        try:
            elapsed_ms = (time.time() - _call_start) * 1000
            get_collector().log_assignment_event(
                num_orders=len(orders),
                num_riders=len(riders),
                hyperparams=self._cfg.get_all(),
                assignments=assignments,
                unassigned_count=len(unassigned_orders),
                elapsed_ms=elapsed_ms,
            )
        except Exception as _ml_err:
            logger.debug(f"ML logging skipped: {_ml_err}")
        # Log final distribution
        logger.info("=" * 50)
        logger.info("FINAL ASSIGNMENT DISTRIBUTION:")
        for rid, orders in sorted(assignments.items()):
            logger.info(f"  Rider {rid}: {len(orders)} orders")
        if unassigned_orders:
            logger.warning(f"  [ALERT] STILL UNASSIGNED: {len(unassigned_orders)} (Reason: No riders online or invalid coords)")
        else:
            logger.info("  [OK] ALL ORDERS ASSIGNED SUCCESSFULLY")
        logger.info("=" * 50)
        return assignments, unassigned_orders
    def _rebalance_workload(self, assignments: Dict[int, List], rider_states: Dict, valid_riders: List):
        """
        Rebalance if workload is heavily skewed.
        Move orders from overloaded riders to idle ones if possible.
        """
        if not assignments:
            return
        # Calculate average load
        total_orders = sum(len(orders) for orders in assignments.values())
        avg_load = total_orders / len(valid_riders) if valid_riders else 0
        # Find overloaded and underutilized riders
        overloaded = []
        underutilized = []
        for r in valid_riders:
            rid = r['id']
            load = rider_states[rid]['count']
            if load > avg_load * 1.5 and load > self.IDEAL_LOAD:  # 50% above average
                overloaded.append(rid)
            elif load < avg_load * 0.5:  # 50% below average
                underutilized.append(rid)
        if not overloaded or not underutilized:
            return
        logger.info(f"Rebalancing: {len(overloaded)} overloaded, {len(underutilized)} underutilized riders")
        # Try to move orders from overloaded to underutilized
        for over_rid in overloaded:
            over_orders = assignments[over_rid]
            over_state = rider_states[over_rid]
            # Try to offload some orders
            for under_rid in underutilized:
                under_state = rider_states[under_rid]
                under_capacity = self.MAX_ORDERS_PER_RIDER - under_state['count']
                if under_capacity <= 0:
                    continue
                # Find orders that are closer to underutilized rider
                transferable = []
                for order in over_orders:
                    o_lat, o_lon = self.get_lat_lon(order, prefix="pickup")
                    if o_lat == 0:
                        continue
                    dist_to_under = self.haversine(under_state['lat'], under_state['lon'], o_lat, o_lon)
                    dist_to_over = self.haversine(over_state['lat'], over_state['lon'], o_lat, o_lon)
                    # Transfer if underutilized rider is closer or similar distance
                    if dist_to_under <= self.MAX_PICKUP_DISTANCE_KM and dist_to_under <= dist_to_over * 1.2:
                        transferable.append(order)
                if transferable:
                    # Transfer up to capacity
                    transfer_count = min(len(transferable), under_capacity, over_state['count'] - self.IDEAL_LOAD)
                    transfer_batch = transferable[:transfer_count]
                    # Move orders
                    for order in transfer_batch:
                        over_orders.remove(order)
                        assignments[under_rid].append(order)
                    # Update states
                    over_state['count'] -= len(transfer_batch)
                    under_state['count'] += len(transfer_batch)
                    logger.info(f"  Rebalanced: {len(transfer_batch)} orders from Rider {over_rid} -> {under_rid}")
    def _post_process(self, assignments, rider_states):
        """Update History and Persistence."""
        from app.services.rider.rider_history_service import RiderHistoryService
        from app.services.rider.rider_state_manager import RiderStateManager
        history_service = RiderHistoryService()
        state_mgr = RiderStateManager()
        import time
        ts = time.time()
        for rid, orders in assignments.items():
            if not orders: continue
            history_service.update_rider_stats(rid, 5.0, len(orders)) 
            st = rider_states[rid]
            state_mgr.states[rid] = {
                'minutes_remaining': len(orders) * 15, 
                'last_drop_lat': st['lat'],
                'last_drop_lon': st['lon'],
                'active_kitchens': st['kitchens'],
                'last_updated_ts': ts
            }
        state_mgr._save_states()
--- a/app/services/ml/behavior_analyzer.py
+++ b/app/services/ml/behavior_analyzer.py
@@ -0,0 +1,311 @@
 """
 Behavior Analyzer - Production Grade
 ======================================
 Analyzes historical assignment data using the ID3 decision tree to classify
 assignment outcomes as 'SUCCESS' or 'RISK'.
 Key fixes and upgrades over the original
 ------------------------------------------
 1. BUG FIX: distance_band now uses `total_distance_km` (not `num_orders`).
 2. BUG FIX: time_band input is always normalized to uppercase before predict.
 3. Rich feature set: distance_band, time_band, load_band, order_density_band.
 4. Returns (label, confidence) from the classifier - exposes uncertainty.
 5. Trend analysis: tracks rolling success rate over recent N windows.
 6. Tree persistence: saves/loads trained tree as JSON to survive restarts.
 7. Feature importance proxy: logs which features drove the split.
 8. Thread-safe lazy training via a simple lock.
 """
 import json
 import logging
 import os
 import sqlite3
 import threading
 from datetime import datetime
 from typing import Any, Dict, List, Optional, Tuple
 from app.services.ml.id3_classifier import ID3Classifier, get_behavior_model
 logger = logging.getLogger(__name__)
 _DB_PATH   = os.getenv("ML_DB_PATH",   "ml_data/ml_store.db")
 _TREE_PATH = os.getenv("ML_TREE_PATH", "ml_data/behavior_tree.json")
 # ---------------------------------------------------------------------------
 # Band encoders (discrete labels for ID3)
 # ---------------------------------------------------------------------------
 def distance_band(km: float) -> str:
    """Total route distance -> discrete band."""
    if km <= 5.0:   return "SHORT"
    if km <= 15.0:  return "MID"
    if km <= 30.0:  return "LONG"
    return "VERY_LONG"
 def time_band(ts_str: str) -> str:
    """ISO timestamp -> time-of-day band."""
    try:
        hour = datetime.fromisoformat(ts_str).hour
        if 6  <= hour < 10: return "MORNING_RUSH"
        if 10 <= hour < 12: return "LATE_MORNING"
        if 12 <= hour < 14: return "LUNCH_RUSH"
        if 14 <= hour < 17: return "AFTERNOON"
        if 17 <= hour < 20: return "EVENING_RUSH"
        if 20 <= hour < 23: return "NIGHT"
        return "LATE_NIGHT"
    except Exception:
        return "UNKNOWN"
 def load_band(avg_load: float) -> str:
    """Average orders-per-rider -> load band."""
    if avg_load <= 2.0:  return "LIGHT"
    if avg_load <= 5.0:  return "MODERATE"
    if avg_load <= 8.0:  return "HEAVY"
    return "OVERLOADED"
 def order_density_band(num_orders: int, num_riders: int) -> str:
    """Orders per available rider -> density band."""
    if num_riders == 0:
        return "NO_RIDERS"
    ratio = num_orders / num_riders
    if ratio <= 2.0:  return "SPARSE"
    if ratio <= 5.0:  return "NORMAL"
    if ratio <= 9.0:  return "DENSE"
    return "OVERLOADED"
 # ---------------------------------------------------------------------------
 # Behavior Analyzer
 # ---------------------------------------------------------------------------
 class BehaviorAnalyzer:
    """
    Trains an ID3 tree on historical assignment logs and predicts whether
    a new assignment context is likely to SUCCEED or be at RISK.
    Features used
    -------------
    - distance_band      : total route distance bucket
    - time_band          : time-of-day bucket
    - load_band          : average load per rider bucket
    - order_density_band : orders-per-rider ratio bucket
    Target
    ------
    - is_success: "SUCCESS" if unassigned_count == 0, else "RISK"
    """
    TARGET = "is_success"
    FEATURES = ["distance_band", "time_band", "load_band", "order_density_band"]
    def __init__(self):
        self._db_path   = _DB_PATH
        self._tree_path = _TREE_PATH
        self.model: ID3Classifier = get_behavior_model(max_depth=5)
        self.is_trained: bool = False
        self._lock = threading.Lock()
        self._training_size: int = 0
        self._success_rate: float = 0.0
        self._rules: List[str] = []
        self._recent_trend: List[float] = []
        self._load_tree()
    # ------------------------------------------------------------------
    # Training
    # ------------------------------------------------------------------
    def train_on_history(self, limit: int = 2000) -> Dict[str, Any]:
        """Fetch the most recent rows from SQLite and rebuild the tree."""
        with self._lock:
            try:
                rows = self._fetch_rows(limit)
                if len(rows) < 10:
                    logger.warning(f"ID3 BehaviorAnalyzer: only {len(rows)} rows - need >=10.")
                    return {"status": "insufficient_data", "rows": len(rows)}
                training_data, successes = self._preprocess(rows)
                if not training_data:
                    return {"status": "preprocess_failed", "rows": len(rows)}
                self.model.train(
                    data=training_data,
                    target=self.TARGET,
                    features=self.FEATURES,
                )
                self.is_trained = True
                self._training_size = len(training_data)
                self._success_rate = successes / len(training_data)
                self._rules = self.model.get_tree_rules()
                self._compute_trend(rows)
                self._save_tree()
                summary = {
                    "status": "ok",
                    "training_rows": self._training_size,
                    "success_rate": round(self._success_rate, 4),
                    "n_rules": len(self._rules),
                    "classes": self.model.classes,
                    "feature_values": self.model.feature_values,
                }
                logger.info(
                    f"ID3 BehaviorAnalyzer trained - rows={self._training_size}, "
                    f"success_rate={self._success_rate:.1%}, rules={len(self._rules)}"
                )
                return summary
            except Exception as e:
                logger.error(f"ID3 BehaviorAnalyzer training failed: {e}", exc_info=True)
                return {"status": "error", "message": str(e)}
    # ------------------------------------------------------------------
    # Prediction
    # ------------------------------------------------------------------
    def predict(self, distance_km: float, timestamp_or_band: str,
                avg_load: float = 4.0, num_orders: int = 5,
                num_riders: int = 2) -> Dict[str, Any]:
        """Predict whether an assignment context will SUCCEED or be at RISK."""
        if not self.is_trained:
            return {
                "label": "SUCCESS",
                "confidence": 0.5,
                "features_used": {},
                "model_trained": False,
            }
        KNOWN_BANDS = {
            "MORNING_RUSH", "LATE_MORNING", "LUNCH_RUSH",
            "AFTERNOON", "EVENING_RUSH", "NIGHT", "LATE_NIGHT", "UNKNOWN"
        }
        t_band = (
            timestamp_or_band.upper()
            if timestamp_or_band.upper() in KNOWN_BANDS
            else time_band(timestamp_or_band)
        )
        features_used = {
            "distance_band":      distance_band(distance_km),
            "time_band":          t_band,
            "load_band":          load_band(avg_load),
            "order_density_band": order_density_band(num_orders, num_riders),
        }
        label, confidence = self.model.predict(features_used)
        return {
            "label":         label,
            "confidence":    round(confidence, 4),
            "features_used": features_used,
            "model_trained": True,
        }
    # ------------------------------------------------------------------
    # Info / Diagnostics
    # ------------------------------------------------------------------
    def get_info(self) -> Dict[str, Any]:
        return {
            "is_trained":     self.is_trained,
            "training_rows":  self._training_size,
            "success_rate":   round(self._success_rate, 4),
            "n_rules":        len(self._rules),
            "rules":          self._rules[:20],
            "recent_trend":   self._recent_trend,
            "feature_names":  self.FEATURES,
            "feature_values": self.model.feature_values if self.is_trained else {},
            "classes":        self.model.classes if self.is_trained else [],
        }
    # ------------------------------------------------------------------
    # Internal helpers
    # ------------------------------------------------------------------
    def _fetch_rows(self, limit: int) -> List[Dict]:
        conn = sqlite3.connect(self._db_path)
        conn.row_factory = sqlite3.Row
        rows = conn.execute(
            "SELECT * FROM assignment_ml_log ORDER BY id DESC LIMIT ?", (limit,)
        ).fetchall()
        conn.close()
        return [dict(r) for r in rows]
    def _preprocess(self, rows: List[Dict]) -> Tuple[List[Dict], int]:
        training_data: List[Dict] = []
        successes = 0
        for r in rows:
            try:
                dist_km     = float(r.get("total_distance_km") or 0.0)
                ts          = str(r.get("timestamp") or "")
                avg_ld      = float(r.get("avg_load") or 0.0)
                n_orders    = int(r.get("num_orders") or 0)
                n_riders    = int(r.get("num_riders") or 1)
                unassigned  = int(r.get("unassigned_count") or 0)
                label = "SUCCESS" if unassigned == 0 else "RISK"
                if label == "SUCCESS":
                    successes += 1
                training_data.append({
                    "distance_band":      distance_band(dist_km),
                    "time_band":          time_band(ts),
                    "load_band":          load_band(avg_ld),
                    "order_density_band": order_density_band(n_orders, n_riders),
                    self.TARGET:          label,
                })
            except Exception:
                continue
        return training_data, successes
    def _compute_trend(self, rows: List[Dict], window: int = 50) -> None:
        trend = []
        for i in range(0, len(rows), window):
            chunk = rows[i:i + window]
            if not chunk:
                break
            rate = sum(1 for r in chunk if int(r.get("unassigned_count", 1)) == 0) / len(chunk)
            trend.append(round(rate, 4))
        self._recent_trend = trend[-20:]
    def _save_tree(self) -> None:
        try:
            os.makedirs(os.path.dirname(self._tree_path) or ".", exist_ok=True)
            with open(self._tree_path, "w") as f:
                f.write(self.model.to_json())
            logger.info(f"ID3 tree persisted -> {self._tree_path}")
        except Exception as e:
            logger.warning(f"ID3 tree save failed: {e}")
    def _load_tree(self) -> None:
        try:
            if not os.path.exists(self._tree_path):
                return
            with open(self._tree_path) as f:
                self.model = ID3Classifier.from_json(f.read())
            self.is_trained = True
            self._rules = self.model.get_tree_rules()
            logger.info(f"ID3 tree restored - rules={len(self._rules)}")
        except Exception as e:
            logger.warning(f"ID3 tree load failed (will retrain): {e}")
 # ---------------------------------------------------------------------------
 # Module-level singleton
 # ---------------------------------------------------------------------------
 _analyzer: Optional[BehaviorAnalyzer] = None
 _analyzer_lock = threading.Lock()
 def get_analyzer() -> BehaviorAnalyzer:
    global _analyzer
    with _analyzer_lock:
        if _analyzer is None:
            _analyzer = BehaviorAnalyzer()
        if not _analyzer.is_trained:
            _analyzer.train_on_history()
        return _analyzer
--- a/app/services/ml/id3_classifier.py
+++ b/app/services/ml/id3_classifier.py
@@ -0,0 +1,400 @@
 """
 ID3 Classifier - Production Grade
 Improvements over v1:
 - Chi-squared pruning to prevent overfitting on sparse branches
 - Confidence scores on every prediction (Laplace smoothed)
 - Gain-ratio variant for high-cardinality features
 - Serialization (to_dict / from_dict / to_json / from_json)
 - Per-feature importance scores
 - Full prediction audit trail via explain()
 - min_samples_split and min_info_gain stopping criteria
 """
 import math
 import json
 import logging
 from collections import Counter
 from typing import Any, Dict, List, Optional, Tuple
 logger = logging.getLogger(__name__)
 class ID3Classifier:
    """
    ID3 decision tree (entropy / information-gain splitting).
    All predict* methods work even if the model has never been trained -
    they return safe defaults rather than raising.
    """
    def __init__(
        self,
        max_depth: int = 6,
        min_samples_split: int = 5,
        min_info_gain: float = 0.001,
        use_gain_ratio: bool = False,
        chi2_pruning: bool = True,
    ):
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.min_info_gain = min_info_gain
        self.use_gain_ratio = use_gain_ratio
        self.chi2_pruning = chi2_pruning
        self.tree: Any = None
        self.features: List[str] = []
        self.target: str = ""
        self.classes_: List[str] = []
        self.feature_importances_: Dict[str, float] = {}
        self.feature_values: Dict[str, List[str]] = {}   # unique values seen per feature
        self._n_samples: int = 0
        self._total_gain: Dict[str, float] = {}
    # ------------------------------------------------------------------ train
    def train(self, data: List[Dict[str, Any]], target: str, features: List[str]) -> None:
        if not data:
            logger.warning("ID3: train() called with empty data.")
            return
        self.target = target
        self.features = list(features)
        self.classes_ = sorted({str(row.get(target)) for row in data})
        self._total_gain = {f: 0.0 for f in features}
        self._n_samples = len(data)
        # Collect unique values per feature for dashboard display
        self.feature_values = {
            f: sorted({str(row.get(f)) for row in data if row.get(f) is not None})
            for f in features
        }
        self.tree = self._build_tree(data, list(features), target, depth=0)
        if self.chi2_pruning:
            self.tree = self._prune(self.tree, data, target)
        total_gain = sum(self._total_gain.values()) or 1.0
        self.feature_importances_ = {
            f: round(v / total_gain, 4) for f, v in self._total_gain.items()
        }
        logger.info(
            f"ID3: trained on {len(data)} samples | "
            f"classes={self.classes_} | importances={self.feature_importances_}"
        )
    # ----------------------------------------------------------- predict API
    def predict(self, sample: Dict[str, Any]) -> Tuple[str, float]:
        """Return (label, confidence 0-1). Safe to call before training."""
        if self.tree is None:
            return "Unknown", 0.0
        label, proba = self._classify(self.tree, sample, [])
        confidence = proba.get(str(label), 0.0) if isinstance(proba, dict) else 1.0
        return str(label), round(confidence, 4)
    def predict_proba(self, sample: Dict[str, Any]) -> Dict[str, float]:
        """Full class probability distribution."""
        if self.tree is None:
            return {}
        _, proba = self._classify(self.tree, sample, [])
        return proba if isinstance(proba, dict) else {str(proba): 1.0}
    def explain(self, sample: Dict[str, Any]) -> Dict[str, Any]:
        """Human-readable decision path for audit / dashboard display."""
        if self.tree is None:
            return {"prediction": "Unknown", "confidence": 0.0, "decision_path": []}
        path: List[str] = []
        label, proba = self._classify(self.tree, sample, path)
        return {
            "prediction": str(label),
            "confidence": round(proba.get(str(label), 1.0), 4),
            "probabilities": proba,
            "decision_path": path,
        }
    # ---------------------------------------------------------- serialisation
    def to_dict(self) -> Dict[str, Any]:
        return {
            "tree": self.tree,
            "features": self.features,
            "target": self.target,
            "classes": self.classes_,
            "feature_importances": self.feature_importances_,
            "feature_values": self.feature_values,
            "n_samples": self._n_samples,
            "params": {
                "max_depth": self.max_depth,
                "min_samples_split": self.min_samples_split,
                "min_info_gain": self.min_info_gain,
                "use_gain_ratio": self.use_gain_ratio,
                "chi2_pruning": self.chi2_pruning,
            },
        }
    @classmethod
    def from_dict(cls, d: Dict[str, Any]) -> "ID3Classifier":
        p = d.get("params", {})
        obj = cls(
            max_depth=p.get("max_depth", 6),
            min_samples_split=p.get("min_samples_split", 5),
            min_info_gain=p.get("min_info_gain", 0.001),
            use_gain_ratio=p.get("use_gain_ratio", False),
            chi2_pruning=p.get("chi2_pruning", True),
        )
        obj.tree = d["tree"]
        obj.features = d["features"]
        obj.target = d["target"]
        obj.classes_ = d["classes"]
        obj.feature_importances_ = d.get("feature_importances", {})
        obj.feature_values = d.get("feature_values", {})
        obj._n_samples = d.get("n_samples", 0)
        return obj
    def to_json(self) -> str:
        return json.dumps(self.to_dict(), indent=2)
    @classmethod
    def from_json(cls, s: str) -> "ID3Classifier":
        return cls.from_dict(json.loads(s))
    def summary(self) -> Dict[str, Any]:
        return {
            "n_samples": self._n_samples,
            "n_classes": len(self.classes_),
            "classes": self.classes_,
            "n_features": len(self.features),
            "feature_importances": self.feature_importances_,
            "feature_values": self.feature_values,
            "trained": self.tree is not None,
        }
    @property
    def classes(self) -> List[str]:
        """Alias for classes_ for compatibility."""
        return self.classes_
    def get_tree_rules(self) -> List[str]:
        """Extract human-readable if/then rules from the trained tree."""
        rules: List[str] = []
        if self.tree is None:
            return rules
        self._extract_rules(self.tree, [], rules)
        return rules
    def _extract_rules(self, node: Any, conditions: List[str], rules: List[str]) -> None:
        """Recursively walk the tree and collect decision paths as strings."""
        if not isinstance(node, dict):
            return
        if node.get("__leaf__"):
            label = node.get("__label__", "?")
            proba = node.get("__proba__", {})
            conf = proba.get(str(label), 0.0)
            prefix = " AND ".join(conditions) if conditions else "(root)"
            rules.append(f"{prefix} => {label} ({conf:.0%})")
            return
        feature = node.get("__feature__", "?")
        for val, child in node.get("__branches__", {}).items():
            self._extract_rules(child, conditions + [f"{feature}={val}"], rules)
    # --------------------------------------------------------- tree building
    def _build_tree(
        self,
        data: List[Dict[str, Any]],
        features: List[str],
        target: str,
        depth: int,
    ) -> Any:
        counts = Counter(str(row.get(target)) for row in data)
        # Pure node
        if len(counts) == 1:
            return self._make_leaf(data, target)
        # Stopping criteria
        if not features or depth >= self.max_depth or len(data) < self.min_samples_split:
            return self._make_leaf(data, target)
        best_f, best_gain = self._best_split(data, features, target)
        if best_f is None or best_gain < self.min_info_gain:
            return self._make_leaf(data, target)
        self._total_gain[best_f] = self._total_gain.get(best_f, 0.0) + best_gain
        remaining = [f for f in features if f != best_f]
        node = {
            "__feature__": best_f,
            "__gain__": round(best_gain, 6),
            "__n__": len(data),
            "__branches__": {},
        }
        for val in {row.get(best_f) for row in data}:
            subset = [r for r in data if r.get(best_f) == val]
            node["__branches__"][str(val)] = self._build_tree(
                subset, remaining, target, depth + 1
            )
        return node
    def _make_leaf(self, data: List[Dict[str, Any]], target: str) -> Dict[str, Any]:
        counts = Counter(str(row.get(target)) for row in data)
        total = len(data)
        k = len(self.classes_) or 1
        # Laplace smoothing
        proba = {
            cls: round((counts.get(cls, 0) + 1) / (total + k), 4)
            for cls in self.classes_
        }
        label = max(proba, key=proba.get)
        return {"__leaf__": True, "__label__": label, "__proba__": proba, "__n__": total}
    # ---------------------------------------------------------- splitting
    def _best_split(
        self, data: List[Dict[str, Any]], features: List[str], target: str
    ) -> Tuple[Optional[str], float]:
        base_e = self._entropy(data, target)
        best_f, best_gain = None, -1.0
        for f in features:
            gain = self._info_gain(data, f, target, base_e)
            if self.use_gain_ratio:
                si = self._split_info(data, f)
                gain = gain / si if si > 0 else 0.0
            if gain > best_gain:
                best_gain = gain
                best_f = f
        return best_f, best_gain
    # ----------------------------------------------------------- pruning
    def _prune(self, node: Any, data: List[Dict[str, Any]], target: str) -> Any:
        if not isinstance(node, dict) or node.get("__leaf__"):
            return node
        feature = node["__feature__"]
        # Recurse children first
        for val in list(node["__branches__"].keys()):
            subset = [r for r in data if str(r.get(feature)) == str(val)]
            node["__branches__"][val] = self._prune(node["__branches__"][val], subset, target)
        # Chi-squared test: if split is not significant, collapse to leaf
        if not self._chi2_significant(data, feature, target):
            return self._make_leaf(data, target)
        return node
    def _chi2_significant(
        self, data: List[Dict[str, Any]], feature: str, target: str
    ) -> bool:
        classes = self.classes_
        feature_vals = list({str(r.get(feature)) for r in data})
        if not classes or len(feature_vals) < 2:
            return False
        total = len(data)
        class_totals = Counter(str(r.get(target)) for r in data)
        chi2 = 0.0
        for val in feature_vals:
            subset = [r for r in data if str(r.get(feature)) == val]
            n_val = len(subset)
            val_counts = Counter(str(r.get(target)) for r in subset)
            for cls in classes:
                observed = val_counts.get(cls, 0)
                expected = (n_val * class_totals.get(cls, 0)) / total
                if expected > 0:
                    chi2 += (observed - expected) ** 2 / expected
        df = (len(feature_vals) - 1) * (len(classes) - 1)
        if df <= 0:
            return False
        # Critical values at p=0.05
        crit_table = {1: 3.841, 2: 5.991, 3: 7.815, 4: 9.488, 5: 11.070, 6: 12.592}
        crit = crit_table.get(df, 3.841 * df)
        return chi2 > crit
    # ---------------------------------------------------------- classify
    def _classify(
        self, node: Any, row: Dict[str, Any], path: List[str]
    ) -> Tuple[Any, Any]:
        if not isinstance(node, dict):
            return node, {str(node): 1.0}
        if node.get("__leaf__"):
            label = node["__label__"]
            proba = node["__proba__"]
            path.append(f"predict={label} (p={proba.get(label, 0):.2f})")
            return label, proba
        feature = node["__feature__"]
        value = str(row.get(feature, ""))
        path.append(f"{feature}={value}")
        branches = node["__branches__"]
        if value in branches:
            return self._classify(branches[value], row, path)
        # Unseen value: weighted vote from all leaf children
        all_proba: Counter = Counter()
        total_n = 0
        for child in branches.values():
            if isinstance(child, dict) and child.get("__leaf__"):
                n = child.get("__n__", 1)
                total_n += n
                for cls, p in child.get("__proba__", {}).items():
                    all_proba[cls] += p * n
        if not total_n:
            fallback = self.classes_[0] if self.classes_ else "Unknown"
            path.append(f"unseen fallback: {fallback}")
            return fallback, {fallback: 1.0}
        proba = {cls: round(v / total_n, 4) for cls, v in all_proba.items()}
        label = max(proba, key=proba.get)
        path.append(f"weighted vote: {label}")
        return label, proba
    # ---------------------------------------------------------- entropy math
    def _entropy(self, data: List[Dict[str, Any]], target: str) -> float:
        if not data:
            return 0.0
        counts = Counter(str(row.get(target)) for row in data)
        total = len(data)
        return -sum((c / total) * math.log2(c / total) for c in counts.values() if c > 0)
    def _info_gain(
        self,
        data: List[Dict[str, Any]],
        feature: str,
        target: str,
        base_entropy: Optional[float] = None,
    ) -> float:
        if base_entropy is None:
            base_entropy = self._entropy(data, target)
        total = len(data)
        buckets: Dict[Any, list] = {}
        for row in data:
            buckets.setdefault(row.get(feature), []).append(row)
        weighted = sum(
            (len(sub) / total) * self._entropy(sub, target) for sub in buckets.values()
        )
        return base_entropy - weighted
    def _split_info(self, data: List[Dict[str, Any]], feature: str) -> float:
        total = len(data)
        counts = Counter(row.get(feature) for row in data)
        return -sum((c / total) * math.log2(c / total) for c in counts.values() if c > 0)
 # ------------------------------------------------------------------ factory
 def get_behavior_model(
    max_depth: int = 5,
    min_samples_split: int = 8,
    min_info_gain: float = 0.005,
    use_gain_ratio: bool = True,
    chi2_pruning: bool = True,
 ) -> ID3Classifier:
    return ID3Classifier(
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_info_gain=min_info_gain,
        use_gain_ratio=use_gain_ratio,
        chi2_pruning=chi2_pruning,
    )
--- a/app/services/ml/ml_data_collector.py
+++ b/app/services/ml/ml_data_collector.py
@@ -0,0 +1,539 @@
 """
 ML Data Collector - Production Grade
 ======================================
 Logs every assignment call (inputs + outcomes) to SQLite.
 Key upgrades over the original
 --------------------------------
 1. FROZEN historical scores  - quality_score is written ONCE at log time.
   get_training_data() returns scores as-is from the DB (no retroactive mutation).
 2. Rich schema              - zone_id, city_id, is_peak, weather_code,
   sla_breached, avg_delivery_time_min for richer features.
 3. SLA tracking             - logs whether delivery SLA was breached.
 4. Analytics API            - get_hourly_stats(), get_strategy_comparison(),
   get_quality_histogram(), get_zone_stats() for dashboard consumption.
 5. Thread-safe writes       - connection-per-write pattern for FastAPI workers.
 6. Indexed columns          - timestamp, ml_strategy, zone_id for fast queries.
 """
 import csv
 import io
 import logging
 import os
 import sqlite3
 import threading
 from datetime import datetime, timedelta
 from typing import Any, Dict, List, Optional
 logger = logging.getLogger(__name__)
 _DB_PATH = os.getenv("ML_DB_PATH", "ml_data/ml_store.db")
 _WRITE_LOCK = threading.Lock()
 def _std(values: List[float]) -> float:
    if len(values) < 2:
        return 0.0
    mean = sum(values) / len(values)
    return (sum((v - mean) ** 2 for v in values) / len(values)) ** 0.5
 class MLDataCollector:
    """
    Event logger for assignment service calls.
    Each log_assignment_event() call writes one row capturing:
      - Operating context   (time, orders, riders, zone, city)
      - Active hyperparams  (exact config snapshot for this call)
      - Measured outcomes   (quality score, SLA, latency, distances)
    quality_score is computed once and FROZEN - never retroactively changed.
    """
    def __init__(self):
        self._db_path = _DB_PATH
        self._ensure_db()
    # ------------------------------------------------------------------
    # Main logging API
    # ------------------------------------------------------------------
    def log_assignment_event(
        self,
        *,
        num_orders: int,
        num_riders: int,
        hyperparams: Dict[str, Any],
        assignments: Dict[int, List[Any]],
        unassigned_count: int,
        elapsed_ms: float,
        zone_id: str = "default",
        city_id: str = "default",
        weather_code: str = "CLEAR",
        sla_minutes: Optional[float] = None,
        avg_delivery_time_min: Optional[float] = None,
    ) -> None:
        """
        Log one assignment event.
        Call this at the END of AssignmentService.assign_orders() once
        outcomes are known.
        """
        try:
            now = datetime.utcnow()
            hour = now.hour
            day_of_week = now.weekday()
            is_peak = int(hour in (7, 8, 9, 12, 13, 18, 19, 20))
            rider_loads = [len(orders) for orders in assignments.values() if orders]
            riders_used = len(rider_loads)
            total_assigned = sum(rider_loads)
            avg_load = total_assigned / riders_used if riders_used else 0.0
            load_std = _std(rider_loads) if rider_loads else 0.0
            all_orders = [o for orders in assignments.values() if orders for o in orders]
            total_distance_km = sum(self._get_km(o) for o in all_orders)
            ml_strategy = hyperparams.get("ml_strategy", "balanced")
            max_opr = hyperparams.get("max_orders_per_rider", 12)
            sla_breached = 0
            if sla_minutes and avg_delivery_time_min:
                sla_breached = int(avg_delivery_time_min > sla_minutes)
            # Quality score - FROZEN at log time
            quality_score = self._compute_quality_score(
                num_orders=num_orders,
                unassigned_count=unassigned_count,
                load_std=load_std,
                riders_used=riders_used,
                num_riders=num_riders,
                total_distance_km=total_distance_km,
                max_orders_per_rider=max_opr,
                ml_strategy=ml_strategy,
            )
            row = {
                "timestamp":                    now.isoformat(),
                "hour":                         hour,
                "day_of_week":                  day_of_week,
                "is_peak":                      is_peak,
                "zone_id":                      zone_id,
                "city_id":                      city_id,
                "weather_code":                 weather_code,
                "num_orders":                   num_orders,
                "num_riders":                   num_riders,
                "max_pickup_distance_km":       hyperparams.get("max_pickup_distance_km", 10.0),
                "max_kitchen_distance_km":      hyperparams.get("max_kitchen_distance_km", 3.0),
                "max_orders_per_rider":         max_opr,
                "ideal_load":                   hyperparams.get("ideal_load", 6),
                "workload_balance_threshold":   hyperparams.get("workload_balance_threshold", 0.7),
                "workload_penalty_weight":      hyperparams.get("workload_penalty_weight", 100.0),
                "distance_penalty_weight":      hyperparams.get("distance_penalty_weight", 2.0),
                "cluster_radius_km":            hyperparams.get("cluster_radius_km", 3.0),
                "search_time_limit_seconds":    hyperparams.get("search_time_limit_seconds", 5),
                "road_factor":                  hyperparams.get("road_factor", 1.3),
                "ml_strategy":                  ml_strategy,
                "riders_used":                  riders_used,
                "total_assigned":               total_assigned,
                "unassigned_count":             unassigned_count,
                "avg_load":                     round(avg_load, 3),
                "load_std":                     round(load_std, 3),
                "total_distance_km":            round(total_distance_km, 2),
                "elapsed_ms":                   round(elapsed_ms, 1),
                "sla_breached":                 sla_breached,
                "avg_delivery_time_min":        round(avg_delivery_time_min or 0.0, 2),
                "quality_score":                round(quality_score, 2),
            }
            with _WRITE_LOCK:
                self._insert(row)
            logger.info(
                f"[MLCollector] zone={zone_id} orders={num_orders} "
                f"assigned={total_assigned} unassigned={unassigned_count} "
                f"quality={quality_score:.1f} elapsed={elapsed_ms:.0f}ms"
            )
        except Exception as e:
            logger.warning(f"[MLCollector] Logging failed (non-fatal): {e}")
    # ------------------------------------------------------------------
    # Data retrieval for training
    # ------------------------------------------------------------------
    def get_training_data(
        self,
        min_records: int = 30,
        strategy_filter: Optional[str] = None,
        since_hours: Optional[int] = None,
    ) -> Optional[List[Dict[str, Any]]]:
        """
        Return logged rows for model training.
        quality_score is returned AS-IS (frozen at log time - no re-scoring).
        """
        try:
            conn = sqlite3.connect(self._db_path)
            conn.row_factory = sqlite3.Row
            query = "SELECT * FROM assignment_ml_log"
            params: list = []
            clauses: list = []
            if strategy_filter:
                clauses.append("ml_strategy = ?")
                params.append(strategy_filter)
            if since_hours:
                cutoff = (datetime.utcnow() - timedelta(hours=since_hours)).isoformat()
                clauses.append("timestamp >= ?")
                params.append(cutoff)
            if clauses:
                query += " WHERE " + " AND ".join(clauses)
            query += " ORDER BY id ASC"
            rows = conn.execute(query, params).fetchall()
            conn.close()
            if len(rows) < min_records:
                logger.info(f"[MLCollector] {len(rows)} records < {min_records} minimum.")
                return None
            return [dict(r) for r in rows]
        except Exception as e:
            logger.error(f"[MLCollector] get_training_data failed: {e}")
            return None
    # ------------------------------------------------------------------
    # Analytics API
    # ------------------------------------------------------------------
    def get_recent_quality_trend(self, last_n: int = 50) -> Dict[str, Any]:
        """Recent quality scores + series for sparkline charts."""
        try:
            conn = sqlite3.connect(self._db_path)
            rows = conn.execute(
                "SELECT quality_score, timestamp, unassigned_count, elapsed_ms "
                "FROM assignment_ml_log ORDER BY id DESC LIMIT ?", (last_n,)
            ).fetchall()
            conn.close()
            if not rows:
                return {"avg_quality": 0.0, "sample_size": 0, "history": []}
            scores = [r[0] for r in rows]
            return {
                "avg_quality":       round(sum(scores) / len(scores), 2),
                "min_quality":       round(min(scores), 2),
                "max_quality":       round(max(scores), 2),
                "sample_size":       len(scores),
                "history":           list(reversed(scores)),
                "timestamps":        list(reversed([r[1] for r in rows])),
                "unassigned_series": list(reversed([r[2] for r in rows])),
                "latency_series":    list(reversed([r[3] for r in rows])),
            }
        except Exception:
            return {"avg_quality": 0.0, "sample_size": 0, "history": []}
    def get_hourly_stats(self, last_days: int = 7) -> List[Dict[str, Any]]:
        """Quality, SLA, and call volume aggregated by hour-of-day."""
        try:
            conn = sqlite3.connect(self._db_path)
            cutoff = (datetime.utcnow() - timedelta(days=last_days)).isoformat()
            rows = conn.execute(
                """
                SELECT hour,
                       COUNT(*)                              AS call_count,
                       AVG(quality_score)                    AS avg_quality,
                       AVG(unassigned_count)                 AS avg_unassigned,
                       AVG(elapsed_ms)                       AS avg_latency_ms,
                       SUM(CASE WHEN sla_breached=1 THEN 1 ELSE 0 END) AS sla_breaches
                FROM assignment_ml_log WHERE timestamp >= ?
                GROUP BY hour ORDER BY hour
                """, (cutoff,)
            ).fetchall()
            conn.close()
            return [
                {
                    "hour":           r[0],
                    "call_count":     r[1],
                    "avg_quality":    round(r[2] or 0.0, 2),
                    "avg_unassigned": round(r[3] or 0.0, 2),
                    "avg_latency_ms": round(r[4] or 0.0, 1),
                    "sla_breaches":   r[5],
                }
                for r in rows
            ]
        except Exception as e:
            logger.error(f"[MLCollector] get_hourly_stats: {e}")
            return []
    def get_strategy_comparison(self) -> List[Dict[str, Any]]:
        """Compare quality metrics across ml_strategy values."""
        try:
            conn = sqlite3.connect(self._db_path)
            rows = conn.execute(
                """
                SELECT ml_strategy,
                       COUNT(*)                  AS call_count,
                       AVG(quality_score)         AS avg_quality,
                       MIN(quality_score)         AS min_quality,
                       MAX(quality_score)         AS max_quality,
                       AVG(unassigned_count)      AS avg_unassigned,
                       AVG(total_distance_km)     AS avg_distance_km,
                       AVG(elapsed_ms)            AS avg_latency_ms
                FROM assignment_ml_log
                GROUP BY ml_strategy ORDER BY avg_quality DESC
                """
            ).fetchall()
            conn.close()
            return [
                {
                    "strategy":        r[0],
                    "call_count":      r[1],
                    "avg_quality":     round(r[2] or 0.0, 2),
                    "min_quality":     round(r[3] or 0.0, 2),
                    "max_quality":     round(r[4] or 0.0, 2),
                    "avg_unassigned":  round(r[5] or 0.0, 2),
                    "avg_distance_km": round(r[6] or 0.0, 2),
                    "avg_latency_ms":  round(r[7] or 0.0, 1),
                }
                for r in rows
            ]
        except Exception as e:
            logger.error(f"[MLCollector] get_strategy_comparison: {e}")
            return []
    def get_quality_histogram(self, bins: int = 10) -> List[Dict[str, Any]]:
        """Quality score distribution for histogram chart."""
        try:
            conn = sqlite3.connect(self._db_path)
            rows = conn.execute("SELECT quality_score FROM assignment_ml_log").fetchall()
            conn.close()
            scores = [r[0] for r in rows if r[0] is not None]
            if not scores:
                return []
            bin_width = 100.0 / bins
            return [
                {
                    "range": f"{i*bin_width:.0f}-{(i+1)*bin_width:.0f}",
                    "count": sum(1 for s in scores if i*bin_width <= s < (i+1)*bin_width)
                }
                for i in range(bins)
            ]
        except Exception as e:
            logger.error(f"[MLCollector] get_quality_histogram: {e}")
            return []
    def get_zone_stats(self) -> List[Dict[str, Any]]:
        """Quality and SLA stats grouped by zone."""
        try:
            conn = sqlite3.connect(self._db_path)
            rows = conn.execute(
                """
                SELECT zone_id, COUNT(*) AS call_count,
                       AVG(quality_score) AS avg_quality,
                       SUM(sla_breached)  AS sla_breaches,
                       AVG(total_distance_km) AS avg_distance_km
                FROM assignment_ml_log
                GROUP BY zone_id ORDER BY avg_quality DESC
                """
            ).fetchall()
            conn.close()
            return [
                {
                    "zone_id":         r[0],
                    "call_count":      r[1],
                    "avg_quality":     round(r[2] or 0.0, 2),
                    "sla_breaches":    r[3],
                    "avg_distance_km": round(r[4] or 0.0, 2),
                }
                for r in rows
            ]
        except Exception as e:
            logger.error(f"[MLCollector] get_zone_stats: {e}")
            return []
    def count_records(self) -> int:
        try:
            conn = sqlite3.connect(self._db_path)
            count = conn.execute("SELECT COUNT(*) FROM assignment_ml_log").fetchone()[0]
            conn.close()
            return count
        except Exception:
            return 0
    def count_by_strategy(self) -> Dict[str, int]:
        try:
            conn = sqlite3.connect(self._db_path)
            rows = conn.execute(
                "SELECT ml_strategy, COUNT(*) FROM assignment_ml_log GROUP BY ml_strategy"
            ).fetchall()
            conn.close()
            return {r[0]: r[1] for r in rows}
        except Exception:
            return {}
    def export_csv(self) -> str:
        """Export all records as CSV string."""
        try:
            conn = sqlite3.connect(self._db_path)
            conn.row_factory = sqlite3.Row
            rows = conn.execute("SELECT * FROM assignment_ml_log ORDER BY id ASC").fetchall()
            conn.close()
            if not rows:
                return ""
            buf = io.StringIO()
            writer = csv.DictWriter(buf, fieldnames=rows[0].keys())
            writer.writeheader()
            writer.writerows([dict(r) for r in rows])
            return buf.getvalue()
        except Exception as e:
            logger.error(f"[MLCollector] export_csv failed: {e}")
            return ""
    def purge_old_records(self, keep_days: int = 90) -> int:
        """Delete records older than keep_days. Returns count deleted."""
        try:
            cutoff = (datetime.utcnow() - timedelta(days=keep_days)).isoformat()
            conn = sqlite3.connect(self._db_path)
            cursor = conn.execute(
                "DELETE FROM assignment_ml_log WHERE timestamp < ?", (cutoff,)
            )
            deleted = cursor.rowcount
            conn.commit()
            conn.close()
            logger.info(f"[MLCollector] Purged {deleted} records older than {keep_days} days.")
            return deleted
        except Exception as e:
            logger.error(f"[MLCollector] purge failed: {e}")
            return 0
    # ------------------------------------------------------------------
    # Quality Score Formula (frozen at log time - do not change behavior)
    # ------------------------------------------------------------------
    @staticmethod
    def _compute_quality_score(
        num_orders: int, unassigned_count: int, load_std: float,
        riders_used: int, num_riders: int, total_distance_km: float,
        max_orders_per_rider: int, ml_strategy: str = "balanced",
    ) -> float:
        if num_orders == 0:
            return 0.0
        assigned_ratio = 1.0 - (unassigned_count / num_orders)
        max_std        = max(1.0, max_orders_per_rider / 2.0)
        balance_ratio  = max(0.0, 1.0 - (load_std / max_std))
        max_dist       = max(1.0, float((num_orders - unassigned_count) * 8.0))
        distance_ratio = max(0.0, 1.0 - (total_distance_km / max_dist))
        weights = {
            "aggressive_speed": (80.0, 20.0, 0.0),
            "fuel_saver":       (30.0, 70.0, 0.0),
            "zone_strict":      (40.0, 30.0, 30.0),
            "balanced":         (50.0, 25.0, 25.0),
        }
        w_comp, w_dist, w_bal = weights.get(ml_strategy, (50.0, 25.0, 25.0))
        return min(
            assigned_ratio * w_comp + distance_ratio * w_dist + balance_ratio * w_bal,
            100.0,
        )
    @staticmethod
    def _get_km(order: Any) -> float:
        try:
            return float(order.get("kms") or order.get("calculationDistanceKm") or 0.0)
        except Exception:
            return 0.0
    # ------------------------------------------------------------------
    # DB Bootstrap
    # ------------------------------------------------------------------
    def _ensure_db(self) -> None:
        try:
            os.makedirs(os.path.dirname(self._db_path) or ".", exist_ok=True)
            conn = sqlite3.connect(self._db_path)
            conn.execute("""
                CREATE TABLE IF NOT EXISTS assignment_ml_log (
                    id                          INTEGER PRIMARY KEY AUTOINCREMENT,
                    timestamp                   TEXT    NOT NULL,
                    hour                        INTEGER,
                    day_of_week                 INTEGER,
                    is_peak                     INTEGER DEFAULT 0,
                    zone_id                     TEXT    DEFAULT 'default',
                    city_id                     TEXT    DEFAULT 'default',
                    weather_code                TEXT    DEFAULT 'CLEAR',
                    num_orders                  INTEGER,
                    num_riders                  INTEGER,
                    max_pickup_distance_km      REAL,
                    max_kitchen_distance_km     REAL,
                    max_orders_per_rider        INTEGER,
                    ideal_load                  INTEGER,
                    workload_balance_threshold  REAL,
                    workload_penalty_weight     REAL,
                    distance_penalty_weight     REAL,
                    cluster_radius_km           REAL,
                    search_time_limit_seconds   INTEGER,
                    road_factor                 REAL,
                    ml_strategy                 TEXT    DEFAULT 'balanced',
                    riders_used                 INTEGER,
                    total_assigned              INTEGER,
                    unassigned_count            INTEGER,
                    avg_load                    REAL,
                    load_std                    REAL,
                    total_distance_km           REAL    DEFAULT 0.0,
                    elapsed_ms                  REAL,
                    sla_breached                INTEGER DEFAULT 0,
                    avg_delivery_time_min       REAL    DEFAULT 0.0,
                    quality_score               REAL
                )
            """)
            migrations = [
                "ALTER TABLE assignment_ml_log ADD COLUMN is_peak INTEGER DEFAULT 0",
                "ALTER TABLE assignment_ml_log ADD COLUMN zone_id TEXT DEFAULT 'default'",
                "ALTER TABLE assignment_ml_log ADD COLUMN city_id TEXT DEFAULT 'default'",
                "ALTER TABLE assignment_ml_log ADD COLUMN weather_code TEXT DEFAULT 'CLEAR'",
                "ALTER TABLE assignment_ml_log ADD COLUMN sla_breached INTEGER DEFAULT 0",
                "ALTER TABLE assignment_ml_log ADD COLUMN avg_delivery_time_min REAL DEFAULT 0.0",
                "ALTER TABLE assignment_ml_log ADD COLUMN ml_strategy TEXT DEFAULT 'balanced'",
                "ALTER TABLE assignment_ml_log ADD COLUMN total_distance_km REAL DEFAULT 0.0",
            ]
            for ddl in migrations:
                try:
                    conn.execute(ddl)
                except Exception:
                    pass
            for idx in [
                "CREATE INDEX IF NOT EXISTS idx_timestamp ON assignment_ml_log(timestamp)",
                "CREATE INDEX IF NOT EXISTS idx_strategy  ON assignment_ml_log(ml_strategy)",
                "CREATE INDEX IF NOT EXISTS idx_zone      ON assignment_ml_log(zone_id)",
            ]:
                conn.execute(idx)
            conn.commit()
            conn.close()
        except Exception as e:
            logger.error(f"[MLCollector] DB init failed: {e}")
    def _insert(self, row: Dict[str, Any]) -> None:
        os.makedirs(os.path.dirname(self._db_path) or ".", exist_ok=True)
        conn = sqlite3.connect(self._db_path)
        cols = ", ".join(row.keys())
        placeholders = ", ".join(["?"] * len(row))
        conn.execute(
            f"INSERT INTO assignment_ml_log ({cols}) VALUES ({placeholders})",
            list(row.values()),
        )
        conn.commit()
        conn.close()
 # ---------------------------------------------------------------------------
 # Module-level singleton
 # ---------------------------------------------------------------------------
 _collector: Optional[MLDataCollector] = None
 def get_collector() -> MLDataCollector:
    global _collector
    if _collector is None:
        _collector = MLDataCollector()
    return _collector
--- a/app/services/ml/ml_hypertuner.py
+++ b/app/services/ml/ml_hypertuner.py
@@ -0,0 +1,610 @@
 """
 ML Hypertuner - Production Grade
 ===================================
 XGBoost surrogate model + Optuna TPE Bayesian optimization.
 Key upgrades over the original
 --------------------------------
 1. Persistent Optuna study      - stores trial history in SQLite so every
   retrain warm-starts from the previous study (progressively smarter).
 2. Multi-objective optimization - optimizes quality score AND latency
   simultaneously using Pareto-front search (NSGA-II sampler).
 3. Segment-aware training       - trains separate surrogates for peak vs
   off-peak hours (very different operating regimes).
 4. Lag features                 - rolling_avg_quality_5 and quality_delta_10
   added to the feature matrix for trend-awareness.
 5. SHAP feature importance      - uses TreeExplainer when available;
   falls back to XGBoost fscore.
 6. Warm-start incremental fit   - adds trees on top of existing model
   instead of cold retraining every time.
 7. Staleness detection          - warns if model is older than 24h.
 8. Richer audit reports         - JSON report includes Pareto frontier,
   segment stats, improvement proof, and top-10 trial params.
 """
 import json
 import logging
 import os
 from datetime import datetime, timedelta
 from typing import Any, Dict, List, Optional, Tuple
 import numpy as np
 from sklearn.model_selection import KFold
 from sklearn.metrics import r2_score, mean_absolute_error
 logger = logging.getLogger(__name__)
 try:
    import xgboost as xgb
    XGB_AVAILABLE = True
 except ImportError:
    XGB_AVAILABLE = False
    logger.warning("[Hypertuner] xgboost not installed.")
 try:
    import optuna
    optuna.logging.set_verbosity(optuna.logging.WARNING)
    OPTUNA_AVAILABLE = True
 except ImportError:
    OPTUNA_AVAILABLE = False
    logger.warning("[Hypertuner] optuna not installed.")
 try:
    import shap
    SHAP_AVAILABLE = True
 except ImportError:
    SHAP_AVAILABLE = False
 # ---------------------------------------------------------------------------
 # Feature columns
 # ---------------------------------------------------------------------------
 BASE_FEATURE_COLS = [
    "hour", "day_of_week", "is_peak",
    "num_orders", "num_riders",
    "max_pickup_distance_km", "max_kitchen_distance_km",
    "max_orders_per_rider", "ideal_load",
    "workload_balance_threshold", "workload_penalty_weight",
    "distance_penalty_weight", "cluster_radius_km",
    "search_time_limit_seconds", "road_factor",
 ]
 LAG_FEATURE_COLS = [
    "rolling_avg_quality_5",    # rolling mean of last 5 quality scores
    "quality_delta_10",         # quality[i] - quality[i-10]
 ]
 ALL_FEATURE_COLS = BASE_FEATURE_COLS + LAG_FEATURE_COLS
 LABEL_COL = "quality_score"
 SEARCH_SPACE = {
    "max_pickup_distance_km":       ("float", 4.0,   15.0),
    "max_kitchen_distance_km":      ("float", 1.0,    8.0),
    "max_orders_per_rider":         ("int",   6,      20),
    "ideal_load":                   ("int",   2,      10),
    "workload_balance_threshold":   ("float", 0.3,    0.95),
    "workload_penalty_weight":      ("float", 20.0,  200.0),
    "distance_penalty_weight":      ("float", 0.5,   10.0),
    "cluster_radius_km":            ("float", 1.0,    8.0),
    "search_time_limit_seconds":    ("int",   2,      15),
    "road_factor":                  ("float", 1.1,    1.6),
 }
 _STUDY_DB_PATH = os.getenv("ML_DB_PATH", "ml_data/ml_store.db")
 _REPORT_DIR    = "ml_data/reports"
 _MAX_MODEL_AGE_H = 24
 # ---------------------------------------------------------------------------
 # MLHypertuner
 # ---------------------------------------------------------------------------
 class MLHypertuner:
    """XGBoost surrogate + Optuna TPE / NSGA-II hyperparameter optimizer."""
    def __init__(self):
        self._model: Optional[Any] = None
        self._peak_model: Optional[Any] = None
        self._offpeak_model: Optional[Any] = None
        self._model_trained_at: Optional[datetime] = None
        self._training_rows: int = 0
        self._latest_validation: Optional[Dict] = None
        self._latest_baseline: Optional[Dict] = None
        self._feature_importance: Optional[Dict[str, float]] = None
        self._top_trials: List[Dict] = []
        self._pareto_frontier: List[Dict] = []
        self._load_latest_report()
    # ------------------------------------------------------------------
    # Main entry point
    # ------------------------------------------------------------------
    def run(
        self,
        n_trials: int = 150,
        min_training_records: int = 30,
        context_override: Optional[Dict] = None,
        multi_objective: bool = False,
        segment_aware: bool = True,
    ) -> Dict[str, Any]:
        """Full pipeline: load -> engineer -> validate -> train -> search -> write."""
        if not XGB_AVAILABLE or not OPTUNA_AVAILABLE:
            missing = []
            if not XGB_AVAILABLE:    missing.append("xgboost")
            if not OPTUNA_AVAILABLE: missing.append("optuna")
            return {"status": "error", "message": f"Missing: {', '.join(missing)}"}
        from app.services.ml.ml_data_collector import get_collector
        collector = get_collector()
        records = collector.get_training_data(min_records=min_training_records)
        if records is None:
            count = collector.count_records()
            return {
                "status": "insufficient_data",
                "message": f"{count} records - need >={min_training_records}.",
                "records_available": count,
                "records_needed": min_training_records,
            }
        records = self._add_lag_features(records)
        X, y = self._prepare_data(records, ALL_FEATURE_COLS)
        if X is None or len(X) == 0:
            return {"status": "error", "message": "Data preparation failed."}
        cv_results = self._cross_validate(X, y)
        logger.info(f"[Hypertuner] CV: R2={cv_results['r2_score']:.3f}, MAE={cv_results['mae']:.2f}")
        self._train_model(X, y, model_attr="_model")
        self._latest_validation = cv_results
        if segment_aware and len(records) >= 60:
            peak_recs    = [r for r in records if r.get("is_peak", 0) == 1]
            offpeak_recs = [r for r in records if r.get("is_peak", 0) == 0]
            if len(peak_recs) >= 20:
                Xp, yp = self._prepare_data(peak_recs, ALL_FEATURE_COLS)
                self._train_model(Xp, yp, model_attr="_peak_model")
            if len(offpeak_recs) >= 20:
                Xo, yo = self._prepare_data(offpeak_recs, ALL_FEATURE_COLS)
                self._train_model(Xo, yo, model_attr="_offpeak_model")
        baseline_stats = self._compute_baseline_stats(records)
        self._latest_baseline = baseline_stats
        context = context_override or self._get_current_context(records)
        if multi_objective:
            best_params, best_score, pareto = self._optuna_search_multi(context, n_trials)
            self._pareto_frontier = pareto
        else:
            best_params, best_score = self._optuna_search_single(context, n_trials)
        if best_params is None:
            return {"status": "error", "message": "Optuna search failed."}
        improvement = round(best_score - baseline_stats["avg_quality"], 2)
        self._compute_feature_importance()
        if cv_results["r2_score"] < 0.5:
            return {
                "status": "model_not_ready",
                "message": f"R2={cv_results['r2_score']:.3f} too low.",
                "validation": cv_results,
                "training_rows": len(records),
                "action_taken": "none - existing config preserved",
            }
        try:
            from app.config.dynamic_config import get_config
            get_config().set_bulk(best_params, source="ml_hypertuner")
        except ImportError:
            logger.info("[Hypertuner] DynamicConfig not available - params not written to config.")
        self._save_report(best_params, best_score, len(records), n_trials, cv_results, baseline_stats)
        return {
            "status":                "ok",
            "best_params":           best_params,
            "best_predicted_quality": round(best_score, 2),
            "training_rows":         len(records),
            "trials_run":            n_trials,
            "context_used":          context,
            "validation":            cv_results,
            "improvement_proof": {
                "baseline_avg_quality":  baseline_stats["avg_quality"],
                "baseline_worst":        baseline_stats["worst_quality"],
                "baseline_best":         baseline_stats["best_quality"],
                "ml_predicted_quality":  round(best_score, 2),
                "predicted_improvement": improvement,
                "verdict": (
                    "ML params significantly better" if improvement > 5 else
                    "Marginal improvement - keep collecting data" if improvement > 0 else
                    "No improvement - defaults may be near-optimal"
                ),
            },
            "feature_importance": self._feature_importance,
            "top_trials": self._top_trials[:5],
            "message": "Hyperparameters updated successfully.",
        }
    # ------------------------------------------------------------------
    # Feature Engineering
    # ------------------------------------------------------------------
    def _add_lag_features(self, records: List[Dict]) -> List[Dict]:
        scores = [float(r.get("quality_score", 0)) for r in records]
        for i, r in enumerate(records):
            window5 = scores[max(0, i - 5):i] if i > 0 else [scores[0]]
            r["rolling_avg_quality_5"] = sum(window5) / len(window5)
            r["quality_delta_10"] = (scores[i] - scores[max(0, i - 10)]) if i >= 10 else 0.0
        return records
    # ------------------------------------------------------------------
    # Data Preparation
    # ------------------------------------------------------------------
    def _prepare_data(
        self, records: List[Dict], feature_cols: List[str]
    ) -> Tuple[Optional[np.ndarray], Optional[np.ndarray]]:
        try:
            X_rows, y_vals = [], []
            for rec in records:
                row = []
                for col in feature_cols:
                    try:
                        row.append(float(rec.get(col, 0) or 0))
                    except (TypeError, ValueError):
                        row.append(0.0)
                X_rows.append(row)
                y_vals.append(float(rec.get(LABEL_COL, 0)))
            return (
                np.array(X_rows, dtype=np.float32),
                np.array(y_vals, dtype=np.float32),
            )
        except Exception as e:
            logger.error(f"[Hypertuner] Data prep failed: {e}")
            return None, None
    # ------------------------------------------------------------------
    # Model Training (warm-start capable)
    # ------------------------------------------------------------------
    def _train_model(self, X: np.ndarray, y: np.ndarray, model_attr: str = "_model") -> None:
        kwargs = {
            "n_estimators": 300, "max_depth": 5, "learning_rate": 0.04,
            "subsample": 0.8, "colsample_bytree": 0.8,
            "reg_alpha": 0.1, "reg_lambda": 1.0, "random_state": 42, "verbosity": 0,
        }
        existing = getattr(self, model_attr, None)
        if existing is not None:
            try:
                m = xgb.XGBRegressor(n_estimators=50, **{k: v for k, v in kwargs.items() if k != "n_estimators"})
                m.fit(X, y, xgb_model=existing.get_booster())
                setattr(self, model_attr, m)
                if model_attr == "_model":
                    self._model_trained_at = datetime.utcnow()
                    self._training_rows = len(X)
                logger.info(f"[Hypertuner] XGBoost warm-updated ({model_attr}) - {len(X)} rows.")
                return
            except Exception:
                pass
        m = xgb.XGBRegressor(**kwargs)
        m.fit(X, y)
        setattr(self, model_attr, m)
        if model_attr == "_model":
            self._model_trained_at = datetime.utcnow()
            self._training_rows = len(X)
        logger.info(f"[Hypertuner] XGBoost trained ({model_attr}) - {len(X)} rows.")
    # ------------------------------------------------------------------
    # Cross Validation
    # ------------------------------------------------------------------
    def _cross_validate(self, X: np.ndarray, y: np.ndarray, k: int = 5) -> Dict:
        if len(X) < k * 2:
            split = max(1, int(len(X) * 0.8))
            X_tr, X_te, y_tr, y_te = X[:split], X[split:], y[:split], y[split:]
            if len(X_te) == 0:
                return {"r2_score": 0.0, "mae": 99.0, "trust_level": "insufficient_data",
                        "trust_score": 0, "folds": 0}
            m = xgb.XGBRegressor(n_estimators=100, max_depth=4, verbosity=0, random_state=42)
            m.fit(X_tr, y_tr)
            r2  = float(r2_score(y_te, m.predict(X_te)))
            mae = float(mean_absolute_error(y_te, m.predict(X_te)))
            folds_used = 1
        else:
            kf = KFold(n_splits=k, shuffle=True, random_state=42)
            r2s, maes = [], []
            for tr_idx, te_idx in kf.split(X):
                m = xgb.XGBRegressor(n_estimators=100, max_depth=4, verbosity=0, random_state=42)
                m.fit(X[tr_idx], y[tr_idx])
                preds = m.predict(X[te_idx])
                r2s.append(r2_score(y[te_idx], preds))
                maes.append(mean_absolute_error(y[te_idx], preds))
            r2, mae, folds_used = float(np.mean(r2s)), float(np.mean(maes)), k
        trust_map = [(0.85, "excellent", 5), (0.75, "strong", 4),
                     (0.60, "good", 3), (0.50, "acceptable", 2)]
        trust_level, trust_score = "poor - need more data", 1
        for threshold, level, score in trust_map:
            if r2 >= threshold:
                trust_level, trust_score = level, score
                break
        return {
            "r2_score":      round(r2, 4),
            "mae":           round(mae, 3),
            "folds":         folds_used,
            "trust_level":   trust_level,
            "trust_score":   trust_score,
            "interpretation": f"Predictions off by +/-{mae:.1f} pts (R2={r2:.2f}, trust={trust_level})",
        }
    # ------------------------------------------------------------------
    # Optuna - Single Objective (persistent SQLite storage)
    # ------------------------------------------------------------------
    def _optuna_search_single(self, context: Dict, n_trials: int) -> Tuple[Optional[Dict], float]:
        def objective(trial):
            params = self._sample_params(trial)
            if params.get("ideal_load", 6) > params.get("max_orders_per_rider", 12):
                return 0.0
            return self._predict_quality(context, params)
        try:
            study = optuna.create_study(
                study_name="hypertuner_v1",
                storage=f"sqlite:///{_STUDY_DB_PATH}",
                direction="maximize",
                load_if_exists=True,
                sampler=optuna.samplers.TPESampler(seed=42),
            )
            study.optimize(objective, n_trials=n_trials, show_progress_bar=False)
            best = study.best_trial
            self._top_trials = [
                {"params": t.params, "score": t.value}
                for t in sorted(study.trials, key=lambda x: x.value or 0, reverse=True)[:10]
                if t.value is not None
            ]
            return {k: best.params[k] for k in SEARCH_SPACE if k in best.params}, best.value
        except Exception as e:
            logger.error(f"[Hypertuner] Optuna single-obj failed: {e}", exc_info=True)
            return None, 0.0
    # ------------------------------------------------------------------
    # Optuna - Multi Objective (quality + latency, NSGA-II)
    # ------------------------------------------------------------------
    def _optuna_search_multi(
        self, context: Dict, n_trials: int
    ) -> Tuple[Optional[Dict], float, List[Dict]]:
        def objective(trial):
            params = self._sample_params(trial)
            if params.get("ideal_load", 6) > params.get("max_orders_per_rider", 12):
                return 0.0, 99.0
            quality = self._predict_quality(context, params)
            latency_proxy = float(params.get("search_time_limit_seconds", 5)) * 200.0
            return quality, latency_proxy
        try:
            study = optuna.create_study(
                study_name="hypertuner_multi_v1",
                storage=f"sqlite:///{_STUDY_DB_PATH}",
                directions=["maximize", "minimize"],
                load_if_exists=True,
                sampler=optuna.samplers.NSGAIISampler(seed=42),
            )
            study.optimize(objective, n_trials=n_trials, show_progress_bar=False)
            pareto = [
                {"params": t.params, "quality": t.values[0], "latency_proxy": t.values[1]}
                for t in study.best_trials
            ]
            if not pareto:
                return None, 0.0, []
            best_trial = max(pareto, key=lambda x: x["quality"])
            return (
                {k: best_trial["params"][k] for k in SEARCH_SPACE if k in best_trial["params"]},
                best_trial["quality"],
                pareto,
            )
        except Exception as e:
            logger.error(f"[Hypertuner] Optuna multi-obj failed: {e}", exc_info=True)
            return None, 0.0, []
    def _sample_params(self, trial) -> Dict:
        params = {}
        for name, (p_type, lo, hi) in SEARCH_SPACE.items():
            if p_type == "float":
                params[name] = trial.suggest_float(name, lo, hi)
            elif p_type == "int":
                params[name] = trial.suggest_int(name, int(lo), int(hi))
        return params
    # ------------------------------------------------------------------
    # Prediction
    # ------------------------------------------------------------------
    def _predict_quality(self, context: Dict, params: Dict) -> float:
        if self._model is None:
            return 0.0
        combined = {
            **context, **params,
            "rolling_avg_quality_5": context.get("rolling_avg_quality_5", 50.0),
            "quality_delta_10":      context.get("quality_delta_10", 0.0),
        }
        row = []
        for col in ALL_FEATURE_COLS:
            try:
                row.append(float(combined.get(col, 0) or 0))
            except (TypeError, ValueError):
                row.append(0.0)
        is_peak = int(context.get("is_peak", 0))
        model = (self._peak_model if is_peak else self._offpeak_model) or self._model
        pred = float(model.predict(np.array([row], dtype=np.float32))[0])
        return max(0.0, min(pred, 100.0))
    # ------------------------------------------------------------------
    # Feature Importance
    # ------------------------------------------------------------------
    def _compute_feature_importance(self) -> None:
        if self._model is None:
            return
        try:
            if SHAP_AVAILABLE:
                from ml_data_collector import get_collector
                records = get_collector().get_training_data(min_records=1) or []
                records = self._add_lag_features(records[-200:])
                X, _ = self._prepare_data(records, ALL_FEATURE_COLS)
                if X is not None and len(X) > 0:
                    explainer = shap.TreeExplainer(self._model)
                    shap_values = np.abs(explainer.shap_values(X)).mean(axis=0)
                    total = max(shap_values.sum(), 1e-9)
                    self._feature_importance = dict(sorted(
                        {ALL_FEATURE_COLS[i]: round(float(shap_values[i] / total) * 100, 2)
                         for i in range(len(ALL_FEATURE_COLS))}.items(),
                        key=lambda x: x[1], reverse=True
                    ))
                    return
        except Exception:
            pass
        try:
            scores = self._model.get_booster().get_fscore()
            total = max(sum(scores.values()), 1)
            self._feature_importance = dict(sorted(
                {ALL_FEATURE_COLS[int(k[1:])]: round(v / total * 100, 2)
                 for k, v in scores.items()
                 if k.startswith("f") and k[1:].isdigit() and int(k[1:]) < len(ALL_FEATURE_COLS)
                 }.items(),
                key=lambda x: x[1], reverse=True
            ))
        except Exception as e:
            logger.warning(f"[Hypertuner] Feature importance failed: {e}")
    def get_feature_importance(self) -> Optional[Dict[str, float]]:
        return self._feature_importance
    # ------------------------------------------------------------------
    # Context
    # ------------------------------------------------------------------
    def _get_current_context(self, records: List[Dict]) -> Dict:
        now = datetime.utcnow()
        recent = records[-20:]
        avg_orders = sum(r.get("num_orders", 0) for r in recent) / max(len(recent), 1)
        avg_riders = sum(r.get("num_riders", 0) for r in recent) / max(len(recent), 1)
        recent_scores = [float(r.get("quality_score", 0)) for r in recent]
        rolling_avg5 = sum(recent_scores[-5:]) / max(len(recent_scores[-5:]), 1)
        delta10 = (recent_scores[-1] - recent_scores[-11]) if len(recent_scores) >= 11 else 0.0
        return {
            "hour":                  now.hour,
            "day_of_week":           now.weekday(),
            "is_peak":               int(now.hour in (7, 8, 9, 12, 13, 18, 19, 20)),
            "num_orders":            round(avg_orders),
            "num_riders":            round(avg_riders),
            "rolling_avg_quality_5": round(rolling_avg5, 2),
            "quality_delta_10":      round(delta10, 2),
        }
    def _compute_baseline_stats(self, records: List[Dict]) -> Dict:
        scores = [float(r.get("quality_score", 0)) for r in records if r.get("quality_score")]
        if not scores:
            return {"avg_quality": 0.0, "best_quality": 0.0, "worst_quality": 0.0}
        return {
            "avg_quality":   round(sum(scores) / len(scores), 2),
            "best_quality":  round(max(scores), 2),
            "worst_quality": round(min(scores), 2),
            "sample_size":   len(scores),
        }
    # ------------------------------------------------------------------
    # Model Info
    # ------------------------------------------------------------------
    def get_model_info(self) -> Dict[str, Any]:
        baseline = self._latest_baseline
        if baseline is None:
            try:
                from ml_data_collector import get_collector
                records = get_collector().get_training_data(min_records=1)
                if records:
                    baseline = self._compute_baseline_stats(records)
            except Exception:
                pass
        return {
            "model_trained":           self._model is not None,
            "trained_at":              self._model_trained_at.isoformat() if self._model_trained_at else None,
            "training_rows":           self._training_rows,
            "peak_model_trained":      self._peak_model is not None,
            "offpeak_model_trained":   self._offpeak_model is not None,
            "features":                ALL_FEATURE_COLS,
            "validation":              self._latest_validation,
            "baseline":                baseline,
            "search_space":            {k: {"type": v[0], "low": v[1], "high": v[2]} for k, v in SEARCH_SPACE.items()},
            "feature_importance":      self._feature_importance,
            "top_trials":              self._top_trials[:10],
            "pareto_frontier_size":    len(self._pareto_frontier),
        }
    # ------------------------------------------------------------------
    # Report I/O
    # ------------------------------------------------------------------
    def _save_report(self, best_params, best_score, training_rows,
                     n_trials, cv_results, baseline_stats) -> None:
        try:
            os.makedirs(_REPORT_DIR, exist_ok=True)
            report = {
                "timestamp":              datetime.utcnow().isoformat(),
                "training_rows":          training_rows,
                "n_trials":               n_trials,
                "best_predicted_quality": round(best_score, 2),
                "best_params":            best_params,
                "validation":             cv_results or {},
                "baseline_stats":         baseline_stats or {},
                "feature_importance":     self._feature_importance or {},
                "top_trials":             self._top_trials[:10],
                "pareto_frontier":        self._pareto_frontier[:20],
            }
            path = os.path.join(_REPORT_DIR, f"tuning_{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}.json")
            with open(path, "w") as f:
                json.dump(report, f, indent=2)
            logger.info(f"[Hypertuner] Report -> {path}")
        except Exception as e:
            logger.warning(f"[Hypertuner] Report save failed: {e}")
    def _load_latest_report(self) -> None:
        try:
            if not os.path.isdir(_REPORT_DIR):
                return
            files = sorted([f for f in os.listdir(_REPORT_DIR) if f.endswith(".json")], reverse=True)
            if not files:
                return
            with open(os.path.join(_REPORT_DIR, files[0])) as f:
                report = json.load(f)
            self._latest_validation  = report.get("validation")
            self._latest_baseline    = report.get("baseline_stats")
            self._training_rows      = report.get("training_rows", 0)
            self._feature_importance = report.get("feature_importance")
            self._top_trials         = report.get("top_trials", [])
            self._pareto_frontier    = report.get("pareto_frontier", [])
            ts = report.get("timestamp")
            if ts:
                self._model_trained_at = datetime.fromisoformat(ts)
            logger.info(f"[Hypertuner] Restored state from {files[0]}")
        except Exception as e:
            logger.warning(f"[Hypertuner] Load latest report failed: {e}")
 # ---------------------------------------------------------------------------
 # Module-level singleton
 # ---------------------------------------------------------------------------
 _tuner: Optional[MLHypertuner] = None
 def get_hypertuner() -> MLHypertuner:
    global _tuner
    if _tuner is None:
        _tuner = MLHypertuner()
    return _tuner
--- a/app/services/rider/get_active_riders.py
+++ b/app/services/rider/get_active_riders.py
@@ -0,0 +1,99 @@
 import httpx
 import logging
 from datetime import datetime
 from typing import List, Dict, Any, Optional
 from app.config.rider_preferences import RIDER_PREFERRED_KITCHENS
 logger = logging.getLogger(__name__)
 async def fetch_active_riders() -> List[Dict[str, Any]]:
    """
    Fetch active rider logs from the external API for the current date.
    Returns a list of rider log dictionaries.
    """
    try:
        today_str = datetime.now().strftime("%Y-%m-%d")
        url = "https://jupiter.nearle.app/live/api/v2/partners/getriderlogs/"
        params = {
            "applocationid": 1,
            "partnerid": 44,
            "fromdate": today_str,
            "todate": today_str,
            "keyword": ""
        }
        async with httpx.AsyncClient(timeout=30.0) as client:
            response = await client.get(url, params=params)
            response.raise_for_status()
            data = response.json()
            if data and data.get("code") == 200 and data.get("details"):
                 # Filter riders who are in our preferences list and are 'active' or 'idle' (assuming we want online riders)
                 # The user's example showed "onduty": 1. We might want to filter by that.
                 # For now, returning all logs, filtering can happen in assignment logic or here.
                 # Let's return the raw list as requested, filtering logic will be applied during assignment.
                 return data.get("details", [])
            logger.warning(f"Fetch active riders returned no details: {data}")
            return []
    except Exception as e:
        logger.error(f"Error fetching active riders: {e}", exc_info=True)
        return []
 async def fetch_created_orders() -> List[Dict[str, Any]]:
    """
    Fetch all orders in 'created' state for the current date.
    """
    try:
        today_str = datetime.now().strftime("%Y-%m-%d")
        url = "https://jupiter.nearle.app/live/api/v1/orders/tenant/getorders/"
        # Removed pagesize as per user request to fetch all
        params = {
            "applocationid": 0,
            "tenantid": 0,
            "locationid": 0,
            "status": "created",
            "fromdate": today_str,
            "todate": today_str,
            "keyword": "",
            "pageno": 1
            # "pagesize" intentionally omitted to fetch all
        }
        async with httpx.AsyncClient(timeout=60.0) as client:
            response = await client.get(url, params=params)
            response.raise_for_status()
            data = response.json()
            if data and data.get("code") == 200 and data.get("details"):
                return data.get("details", [])
            logger.warning(f"Fetch created orders returned no details: {data}")
            return []
    except Exception as e:
        logger.error(f"Error fetching created orders: {e}", exc_info=True)
        return []
 async def fetch_rider_pricing() -> List[Dict[str, Any]]:
    """
    Fetch rider pricing configuration from external API.
    """
    try:
        url = "https://jupiter.nearle.app/live/api/v1/partners/getriderpricing"
        async with httpx.AsyncClient(timeout=30.0) as client:
            response = await client.get(url)
            response.raise_for_status()
            data = response.json()
            if data and data.get("code") == 200:
                return data.get("details", [])
            logger.warning(f"Fetch rider pricing returned no details: {data}")
            return []
    except Exception as e:
        logger.error(f"Error fetching rider pricing: {e}", exc_info=True)
        return []
--- a/app/services/rider/rider_history_service.py
+++ b/app/services/rider/rider_history_service.py
@@ -0,0 +1,78 @@
 import os
 import pickle
 import logging
 from datetime import datetime
 from typing import Dict, Any
 logger = logging.getLogger(__name__)
 HISTORY_FILE = "rider_history.pkl"
 class RiderHistoryService:
    def __init__(self, history_file: str = HISTORY_FILE):
        self.history_file = history_file
        self.history = self._load_history()
    def _load_history(self) -> Dict[int, Dict[str, float]]:
        """Load history from pickle file."""
        if not os.path.exists(self.history_file):
            return {}
        try:
            with open(self.history_file, 'rb') as f:
                return pickle.load(f)
        except Exception as e:
            logger.error(f"Failed to load rider history: {e}")
            return {}
    def _save_history(self):
        """Save history to pickle file."""
        try:
            with open(self.history_file, 'wb') as f:
                pickle.dump(self.history, f)
        except Exception as e:
            logger.error(f"Failed to save rider history: {e}")
    def update_rider_stats(self, rider_id: int, distance_km: float, order_count: int):
        """Update cumulative stats for a rider."""
        rider_id = int(rider_id)
        if rider_id not in self.history:
            self.history[rider_id] = {
                "total_km": 0.0,
                "total_orders": 0,
                "last_updated": datetime.now().isoformat()
            }
        self.history[rider_id]["total_km"] += distance_km
        self.history[rider_id]["total_orders"] += order_count
        self.history[rider_id]["last_updated"] = datetime.now().isoformat()
        # Auto-save on update
        self._save_history()
    def get_rider_score(self, rider_id: int) -> float:
        """
        Get a score representing the rider's historical 'load' (KMs).
        Higher Score = More KMs driven recently.
        """
        rider_id = int(rider_id)
        stats = self.history.get(rider_id, {})
        return stats.get("total_km", 0.0)
    def get_preferred_assignment_type(self, rider_id: int, all_rider_scores: Dict[int, float]) -> str:
        """
        Determine if rider should get 'Long' or 'Short' routes based on population average.
        """
        score = self.get_rider_score(rider_id)
        if not all_rider_scores:
            return "ANY"
        avg_score = sum(all_rider_scores.values()) / len(all_rider_scores)
        # If rider has driven LESS than average, prefer LONG routes (Risky)
        if score < avg_score:
            return "LONG"
        # If rider has driven MORE than average, prefer SHORT routes (Economy)
        else:
            return "SHORT"
--- a/app/services/rider/rider_state_manager.py
+++ b/app/services/rider/rider_state_manager.py
@@ -0,0 +1,108 @@
 import os
 import pickle
 import logging
 import time
 from datetime import datetime
 from typing import Dict, Any, List, Set
 logger = logging.getLogger(__name__)
 STATE_FILE = "rider_active_state.pkl"
 class RiderStateManager:
    """
    Manages the 'Short-Term' Active State of Riders for session persistence.
    Tracks:
    - Minutes Committed (Remaining Workload)
    - Active Kitchens (Unique Pickups in current queue)
    - Last Planned Drop Location (for Daisy Chaining)
    - Timestamp of last update (for Time Decay)
    """
    def __init__(self, state_file: str = STATE_FILE):
        self.state_file = state_file
        self.states = self._load_states()
    def _load_states(self) -> Dict[str, Any]:
        """Load states from pickle."""
        if not os.path.exists(self.state_file):
            return {}
        try:
            with open(self.state_file, 'rb') as f:
                return pickle.load(f)
        except Exception as e:
            logger.error(f"Failed to load rider active states: {e}")
            return {}
    def _save_states(self):
        """Save states to pickle."""
        try:
            with open(self.state_file, 'wb') as f:
                pickle.dump(self.states, f)
        except Exception as e:
            logger.error(f"Failed to save rider active states: {e}")
    def get_rider_state(self, rider_id: int) -> Dict[str, Any]:
        """
        Get the current active state of a rider with TIME DECAY applied.
        If the server restarts after 30 mins, the 'minutes_committed' should reduce by 30.
        """
        rider_id = int(rider_id)
        raw_state = self.states.get(rider_id)
        if not raw_state:
            return {
                'minutes_remaining': 0.0,
                'last_drop_lat': None,
                'last_drop_lon': None,
                'active_kitchens': set(),
                'last_updated_ts': time.time()
            }
        # Apply Time Decay
        last_ts = raw_state.get('last_updated_ts', time.time())
        current_ts = time.time()
        elapsed_mins = (current_ts - last_ts) / 60.0
        remaining = max(0.0, raw_state.get('minutes_remaining', 0.0) - elapsed_mins)
        # If queue is empty, kitchens are cleared
        kitchens = raw_state.get('active_kitchens', set())
        if remaining <= 5.0: # Buffer: if almost done, free up kitchens
            kitchens = set()
        return {
            'minutes_remaining': remaining,
            'last_drop_lat': raw_state.get('last_drop_lat'),
            'last_drop_lon': raw_state.get('last_drop_lon'),
            'active_kitchens': kitchens,
            'last_updated_ts': current_ts
        }
    def update_rider_state(self, rider_id: int, added_minutes: float, new_kitchens: Set[str], last_lat: float, last_lon: float):
        """
        Update the state after a new assignment.
        """
        rider_id = int(rider_id)
        # Get current state (decayed)
        current = self.get_rider_state(rider_id)
        # Accumulate
        updated_minutes = current['minutes_remaining'] + added_minutes
        updated_kitchens = current['active_kitchens'].union(new_kitchens)
        self.states[rider_id] = {
            'minutes_remaining': updated_minutes,
            'last_drop_lat': last_lat,
            'last_drop_lon': last_lon,
            'active_kitchens': updated_kitchens,
            'last_updated_ts': time.time()
        }
        self._save_states()
    def clear_state(self, rider_id: int):
        rider_id = int(rider_id)
        if rider_id in self.states:
            del self.states[rider_id]
            self._save_states()
--- a/app/services/routing/clustering_service.py
+++ b/app/services/routing/clustering_service.py
@@ -0,0 +1,133 @@
 """
 Geographic Clustering Service for Order Assignment
 Uses K-means clustering to group orders by kitchen location.
 """
 import logging
 import numpy as np
 from typing import List, Dict, Any, Tuple
 from collections import defaultdict
 from math import radians, cos, sin, asin, sqrt
 logger = logging.getLogger(__name__)
 class ClusteringService:
    """Clusters orders geographically to enable balanced rider assignment."""
    def __init__(self):
        self.earth_radius_km = 6371
    def haversine(self, lat1: float, lon1: float, lat2: float, lon2: float) -> float:
        """Calculate distance between two points in km."""
        lon1, lat1, lon2, lat2 = map(radians, [float(lon1), float(lat1), float(lon2), float(lat2)])
        dlon = lon2 - lon1
        dlat = lat2 - lat1
        a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
        c = 2 * asin(min(1.0, sqrt(a)))
        return c * self.earth_radius_km
    def get_kitchen_location(self, order: Dict[str, Any]) -> Tuple[float, float]:
        """Extract kitchen coordinates from order."""
        try:
            lat = float(order.get("pickuplat", 0))
            lon = float(order.get("pickuplon") or order.get("pickuplong", 0))
            if lat != 0 and lon != 0:
                return lat, lon
        except (ValueError, TypeError):
            pass
        return 0.0, 0.0
    def cluster_orders_by_kitchen(self, orders: List[Dict[str, Any]], max_cluster_radius_km: float = 3.0) -> List[Dict[str, Any]]:
        """
        Cluster orders by kitchen proximity.
        Returns list of clusters, each containing:
        - centroid: (lat, lon) of cluster center
        - orders: list of orders in this cluster
        - kitchen_names: set of kitchen names in cluster
        - total_orders: count
        """
        if not orders:
            return []
        # Group by kitchen location
        kitchen_groups = defaultdict(list)
        kitchen_coords = {}
        for order in orders:
            k_name = self._get_kitchen_name(order)
            k_lat, k_lon = self.get_kitchen_location(order)
            if k_lat == 0:
                # Fallback: use delivery location if pickup missing
                k_lat = float(order.get("deliverylat", 0))
                k_lon = float(order.get("deliverylong", 0))
            if k_lat != 0:
                kitchen_groups[k_name].append(order)
                kitchen_coords[k_name] = (k_lat, k_lon)
        # Now cluster kitchens that are close together
        clusters = []
        processed_kitchens = set()
        for k_name, k_orders in kitchen_groups.items():
            if k_name in processed_kitchens:
                continue
            # Start a new cluster with this kitchen
            cluster_kitchens = [k_name]
            cluster_orders = k_orders[:]
            processed_kitchens.add(k_name)
            k_lat, k_lon = kitchen_coords[k_name]
            # Find nearby kitchens to merge into this cluster
            for other_name, other_coords in kitchen_coords.items():
                if other_name in processed_kitchens:
                    continue
                other_lat, other_lon = other_coords
                dist = self.haversine(k_lat, k_lon, other_lat, other_lon)
                if dist <= max_cluster_radius_km:
                    cluster_kitchens.append(other_name)
                    cluster_orders.extend(kitchen_groups[other_name])
                    processed_kitchens.add(other_name)
            # Calculate cluster centroid
            lats = []
            lons = []
            for order in cluster_orders:
                lat, lon = self.get_kitchen_location(order)
                if lat != 0:
                    lats.append(lat)
                    lons.append(lon)
            if lats:
                centroid_lat = sum(lats) / len(lats)
                centroid_lon = sum(lons) / len(lons)
            else:
                centroid_lat, centroid_lon = k_lat, k_lon
            clusters.append({
                'centroid': (centroid_lat, centroid_lon),
                'orders': cluster_orders,
                'kitchen_names': set(cluster_kitchens),
                'total_orders': len(cluster_orders)
            })
        # Sort clusters by order count (largest first)
        clusters.sort(key=lambda x: x['total_orders'], reverse=True)
        logger.info(f"Created {len(clusters)} clusters from {len(kitchen_groups)} kitchens")
        return clusters
    def _get_kitchen_name(self, order: Dict[str, Any]) -> str:
        """Extract kitchen name from order."""
        possible_keys = ['storename', 'restaurantname', 'kitchenname', 'partnername', 'store_name']
        for key in possible_keys:
            if key in order and order[key]:
                return str(order[key]).strip()
        return "Unknown"
--- a/app/services/routing/kalman_filter.py
+++ b/app/services/routing/kalman_filter.py
@@ -0,0 +1,326 @@
 """
 GPS Kalman Filter \u2014 rider-api
 A 1D Kalman filter applied independently to latitude and longitude
 to smooth noisy GPS coordinates from riders and delivery points.
 Why Kalman for GPS?
 - GPS readings contain measurement noise (\u00b15\u201315m typical, \u00b150m poor signal)
 - Rider location pings can "jump" due to bad signal or device error
 - Kalman filter gives an optimal estimate by balancing:
    (1) Previous predicted position (process model)
    (2) New GPS measurement (observation model)
 Design:
 - Separate filter instance per rider (stateful \u2014 preserves history)
 - `CoordinateKalmanFilter` \u2014 single lat/lon smoother
 - `GPSKalmanFilter`          \u2014 wraps two CoordinateKalmanFilters (lat + lon)
 - `RiderKalmanRegistry`      \u2014 manages per-rider filter instances
 - `smooth_coordinates()`     \u2014 stateless single-shot smoother for delivery coords
 Usage:
    # Stateless (one-shot, no history \u2014 for delivery coords):
    smooth_lat, smooth_lon = smooth_coordinates(raw_lat, raw_lon)
    # Stateful (per-rider, preserves motion history):
    registry = RiderKalmanRegistry()
    lat, lon = registry.update(rider_id=1116, lat=11.0067, lon=76.9558)
 """
 import logging
 import time
 from typing import Dict, Optional, Tuple
 logger = logging.getLogger(__name__)
 # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
 # CORE 1D KALMAN FILTER
 # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
 class CoordinateKalmanFilter:
    """
    1-dimensional Kalman filter for a single GPS coordinate (lat or lon).
    State model: position only (constant position with random walk).
    Equations:
        Prediction:   x\u0302\u2096\u207b  = x\u0302\u2096\u208b\u2081          (no movement assumed between pings)
                      P\u0302\u2096\u207b   = P\u2096\u208b\u2081 + Q       (uncertainty grows over time)
        Update:       K\u2096    = P\u0302\u2096\u207b / (P\u0302\u2096\u207b + R)   (Kalman gain)
                      x\u0302\u2096    = x\u0302\u2096\u207b + K\u2096\u00b7(z\u2096 - x\u0302\u2096\u207b)  (weighted fusion)
                      P\u2096    = (1 - K\u2096)\u00b7P\u0302\u2096\u207b          (update uncertainty)
    Parameters:
        process_noise (Q): How much position can change between measurements.
                           Higher = filter trusts new measurements more (less smoothing).
        measurement_noise (R): GPS measurement uncertainty.
                               Higher = filter trusts history more (more smoothing).
    """
    def __init__(
        self,
        process_noise: float = 1e-4,
        measurement_noise: float = 0.01,
        initial_uncertainty: float = 1.0,
    ):
        self.Q = process_noise
        self.R = measurement_noise
        self._x: Optional[float] = None
        self._P: float = initial_uncertainty
    @property
    def initialized(self) -> bool:
        return self._x is not None
    def update(self, measurement: float) -> float:
        """Process one new measurement and return the filtered estimate."""
        if not self.initialized:
            self._x = measurement
            return self._x
        # Predict
        x_prior = self._x
        P_prior = self._P + self.Q
        # Update
        K = P_prior / (P_prior + self.R)
        self._x = x_prior + K * (measurement - x_prior)
        self._P = (1.0 - K) * P_prior
        return self._x
    def reset(self):
        self._x = None
        self._P = 1.0
 # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
 # 2D GPS KALMAN FILTER (lat + lon)
 # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
 class GPSKalmanFilter:
    """
    Two-dimensional GPS smoother using independent 1D Kalman filters
    for latitude and longitude.
    """
    def __init__(
        self,
        process_noise: float = 1e-4,
        measurement_noise: float = 0.01,
    ):
        self.lat_filter = CoordinateKalmanFilter(process_noise, measurement_noise)
        self.lon_filter = CoordinateKalmanFilter(process_noise, measurement_noise)
        self.last_updated: float = time.time()
        self.update_count: int = 0
    def update(self, lat: float, lon: float) -> Tuple[float, float]:
        """Feed a new GPS reading and get the smoothed (lat, lon)."""
        if not self._is_valid_coord(lat, lon):
            if self.lat_filter.initialized:
                return self.lat_filter._x, self.lon_filter._x
            return lat, lon
        smooth_lat = self.lat_filter.update(lat)
        smooth_lon = self.lon_filter.update(lon)
        self.last_updated = time.time()
        self.update_count += 1
        return smooth_lat, smooth_lon
    def get_estimate(self) -> Optional[Tuple[float, float]]:
        if self.lat_filter.initialized:
            return self.lat_filter._x, self.lon_filter._x
        return None
    def reset(self):
        self.lat_filter.reset()
        self.lon_filter.reset()
        self.update_count = 0
    @staticmethod
    def _is_valid_coord(lat: float, lon: float) -> bool:
        try:
            lat, lon = float(lat), float(lon)
            return (
                -90.0 <= lat <= 90.0
                and -180.0 <= lon <= 180.0
                and not (lat == 0.0 and lon == 0.0)
            )
        except (TypeError, ValueError):
            return False
 # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
 # PER-RIDER FILTER REGISTRY
 # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
 class RiderKalmanRegistry:
    """
    Maintains per-rider Kalman filter instances across calls.
    Stale filters (> 30 min silence) are automatically reset.
    """
    def __init__(
        self,
        process_noise: float = 1e-4,
        measurement_noise: float = 0.01,
        stale_seconds: float = 1800.0,
    ):
        self._filters: Dict[str, GPSKalmanFilter] = {}
        self._process_noise = process_noise
        self._measurement_noise = measurement_noise
        self._stale_seconds = stale_seconds
    def _get_or_create(self, rider_id) -> GPSKalmanFilter:
        key = str(rider_id)
        now = time.time()
        if key in self._filters:
            f = self._filters[key]
            if now - f.last_updated > self._stale_seconds:
                f.reset()
            return f
        self._filters[key] = GPSKalmanFilter(
            process_noise=self._process_noise,
            measurement_noise=self._measurement_noise,
        )
        return self._filters[key]
    def update(self, rider_id, lat: float, lon: float) -> Tuple[float, float]:
        return self._get_or_create(rider_id).update(lat, lon)
    def get_estimate(self, rider_id) -> Optional[Tuple[float, float]]:
        key = str(rider_id)
        if key in self._filters:
            return self._filters[key].get_estimate()
        return None
    def reset_rider(self, rider_id):
        key = str(rider_id)
        if key in self._filters:
            self._filters[key].reset()
    def clear_all(self):
        self._filters.clear()
 # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
 # GLOBAL REGISTRY (process-level singleton)
 # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
 _global_registry = RiderKalmanRegistry()
 def get_registry() -> RiderKalmanRegistry:
    """Get the process-level rider Kalman filter registry."""
    return _global_registry
 # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
 # STATELESS COORDINATE SMOOTHER
 # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
 def smooth_coordinates(
    lat: float,
    lon: float,
    *,
    prior_lat: Optional[float] = None,
    prior_lon: Optional[float] = None,
    process_noise: float = 1e-4,
    measurement_noise: float = 0.01,
 ) -> Tuple[float, float]:
    """
    Stateless single-shot GPS smoother.
    If a prior is provided, blends the new reading towards it.
    """
    f = GPSKalmanFilter(process_noise=process_noise, measurement_noise=measurement_noise)
    if prior_lat is not None and prior_lon is not None:
        try:
            _flat = float(prior_lat)
            _flon = float(prior_lon)
            if GPSKalmanFilter._is_valid_coord(_flat, _flon):
                f.update(_flat, _flon)
        except (TypeError, ValueError):
            pass
    return f.update(lat, lon)
 # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
 # BATCH SMOOTHERS
 # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
 def smooth_rider_locations(riders: list) -> list:
    """
    Apply Kalman smoothing to a list of rider dicts in-place using
    the global per-rider registry (history preserved across calls).
    Reads/writes: latitude, longitude (and currentlat/currentlong if present).
    Adds: _kalman_smoothed = True on each processed rider.
    """
    registry = get_registry()
    for rider in riders:
        try:
            rider_id = (
                rider.get("userid") or rider.get("riderid") or
                rider.get("id") or "unknown"
            )
            raw_lat = float(rider.get("latitude") or rider.get("currentlat") or 0)
            raw_lon = float(rider.get("longitude") or rider.get("currentlong") or 0)
            if raw_lat == 0.0 and raw_lon == 0.0:
                continue
            smooth_lat, smooth_lon = registry.update(rider_id, raw_lat, raw_lon)
            # Cast back to string for Go compatibility
            s_lat, s_lon = str(round(smooth_lat, 8)), str(round(smooth_lon, 8))
            rider["latitude"] = s_lat
            rider["longitude"] = s_lon
            if "currentlat" in rider:
                rider["currentlat"] = s_lat
            if "currentlong" in rider:
                rider["currentlong"] = s_lon
            rider["_kalman_smoothed"] = True
        except Exception as e:
            logger.debug(f"Kalman rider smoothing skipped: {e}")
    return riders
 def smooth_order_coordinates(orders: list) -> list:
    """
    Apply stateless Kalman smoothing to delivery coordinates in a list
    of order dicts. Uses pickup coords as a seed (prior) when available.
    Modifies orders in-place. Returns the same list.
    """
    for order in orders:
        try:
            dlat = float(order.get("deliverylat") or order.get("droplat") or 0)
            dlon = float(order.get("deliverylong") or order.get("droplon") or 0)
            if not GPSKalmanFilter._is_valid_coord(dlat, dlon):
                continue
            plat_raw = order.get("pickuplat")
            plon_raw = order.get("pickuplon") or order.get("pickuplong")
            try:
                plat = float(plat_raw) if plat_raw else None
                plon = float(plon_raw) if plon_raw else None
            except (TypeError, ValueError):
                plat, plon = None, None
            smooth_dlat, smooth_dlon = smooth_coordinates(
                dlat, dlon,
                prior_lat=plat,
                prior_lon=plon,
            )
            # Cast back to string for Go compatibility (fixes unmarshal error)
            s_lat, s_lon = str(round(smooth_dlat, 8)), str(round(smooth_dlon, 8))
            order["deliverylat"] = s_lat
            order["deliverylong"] = s_lon
            if "droplat" in order:
                order["droplat"] = s_lat
            if "droplon" in order:
                order["droplon"] = s_lon
            order["_kalman_smoothed"] = True
        except Exception as e:
            logger.debug(f"Kalman order smoothing skipped: {e}")
    return orders
--- a/app/services/routing/realistic_eta_calculator.py
+++ b/app/services/routing/realistic_eta_calculator.py
@@ -0,0 +1,158 @@
 """
 Realistic ETA Calculator for Delivery Operations
 Accounts for:
 - City traffic conditions
 - Stop time at pickup/delivery
 - Navigation time
 - Parking/finding address time
 - Different speeds for different order types
 """
 import logging
 from typing import Dict, Any
 logger = logging.getLogger(__name__)
 class RealisticETACalculator:
    """
    Calculates realistic ETAs accounting for real-world delivery conditions.
    """
    def __init__(self):
        from app.config.dynamic_config import get_config
        cfg = get_config()
        # BASE SPEED (km/h) - Driven by the DB configuration
        base_speed = cfg.get("avg_speed_kmh", 18.0)
        # REALISTIC SPEEDS based on time of day
        self.CITY_SPEED_HEAVY_TRAFFIC = base_speed * 0.7  # Usually ~12 km/h
        self.CITY_SPEED_MODERATE      = base_speed        # Usually ~18 km/h
        self.CITY_SPEED_LIGHT         = base_speed * 1.2  # Usually ~21.6 km/h
        # TIME BUFFERS (minutes)
        self.PICKUP_TIME = cfg.get("eta_pickup_time_min", 3.0)
        self.DELIVERY_TIME = cfg.get("eta_delivery_time_min", 4.0)
        self.NAVIGATION_BUFFER = cfg.get("eta_navigation_buffer_min", 1.5)
        # DISTANCE-BASED SPEED SELECTION
        # Short distances (<2km) are slower due to more stops/starts
        # Long distances (>8km) might have highway portions
        self.SHORT_TRIP_FACTOR = cfg.get("eta_short_trip_factor", 0.8)
        self.LONG_TRIP_FACTOR  = cfg.get("eta_long_trip_factor", 1.1)
    def calculate_eta(self, 
                     distance_km: float, 
                     is_first_order: bool = False,
                     order_type: str = "Economy",
                     time_of_day: str = "peak") -> int:
        """
        Calculate realistic ETA in minutes.
        Args:
            distance_km: Distance to travel in kilometers
            is_first_order: If True, includes pickup time
            order_type: "Economy", "Premium", or "Risky"
            time_of_day: "peak", "normal", or "light" traffic
        Returns:
            ETA in minutes (rounded up for safety)
        """
        if distance_km <= 0:
            return 0
        # 1. SELECT SPEED BASED ON CONDITIONS
        if time_of_day == "peak":
            base_speed = self.CITY_SPEED_HEAVY_TRAFFIC
        elif time_of_day == "light":
            base_speed = self.CITY_SPEED_LIGHT
        else:
            base_speed = self.CITY_SPEED_MODERATE
        # 2. ADJUST SPEED BASED ON DISTANCE
        # Short trips are slower (more intersections, traffic lights)
        if distance_km < 2.0:
            effective_speed = base_speed * self.SHORT_TRIP_FACTOR
        elif distance_km > 8.0:
            effective_speed = base_speed * self.LONG_TRIP_FACTOR
        else:
            effective_speed = base_speed
        # 3. CALCULATE TRAVEL TIME
        travel_time = (distance_km / effective_speed) * 60  # Convert to minutes
        # 4. ADD BUFFERS
        total_time = travel_time
        # Pickup time (only for first order in sequence)
        if is_first_order:
            total_time += self.PICKUP_TIME
        # Delivery time (always)
        total_time += self.DELIVERY_TIME
        # Navigation buffer (proportional to distance)
        if distance_km > 3.0:
            total_time += self.NAVIGATION_BUFFER
        # 5. SAFETY MARGIN (Round up to next minute)
        # Riders prefer to arrive early than late
        eta_minutes = int(total_time) + 1
        return eta_minutes
    def calculate_batch_eta(self, orders: list) -> list:
        """
        Calculate ETAs for a batch of orders in sequence.
        Args:
            orders: List of order dicts with 'previouskms' and 'step' fields
        Returns:
            Same list with updated 'eta' fields
        """
        for order in orders:
            distance_km = float(order.get('previouskms', 0))
            step = order.get('step', 1)
            order_type = order.get('ordertype', 'Economy')
            # First order includes pickup time
            is_first = (step == 1)
            # Assume peak traffic for safety (can be made dynamic)
            eta = self.calculate_eta(
                distance_km=distance_km,
                is_first_order=is_first,
                order_type=order_type,
                time_of_day="normal"  # Default to moderate traffic
            )
            order['eta'] = str(eta)
            order['eta_realistic'] = True  # Flag to indicate realistic calculation
        return orders
 def get_time_of_day_category() -> str:
    """
    Determine current traffic conditions based on time.
    Returns:
        "peak", "normal", or "light"
    """
    from datetime import datetime
    current_hour = datetime.now().hour
    # Peak hours: 8-10 AM, 12-2 PM, 5-8 PM
    if (8 <= current_hour < 10) or (12 <= current_hour < 14) or (17 <= current_hour < 20):
        return "peak"
    # Light traffic: Late night/early morning
    elif current_hour < 7 or current_hour >= 22:
        return "light"
    else:
        return "normal"
--- a/app/services/routing/route_optimizer.py
+++ b/app/services/routing/route_optimizer.py
@@ -0,0 +1,425 @@
 """Production-grade route optimization using Google OR-Tools.
 ALGORITHM: TSP / VRP with Google OR-Tools
 - Industry-standard solver (same as used by major logistics companies)
 - Constraint-based optimization
 - Handles time windows (future proofing)
 - Guaranteed optimal or near-optimal solution
 FEATURES:
 - Automatic outlier detection and coordinate correction
 - Hybrid distance calculation (Google Maps + Haversine fallback)
 - Robust error handling for invalid inputs
 """
 import math
 import os
 import logging
 import asyncio
 from typing import Dict, Any, List as _List, Optional, Tuple, Union
 from datetime import datetime, timedelta
 import httpx
 from app.services.routing.kalman_filter import smooth_order_coordinates
 import numpy as np
 from app.core.arrow_utils import calculate_haversine_matrix_vectorized
 from app.config.dynamic_config import get_config
 try:
    from ortools.constraint_solver import routing_enums_pb2
    from ortools.constraint_solver import pywrapcp
    ORTOOLS_AVAILABLE = True
 except ImportError:
    ORTOOLS_AVAILABLE = False
    logging.warning("Google OR-Tools not found. Falling back to simple greedy solver.")
 logger = logging.getLogger(__name__)
 class RouteOptimizer:
    """Route optimization using Google OR-Tools (Async)."""
    def __init__(self):
        self.earth_radius = 6371  # Earth radius in km
        _cfg = get_config()
        # Initialize Realistic ETA Calculator
        from app.services.routing.realistic_eta_calculator import RealisticETACalculator, get_time_of_day_category
        self.eta_calculator = RealisticETACalculator()
        self.get_traffic_condition = get_time_of_day_category
        # Speed settings (ML-tuned via DynamicConfig)
        self.avg_speed_kmh = float(_cfg.get("avg_speed_kmh"))
        # Road factor (haversine -> road distance multiplier, ML-tuned)
        self.road_factor = float(_cfg.get("road_factor"))
        # Google Maps API settings
        self.google_maps_api_key = os.getenv("GOOGLE_MAPS_API_KEY", "")
        self.use_google_maps = bool(self.google_maps_api_key)
        # Solver time limit (ML-tuned)
        self.search_time_limit_seconds = int(_cfg.get("search_time_limit_seconds"))
        # Initialize ID3 Behavior Analyzer
        from app.services.ml.behavior_analyzer import get_analyzer
        self.behavior_analyzer = get_analyzer()
    def haversine_distance(self, lat1: float, lon1: float, lat2: float, lon2: float) -> float:
        """Calculate great circle distance between two points on Earth (in km)."""
        try:
            lat1, lon1, lat2, lon2 = map(math.radians, [float(lat1), float(lon1), float(lat2), float(lon2)])
            dlat = lat2 - lat1
            dlon = lon2 - lon1
            a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2
            c = 2 * math.asin(math.sqrt(a))
            return self.earth_radius * c
        except Exception:
            return 0.0
    async def _get_google_maps_distances_batch(self, origin_lat: float, origin_lon: float,
                                         destinations: _List[tuple]) -> Dict[tuple, float]:
        """Get road distances for multiple destinations from Google Maps API. (Async, Parallel)"""
        if not self.use_google_maps or not destinations:
            return {}
        results = {}
        batch_size = 25
        chunks = [destinations[i:i + batch_size] for i in range(0, len(destinations), batch_size)]
        async def process_batch(batch):
            batch_result = {}
            try:
                dest_str = "|".join([f"{lat},{lon}" for lat, lon in batch])
                url = "https://maps.googleapis.com/maps/api/distancematrix/json"
                params = {
                    "origins": f"{origin_lat},{origin_lon}",
                    "destinations": dest_str,
                    "key": self.google_maps_api_key,
                    "units": "metric"
                }
                async with httpx.AsyncClient(timeout=10.0) as client:
                    response = await client.get(url, params=params)
                    response.raise_for_status()
                    data = response.json()
                    if data.get("status") == "OK":
                        rows = data.get("rows", [])
                        if rows:
                            elements = rows[0].get("elements", [])
                            for idx, element in enumerate(elements):
                                if idx < len(batch):
                                    dest_coord = batch[idx]
                                    if element.get("status") == "OK":
                                        dist = element.get("distance", {}).get("value")
                                        dur = element.get("duration", {}).get("value")
                                        if dist is not None:
                                            batch_result[dest_coord] = {
                                                'distance': dist / 1000.0,
                                                'duration': dur / 60.0 if dur else None
                                            }
            except Exception as e:
                logger.warning(f"Google Maps batch call failed: {e}")
            return batch_result
        batch_results_list = await asyncio.gather(*[process_batch(chunk) for chunk in chunks])
        for res in batch_results_list:
            results.update(res)
        return results
    def _solve_tsp_ortools(self, locations: _List[Tuple[float, float]], dist_matrix: _List[_List[float]]) -> _List[int]:
        """Solve TSP using Google OR-Tools."""
        if not ORTOOLS_AVAILABLE:
            # Fallback to simple Greedy NN if OR-Tools not installed
            return self._solve_greedy(locations, dist_matrix)
        if not locations or len(locations) <= 1:
            return [0]
        manager = pywrapcp.RoutingIndexManager(len(locations), 1, 0) # num_nodes, num_vehicles, depot
        routing = pywrapcp.RoutingModel(manager)
        def distance_callback(from_index, to_index):
            from_node = manager.IndexToNode(from_index)
            to_node = manager.IndexToNode(to_index)
            # Open TSP: Returning to the depot (index 0) has zero cost.
            # This ensures the solver optimizes for the path from start to last drop-off
            # rather than a closed circuit that might be reversed if the rider is on the "far" side.
            if to_node == 0:
                return 0
            # OR-Tools works with integers, so we scale by 1000 (meters)
            val = dist_matrix[from_node][to_node]
            return int(val * 1000)
        transit_callback_index = routing.RegisterTransitCallback(distance_callback)
        routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index)
        search_parameters = pywrapcp.DefaultRoutingSearchParameters()
        search_parameters.first_solution_strategy = (
            routing_enums_pb2.FirstSolutionStrategy.PATH_CHEAPEST_ARC
        )
        search_parameters.local_search_metaheuristic = (
            routing_enums_pb2.LocalSearchMetaheuristic.GUIDED_LOCAL_SEARCH
        )
        search_parameters.time_limit.seconds = self.search_time_limit_seconds
        solution = routing.SolveWithParameters(search_parameters)
        if solution:
            index = routing.Start(0)
            route = []
            while not routing.IsEnd(index):
                route.append(manager.IndexToNode(index))
                index = solution.Value(routing.NextVar(index))
            return route
        else:
            return self._solve_greedy(locations, dist_matrix)
    def _solve_greedy(self, locations, dist_matrix):
        """Simple Greedy Nearest Neighbor fallback."""
        unvisited = set(range(1, len(locations)))
        curr = 0
        route = [0]
        while unvisited:
            nearest = min(unvisited, key=lambda x: dist_matrix[curr][x])
            route.append(nearest)
            unvisited.remove(nearest)
            curr = nearest
        return route
    def _cleanup_coords(self, lat: Any, lon: Any, ref_lat: float, ref_lon: float) -> Tuple[float, float]:
        """
        Heuristic to fix bad coordinates.
        1. Fixes lat==lon typo.
        2. Fixes missing negative signs if needed (not needed for India).
        3. Projects outlier > 500km to reference (centroid).
        """
        try:
            lat = float(lat)
            lon = float(lon)
        except:
            return 0.0, 0.0
        if lat == 0 or lon == 0:
            return lat, lon
        # 1. Check strict equality (typo)
        if abs(lat - lon) < 0.0001:
            if ref_lon != 0:
                # If reference is available, assume lat is correct and fix lon
                # (Common error: copy lat to lon field)
                return lat, ref_lon
        # 2. Check general outlier (e.g. 500km away)
        if ref_lat != 0 and ref_lon != 0:
            dist = self.haversine_distance(lat, lon, ref_lat, ref_lon)
            if dist > 500:
                # Returning reference prevents map explosion
                return ref_lat, ref_lon
        return lat, lon
    async def optimize_provider_payload(self, orders: _List[Dict[str, Any]], start_coords: Optional[tuple] = None) -> _List[Dict[str, Any]]:
        """Optimize delivery route and add step metrics (OR-Tools)."""
        if not orders:
            return []
        # Deep copy
        orders = [dict(order) for order in orders]
        # 0. KALMAN FILTER - Smooth noisy delivery GPS coordinates
        orders = smooth_order_coordinates(orders)
        # Helpers
        def _to_float(v: Any) -> float:
            try: return float(v)
            except: return 0.0
        def _normalize_dt(val: Any) -> str:
            if val in (None, "", 0): return ""
            s = str(val).strip()
            for fmt in ("%Y-%m-%dT%H:%M:%SZ", "%Y-%m-%d %H:%M:%S"):
                try: return datetime.strptime(s, fmt).strftime("%Y-%m-%d %H:%M:%S")
                except: pass
            return s
        # 1. PREPARE COORDINATES & CENTROID
        valid_lats = []
        valid_lons = []
        for o in orders:
            lat = _to_float(o.get("deliverylat"))
            lon = _to_float(o.get("deliverylong"))
            if lat != 0 and lon != 0:
                valid_lats.append(lat)
                valid_lons.append(lon)
        centroid_lat = sum(valid_lats)/len(valid_lats) if valid_lats else 0.0
        centroid_lon = sum(valid_lons)/len(valid_lons) if valid_lons else 0.0
        # 2. DETERMINE START LOCATION (With Fix)
        start_lat, start_lon = 0.0, 0.0
        # Try explicit start_coords first
        if start_coords and len(start_coords) == 2:
             try:
                 start_lat, start_lon = float(start_coords[0]), float(start_coords[1])
             except: pass
        # Fallback to pickup location in orders
        if start_lat == 0:
            for o in orders:
                plat = _to_float(o.get("pickuplat"))
                plon = _to_float(o.get("pickuplon") or o.get("pickuplong"))
                if plat != 0:
                    start_lat, start_lon = plat, plon
                    break
        # Fallback to centroid
        if start_lat == 0:
            start_lat, start_lon = centroid_lat, centroid_lon
        # FIX BAD START COORDINATES
        start_lat, start_lon = self._cleanup_coords(start_lat, start_lon, centroid_lat, centroid_lon)
        # 3. BUILD LOCATIONS LIST FOR SOLVER
        # Index 0 is Start (Depot), 1..N are orders
        locations = [(start_lat, start_lon)]
        points_map = [] # Maps solver index 1..N back to original order index
        for idx, order in enumerate(orders):
            lat = _to_float(order.get("deliverylat"))
            lon = _to_float(order.get("deliverylong"))
            # Project coordinates and ensure they are strings for Go compatibility
            lat, lon = self._cleanup_coords(lat, lon, centroid_lat, centroid_lon)
            order_str_lat, order_str_lon = str(lat), str(lon)
            order["deliverylat"] = order_str_lat
            order["deliverylong"] = order_str_lon
            if "droplat" in order: order["droplat"] = order_str_lat
            if "droplon" in order: order["droplon"] = order_str_lon
            locations.append((lat, lon))
            points_map.append(idx)
        # 4. COMPUTE DISTANCE MATRIX (Vectorized with Arrow/NumPy)
        # road_factor is now ML-tuned (was hardcoded 1.3)
        lats = np.array([loc[0] for loc in locations])
        lons = np.array([loc[1] for loc in locations])
        dist_matrix = calculate_haversine_matrix_vectorized(lats, lons) * self.road_factor
        # 5. RISK-AWARE COST MATRIX (ID3 INTELLIGENCE)
        # Apply Risk Penalties to the matrix before solving
        cost_matrix = dist_matrix.copy()
        traffic = self.get_traffic_condition()
        num_locs = len(locations)
        risk_penalty_count = 0
        for i in range(num_locs):
            for j in range(num_locs):
                if i == j: continue
                # Predict success risk for this specific leg
                dist_km = dist_matrix[i][j]
                prediction = self.behavior_analyzer.predict(
                    distance_km=dist_km,
                    timestamp_or_band=traffic,
                )
                if prediction.get("label") == "RISK":  # High Risk predicted by ID3
                    # Add 25% penalty to distance to discourage this leg
                    cost_matrix[i][j] *= 1.25
                    risk_penalty_count += 1
        if risk_penalty_count > 0:
            logger.info(f"ID3 Intelligence: Applied {risk_penalty_count} Risk Penalties to optimize for delivery safety.")
        # 6. SOLVE TSP
        route_indices = self._solve_tsp_ortools(locations, cost_matrix)
        # Remove 0 (depot)
        optimized_order_indices = [i for i in route_indices if i != 0]
        # 6. BUILD RESULT
        result = []
        cumulative_dist = 0.0
        # Track previous location (starts at 0)
        prev_idx = 0
        for step_num, solver_idx in enumerate(optimized_order_indices, start=1):
            order_idx = points_map[solver_idx - 1]
            order = dict(orders[order_idx])
            # Clean fields
            for k in ("step", "previouskms", "cumulativekms", "eta", "actualkms", "ordertype"): 
                order.pop(k, None)
            # Normalize dates
            for field in ["orderdate", "deliverytime", "created"]:
                if field in order: order[field] = _normalize_dt(order.get(field))
            # Distance for this leg
            step_dist = dist_matrix[prev_idx][solver_idx]
            cumulative_dist += step_dist
            # Metadata (Step metrics are integers in the Go struct)
            order["step"] = int(step_num)
            order["previouskms"] = int(0 if step_num == 1 else int(round(step_dist)))
            order["cumulativekms"] = int(round(cumulative_dist))
            # 7. METRICS (Calculate actual distance, prioritize provider input)
            plat, plon = start_lat, start_lon
            if plat == 0: plat, plon = _to_float(order.get("pickuplat")), _to_float(order.get("pickuplon") or order.get("pickuplong"))
            dlat, dlon = locations[solver_idx]
            # Baseline: Haversine * 1.3 (estimated road factor)
            true_dist = self.haversine_distance(plat, plon, dlat, dlon) * 1.3
            provided_kms = order.get("kms")
            if provided_kms not in (None, "", 0, "0"):
                try:
                    # If provider gave us a distance, respect it as the 'actual' distance
                    true_dist = float(provided_kms)
                except:
                    pass
            order["actualkms"] = str(round(true_dist, 2))
            order["kms"] = str(provided_kms) if provided_kms else str(int(round(true_dist)))
            # Financial metrics - keeping as numbers for calculations
            if "rider_charge" in order: order["rider_charge"] = round(float(order["rider_charge"]), 2)
            if "profit" in order: order["profit"] = round(float(order["profit"]), 2)
            # Type & ETA
            order["ordertype"] = "Economy" if true_dist <= 5 else "Premium" if true_dist <= 12 else "Risky"
            traffic = self.get_traffic_condition()
            eta = self.eta_calculator.calculate_eta(
                distance_km=step_dist,
                is_first_order=(step_num == 1),
                order_type=order["ordertype"],
                time_of_day=traffic
            )
            order["eta"] = str(eta)
            result.append(order)
            prev_idx = solver_idx
        return result
 def optimize_route(orders: _List[Dict[str, Any]]) -> _List[Dict[str, Any]]:
    """Synchronous wrapper."""
    optimizer = RouteOptimizer()
    try:
        loop = asyncio.get_event_loop()
    except RuntimeError:
        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
    if loop.is_running():
        # Fallback if loop is running (shouldn't happen in standard usage)
        return [] 
    return loop.run_until_complete(optimizer.optimize_provider_payload(orders))
--- a/app/services/routing/zone_service.py
+++ b/app/services/routing/zone_service.py
@@ -0,0 +1,196 @@
 import logging
 from typing import List, Dict, Any, Optional
 logger = logging.getLogger(__name__)
 class ZoneService:
    """
    Service to classify orders and riders into geographic zones.
    Defaulting to Coimbatore logic as per user context.
    """
    # Approximate Center of Coimbatore (Gandhipuram/Bus Stand area)
    CENTER_LAT = 11.0168
    CENTER_LON = 76.9558
    def __init__(self):
        pass
    def determine_zone(self, lat: float, lon: float, pincode: Optional[str] = None) -> str:
        """
        Determine the zone (North, South, East, West, etc.) based on coordinates.
        """
        if lat == 0 or lon == 0:
            return "Unknown"
        lat_diff = lat - self.CENTER_LAT
        lon_diff = lon - self.CENTER_LON
        # Simple Quadrant Logic
        # North: +Lat
        # South: -Lat
        # East: +Lon
        # West: -Lon
        # Define a small central buffer (0.01 degrees ~ 1.1km)
        buffer = 0.010
        is_north = lat_diff > buffer
        is_south = lat_diff < -buffer
        is_east = lon_diff > buffer
        is_west = lon_diff < -buffer
        zone_parts = []
        if is_north: zone_parts.append("North")
        elif is_south: zone_parts.append("South")
        if is_east: zone_parts.append("East")
        elif is_west: zone_parts.append("West")
        if not zone_parts:
            return "Central"
        return " ".join(zone_parts)
    def group_by_zones(self, flat_orders: List[Dict[str, Any]], unassigned_orders: List[Dict[str, Any]] = None, fuel_charge: float = 2.5, base_pay: float = 30.0) -> Dict[str, Any]:
        """
        Group a flat list of optimized orders into Zones -> Riders -> Orders.
        Calculates profit per order and per zone.
        """
        zones_map = {} # "North East": { "riders": { rider_id: [orders] } }
        unassigned_orders = unassigned_orders or []
        # Merge both for initial processing if you want everything zoned
        all_to_process = []
        for o in flat_orders:
            all_to_process.append((o, True))
        for o in unassigned_orders:
            all_to_process.append((o, False))
        for order, is_assigned in all_to_process:
            # 1. Extract Coords
            try:
                # Prefer Delivery location for zoning (where the customer is)
                lat = float(order.get("deliverylat") or order.get("droplat") or 0)
                lon = float(order.get("deliverylong") or order.get("droplon") or 0)
                pincode = str(order.get("deliveryzip") or "")
            except:
                lat, lon, pincode = 0, 0, ""
            # 2. Get Zone
            zone_name = self.determine_zone(lat, lon, pincode)
            order["zone_name"] = zone_name
            # 3. Initialize Zone Bucket
            if zone_name not in zones_map:
                zones_map[zone_name] = {
                    "riders_map": {}, 
                    "total_orders": 0,
                    "assigned_orders": 0,
                    "unassigned_orders": [],
                    "total_kms": 0.0,
                    "total_profit": 0.0
                }
            # 4. Add to Rider bucket within Zone
            rider_id = order.get("userid") or order.get("_id")
            # Track kms and profit for this zone
            try:
                # 'actualkms' is preferred for delivery distance
                dist = float(order.get("actualkms", order.get("previouskms", 0)))
                zones_map[zone_name]["total_kms"] += dist
                # Individual charge for this order: Fixed Base + Variable Distance
                order_amount = float(order.get("orderamount") or order.get("deliveryamount") or 0)
                rider_payment = base_pay + (dist * fuel_charge)
                profit = order_amount - rider_payment
                order["rider_charge"] = round(rider_payment, 2)
                order["profit"] = round(profit, 2)
                # Profit-based classification (Order Type)
                if profit <= 0:
                    order["ordertype"] = "Loss"
                elif profit <= 5:
                    order["ordertype"] = "Risky"
                elif profit <= 10:
                    order["ordertype"] = "Economy"
                else:
                    order["ordertype"] = "Premium"
                zones_map[zone_name]["total_profit"] += profit
            except:
                pass
            # If strictly unassigned order (no rider), put in unassigned
            if not is_assigned:
                zones_map[zone_name]["unassigned_orders"].append(order)
            else:
                str_rid = str(rider_id)
                if str_rid not in zones_map[zone_name]["riders_map"]:
                    zones_map[zone_name]["riders_map"][str_rid] = {
                        "rider_details": {
                            "id": str_rid,
                            "name": order.get("username", "Unknown")
                        },
                        "orders": []
                    }
                zones_map[zone_name]["riders_map"][str_rid]["orders"].append(order)
                zones_map[zone_name]["assigned_orders"] += 1
            zones_map[zone_name]["total_orders"] += 1
        # 5. Restructure for API Response
        output_zones = []
        zone_metrics = []
        sorted_zone_names = sorted(zones_map.keys())
        for z_name in sorted_zone_names:
            z_data = zones_map[z_name]
            # Flatten riders map
            riders_list = []
            for r_id, r_data in z_data["riders_map"].items():
                riders_list.append({
                    "rider_id": r_data["rider_details"]["id"],
                    "rider_name": r_data["rider_details"]["name"],
                    "orders_count": len(r_data["orders"]),
                    "orders": r_data["orders"]
                })
            # Create the flat metric summary
            metrics = {
                "zone_name": z_name,
                "total_orders": z_data["total_orders"],
                "assigned_orders": z_data["assigned_orders"],
                "unassigned_orders_count": len(z_data["unassigned_orders"]),
                "active_riders_count": len(riders_list),
                "total_delivery_kms": round(z_data["total_kms"], 2),
                "total_profit": round(z_data["total_profit"], 2)
            }
            zone_metrics.append(metrics)
            # Create the detailed zone object with flattened metrics
            zone_obj = {
                "zone_name": z_name,
                "total_orders": metrics["total_orders"],
                "active_riders_count": metrics["active_riders_count"],
                "assigned_orders": metrics["assigned_orders"],
                "unassigned_orders_count": metrics["unassigned_orders_count"],
                "total_delivery_kms": metrics["total_delivery_kms"],
                "total_profit": metrics["total_profit"],
                "riders": riders_list,
                "unassigned_orders": z_data["unassigned_orders"]
            }
            output_zones.append(zone_obj)
        return {
            "detailed_zones": output_zones,
            "zone_analysis": zone_metrics
        }
--- a/app/templates/ml_dashboard.html
+++ b/app/templates/ml_dashboard.html
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,36 @@
 version: "3.9"
 networks:
  frontend:
    external: true
 services:
  routes_api:
    build:
      context: .
      dockerfile: Dockerfile
    image: routes-api:latest
    container_name: routes_api
    restart: unless-stopped
    environment:
      - UVICORN_WORKERS=2
      - REDIS_URL=redis://:${REDIS_PASSWORD}@routes_redis:6379/0
      # Optional: Set cache TTL in seconds (default: 300 = 5 min, 86400 = 24h)
      # Uncomment and set in .env file: REDIS_CACHE_TTL_SECONDS=86400
      # - REDIS_CACHE_TTL_SECONDS=${REDIS_CACHE_TTL_SECONDS}
      # Google Maps API key for accurate road distance calculation (actualkms)
      # Set in .env file: GOOGLE_MAPS_API_KEY=your_api_key_here
      - GOOGLE_MAPS_API_KEY=${GOOGLE_MAPS_API_KEY}
    labels:
      - traefik.enable=true
      - traefik.http.routers.routes_api.rule=Host(`routes.workolik.com`)
      - traefik.http.routers.routes_api.entrypoints=websecure
      - traefik.http.routers.routes_api.tls.certresolver=letsencrypt
      - traefik.http.services.routes_api.loadbalancer.server.port=8002
      - traefik.docker.network=frontend
    volumes:
      - ./ml_data:/app/ml_data
      - ./rider_history.pkl:/app/rider_history.pkl
      - ./rider_active_state.pkl:/app/rider_active_state.pkl
    networks:
      - frontend
--- a/docker-entrypoint.sh
+++ b/docker-entrypoint.sh
@@ -0,0 +1,11 @@
 #!/bin/sh
 set -e
 # Get number of workers from environment or default to 1
 WORKERS=${UVICORN_WORKERS:-1}
 echo "Starting Route Optimization API with ${WORKERS} worker(s)..."
 # Start uvicorn
 exec uvicorn app.main:app --host 0.0.0.0 --port 8002 --workers ${WORKERS}
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,18 @@
 fastapi
 uvicorn
 python-dotenv
 requests
 numpy
 pandas
 scikit-learn
 scipy
 openpyxl
 xlsxwriter
 httpx
 ortools
 pyarrow
 # ML Hypertuning
 xgboost>=2.0.0
 optuna>=3.5.0
 sqlalchemy>=2.0.0
 apscheduler>=3.10.0
--- a/run_simulation.py
+++ b/run_simulation.py
@@ -0,0 +1,173 @@
 import json
 import logging
 import asyncio
 from app.services.core.assignment_service import AssignmentService
 from app.services.routing.route_optimizer import RouteOptimizer
 from app.core.arrow_utils import save_optimized_route_parquet
 # Setup logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 # Load Environment Variables
 try:
    from dotenv import load_dotenv
    load_dotenv()
    print("✅ Loaded .env file")
 except ImportError:
    print("⚠️ python-dotenv not installed, skipping .env load")
 async def run_simulation():
    print("🚀 Starting Logic Simulation (High Efficiency Mode + K-wMeans)...")
    # 1. Load Orders (using route.json as source)
    try:
        with open('route.json', 'r') as f:
            route_data = json.load(f)
    except FileNotFoundError:
        print("❌ route.json not found.")
        return
    raw_orders = route_data.get('details', [])
    # Strip assignment data to simulate fresh orders
    clean_orders = []
    for o in raw_orders:
        o_copy = o.copy()
        for key in ['userid', 'step', 'cumulativekms', 'eta']:
            o_copy.pop(key, None)
        clean_orders.append(o_copy)
    print(f"📦 Loaded {len(clean_orders)} orders.")
    # 2. Mock Riders
    # Using the 5 rider fleet as agreed
    rider_ids = [753, 883, 1114, 1271, 1116, 1096, 897, 950, 1272, 1133] # Full Active Riders List
    # Rider Starting Locations (Based on "Mostly Available Location")
    # Coordinates approximated for Coimbatore areas
    rider_locations = {
        1116: (11.0067, 76.9558), # VIVEK ANANDAN: RS PURAM
        1096: (11.0450, 76.9000), # NARAYANASAMY: VADAVALI
        897:  (11.0430, 76.9380), # VARUN EDWARD: KAVUNDAMPALAYAM
        950:  (11.0330, 76.9800), # JAYASABESH: GANAPATHY
        1114: (11.0450, 77.0000), # TAMILAZHAGAN: GANDHIMA NAGAR
        883:  (11.0200, 77.0000), # RAJAN: PEELAMEDU
        1272: (10.9950, 77.0000), # MUTHURAJA: RAMANATHAPURAM
        753:  (11.0000, 77.0300), # MANIKANDAN: SINGANALLUR
        1133: (11.0067, 76.9558), # THATCHINAMOORTHI: RS PURAM (Covering Kavundampalayam to Kovaipudur)
        1271: (11.0067, 76.9558)  # Legacy ID for Thatchinamoorthi
    }
    riders = []
    for i, rid in enumerate(rider_ids):
        lat, lon = rider_locations.get(rid, (11.0168, 76.9558)) # Default to Central if unknown
        riders.append({
            "userid": rid,
            "status": "idle",
            "onduty": 1,
            "latitude": str(lat),
            "longitude": str(lon)
        })
    # 3. Run Assignment
    assignment_service = AssignmentService()
    try:
        assignments, unassigned_orders = assignment_service.assign_orders(clean_orders, riders)
    except Exception as e:
        print(f"❌ Error during assignment: {e}")
        import traceback
        traceback.print_exc()
        return
    # 4. Generate Output (Mirroring API Logic)
    optimizer = RouteOptimizer()
    output_details = []
    distribution = {}
    assigned_count = 0
    # Prepare async tasks
    tasks = []
    task_rids = []
    for rid, orders in assignments.items():
        if not orders: continue
        distribution[rid] = len(orders)
        assigned_count += len(orders)
        # Optimize Route & Add Metrics (Cumulative KMS, Step, etc.)
        mock_rider = next((r for r in riders if r["userid"] == rid), None)
        start_coords = None
        if mock_rider:
            start_coords = (float(mock_rider['latitude']), float(mock_rider['longitude']))
        tasks.append(optimizer.optimize_provider_payload(orders, start_coords=start_coords))
        task_rids.append(rid)
    # Run tasks
    if tasks:
        results = await asyncio.gather(*tasks)
        for rid, optimized_route in zip(task_rids, results):
            mock_rider = next((r for r in riders if r["userid"] == rid), {})
            r_name = mock_rider.get("username", "")
            r_contact = mock_rider.get("contactno", "")
            total_kms = 0
            if optimized_route:
                try:
                    total_kms = max([float(o.get("cumulativekms", 0)) for o in optimized_route])
                except:
                    total_kms = sum([float(o.get("actualkms", o.get("kms", 0))) for o in optimized_route])
            for o in optimized_route:
                o['userid'] = rid
                o['username'] = r_name
                o['rider'] = r_name
                o['ridercontactno'] = r_contact
                o['riderkms'] = str(round(total_kms, 2))
                output_details.append(o)
    # 5. Zone Processing
    fuel_charge = 2.5
    base_pay = 30.0
    from app.services.routing.zone_service import ZoneService
    zone_service = ZoneService()
    zone_data = zone_service.group_by_zones(output_details, unassigned_orders, fuel_charge=fuel_charge, base_pay=base_pay)
    # 6. Save output.json
    output_data = {
        "message": "Success",
        "status": True,
        "details": output_details,
        "zone_summary": zone_data["zone_analysis"],
        "zones": zone_data["detailed_zones"],
        "meta": {
            "total_orders": len(clean_orders),
            "total_riders": len(rider_ids),
            "assigned_orders": assigned_count,
            "unassigned_orders": len(unassigned_orders),
            "total_profit": round(sum(z["total_profit"] for z in zone_data["zone_analysis"]), 2),
            "unassigned_details": [
                {"id": o.get("orderid") or o.get("_id"), "reason": o.get("unassigned_reason")}
                for o in unassigned_orders
            ],
            "distribution_summary": distribution
        }
    }
    with open('output.json', 'w') as f:
        json.dump(output_data, f, indent=4)
    # Apache Arrow / Parquet Export
    try:
        save_optimized_route_parquet(output_details, 'output.parquet')
        print("📊 Also saved results to output.parquet (Apache Arrow format)")
    except Exception as e:
        print(f"⚠️ Could not save Parquet: {e}")
    print("✅ Simulation Complete. Saved to output.json")
    print("📊 Distribution Summary:")
    print(json.dumps(distribution, indent=4))
 if __name__ == "__main__":
    asyncio.run(run_simulation())
--- a/start.py
+++ b/start.py
@@ -0,0 +1,24 @@
 #!/usr/bin/env python3
 """Mobile-optimized startup script for the Delivery Route Optimization API."""
 import uvicorn
 def main():
    """Start the mobile-optimized API server."""
    print("📱 Starting Mobile Delivery Route Optimization API...")
    print("⚡ Optimized for real-time mobile apps")
    print("🎯 Default algorithm: GREEDY (ultra-fast)")
    print("📚 Documentation: http://localhost:8002/docs")
    print("=" * 60)
    uvicorn.run(
        "app.main:app", 
        host="0.0.0.0", 
        port=8002, 
        reload=True,
        access_log=True,
        log_level="info"
    )
 if __name__ == "__main__":
    main()
		`@@ -0,0 +1 @@`
							`"""Configuration package for mobile delivery optimization."""`