initial project setup with README and ignore
This commit is contained in:
58
.dockerignore
Normal file
58
.dockerignore
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
# Python
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
*.so
|
||||||
|
.Python
|
||||||
|
*.egg-info/
|
||||||
|
dist/
|
||||||
|
build/
|
||||||
|
|
||||||
|
# Virtual environments
|
||||||
|
venv/
|
||||||
|
env/
|
||||||
|
ENV/
|
||||||
|
|
||||||
|
# IDE
|
||||||
|
.vscode/
|
||||||
|
.idea/
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
|
||||||
|
# Testing
|
||||||
|
.pytest_cache/
|
||||||
|
.coverage
|
||||||
|
htmlcov/
|
||||||
|
|
||||||
|
# Documentation
|
||||||
|
*.md
|
||||||
|
!README.md
|
||||||
|
|
||||||
|
# Environment
|
||||||
|
.env
|
||||||
|
.env.local
|
||||||
|
|
||||||
|
# Logs
|
||||||
|
*.log
|
||||||
|
|
||||||
|
# OS
|
||||||
|
.DS_Store
|
||||||
|
Thumbs.db
|
||||||
|
|
||||||
|
# Git
|
||||||
|
.git/
|
||||||
|
.gitignore
|
||||||
|
|
||||||
|
# Docker
|
||||||
|
Dockerfile
|
||||||
|
docker-compose.yml
|
||||||
|
.dockerignore
|
||||||
|
|
||||||
|
# Test files
|
||||||
|
test_*.py
|
||||||
|
*_test.py
|
||||||
|
|
||||||
|
# Temporary files
|
||||||
|
*.tmp
|
||||||
|
*.bak
|
||||||
|
|
||||||
12
.gitignore
vendored
Normal file
12
.gitignore
vendored
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
.env
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
*.pkl
|
||||||
|
ml_data/
|
||||||
|
output.json
|
||||||
|
route.json
|
||||||
|
ml_params_output.txt
|
||||||
|
idea.txt
|
||||||
|
.idea/
|
||||||
|
.vscode/
|
||||||
25
Dockerfile
Normal file
25
Dockerfile
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
# syntax=docker/dockerfile:1
|
||||||
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||||
|
PYTHONUNBUFFERED=1 \
|
||||||
|
PIP_NO_CACHE_DIR=1
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install dependencies first
|
||||||
|
COPY requirements.txt ./
|
||||||
|
RUN pip install --upgrade pip \
|
||||||
|
&& pip install -r requirements.txt
|
||||||
|
|
||||||
|
# Copy application code
|
||||||
|
COPY app ./app
|
||||||
|
COPY start.py ./start.py
|
||||||
|
COPY docker-entrypoint.sh ./docker-entrypoint.sh
|
||||||
|
|
||||||
|
# Make entrypoint executable
|
||||||
|
RUN chmod +x docker-entrypoint.sh
|
||||||
|
|
||||||
|
EXPOSE 8002
|
||||||
|
|
||||||
|
ENTRYPOINT ["./docker-entrypoint.sh"]
|
||||||
15
README.md
Normal file
15
README.md
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
# Route Rider API
|
||||||
|
|
||||||
|
Centralized Routing Engine for Rider Assignments.
|
||||||
|
|
||||||
|
## Setup
|
||||||
|
|
||||||
|
1. Install dependencies:
|
||||||
|
```bash
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Run the application:
|
||||||
|
```bash
|
||||||
|
python start.py
|
||||||
|
```
|
||||||
1
app/__init__.py
Normal file
1
app/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
# Delivery Route Optimization API
|
||||||
1
app/config/__init__.py
Normal file
1
app/config/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""Configuration package for mobile delivery optimization."""
|
||||||
204
app/config/dynamic_config.py
Normal file
204
app/config/dynamic_config.py
Normal file
@@ -0,0 +1,204 @@
|
|||||||
|
"""
|
||||||
|
Dynamic Configuration - rider-api
|
||||||
|
|
||||||
|
Replaces all hardcoded hyperparameters with DB-backed values.
|
||||||
|
The ML hypertuner writes optimal values here; services read from here.
|
||||||
|
|
||||||
|
Fallback: If DB is unavailable or no tuned values exist, defaults are used.
|
||||||
|
This means zero risk - the system works day 1 with no data.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# --- DB Path ------------------------------------------------------------------
|
||||||
|
_DB_PATH = os.getenv("ML_DB_PATH", "ml_data/ml_store.db")
|
||||||
|
|
||||||
|
|
||||||
|
# --- Hard Defaults (What the system used before ML) ---------------------------
|
||||||
|
DEFAULTS: Dict[str, Any] = {
|
||||||
|
# System Strategy / Prompt
|
||||||
|
"ml_strategy": "balanced",
|
||||||
|
|
||||||
|
# AssignmentService
|
||||||
|
"max_pickup_distance_km": 10.0,
|
||||||
|
"max_kitchen_distance_km": 3.0,
|
||||||
|
"max_orders_per_rider": 12,
|
||||||
|
"ideal_load": 6,
|
||||||
|
"workload_balance_threshold": 0.7,
|
||||||
|
"workload_penalty_weight": 100.0,
|
||||||
|
"distance_penalty_weight": 2.0,
|
||||||
|
"preference_bonus": -15.0,
|
||||||
|
"home_zone_bonus_4km": -3.0,
|
||||||
|
"home_zone_bonus_2km": -5.0,
|
||||||
|
"emergency_load_penalty": 3.0, # km penalty per order in emergency assign
|
||||||
|
|
||||||
|
# RouteOptimizer
|
||||||
|
"search_time_limit_seconds": 5,
|
||||||
|
"avg_speed_kmh": 18.0,
|
||||||
|
"road_factor": 1.3,
|
||||||
|
|
||||||
|
# ClusteringService
|
||||||
|
"cluster_radius_km": 3.0,
|
||||||
|
|
||||||
|
# KalmanFilter
|
||||||
|
"kalman_process_noise": 1e-4,
|
||||||
|
"kalman_measurement_noise": 0.01,
|
||||||
|
|
||||||
|
# RealisticETACalculator
|
||||||
|
"eta_pickup_time_min": 3.0,
|
||||||
|
"eta_delivery_time_min": 4.0,
|
||||||
|
"eta_navigation_buffer_min": 1.5,
|
||||||
|
"eta_short_trip_factor": 0.8, # speed multiplier for dist < 2km
|
||||||
|
"eta_long_trip_factor": 1.1, # speed multiplier for dist > 8km
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class DynamicConfig:
|
||||||
|
"""
|
||||||
|
Thread-safe, DB-backed configuration store.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
cfg = DynamicConfig()
|
||||||
|
max_dist = cfg.get("max_pickup_distance_km")
|
||||||
|
all_params = cfg.get_all()
|
||||||
|
"""
|
||||||
|
|
||||||
|
_instance: Optional["DynamicConfig"] = None
|
||||||
|
|
||||||
|
def __new__(cls) -> "DynamicConfig":
|
||||||
|
"""Singleton - one config per process."""
|
||||||
|
if cls._instance is None:
|
||||||
|
cls._instance = super().__new__(cls)
|
||||||
|
cls._instance._initialized = False
|
||||||
|
return cls._instance
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
if self._initialized:
|
||||||
|
return
|
||||||
|
self._initialized = True
|
||||||
|
self._cache: Dict[str, Any] = {}
|
||||||
|
self._last_loaded: Optional[datetime] = None
|
||||||
|
self._ensure_db()
|
||||||
|
self._load()
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------------
|
||||||
|
# Public API
|
||||||
|
# --------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def get(self, key: str, default: Any = None) -> Any:
|
||||||
|
"""Get a config value. Returns ML-tuned value if available, else default."""
|
||||||
|
self._maybe_reload()
|
||||||
|
val = self._cache.get(key)
|
||||||
|
if val is not None:
|
||||||
|
return val
|
||||||
|
fallback = default if default is not None else DEFAULTS.get(key)
|
||||||
|
return fallback
|
||||||
|
|
||||||
|
def get_all(self) -> Dict[str, Any]:
|
||||||
|
"""Return all current config values (ML-tuned + defaults for missing keys)."""
|
||||||
|
self._maybe_reload()
|
||||||
|
result = dict(DEFAULTS)
|
||||||
|
result.update(self._cache)
|
||||||
|
return result
|
||||||
|
|
||||||
|
def set(self, key: str, value: Any, source: str = "manual") -> None:
|
||||||
|
"""Write a config value to DB (used by hypertuner)."""
|
||||||
|
try:
|
||||||
|
os.makedirs(os.path.dirname(_DB_PATH) or ".", exist_ok=True)
|
||||||
|
conn = sqlite3.connect(_DB_PATH)
|
||||||
|
conn.execute("""
|
||||||
|
INSERT INTO dynamic_config (key, value, source, updated_at)
|
||||||
|
VALUES (?, ?, ?, ?)
|
||||||
|
ON CONFLICT(key) DO UPDATE SET
|
||||||
|
value=excluded.value,
|
||||||
|
source=excluded.source,
|
||||||
|
updated_at=excluded.updated_at
|
||||||
|
""", (key, json.dumps(value), source, datetime.utcnow().isoformat()))
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
self._cache[key] = value
|
||||||
|
logger.info(f"[DynamicConfig] Set {key}={value} (source={source})")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[DynamicConfig] Failed to set {key}: {e}")
|
||||||
|
|
||||||
|
def set_bulk(self, params: Dict[str, Any], source: str = "ml_hypertuner") -> None:
|
||||||
|
"""Write multiple config values at once (called after each Optuna study)."""
|
||||||
|
for key, value in params.items():
|
||||||
|
self.set(key, value, source=source)
|
||||||
|
logger.info(f"[DynamicConfig] Bulk update: {len(params)} params from {source}")
|
||||||
|
|
||||||
|
def reset_to_defaults(self) -> None:
|
||||||
|
"""Wipe all ML-tuned values, revert to hardcoded defaults."""
|
||||||
|
try:
|
||||||
|
conn = sqlite3.connect(_DB_PATH)
|
||||||
|
conn.execute("DELETE FROM dynamic_config")
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
self._cache.clear()
|
||||||
|
logger.warning("[DynamicConfig] Reset to factory defaults.")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[DynamicConfig] Reset failed: {e}")
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------------
|
||||||
|
# Internal
|
||||||
|
# --------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _ensure_db(self) -> None:
|
||||||
|
try:
|
||||||
|
os.makedirs(os.path.dirname(_DB_PATH) or ".", exist_ok=True)
|
||||||
|
conn = sqlite3.connect(_DB_PATH)
|
||||||
|
conn.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS dynamic_config (
|
||||||
|
key TEXT PRIMARY KEY,
|
||||||
|
value TEXT NOT NULL,
|
||||||
|
source TEXT DEFAULT 'manual',
|
||||||
|
updated_at TEXT
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[DynamicConfig] DB init failed: {e}")
|
||||||
|
|
||||||
|
def _load(self) -> None:
|
||||||
|
try:
|
||||||
|
conn = sqlite3.connect(_DB_PATH)
|
||||||
|
rows = conn.execute("SELECT key, value FROM dynamic_config").fetchall()
|
||||||
|
conn.close()
|
||||||
|
self._cache = {}
|
||||||
|
for key, raw in rows:
|
||||||
|
try:
|
||||||
|
self._cache[key] = json.loads(raw)
|
||||||
|
except Exception:
|
||||||
|
self._cache[key] = raw
|
||||||
|
self._last_loaded = datetime.utcnow()
|
||||||
|
if self._cache:
|
||||||
|
logger.info(f"[DynamicConfig] Loaded {len(self._cache)} ML-tuned params from DB")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"[DynamicConfig] Could not load from DB (using defaults): {e}")
|
||||||
|
self._cache = {}
|
||||||
|
|
||||||
|
def _maybe_reload(self, interval_seconds: int = 300) -> None:
|
||||||
|
"""Reload from DB every 5 minutes - picks up new tuned params without restart."""
|
||||||
|
if self._last_loaded is None:
|
||||||
|
self._load()
|
||||||
|
return
|
||||||
|
delta = (datetime.utcnow() - self._last_loaded).total_seconds()
|
||||||
|
if delta > interval_seconds:
|
||||||
|
self._load()
|
||||||
|
|
||||||
|
|
||||||
|
# --- Module-level convenience singleton ---------------------------------------
|
||||||
|
_cfg = DynamicConfig()
|
||||||
|
|
||||||
|
|
||||||
|
def get_config() -> DynamicConfig:
|
||||||
|
"""Get the global DynamicConfig singleton."""
|
||||||
|
return _cfg
|
||||||
33
app/config/mobile_config.py
Normal file
33
app/config/mobile_config.py
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
"""Mobile-specific configuration for delivery route optimization."""
|
||||||
|
|
||||||
|
# Mobile optimization settings
|
||||||
|
MOBILE_CONFIG = {
|
||||||
|
"default_algorithm": "greedy",
|
||||||
|
"max_deliveries": 100,
|
||||||
|
"timeout_seconds": 5,
|
||||||
|
"response_compression": True,
|
||||||
|
"performance_monitoring": True,
|
||||||
|
"mobile_headers": True
|
||||||
|
}
|
||||||
|
|
||||||
|
# Performance targets for mobile
|
||||||
|
PERFORMANCE_TARGETS = {
|
||||||
|
"greedy_algorithm": {
|
||||||
|
"max_response_time": 0.1, # 100ms
|
||||||
|
"max_deliveries": 50,
|
||||||
|
"description": "Ultra-fast for real-time mobile apps"
|
||||||
|
},
|
||||||
|
"tsp_algorithm": {
|
||||||
|
"max_response_time": 3.0, # 3 seconds
|
||||||
|
"max_deliveries": 30,
|
||||||
|
"description": "Optimal but slower, good for planning"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Mobile app recommendations
|
||||||
|
MOBILE_RECOMMENDATIONS = {
|
||||||
|
"real_time_delivery": "greedy",
|
||||||
|
"route_planning": "tsp",
|
||||||
|
"large_batches": "greedy",
|
||||||
|
"cost_optimization": "tsp"
|
||||||
|
}
|
||||||
50
app/config/rider_preferences.py
Normal file
50
app/config/rider_preferences.py
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
"""
|
||||||
|
Rider Preferred Kitchens Configuration
|
||||||
|
Mapping of Rider ID (int) to list of preferred Kitchen names (str).
|
||||||
|
Updated based on Deployment Plan.
|
||||||
|
"""
|
||||||
|
|
||||||
|
RIDER_PREFERRED_KITCHENS = {
|
||||||
|
# 1. VIVEK ANANDHAN - LOCAL, RS PURAM TO SELVAPURAM
|
||||||
|
1116: ["Daily grubs(jayanthi kitchen)", "Bhuvaneshwari kitchen", "Hilda kitchen", "Kalpana kitchen"],
|
||||||
|
|
||||||
|
# 2. NARAYANASAMY - VENGATAPURAM, VADAVALI, TADAGAM ROAD
|
||||||
|
1096: ["Daily grubs(jayanthi kitchen)", "Bhuvaneshwari kitchen", "Hilda kitchen", "Kalpana kitchen"],
|
||||||
|
|
||||||
|
# 3. VARUN EDWARD - GN MILLS, KAVUNDAMPALAYAM, THUDIYALUR
|
||||||
|
897: ["Daily grubs(jayanthi kitchen)", "Bhuvaneshwari kitchen", "Hilda kitchen", "Kalpana kitchen"],
|
||||||
|
|
||||||
|
# 4. JAYASABAESH - GANAPTHY
|
||||||
|
950: ["Daily grubs nandhini", "Vidhya kitchen"],
|
||||||
|
|
||||||
|
# 5. TAMILALAHZAN - GANDHIMA NAGAR
|
||||||
|
1114: ["Daily grubs nandhini", "Vidhya kitchen"],
|
||||||
|
|
||||||
|
# 6. RAJAN - PEELAMDU
|
||||||
|
883: ["Daily grubs nandhini", "Vidhya kitchen"],
|
||||||
|
|
||||||
|
# 7. MUTHURAJ - RAMANATHAPURAM TO SAIBABACOLONY
|
||||||
|
1272: ["Daily grubs(jayanthi kitchen)", "Bhuvaneshwari kitchen", "Hilda kitchen", "Kalpana kitchen", "Daily grubs nandhini", "Vidhya kitchen"],
|
||||||
|
|
||||||
|
# 8. MANIKANDAN - SINGNALLUR
|
||||||
|
753: ["Daily grubs nandhini", "Vidhya kitchen"],
|
||||||
|
|
||||||
|
# 9. TACHANAMOORTHI - KOVAI PUTHUR TO KAVUNDAMPALAYAM
|
||||||
|
1271: ["Daily grubs(jayanthi kitchen)", "Bhuvaneshwari kitchen", "Hilda kitchen", "Kalpana kitchen"],
|
||||||
|
1133: ["Daily grubs(jayanthi kitchen)", "Bhuvaneshwari kitchen", "Hilda kitchen", "Kalpana kitchen"], # Active ID
|
||||||
|
}
|
||||||
|
|
||||||
|
# Anchor Coordinates for Riders (Based on Area Name)
|
||||||
|
# Used as fallback if GPS is missing, or to bias assignment to their Home Zone.
|
||||||
|
RIDER_HOME_LOCATIONS = {
|
||||||
|
1116: (11.0067, 76.9558), # VIVEK ANANDAN: RS PURAM
|
||||||
|
1096: (11.0450, 76.9000), # NARAYANASAMY: VADAVALI
|
||||||
|
897: (11.0430, 76.9380), # VARUN EDWARD: KAVUNDAMPALAYAM
|
||||||
|
950: (11.0330, 76.9800), # JAYASABESH: GANAPATHY
|
||||||
|
1114: (11.0450, 77.0000), # TAMILAZHAGAN: GANDHIMA NAGAR
|
||||||
|
883: (11.0200, 77.0000), # RAJAN: PEELAMEDU
|
||||||
|
1272: (10.9950, 77.0000), # MUTHURAJA: RAMANATHAPURAM
|
||||||
|
753: (11.0000, 77.0300), # MANIKANDAN: SINGANALLUR
|
||||||
|
1271: (10.9500, 76.9600), # THATCHINAMOORTHI: KOVAI PUDUR
|
||||||
|
|
||||||
|
}
|
||||||
5
app/controllers/__init__.py
Normal file
5
app/controllers/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
"""Controllers package."""
|
||||||
|
|
||||||
|
from .route_controller import RouteController
|
||||||
|
|
||||||
|
__all__ = ["RouteController"]
|
||||||
87
app/controllers/route_controller.py
Normal file
87
app/controllers/route_controller.py
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
"""Controller for provider payload optimization and forwarding."""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import hashlib
|
||||||
|
import json
|
||||||
|
from typing import Dict, Any
|
||||||
|
import httpx
|
||||||
|
from fastapi import HTTPException
|
||||||
|
|
||||||
|
from app.core.exceptions import ValidationError, APIException
|
||||||
|
from app.services.routing.route_optimizer import RouteOptimizer
|
||||||
|
from app.services import cache
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class RouteController:
|
||||||
|
"""Controller for optimizing provider payloads and forwarding upstream."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.route_optimizer = RouteOptimizer()
|
||||||
|
|
||||||
|
def _hash_key(self, prefix: str, payload: Dict[str, Any]) -> str:
|
||||||
|
"""Create a stable cache key from a dict payload."""
|
||||||
|
# ensure deterministic json by sorting keys
|
||||||
|
serialized = json.dumps(payload, sort_keys=True, separators=(",", ":"))
|
||||||
|
digest = hashlib.sha256(serialized.encode("utf-8")).hexdigest()
|
||||||
|
return f"routes:{prefix}:{digest}"
|
||||||
|
|
||||||
|
async def optimize_and_forward_provider_payload(self, orders: list[dict], forward_url: str) -> dict:
|
||||||
|
"""Optimize provider payload and return it (forwarding paused).
|
||||||
|
|
||||||
|
- Input: list of provider orders (dicts)
|
||||||
|
- Output: {code, details, message, status} where details is the optimized array
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if not isinstance(orders, list) or not orders:
|
||||||
|
raise ValidationError("Orders array is required", field="body")
|
||||||
|
|
||||||
|
optimized = await self.route_optimizer.optimize_provider_payload(orders)
|
||||||
|
|
||||||
|
# Debug sample of optimized payload (first 3 items, select keys)
|
||||||
|
try:
|
||||||
|
sample = [
|
||||||
|
{
|
||||||
|
k: item.get(k)
|
||||||
|
for k in ("orderheaderid", "orderid", "deliverycustomerid", "step", "previouskms", "cumulativekms", "eta")
|
||||||
|
}
|
||||||
|
for item in optimized[:3]
|
||||||
|
]
|
||||||
|
logger.debug(f"Optimized payload sample: {sample}")
|
||||||
|
trace = [
|
||||||
|
{
|
||||||
|
"orderid": item.get("orderid"),
|
||||||
|
"step": item.get("step"),
|
||||||
|
"prev": item.get("previouskms"),
|
||||||
|
"cum": item.get("cumulativekms"),
|
||||||
|
}
|
||||||
|
for item in optimized
|
||||||
|
]
|
||||||
|
logger.debug(f"Optimized order trace: {trace}")
|
||||||
|
except Exception:
|
||||||
|
logger.debug("Optimized payload sample logging failed")
|
||||||
|
|
||||||
|
# Forwarding paused: return optimized payload directly
|
||||||
|
return {
|
||||||
|
"code": 200,
|
||||||
|
"details": optimized,
|
||||||
|
"message": "Success",
|
||||||
|
"status": True,
|
||||||
|
}
|
||||||
|
except ValidationError:
|
||||||
|
raise
|
||||||
|
except httpx.HTTPStatusError as e:
|
||||||
|
status_code = e.response.status_code
|
||||||
|
body_text = e.response.text
|
||||||
|
logger.error(f"Forwarding failed: {status_code} - {body_text}")
|
||||||
|
# Surface upstream details to the client for faster debugging
|
||||||
|
raise APIException(
|
||||||
|
status_code=502,
|
||||||
|
message=f"Upstream service error (status {status_code}): {body_text}",
|
||||||
|
code="UPSTREAM_ERROR"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error optimizing/forwarding provider payload: {e}", exc_info=True)
|
||||||
|
raise APIException(status_code=500, message="Internal server error", code="INTERNAL_ERROR")
|
||||||
|
# Batch routes removed - use single-route optimization for each pickup location
|
||||||
2
app/core/__init__.py
Normal file
2
app/core/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
"""Core application components."""
|
||||||
|
|
||||||
63
app/core/arrow_utils.py
Normal file
63
app/core/arrow_utils.py
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
"""
|
||||||
|
High-performance utilities using Apache Arrow and NumPy for geographic data.
|
||||||
|
Provides vectorized operations for distances and coordinate processing.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pyarrow as pa
|
||||||
|
import pyarrow.parquet as pq
|
||||||
|
import logging
|
||||||
|
from typing import List, Dict, Any, Tuple
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
def calculate_haversine_matrix_vectorized(lats: np.ndarray, lons: np.ndarray) -> np.ndarray:
|
||||||
|
"""
|
||||||
|
Calculate an N x N distance matrix using the Haversine formula.
|
||||||
|
Fully vectorized using NumPy for O(N^2) speed improvement over Python loops.
|
||||||
|
"""
|
||||||
|
# Earth's radius in kilometers
|
||||||
|
R = 6371.0
|
||||||
|
|
||||||
|
# Convert degrees to radians
|
||||||
|
lats_rad = np.radians(lats)
|
||||||
|
lons_rad = np.radians(lons)
|
||||||
|
|
||||||
|
# Create meshgrids for pairwise differences
|
||||||
|
# lats.reshape(-1, 1) creates a column vector
|
||||||
|
# lats.reshape(1, -1) creates a row vector
|
||||||
|
# Subtracting them creates an N x N matrix of differences
|
||||||
|
dlat = lats_rad.reshape(-1, 1) - lats_rad.reshape(1, -1)
|
||||||
|
dlon = lons_rad.reshape(-1, 1) - lons_rad.reshape(1, -1)
|
||||||
|
|
||||||
|
# Haversine formula
|
||||||
|
a = np.sin(dlat / 2)**2 + np.cos(lats_rad.reshape(-1, 1)) * np.cos(lats_rad.reshape(1, -1)) * np.sin(dlon / 2)**2
|
||||||
|
c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
|
||||||
|
|
||||||
|
return R * c
|
||||||
|
|
||||||
|
def orders_to_arrow_table(orders: List[Dict[str, Any]]) -> pa.Table:
|
||||||
|
"""
|
||||||
|
Convert a list of order dictionaries to an Apache Arrow Table.
|
||||||
|
This enables zero-copy operations and efficient columnar storage.
|
||||||
|
"""
|
||||||
|
return pa.Table.from_pylist(orders)
|
||||||
|
|
||||||
|
def save_optimized_route_parquet(orders: List[Dict[str, Any]], filename: str):
|
||||||
|
"""
|
||||||
|
Save optimized route data to a Parquet file for high-speed analysis.
|
||||||
|
Useful for logging and historical simulation replays.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
table = orders_to_arrow_table(orders)
|
||||||
|
pq.write_table(table, filename)
|
||||||
|
logger.info(f" Saved route data to Parquet: {filename}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f" Failed to save Parquet: {e}")
|
||||||
|
|
||||||
|
def load_route_parquet(filename: str) -> List[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Load route data from a Parquet file and return as a list of dicts.
|
||||||
|
"""
|
||||||
|
table = pq.read_table(filename)
|
||||||
|
return table.to_pylist()
|
||||||
26
app/core/constants.py
Normal file
26
app/core/constants.py
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
"""API constants and configuration."""
|
||||||
|
|
||||||
|
# API Configuration
|
||||||
|
API_VERSION = "2.0.0"
|
||||||
|
API_TITLE = "Route Optimization API"
|
||||||
|
API_DESCRIPTION = "Professional API for delivery route optimization"
|
||||||
|
|
||||||
|
# Route Optimization Limits
|
||||||
|
MAX_DELIVERIES = 50
|
||||||
|
MIN_DELIVERIES = 1
|
||||||
|
|
||||||
|
# Coordinate Validation
|
||||||
|
MIN_LATITUDE = -90
|
||||||
|
MAX_LATITUDE = 90
|
||||||
|
MIN_LONGITUDE = -180
|
||||||
|
MAX_LONGITUDE = 180
|
||||||
|
|
||||||
|
# Algorithm Types
|
||||||
|
ALGORITHM_GREEDY = "greedy"
|
||||||
|
ALGORITHM_TSP = "tsp"
|
||||||
|
|
||||||
|
# Response Messages
|
||||||
|
MESSAGE_SUCCESS = "Route optimized successfully"
|
||||||
|
MESSAGE_VALIDATION_ERROR = "Request validation failed"
|
||||||
|
MESSAGE_INTERNAL_ERROR = "An unexpected error occurred"
|
||||||
|
|
||||||
112
app/core/exception_handlers.py
Normal file
112
app/core/exception_handlers.py
Normal file
@@ -0,0 +1,112 @@
|
|||||||
|
"""Professional exception handlers for the API."""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from fastapi import Request, status
|
||||||
|
from fastapi.responses import JSONResponse
|
||||||
|
from fastapi.exceptions import RequestValidationError
|
||||||
|
from starlette.exceptions import HTTPException as StarletteHTTPException
|
||||||
|
|
||||||
|
from app.core.exceptions import APIException
|
||||||
|
from app.models.errors import ErrorResponse, ErrorDetail
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
async def api_exception_handler(request: Request, exc: APIException) -> JSONResponse:
|
||||||
|
"""Handle custom API exceptions."""
|
||||||
|
request_id = getattr(request.state, "request_id", None)
|
||||||
|
|
||||||
|
error_response = ErrorResponse(
|
||||||
|
success=False,
|
||||||
|
error=ErrorDetail(
|
||||||
|
field=exc.field,
|
||||||
|
message=exc.message,
|
||||||
|
code=exc.code
|
||||||
|
),
|
||||||
|
path=request.url.path,
|
||||||
|
request_id=request_id
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.warning(f"API Exception: {exc.code} - {exc.message} (Request ID: {request_id})")
|
||||||
|
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=exc.status_code,
|
||||||
|
content=error_response.model_dump(exclude_none=True)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def http_exception_handler(request: Request, exc: StarletteHTTPException) -> JSONResponse:
|
||||||
|
"""Handle HTTP exceptions."""
|
||||||
|
request_id = getattr(request.state, "request_id", None)
|
||||||
|
|
||||||
|
error_response = ErrorResponse(
|
||||||
|
success=False,
|
||||||
|
error=ErrorDetail(
|
||||||
|
message=exc.detail,
|
||||||
|
code="HTTP_ERROR"
|
||||||
|
),
|
||||||
|
path=request.url.path,
|
||||||
|
request_id=request_id
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.warning(f"HTTP Exception: {exc.status_code} - {exc.detail} (Request ID: {request_id})")
|
||||||
|
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=exc.status_code,
|
||||||
|
content=error_response.model_dump(exclude_none=True)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def validation_exception_handler(request: Request, exc: RequestValidationError) -> JSONResponse:
|
||||||
|
"""Handle validation errors with detailed field information."""
|
||||||
|
request_id = getattr(request.state, "request_id", None)
|
||||||
|
|
||||||
|
errors = exc.errors()
|
||||||
|
if errors:
|
||||||
|
first_error = errors[0]
|
||||||
|
field = ".".join(str(loc) for loc in first_error.get("loc", []))
|
||||||
|
message = first_error.get("msg", "Validation error")
|
||||||
|
else:
|
||||||
|
field = None
|
||||||
|
message = "Validation error"
|
||||||
|
|
||||||
|
error_response = ErrorResponse(
|
||||||
|
success=False,
|
||||||
|
error=ErrorDetail(
|
||||||
|
field=field,
|
||||||
|
message=message,
|
||||||
|
code="VALIDATION_ERROR"
|
||||||
|
),
|
||||||
|
path=request.url.path,
|
||||||
|
request_id=request_id
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.warning(f"Validation Error: {message} (Field: {field}, Request ID: {request_id})")
|
||||||
|
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
||||||
|
content=error_response.model_dump(exclude_none=True)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def general_exception_handler(request: Request, exc: Exception) -> JSONResponse:
|
||||||
|
"""Handle unexpected exceptions."""
|
||||||
|
request_id = getattr(request.state, "request_id", None)
|
||||||
|
|
||||||
|
error_response = ErrorResponse(
|
||||||
|
success=False,
|
||||||
|
error=ErrorDetail(
|
||||||
|
message="An unexpected error occurred. Please try again later.",
|
||||||
|
code="INTERNAL_SERVER_ERROR"
|
||||||
|
),
|
||||||
|
path=request.url.path,
|
||||||
|
request_id=request_id
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.error(f"Unexpected Error: {str(exc)} (Request ID: {request_id})", exc_info=True)
|
||||||
|
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
|
content=error_response.model_dump(exclude_none=True)
|
||||||
|
)
|
||||||
|
|
||||||
70
app/core/exceptions.py
Normal file
70
app/core/exceptions.py
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
"""Custom exceptions for the API."""
|
||||||
|
|
||||||
|
from fastapi import HTTPException, status
|
||||||
|
|
||||||
|
|
||||||
|
class APIException(HTTPException):
|
||||||
|
"""Base API exception with structured error format."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
status_code: int,
|
||||||
|
message: str,
|
||||||
|
field: str = None,
|
||||||
|
code: str = None,
|
||||||
|
detail: str = None
|
||||||
|
):
|
||||||
|
self.message = message
|
||||||
|
self.field = field
|
||||||
|
self.code = code or self._get_default_code(status_code)
|
||||||
|
super().__init__(status_code=status_code, detail=detail or message)
|
||||||
|
|
||||||
|
def _get_default_code(self, status_code: int) -> str:
|
||||||
|
"""Get default error code based on status code."""
|
||||||
|
codes = {
|
||||||
|
400: "BAD_REQUEST",
|
||||||
|
401: "UNAUTHORIZED",
|
||||||
|
403: "FORBIDDEN",
|
||||||
|
404: "NOT_FOUND",
|
||||||
|
409: "CONFLICT",
|
||||||
|
422: "VALIDATION_ERROR",
|
||||||
|
429: "RATE_LIMIT_EXCEEDED",
|
||||||
|
500: "INTERNAL_SERVER_ERROR",
|
||||||
|
503: "SERVICE_UNAVAILABLE"
|
||||||
|
}
|
||||||
|
return codes.get(status_code, "UNKNOWN_ERROR")
|
||||||
|
|
||||||
|
|
||||||
|
class ValidationError(APIException):
|
||||||
|
"""Validation error exception."""
|
||||||
|
|
||||||
|
def __init__(self, message: str, field: str = None):
|
||||||
|
super().__init__(
|
||||||
|
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
||||||
|
message=message,
|
||||||
|
field=field,
|
||||||
|
code="VALIDATION_ERROR"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class NotFoundError(APIException):
|
||||||
|
"""Resource not found exception."""
|
||||||
|
|
||||||
|
def __init__(self, message: str = "Resource not found"):
|
||||||
|
super().__init__(
|
||||||
|
status_code=status.HTTP_404_NOT_FOUND,
|
||||||
|
message=message,
|
||||||
|
code="NOT_FOUND"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class RateLimitError(APIException):
|
||||||
|
"""Rate limit exceeded exception."""
|
||||||
|
|
||||||
|
def __init__(self, message: str = "Rate limit exceeded"):
|
||||||
|
super().__init__(
|
||||||
|
status_code=status.HTTP_429_TOO_MANY_REQUESTS,
|
||||||
|
message=message,
|
||||||
|
code="RATE_LIMIT_EXCEEDED"
|
||||||
|
)
|
||||||
|
|
||||||
263
app/main.py
Normal file
263
app/main.py
Normal file
@@ -0,0 +1,263 @@
|
|||||||
|
"""Professional FastAPI application for delivery route optimization."""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import threading
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
|
from fastapi import FastAPI, Request, status
|
||||||
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
from fastapi.middleware.gzip import GZipMiddleware
|
||||||
|
from fastapi.exceptions import RequestValidationError
|
||||||
|
from starlette.exceptions import HTTPException as StarletteHTTPException
|
||||||
|
|
||||||
|
from app.routes import optimization_router, health_router, cache_router, ml_router, ml_web_router
|
||||||
|
from app.middleware.request_id import RequestIDMiddleware
|
||||||
|
from app.core.exceptions import APIException
|
||||||
|
from app.core.exception_handlers import (
|
||||||
|
api_exception_handler,
|
||||||
|
http_exception_handler,
|
||||||
|
validation_exception_handler,
|
||||||
|
general_exception_handler
|
||||||
|
)
|
||||||
|
|
||||||
|
# Configure professional logging with env control
|
||||||
|
_log_level_name = os.getenv("LOG_LEVEL", "INFO").upper()
|
||||||
|
_log_level = getattr(logging, _log_level_name, logging.INFO)
|
||||||
|
logging.basicConfig(
|
||||||
|
level=_log_level,
|
||||||
|
format="%(asctime)s - %(name)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s",
|
||||||
|
datefmt="%Y-%m-%d %H:%M:%S",
|
||||||
|
handlers=[
|
||||||
|
logging.StreamHandler(sys.stdout)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Ensure root and key libraries honor desired level
|
||||||
|
logging.getLogger().setLevel(_log_level)
|
||||||
|
logging.getLogger("httpx").setLevel(_log_level)
|
||||||
|
logging.getLogger("uvicorn").setLevel(_log_level)
|
||||||
|
logging.getLogger("uvicorn.error").setLevel(_log_level)
|
||||||
|
logging.getLogger("uvicorn.access").setLevel(_log_level)
|
||||||
|
|
||||||
|
|
||||||
|
# --- Smart Post-Call ML Trainer ----------------------------------------------------------
|
||||||
|
#
|
||||||
|
# Trains in a BACKGROUND THREAD after every N /riderassign calls.
|
||||||
|
# - The API response is NEVER blocked - training is fully async.
|
||||||
|
# - Cooldown prevents overlapping runs (won't train if one is already running).
|
||||||
|
# - MIN_RECORDS guard: won't attempt if DB doesn't have enough data yet.
|
||||||
|
#
|
||||||
|
# Config:
|
||||||
|
# TRAIN_EVERY_N_CALLS : retrain after this many calls (default: 10)
|
||||||
|
# MIN_RECORDS_TO_TRAIN: minimum DB rows before first train (default: 30)
|
||||||
|
# COOLDOWN_SECONDS : min gap between two training runs (default: 120s)
|
||||||
|
# -------------------------------------------------------------------
|
||||||
|
|
||||||
|
TRAIN_EVERY_N_CALLS = int(os.getenv("ML_TRAIN_EVERY_N", "10"))
|
||||||
|
MIN_RECORDS_TO_TRAIN = int(os.getenv("ML_MIN_RECORDS", "30"))
|
||||||
|
COOLDOWN_SECONDS = int(os.getenv("ML_COOLDOWN_SEC", "120"))
|
||||||
|
|
||||||
|
_call_counter = 0
|
||||||
|
_counter_lock = threading.Lock()
|
||||||
|
_training_lock = threading.Lock()
|
||||||
|
_last_trained_at = 0.0 # epoch seconds
|
||||||
|
|
||||||
|
|
||||||
|
def _run_training_background():
|
||||||
|
"""
|
||||||
|
The actual training job - runs in a daemon thread.
|
||||||
|
Fully safe to call while the API is serving requests.
|
||||||
|
"""
|
||||||
|
global _last_trained_at
|
||||||
|
|
||||||
|
# Acquire lock - only ONE training run at a time
|
||||||
|
if not _training_lock.acquire(blocking=False):
|
||||||
|
logger.info("[MLTrigger] Training already running - skipping this trigger.")
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
from app.services.ml.ml_hypertuner import get_hypertuner
|
||||||
|
from app.services.ml.ml_data_collector import get_collector
|
||||||
|
|
||||||
|
count = get_collector().count_records()
|
||||||
|
if count < MIN_RECORDS_TO_TRAIN:
|
||||||
|
logger.info(f"[MLTrigger] Only {count} records - need >={MIN_RECORDS_TO_TRAIN}. Skipping.")
|
||||||
|
return
|
||||||
|
|
||||||
|
logger.info(f"[MLTrigger] [ML] Background hypertuning started ({count} records)...")
|
||||||
|
result = get_hypertuner().run(n_trials=100)
|
||||||
|
|
||||||
|
if result.get("status") == "ok":
|
||||||
|
_last_trained_at = time.time()
|
||||||
|
logger.info(
|
||||||
|
f"[MLTrigger] [OK] Hypertuning done - "
|
||||||
|
f"quality={result.get('best_predicted_quality', '?')}/100 "
|
||||||
|
f"| {result.get('training_rows', '?')} rows "
|
||||||
|
f"| {result.get('trials_run', '?')} trials"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.info(f"[MLTrigger] Hypertuning skipped: {result.get('message', '')}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[MLTrigger] Background training error: {e}", exc_info=True)
|
||||||
|
finally:
|
||||||
|
_training_lock.release()
|
||||||
|
|
||||||
|
|
||||||
|
def trigger_training_if_due():
|
||||||
|
"""
|
||||||
|
Called after every /riderassign call.
|
||||||
|
Increments counter - fires background thread every TRAIN_EVERY_N_CALLS.
|
||||||
|
Non-blocking: returns immediately regardless.
|
||||||
|
"""
|
||||||
|
global _call_counter, _last_trained_at
|
||||||
|
|
||||||
|
with _counter_lock:
|
||||||
|
_call_counter += 1
|
||||||
|
should_train = (_call_counter % TRAIN_EVERY_N_CALLS == 0)
|
||||||
|
|
||||||
|
if not should_train:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Cooldown check - don't train if we just trained recently
|
||||||
|
elapsed = time.time() - _last_trained_at
|
||||||
|
if elapsed < COOLDOWN_SECONDS:
|
||||||
|
logger.info(
|
||||||
|
f"[MLTrigger] Cooldown active - "
|
||||||
|
f"{int(COOLDOWN_SECONDS - elapsed)}s remaining. Skipping."
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
# Fire background thread - does NOT block the API response
|
||||||
|
t = threading.Thread(target=_run_training_background, daemon=True, name="ml-hypertuner")
|
||||||
|
t.start()
|
||||||
|
logger.info(f"[MLTrigger] [START] Background training thread launched (call #{_call_counter})")
|
||||||
|
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def lifespan(app: FastAPI):
|
||||||
|
"""Application lifespan events."""
|
||||||
|
logger.info("[START] Starting Route Optimization API...")
|
||||||
|
|
||||||
|
# -- On startup: if enough data exists, train immediately in background --
|
||||||
|
try:
|
||||||
|
from app.services.ml.ml_data_collector import get_collector
|
||||||
|
count = get_collector().count_records()
|
||||||
|
if count >= MIN_RECORDS_TO_TRAIN:
|
||||||
|
logger.info(f"[Startup] {count} records found -> launching startup hypertuning...")
|
||||||
|
t = threading.Thread(target=_run_training_background, daemon=True, name="ml-startup")
|
||||||
|
t.start()
|
||||||
|
else:
|
||||||
|
logger.info(
|
||||||
|
f"[Startup] {count}/{MIN_RECORDS_TO_TRAIN} records in ML DB - "
|
||||||
|
f"will auto-train after every {TRAIN_EVERY_N_CALLS} /riderassign calls."
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"[Startup] ML status check failed (non-fatal): {e}")
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"[OK] Application initialized - "
|
||||||
|
f"ML trains every {TRAIN_EVERY_N_CALLS} calls "
|
||||||
|
f"(cooldown {COOLDOWN_SECONDS}s, min {MIN_RECORDS_TO_TRAIN} records)"
|
||||||
|
)
|
||||||
|
yield
|
||||||
|
|
||||||
|
logger.info(" Shutting down Route Optimization API...")
|
||||||
|
|
||||||
|
|
||||||
|
# Create FastAPI application with professional configuration
|
||||||
|
app = FastAPI(
|
||||||
|
title="Route Optimization API",
|
||||||
|
version="2.0.0",
|
||||||
|
docs_url="/docs",
|
||||||
|
redoc_url="/redoc",
|
||||||
|
openapi_url="/api/v1/openapi.json",
|
||||||
|
lifespan=lifespan
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add Request ID middleware (must be first)
|
||||||
|
app.add_middleware(RequestIDMiddleware)
|
||||||
|
|
||||||
|
# Add CORS middleware
|
||||||
|
app.add_middleware(
|
||||||
|
CORSMiddleware,
|
||||||
|
allow_origins=["*"], # Configure specific domains in production
|
||||||
|
allow_credentials=True,
|
||||||
|
allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
|
||||||
|
allow_headers=["*"],
|
||||||
|
expose_headers=["X-Request-ID", "X-Process-Time"]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add GZIP compression
|
||||||
|
app.add_middleware(GZipMiddleware, minimum_size=1000)
|
||||||
|
|
||||||
|
# Add request timing middleware
|
||||||
|
@app.middleware("http")
|
||||||
|
async def add_process_time_header(request: Request, call_next):
|
||||||
|
"""Add performance monitoring headers."""
|
||||||
|
start_time = time.time()
|
||||||
|
response = await call_next(request)
|
||||||
|
process_time = time.time() - start_time
|
||||||
|
response.headers["X-Process-Time"] = str(round(process_time, 4))
|
||||||
|
response.headers["X-API-Version"] = "2.0.0"
|
||||||
|
return response
|
||||||
|
|
||||||
|
# Register exception handlers
|
||||||
|
app.add_exception_handler(APIException, api_exception_handler)
|
||||||
|
app.add_exception_handler(StarletteHTTPException, http_exception_handler)
|
||||||
|
app.add_exception_handler(RequestValidationError, validation_exception_handler)
|
||||||
|
app.add_exception_handler(Exception, general_exception_handler)
|
||||||
|
|
||||||
|
# Include routers
|
||||||
|
app.include_router(optimization_router)
|
||||||
|
app.include_router(health_router)
|
||||||
|
app.include_router(cache_router)
|
||||||
|
app.include_router(ml_router)
|
||||||
|
app.include_router(ml_web_router)
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/", tags=["Root"])
|
||||||
|
async def root(request: Request):
|
||||||
|
"""
|
||||||
|
API root endpoint with service information.
|
||||||
|
|
||||||
|
Returns API metadata, available endpoints, and usage information.
|
||||||
|
"""
|
||||||
|
request_id = getattr(request.state, "request_id", None)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"service": "Route Optimization API",
|
||||||
|
"version": "2.0.0",
|
||||||
|
"status": "operational",
|
||||||
|
"documentation": {
|
||||||
|
"swagger": "/docs",
|
||||||
|
"redoc": "/redoc",
|
||||||
|
"openapi": "/api/v1/openapi.json"
|
||||||
|
},
|
||||||
|
"endpoints": {
|
||||||
|
"createdeliveries": {
|
||||||
|
"url": "/api/v1/optimization/createdeliveries",
|
||||||
|
"method": "POST",
|
||||||
|
"description": "Accept provider array, optimize order, add step/previouskms/cumulativekms, forward upstream"
|
||||||
|
},
|
||||||
|
"health": {
|
||||||
|
"url": "/api/v1/health",
|
||||||
|
"method": "GET",
|
||||||
|
"description": "Health check endpoint"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"features": {
|
||||||
|
"algorithm": "Greedy Nearest-Neighbor",
|
||||||
|
"optimization": "Provider array reordering with distance metrics",
|
||||||
|
"added_fields": ["step", "previouskms", "cumulativekms", "actualkms"]
|
||||||
|
},
|
||||||
|
"request_id": request_id
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import uvicorn
|
||||||
|
uvicorn.run("app.main:app", host="0.0.0.0", port=8002, reload=True)
|
||||||
2
app/middleware/__init__.py
Normal file
2
app/middleware/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
"""Middleware components."""
|
||||||
|
|
||||||
26
app/middleware/request_id.py
Normal file
26
app/middleware/request_id.py
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
"""Request ID middleware for request tracing."""
|
||||||
|
|
||||||
|
import uuid
|
||||||
|
from fastapi import Request
|
||||||
|
from starlette.middleware.base import BaseHTTPMiddleware
|
||||||
|
from starlette.responses import Response
|
||||||
|
|
||||||
|
|
||||||
|
class RequestIDMiddleware(BaseHTTPMiddleware):
|
||||||
|
"""Middleware to add unique request ID to each request."""
|
||||||
|
|
||||||
|
async def dispatch(self, request: Request, call_next):
|
||||||
|
# Generate or retrieve request ID
|
||||||
|
request_id = request.headers.get("X-Request-ID") or str(uuid.uuid4())
|
||||||
|
|
||||||
|
# Add request ID to request state
|
||||||
|
request.state.request_id = request_id
|
||||||
|
|
||||||
|
# Process request
|
||||||
|
response = await call_next(request)
|
||||||
|
|
||||||
|
# Add request ID to response headers
|
||||||
|
response.headers["X-Request-ID"] = request_id
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
21
app/models/__init__.py
Normal file
21
app/models/__init__.py
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
"""Models package."""
|
||||||
|
|
||||||
|
from .schemas import (
|
||||||
|
Location,
|
||||||
|
Delivery,
|
||||||
|
RouteOptimizationRequest,
|
||||||
|
RouteStep,
|
||||||
|
OptimizedRoute,
|
||||||
|
PickupLocation,
|
||||||
|
DeliveryLocation
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"Location",
|
||||||
|
"Delivery",
|
||||||
|
"RouteOptimizationRequest",
|
||||||
|
"RouteStep",
|
||||||
|
"OptimizedRoute",
|
||||||
|
"PickupLocation",
|
||||||
|
"DeliveryLocation"
|
||||||
|
]
|
||||||
45
app/models/errors.py
Normal file
45
app/models/errors.py
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
"""Professional error response models for API."""
|
||||||
|
|
||||||
|
from typing import Optional, Any, Dict
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
|
class ErrorDetail(BaseModel):
|
||||||
|
"""Detailed error information."""
|
||||||
|
field: Optional[str] = Field(None, description="Field name that caused the error")
|
||||||
|
message: str = Field(..., description="Error message")
|
||||||
|
code: Optional[str] = Field(None, description="Error code")
|
||||||
|
|
||||||
|
|
||||||
|
class ErrorResponse(BaseModel):
|
||||||
|
"""Standardized error response model."""
|
||||||
|
success: bool = Field(False, description="Request success status")
|
||||||
|
error: ErrorDetail = Field(..., description="Error details")
|
||||||
|
timestamp: str = Field(default_factory=lambda: datetime.utcnow().isoformat(), description="Error timestamp")
|
||||||
|
path: Optional[str] = Field(None, description="Request path")
|
||||||
|
request_id: Optional[str] = Field(None, description="Request ID for tracing")
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
json_schema_extra = {
|
||||||
|
"example": {
|
||||||
|
"success": False,
|
||||||
|
"error": {
|
||||||
|
"field": "pickup_location",
|
||||||
|
"message": "Pickup location is required",
|
||||||
|
"code": "VALIDATION_ERROR"
|
||||||
|
},
|
||||||
|
"timestamp": "2024-01-15T10:30:00.000Z",
|
||||||
|
"path": "/api/v1/optimization/single-route",
|
||||||
|
"request_id": "req-123456"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class SuccessResponse(BaseModel):
|
||||||
|
"""Standardized success response wrapper."""
|
||||||
|
success: bool = Field(True, description="Request success status")
|
||||||
|
data: Any = Field(..., description="Response data")
|
||||||
|
timestamp: str = Field(default_factory=lambda: datetime.utcnow().isoformat(), description="Response timestamp")
|
||||||
|
request_id: Optional[str] = Field(None, description="Request ID for tracing")
|
||||||
|
|
||||||
167
app/models/schemas.py
Normal file
167
app/models/schemas.py
Normal file
@@ -0,0 +1,167 @@
|
|||||||
|
"""Professional Pydantic models for request/response validation."""
|
||||||
|
|
||||||
|
from typing import List, Optional
|
||||||
|
from pydantic import BaseModel, Field, field_validator
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
|
class Location(BaseModel):
|
||||||
|
"""Location model with latitude and longitude."""
|
||||||
|
lat: float = Field(..., description="Latitude")
|
||||||
|
lng: float = Field(..., description="Longitude")
|
||||||
|
|
||||||
|
|
||||||
|
class PickupLocation(BaseModel):
|
||||||
|
"""Pickup location model with latitude and longitude."""
|
||||||
|
pickuplat: float = Field(
|
||||||
|
...,
|
||||||
|
description="Pickup latitude",
|
||||||
|
ge=-90,
|
||||||
|
le=90,
|
||||||
|
examples=[11.0050534]
|
||||||
|
)
|
||||||
|
pickuplon: float = Field(
|
||||||
|
...,
|
||||||
|
description="Pickup longitude",
|
||||||
|
ge=-180,
|
||||||
|
le=180,
|
||||||
|
examples=[76.9508991]
|
||||||
|
)
|
||||||
|
|
||||||
|
@field_validator("pickuplat", "pickuplon")
|
||||||
|
@classmethod
|
||||||
|
def validate_coordinates(cls, v):
|
||||||
|
"""Validate coordinate values."""
|
||||||
|
if v is None:
|
||||||
|
raise ValueError("Coordinate cannot be None")
|
||||||
|
return float(v)
|
||||||
|
|
||||||
|
|
||||||
|
class DeliveryLocation(BaseModel):
|
||||||
|
"""Delivery location model with latitude and longitude."""
|
||||||
|
deliverylat: float = Field(
|
||||||
|
...,
|
||||||
|
description="Delivery latitude",
|
||||||
|
ge=-90,
|
||||||
|
le=90,
|
||||||
|
examples=[11.0309723]
|
||||||
|
)
|
||||||
|
deliverylong: float = Field(
|
||||||
|
...,
|
||||||
|
description="Delivery longitude",
|
||||||
|
ge=-180,
|
||||||
|
le=180,
|
||||||
|
examples=[77.0004574]
|
||||||
|
)
|
||||||
|
|
||||||
|
@field_validator("deliverylat", "deliverylong")
|
||||||
|
@classmethod
|
||||||
|
def validate_coordinates(cls, v):
|
||||||
|
"""Validate coordinate values."""
|
||||||
|
if v is None:
|
||||||
|
raise ValueError("Coordinate cannot be None")
|
||||||
|
return float(v)
|
||||||
|
|
||||||
|
|
||||||
|
class Delivery(BaseModel):
|
||||||
|
"""Delivery order model."""
|
||||||
|
deliveryid: str = Field(..., description="Unique delivery identifier")
|
||||||
|
deliverycustomerid: int = Field(..., description="Customer ID for this delivery")
|
||||||
|
location: DeliveryLocation = Field(..., description="Delivery location coordinates")
|
||||||
|
|
||||||
|
|
||||||
|
class RouteOptimizationRequest(BaseModel):
|
||||||
|
"""
|
||||||
|
Request model for route optimization.
|
||||||
|
|
||||||
|
Optimizes delivery routes starting from a pickup location (warehouse/store) to multiple delivery locations.
|
||||||
|
Uses greedy nearest-neighbor algorithm for fast, efficient route calculation.
|
||||||
|
"""
|
||||||
|
pickup_location: PickupLocation = Field(
|
||||||
|
...,
|
||||||
|
description="Pickup location (warehouse/store) coordinates - starting point for optimization"
|
||||||
|
)
|
||||||
|
pickup_location_id: Optional[int] = Field(
|
||||||
|
None,
|
||||||
|
description="Optional pickup location ID for tracking purposes"
|
||||||
|
)
|
||||||
|
deliveries: List[Delivery] = Field(
|
||||||
|
...,
|
||||||
|
min_items=1,
|
||||||
|
max_items=50,
|
||||||
|
description="List of delivery locations to optimize (1-50 deliveries supported)"
|
||||||
|
)
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
json_schema_extra = {
|
||||||
|
"example": {
|
||||||
|
"pickup_location": {
|
||||||
|
"pickuplat": 11.0050534,
|
||||||
|
"pickuplon": 76.9508991
|
||||||
|
},
|
||||||
|
"pickup_location_id": 1,
|
||||||
|
"deliveries": [
|
||||||
|
{
|
||||||
|
"deliveryid": "90465",
|
||||||
|
"deliverycustomerid": 1,
|
||||||
|
"location": {
|
||||||
|
"deliverylat": 11.0309723,
|
||||||
|
"deliverylong": 77.0004574
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class RouteStep(BaseModel):
|
||||||
|
"""Single step in the optimized route."""
|
||||||
|
step_number: int = Field(..., description="Step number in the route")
|
||||||
|
delivery_id: str = Field(..., description="Delivery ID for this step")
|
||||||
|
delivery_customer_id: int = Field(..., description="Customer ID for this delivery")
|
||||||
|
location: DeliveryLocation = Field(..., description="Delivery location coordinates")
|
||||||
|
distance_from_previous_km: float = Field(..., description="Distance from previous step in kilometers")
|
||||||
|
cumulative_distance_km: float = Field(..., description="Total distance traveled so far in kilometers")
|
||||||
|
|
||||||
|
|
||||||
|
class OptimizedRoute(BaseModel):
|
||||||
|
"""
|
||||||
|
Optimized route response with step-by-step delivery sequence.
|
||||||
|
|
||||||
|
Contains the optimized route starting from pickup location, with each step showing:
|
||||||
|
- Delivery order (Step 1, Step 2, etc.)
|
||||||
|
- Distance from previous step
|
||||||
|
- Cumulative distance traveled
|
||||||
|
"""
|
||||||
|
route_id: str = Field(..., description="Unique route identifier (UUID)")
|
||||||
|
pickup_location_id: Optional[int] = Field(None, description="Pickup location ID")
|
||||||
|
pickup_location: PickupLocation = Field(..., description="Pickup location (warehouse/store) coordinates")
|
||||||
|
total_distance_km: float = Field(
|
||||||
|
...,
|
||||||
|
ge=0,
|
||||||
|
description="Total route distance in kilometers",
|
||||||
|
examples=[12.45]
|
||||||
|
)
|
||||||
|
total_deliveries: int = Field(
|
||||||
|
...,
|
||||||
|
ge=1,
|
||||||
|
description="Total number of deliveries in the route",
|
||||||
|
examples=[5]
|
||||||
|
)
|
||||||
|
optimization_algorithm: str = Field(
|
||||||
|
"greedy",
|
||||||
|
description="Algorithm used for optimization",
|
||||||
|
examples=["greedy"]
|
||||||
|
)
|
||||||
|
steps: List[RouteStep] = Field(
|
||||||
|
...,
|
||||||
|
description="Ordered list of route steps (Step 1 = nearest from pickup, Step 2 = nearest from Step 1, etc.)"
|
||||||
|
)
|
||||||
|
created_at: str = Field(
|
||||||
|
default_factory=lambda: datetime.utcnow().isoformat(),
|
||||||
|
description="Route creation timestamp (ISO 8601)"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Batch optimization removed - no rider support needed
|
||||||
|
# Use single-route optimization for each pickup location
|
||||||
8
app/routes/__init__.py
Normal file
8
app/routes/__init__.py
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
"""Routes package."""
|
||||||
|
|
||||||
|
from .optimization import router as optimization_router
|
||||||
|
from .health import router as health_router
|
||||||
|
from .cache import router as cache_router
|
||||||
|
from .ml_admin import router as ml_router, web_router as ml_web_router
|
||||||
|
|
||||||
|
__all__ = ["optimization_router", "health_router", "cache_router", "ml_router", "ml_web_router"]
|
||||||
79
app/routes/cache.py
Normal file
79
app/routes/cache.py
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
"""Cache management API endpoints."""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from fastapi import APIRouter, HTTPException
|
||||||
|
from typing import Dict, Any
|
||||||
|
|
||||||
|
from app.services import cache
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/api/v1/cache", tags=["Cache Management"])
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/stats", response_model=Dict[str, Any])
|
||||||
|
async def get_cache_stats():
|
||||||
|
"""
|
||||||
|
Get cache statistics.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
- hits: Number of cache hits
|
||||||
|
- misses: Number of cache misses
|
||||||
|
- sets: Number of cache writes
|
||||||
|
- total_keys: Current number of cached route keys
|
||||||
|
- enabled: Whether Redis cache is enabled
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
stats = cache.get_stats()
|
||||||
|
# Calculate hit rate
|
||||||
|
total_requests = stats.get("hits", 0) + stats.get("misses", 0)
|
||||||
|
if total_requests > 0:
|
||||||
|
stats["hit_rate"] = round(stats.get("hits", 0) / total_requests * 100, 2)
|
||||||
|
else:
|
||||||
|
stats["hit_rate"] = 0.0
|
||||||
|
return stats
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting cache stats: {e}")
|
||||||
|
raise HTTPException(status_code=500, detail="Internal server error")
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/keys")
|
||||||
|
async def list_cache_keys(pattern: str = "routes:*"):
|
||||||
|
"""
|
||||||
|
List cache keys matching pattern.
|
||||||
|
|
||||||
|
- **pattern**: Redis key pattern (default: "routes:*")
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
keys = cache.get_keys(pattern)
|
||||||
|
return {
|
||||||
|
"pattern": pattern,
|
||||||
|
"count": len(keys),
|
||||||
|
"keys": keys[:100] # Limit to first 100 for response size
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error listing cache keys: {e}")
|
||||||
|
raise HTTPException(status_code=500, detail="Internal server error")
|
||||||
|
|
||||||
|
|
||||||
|
@router.delete("/clear")
|
||||||
|
async def clear_cache(pattern: str = "routes:*"):
|
||||||
|
"""
|
||||||
|
Clear cache keys matching pattern.
|
||||||
|
|
||||||
|
- **pattern**: Redis key pattern to delete (default: "routes:*")
|
||||||
|
|
||||||
|
[WARN] **Warning**: This will delete cached route optimizations!
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
deleted_count = cache.delete(pattern)
|
||||||
|
logger.info(f"Cleared {deleted_count} cache keys matching pattern: {pattern}")
|
||||||
|
return {
|
||||||
|
"pattern": pattern,
|
||||||
|
"deleted_count": deleted_count,
|
||||||
|
"message": f"Cleared {deleted_count} cache keys"
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error clearing cache: {e}")
|
||||||
|
raise HTTPException(status_code=500, detail="Internal server error")
|
||||||
|
|
||||||
98
app/routes/health.py
Normal file
98
app/routes/health.py
Normal file
@@ -0,0 +1,98 @@
|
|||||||
|
"""Professional health check endpoints."""
|
||||||
|
|
||||||
|
import time
|
||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
from typing import Optional
|
||||||
|
from datetime import datetime
|
||||||
|
from fastapi import APIRouter, Request
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/api/v1/health", tags=["Health"])
|
||||||
|
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
|
||||||
|
class HealthResponse(BaseModel):
|
||||||
|
"""Health check response model."""
|
||||||
|
status: str = Field(..., description="Service status")
|
||||||
|
uptime_seconds: float = Field(..., description="Service uptime in seconds")
|
||||||
|
version: str = Field("2.0.0", description="API version")
|
||||||
|
timestamp: str = Field(..., description="Health check timestamp (ISO 8601)")
|
||||||
|
request_id: Optional[str] = Field(None, description="Request ID for tracing")
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/", response_model=HealthResponse)
|
||||||
|
async def health_check(request: Request):
|
||||||
|
"""
|
||||||
|
Health check endpoint.
|
||||||
|
|
||||||
|
Returns the current health status of the API service including:
|
||||||
|
- Service status (healthy/unhealthy)
|
||||||
|
- Uptime in seconds
|
||||||
|
- API version
|
||||||
|
- Timestamp
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
uptime = time.time() - start_time
|
||||||
|
request_id = getattr(request.state, "request_id", None)
|
||||||
|
|
||||||
|
return HealthResponse(
|
||||||
|
status="healthy",
|
||||||
|
uptime_seconds=round(uptime, 2),
|
||||||
|
version="2.0.0",
|
||||||
|
timestamp=datetime.utcnow().isoformat() + "Z",
|
||||||
|
request_id=request_id
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Health check failed: {e}", exc_info=True)
|
||||||
|
request_id = getattr(request.state, "request_id", None)
|
||||||
|
|
||||||
|
return HealthResponse(
|
||||||
|
status="unhealthy",
|
||||||
|
uptime_seconds=0.0,
|
||||||
|
version="2.0.0",
|
||||||
|
timestamp=datetime.utcnow().isoformat() + "Z",
|
||||||
|
request_id=request_id
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/ready")
|
||||||
|
async def readiness_check(request: Request):
|
||||||
|
"""
|
||||||
|
Readiness check endpoint for load balancers.
|
||||||
|
|
||||||
|
Returns 200 if the service is ready to accept requests.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Check if critical services are available
|
||||||
|
# Add your service health checks here
|
||||||
|
|
||||||
|
return {
|
||||||
|
"status": "ready",
|
||||||
|
"timestamp": datetime.utcnow().isoformat() + "Z",
|
||||||
|
"request_id": getattr(request.state, "request_id", None)
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Readiness check failed: {e}")
|
||||||
|
return {
|
||||||
|
"status": "not_ready",
|
||||||
|
"timestamp": datetime.utcnow().isoformat() + "Z",
|
||||||
|
"request_id": getattr(request.state, "request_id", None)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/live")
|
||||||
|
async def liveness_check(request: Request):
|
||||||
|
"""
|
||||||
|
Liveness check endpoint for container orchestration.
|
||||||
|
|
||||||
|
Returns 200 if the service is alive.
|
||||||
|
"""
|
||||||
|
return {
|
||||||
|
"status": "alive",
|
||||||
|
"timestamp": datetime.utcnow().isoformat() + "Z",
|
||||||
|
"request_id": getattr(request.state, "request_id", None)
|
||||||
|
}
|
||||||
286
app/routes/ml_admin.py
Normal file
286
app/routes/ml_admin.py
Normal file
@@ -0,0 +1,286 @@
|
|||||||
|
"""
|
||||||
|
ML Admin API - rider-api
|
||||||
|
|
||||||
|
Endpoints:
|
||||||
|
GET /api/v1/ml/status - DB record count, quality trend, model info
|
||||||
|
GET /api/v1/ml/config - Current active hyperparameters (ML-tuned + defaults)
|
||||||
|
POST /api/v1/ml/train - Trigger hypertuning immediately
|
||||||
|
POST /api/v1/ml/reset - Reset config to factory defaults
|
||||||
|
GET /api/v1/ml/reports - List past tuning reports
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
from fastapi import APIRouter, HTTPException, Body, Request
|
||||||
|
from fastapi.responses import FileResponse, PlainTextResponse
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
router = APIRouter(
|
||||||
|
prefix="/api/v1/ml",
|
||||||
|
tags=["ML Hypertuner"],
|
||||||
|
responses={
|
||||||
|
500: {"description": "Internal server error"}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
web_router = APIRouter(
|
||||||
|
tags=["ML Monitor Web Dashboard"]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# GET /ml-ops
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@web_router.get("/ml-ops", summary="Visual ML monitoring dashboard")
|
||||||
|
def ml_dashboard():
|
||||||
|
"""Returns the beautiful HTML dashboard for visualizing ML progress."""
|
||||||
|
path = os.path.join(os.getcwd(), "app/templates/ml_dashboard.html")
|
||||||
|
if not os.path.isfile(path):
|
||||||
|
raise HTTPException(status_code=404, detail=f"Dashboard template not found at {path}")
|
||||||
|
return FileResponse(path)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# GET /status
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@router.get("/status", summary="ML system status & quality trend")
|
||||||
|
def ml_status():
|
||||||
|
"""
|
||||||
|
Returns:
|
||||||
|
- How many assignment events are logged
|
||||||
|
- Recent quality score trend (avg / min / max over last 20 calls)
|
||||||
|
- Whether the model has been trained
|
||||||
|
- Current hyperparameter source (ml_tuned vs defaults)
|
||||||
|
"""
|
||||||
|
from app.services.ml.ml_data_collector import get_collector
|
||||||
|
from app.services.ml.ml_hypertuner import get_hypertuner
|
||||||
|
|
||||||
|
try:
|
||||||
|
collector = get_collector()
|
||||||
|
tuner = get_hypertuner()
|
||||||
|
|
||||||
|
record_count = collector.count_records()
|
||||||
|
quality_trend = collector.get_recent_quality_trend(last_n=50)
|
||||||
|
model_info = tuner.get_model_info()
|
||||||
|
|
||||||
|
from app.services.ml.behavior_analyzer import get_analyzer
|
||||||
|
b_analyzer = get_analyzer()
|
||||||
|
|
||||||
|
from app.config.dynamic_config import get_config
|
||||||
|
cfg = get_config()
|
||||||
|
|
||||||
|
return {
|
||||||
|
"status": "ok",
|
||||||
|
"db_records": record_count,
|
||||||
|
"ready_to_train": record_count >= 30,
|
||||||
|
"quality_trend": quality_trend,
|
||||||
|
"hourly_stats": collector.get_hourly_stats(),
|
||||||
|
"quality_histogram": collector.get_quality_histogram(),
|
||||||
|
"strategy_comparison": collector.get_strategy_comparison(),
|
||||||
|
"zone_stats": collector.get_zone_stats(),
|
||||||
|
"behavior": b_analyzer.get_info() if hasattr(b_analyzer, 'get_info') else {},
|
||||||
|
"config": cfg.get_all(),
|
||||||
|
"model": model_info,
|
||||||
|
"message": (
|
||||||
|
f"Collecting data - need {max(0, 30 - record_count)} more records to train."
|
||||||
|
if record_count < 30
|
||||||
|
else "Ready to train! Call POST /api/v1/ml/train"
|
||||||
|
if not model_info["model_trained"]
|
||||||
|
else "Model trained and active."
|
||||||
|
)
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[ML API] Status failed: {e}", exc_info=True)
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# GET /config
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@router.get("/config", summary="Current active hyperparameter values")
|
||||||
|
def ml_config():
|
||||||
|
"""
|
||||||
|
Returns every hyperparameter currently in use by the system.
|
||||||
|
Values marked 'ml_tuned' were set by the ML model.
|
||||||
|
Values marked 'default' are factory defaults (not yet tuned).
|
||||||
|
"""
|
||||||
|
from app.config.dynamic_config import get_config, DEFAULTS
|
||||||
|
|
||||||
|
try:
|
||||||
|
cfg = get_config()
|
||||||
|
all_values = cfg.get_all()
|
||||||
|
cached_keys = set(cfg._cache.keys())
|
||||||
|
|
||||||
|
annotated = {}
|
||||||
|
for k, v in all_values.items():
|
||||||
|
annotated[k] = {
|
||||||
|
"value": v,
|
||||||
|
"source": "ml_tuned" if k in cached_keys else "default",
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
"status": "ok",
|
||||||
|
"hyperparameters": annotated,
|
||||||
|
"total_params": len(annotated),
|
||||||
|
"ml_tuned_count": sum(1 for x in annotated.values() if x["source"] == "ml_tuned"),
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[ML API] Config fetch failed: {e}", exc_info=True)
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
@router.patch("/config", summary="Update specific ML configuration defaults")
|
||||||
|
def ml_config_patch(payload: dict = Body(...)):
|
||||||
|
"""Allows updating any active parameter via JSON overrides. e.g. \{ \"ml_strategy\": \"balanced\" \}"""
|
||||||
|
from app.config.dynamic_config import get_config
|
||||||
|
try:
|
||||||
|
cfg = get_config()
|
||||||
|
cfg.set_bulk(payload, source="ml_admin")
|
||||||
|
return {"status": "ok"}
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[ML API] Config patch failed: {e}", exc_info=True)
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# POST /train
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@router.post("/train", summary="Trigger XGBoost training + Optuna hyperparameter search")
|
||||||
|
def ml_train(
|
||||||
|
n_trials: int = Body(default=100, embed=True, ge=10, le=500,
|
||||||
|
description="Number of Optuna trials (10500)"),
|
||||||
|
min_records: int = Body(default=30, embed=True, ge=10,
|
||||||
|
description="Minimum DB records required")
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Runs the full hypertuning pipeline:
|
||||||
|
1. Load logged assignment data from DB
|
||||||
|
2. Train XGBoost surrogate model
|
||||||
|
3. Run Optuna TPE search ({n_trials} trials)
|
||||||
|
4. Write optimal params to DynamicConfig
|
||||||
|
|
||||||
|
The AssignmentService picks up new params within 5 minutes (auto-reload).
|
||||||
|
"""
|
||||||
|
from app.services.ml.ml_hypertuner import get_hypertuner
|
||||||
|
|
||||||
|
try:
|
||||||
|
logger.info(f"[ML API] Hypertuning triggered: n_trials={n_trials}, min_records={min_records}")
|
||||||
|
tuner = get_hypertuner()
|
||||||
|
result = tuner.run(n_trials=n_trials, min_training_records=min_records)
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[ML API] Training failed: {e}", exc_info=True)
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# POST /reset
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@router.post("/reset", summary="Reset all hyperparameters to factory defaults")
|
||||||
|
def ml_reset():
|
||||||
|
"""
|
||||||
|
Wipes all ML-tuned config values and reverts every parameter to the
|
||||||
|
original hardcoded defaults. Useful if the model produced bad results.
|
||||||
|
"""
|
||||||
|
from app.config.dynamic_config import get_config
|
||||||
|
|
||||||
|
try:
|
||||||
|
get_config().reset_to_defaults()
|
||||||
|
return {
|
||||||
|
"status": "ok",
|
||||||
|
"message": "All hyperparameters reset to factory defaults.",
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[ML API] Reset failed: {e}", exc_info=True)
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# POST /strategy
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@router.post("/strategy", summary="Change the AI Optimization Prompt/Strategy")
|
||||||
|
def ml_strategy(strategy: str = Body(default="balanced", embed=True)):
|
||||||
|
"""
|
||||||
|
Changes the mathematical objective of the AI.
|
||||||
|
Choices: 'balanced', 'fuel_saver', 'aggressive_speed', 'zone_strict'
|
||||||
|
|
||||||
|
Historical data is NOT wiped. Instead, the AI dynamically recalculates
|
||||||
|
the quality score of all past events using the new strategy rules.
|
||||||
|
"""
|
||||||
|
from app.config.dynamic_config import get_config
|
||||||
|
import sqlite3
|
||||||
|
|
||||||
|
valid = ["balanced", "fuel_saver", "aggressive_speed", "zone_strict"]
|
||||||
|
if strategy not in valid:
|
||||||
|
raise HTTPException(400, f"Invalid strategy. Choose from {valid}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
get_config().set("ml_strategy", strategy)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"status": "ok",
|
||||||
|
"message": f"Strategy changed to '{strategy}'. Historical AI data will be mathematically repurposed to train towards this new goal.",
|
||||||
|
"strategy": strategy
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[ML API] Strategy change failed: {e}", exc_info=True)
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# GET /reports
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@router.get("/reports", summary="List past hypertuning reports")
|
||||||
|
def ml_reports():
|
||||||
|
"""Returns the last 10 tuning reports (JSON files in ml_data/reports/)."""
|
||||||
|
try:
|
||||||
|
report_dir = "ml_data/reports"
|
||||||
|
if not os.path.isdir(report_dir):
|
||||||
|
return {"status": "ok", "reports": [], "message": "No reports yet."}
|
||||||
|
|
||||||
|
files = sorted(
|
||||||
|
[f for f in os.listdir(report_dir) if f.endswith(".json")],
|
||||||
|
reverse=True
|
||||||
|
)[:10]
|
||||||
|
|
||||||
|
reports = []
|
||||||
|
for fname in files:
|
||||||
|
path = os.path.join(report_dir, fname)
|
||||||
|
try:
|
||||||
|
with open(path) as f:
|
||||||
|
reports.append(json.load(f))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return {"status": "ok", "reports": reports, "count": len(reports)}
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[ML API] Reports fetch failed: {e}", exc_info=True)
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# GET /export
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@router.get("/export", summary="Export all records as CSV")
|
||||||
|
def ml_export():
|
||||||
|
"""Generates a CSV string containing all rows in the assignment_ml_log table."""
|
||||||
|
try:
|
||||||
|
from app.services.ml.ml_data_collector import get_collector
|
||||||
|
csv_data = get_collector().export_csv()
|
||||||
|
response = PlainTextResponse(content=csv_data, media_type="text/csv")
|
||||||
|
response.headers["Content-Disposition"] = 'attachment; filename="ml_export.csv"'
|
||||||
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[ML API] Export failed: {e}", exc_info=True)
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
364
app/routes/optimization.py
Normal file
364
app/routes/optimization.py
Normal file
@@ -0,0 +1,364 @@
|
|||||||
|
"""Provider payload optimization endpoints."""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
from fastapi import APIRouter, Request, Depends, status, HTTPException, Query
|
||||||
|
|
||||||
|
from app.controllers.route_controller import RouteController
|
||||||
|
from app.core.exceptions import APIException
|
||||||
|
from app.core.arrow_utils import save_optimized_route_parquet
|
||||||
|
import os
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
router = APIRouter(
|
||||||
|
prefix="/api/v1/optimization",
|
||||||
|
tags=["Route Optimization"],
|
||||||
|
responses={
|
||||||
|
400: {"description": "Bad request - Invalid input parameters"},
|
||||||
|
422: {"description": "Validation error - Request validation failed"},
|
||||||
|
500: {"description": "Internal server error"}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_route_controller() -> RouteController:
|
||||||
|
"""Dependency injection for route controller."""
|
||||||
|
return RouteController()
|
||||||
|
|
||||||
|
|
||||||
|
# Legacy single-route endpoint removed; provider flow only.
|
||||||
|
@router.post(
|
||||||
|
"/createdeliveries",
|
||||||
|
status_code=status.HTTP_200_OK,
|
||||||
|
summary="Optimize provider payload (forwarding paused)",
|
||||||
|
description="""
|
||||||
|
Accepts the provider's orders array, reorders it using greedy nearest-neighbor, adds only:
|
||||||
|
- step (1..N)
|
||||||
|
- previouskms (distance from previous stop in km)
|
||||||
|
- cumulativekms (total distance so far in km)
|
||||||
|
- actualkms (direct pickup-to-delivery distance)
|
||||||
|
|
||||||
|
Forwarding is temporarily paused: returns the optimized array in the response.
|
||||||
|
""",
|
||||||
|
responses={
|
||||||
|
200: {
|
||||||
|
"description": "Upstream response",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"example": {"code": 200, "details": [], "message": "Success", "status": True}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
async def provider_optimize_forward(
|
||||||
|
body: list[dict],
|
||||||
|
controller: RouteController = Depends(get_route_controller)
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Accept provider JSON array, reorder by greedy nearest-neighbor, annotate each item with:
|
||||||
|
- step (1..N)
|
||||||
|
- previouskms (km from previous point)
|
||||||
|
- cumulativekms (km so far)
|
||||||
|
- actualkms (pickup to delivery distance)
|
||||||
|
Then forward the optimized array to the external API and return only its response.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
url = "https://jupiter.nearle.app/live/api/v1/deliveries/createdeliveries"
|
||||||
|
result = await controller.optimize_and_forward_provider_payload(body, url)
|
||||||
|
|
||||||
|
# Performance Logging: Save a Parquet Snapshot (Async-friendly backup)
|
||||||
|
try:
|
||||||
|
os.makedirs("data/snapshots", exist_ok=True)
|
||||||
|
snapshot_path = f"data/snapshots/route_{int(time.time())}.parquet"
|
||||||
|
save_optimized_route_parquet(body, snapshot_path)
|
||||||
|
logger.info(f"Apache Arrow: Snapshot saved to {snapshot_path}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Could not save Arrow snapshot: {e}")
|
||||||
|
|
||||||
|
return result
|
||||||
|
except APIException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Unexpected error in provider_optimize_forward: {e}", exc_info=True)
|
||||||
|
raise HTTPException(status_code=500, detail="Internal server error")
|
||||||
|
|
||||||
|
|
||||||
|
@router.get(
|
||||||
|
"/createdeliveries",
|
||||||
|
summary="Usage info for provider optimize forward"
|
||||||
|
)
|
||||||
|
async def provider_optimize_forward_info():
|
||||||
|
"""Return usage info; this endpoint accepts POST only for processing."""
|
||||||
|
return {
|
||||||
|
"message": "Use POST with a JSON array of orders to optimize and forward.",
|
||||||
|
"method": "POST",
|
||||||
|
"path": "/api/v1/optimization/provider-optimize-forward"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post(
|
||||||
|
"/riderassign",
|
||||||
|
status_code=status.HTTP_200_OK,
|
||||||
|
summary="Assign created orders to active riders",
|
||||||
|
description="""
|
||||||
|
Assigns orders to riders based on kitchen preferences, proximity, and load.
|
||||||
|
|
||||||
|
- If a payload of orders is provided, processes those.
|
||||||
|
- If payload is empty, fetches all 'created' orders from the external API.
|
||||||
|
- Fetches active riders and matches them.
|
||||||
|
""",
|
||||||
|
responses={
|
||||||
|
200: {
|
||||||
|
"description": "Assignment Result",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"example": {"code": 200, "details": {"1234": [{"orderid": "..."}]}, "message": "Success", "status": True}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
async def assign_orders_to_riders(
|
||||||
|
request: Request,
|
||||||
|
body: list[dict] = None,
|
||||||
|
resuffle: bool = Query(False),
|
||||||
|
reshuffle: bool = Query(False),
|
||||||
|
rehuffle: bool = Query(False),
|
||||||
|
hypertuning_params: str = None
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Smart assignment of orders to riders.
|
||||||
|
"""
|
||||||
|
from app.services.rider.get_active_riders import fetch_active_riders, fetch_created_orders, fetch_rider_pricing
|
||||||
|
from app.services.core.assignment_service import AssignmentService
|
||||||
|
from app.services.routing.route_optimizer import RouteOptimizer
|
||||||
|
from app.services.routing.realistic_eta_calculator import RealisticETACalculator
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from dateutil.parser import parse as parse_date
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
eta_calculator = RealisticETACalculator()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Check if any variant is present in query params (flag-style) or explicitly true
|
||||||
|
q_params = request.query_params
|
||||||
|
do_reshuffle = any(k in q_params for k in ["reshuffle", "resuffle", "rehuffle"]) or \
|
||||||
|
resuffle or reshuffle or rehuffle
|
||||||
|
|
||||||
|
# 1. Fetch Riders and Pricing
|
||||||
|
riders_task = fetch_active_riders()
|
||||||
|
pricing_task = fetch_rider_pricing()
|
||||||
|
|
||||||
|
riders, pricing = await asyncio.gather(riders_task, pricing_task)
|
||||||
|
|
||||||
|
# Determine pricing (Default: 30 base + 2.5/km)
|
||||||
|
fuel_charge = 2.5
|
||||||
|
base_pay = 30.0
|
||||||
|
if pricing:
|
||||||
|
shift_1 = next((p for p in pricing if p.get("shiftid") == 1), None)
|
||||||
|
if shift_1:
|
||||||
|
fuel_charge = float(shift_1.get("fuelcharge", 2.5))
|
||||||
|
base_pay = float(shift_1.get("basepay") or shift_1.get("base_pay") or 30.0)
|
||||||
|
|
||||||
|
# 2. Determine Orders Source
|
||||||
|
orders = body
|
||||||
|
if not orders:
|
||||||
|
logger.info("No payload provided, fetching created orders from external API.")
|
||||||
|
orders = await fetch_created_orders()
|
||||||
|
else:
|
||||||
|
logger.info(f"Processing {len(orders)} orders from payload.")
|
||||||
|
|
||||||
|
if not orders:
|
||||||
|
return {
|
||||||
|
"code": 200,
|
||||||
|
"details": {},
|
||||||
|
"message": "No orders found to assign.",
|
||||||
|
"status": True,
|
||||||
|
"meta": {
|
||||||
|
"active_riders_count": len(riders)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# 3. Run Assignment (AssignmentService)
|
||||||
|
# -- Per-request strategy override --
|
||||||
|
from app.config.dynamic_config import get_config
|
||||||
|
_cfg = get_config()
|
||||||
|
_original_strategy = None
|
||||||
|
|
||||||
|
valid_strategies = ["balanced", "fuel_saver", "aggressive_speed", "zone_strict"]
|
||||||
|
if hypertuning_params and hypertuning_params in valid_strategies:
|
||||||
|
_original_strategy = _cfg.get("ml_strategy", "balanced")
|
||||||
|
_cfg._cache["ml_strategy"] = hypertuning_params
|
||||||
|
logger.info(f"[HYPERTUNE] Per-request strategy override: {hypertuning_params}")
|
||||||
|
|
||||||
|
service = AssignmentService()
|
||||||
|
assignments, unassigned_orders = await service.assign_orders(
|
||||||
|
riders=riders,
|
||||||
|
orders=orders,
|
||||||
|
fuel_charge=fuel_charge,
|
||||||
|
base_pay=base_pay,
|
||||||
|
reshuffle=do_reshuffle
|
||||||
|
)
|
||||||
|
|
||||||
|
# Restore original strategy after this call
|
||||||
|
if _original_strategy is not None:
|
||||||
|
_cfg._cache["ml_strategy"] = _original_strategy
|
||||||
|
|
||||||
|
if do_reshuffle:
|
||||||
|
logger.info("[RESHUFFLE] Retry mode active - exploring alternative rider assignments.")
|
||||||
|
|
||||||
|
# 4. Optimize Routes for Each Rider and Flatten Response
|
||||||
|
optimizer = RouteOptimizer()
|
||||||
|
flat_orders_list = []
|
||||||
|
|
||||||
|
# Prepare tasks for parallel execution
|
||||||
|
# We need to store context (rider_id) to map results back
|
||||||
|
optimization_tasks = []
|
||||||
|
task_contexts = []
|
||||||
|
|
||||||
|
for rider_id, rider_orders in assignments.items():
|
||||||
|
if not rider_orders:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Align with createdeliveries model: Always optimize from the Pickup/Kitchen location.
|
||||||
|
# This prevents route reversal if the rider is on the "far" side of the deliveries.
|
||||||
|
# The rider's current location (rlat/rlon) is ignored for sequence optimization
|
||||||
|
# to ensure the logical flow (Kitchen -> Stop 1 -> Stop 2 -> Stop 3) is followed.
|
||||||
|
start_coords = None
|
||||||
|
|
||||||
|
# Add to task list
|
||||||
|
optimization_tasks.append(
|
||||||
|
optimizer.optimize_provider_payload(rider_orders, start_coords=start_coords)
|
||||||
|
)
|
||||||
|
task_contexts.append(rider_id)
|
||||||
|
|
||||||
|
total_assigned = 0
|
||||||
|
|
||||||
|
# Execute all optimizations in parallel
|
||||||
|
# This dramatically reduces time from Sum(RiderTimes) to Max(RiderTime)
|
||||||
|
if optimization_tasks:
|
||||||
|
results = await asyncio.gather(*optimization_tasks)
|
||||||
|
|
||||||
|
# Create a lookup for rider details
|
||||||
|
rider_info_map = {}
|
||||||
|
for r in riders:
|
||||||
|
# Use string conversion for robust ID matching
|
||||||
|
r_id = str(r.get("userid") or r.get("_id", ""))
|
||||||
|
if r_id:
|
||||||
|
rider_info_map[r_id] = {
|
||||||
|
"name": r.get("username", ""),
|
||||||
|
"contactno": r.get("contactno", "")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Process results matching them back to riders
|
||||||
|
for stored_rider_id, optimized_route in zip(task_contexts, results):
|
||||||
|
r_id_str = str(stored_rider_id)
|
||||||
|
r_info = rider_info_map.get(r_id_str, {})
|
||||||
|
rider_name = r_info.get("name", "")
|
||||||
|
rider_contact = r_info.get("contactno", "")
|
||||||
|
|
||||||
|
# Calculate total distance for this rider
|
||||||
|
total_rider_kms = 0
|
||||||
|
if optimized_route:
|
||||||
|
# Usually the last order has the max cumulative kms if steps are 1..N
|
||||||
|
try:
|
||||||
|
total_rider_kms = max([float(o.get("cumulativekms", 0)) for o in optimized_route])
|
||||||
|
except:
|
||||||
|
total_rider_kms = sum([float(o.get("actualkms", o.get("kms", 0))) for o in optimized_route])
|
||||||
|
|
||||||
|
for order in optimized_route:
|
||||||
|
order["userid"] = stored_rider_id
|
||||||
|
order["username"] = rider_name
|
||||||
|
# Populate the specific fields requested by the user
|
||||||
|
order["rider"] = rider_name
|
||||||
|
order["ridercontactno"] = rider_contact
|
||||||
|
order["riderkms"] = str(round(total_rider_kms, 2))
|
||||||
|
|
||||||
|
# --- DYNAMIC ETA COMPUTATION -----------------------------
|
||||||
|
# Try various cases and names for pickup slot
|
||||||
|
pickup_slot_str = (
|
||||||
|
order.get("pickupSlot") or
|
||||||
|
order.get("pickupslot") or
|
||||||
|
order.get("pickup_slot") or
|
||||||
|
order.get("pickuptime")
|
||||||
|
)
|
||||||
|
|
||||||
|
if pickup_slot_str:
|
||||||
|
# Find the actual travel distance for THIS specific order
|
||||||
|
# cumulativekms represents distance from pickup to this delivery stop
|
||||||
|
dist_km = float(order.get("cumulativekms") or order.get("actualkms", order.get("kms", 0)))
|
||||||
|
step = int(order.get("step", 1))
|
||||||
|
order_type = order.get("ordertype", "Economy")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Robust date parsing (handles almost any format magically)
|
||||||
|
pickup_time = parse_date(str(pickup_slot_str))
|
||||||
|
|
||||||
|
eta_mins = eta_calculator.calculate_eta(
|
||||||
|
distance_km=dist_km,
|
||||||
|
is_first_order=(step == 1),
|
||||||
|
order_type=order_type,
|
||||||
|
time_of_day="normal"
|
||||||
|
)
|
||||||
|
expected_time = pickup_time + timedelta(minutes=eta_mins)
|
||||||
|
|
||||||
|
# Format output as requested: "2026-03-24 08:25 AM"
|
||||||
|
order["expectedDeliveryTime"] = expected_time.strftime("%Y-%m-%d %I:%M %p")
|
||||||
|
order["transitMinutes"] = eta_mins
|
||||||
|
order["calculationDistanceKm"] = round(dist_km, 2)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Could not calculate ETA from pickupSlot '{pickup_slot_str}': {e}")
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
|
||||||
|
flat_orders_list.append(order)
|
||||||
|
total_assigned += len(optimized_route)
|
||||||
|
|
||||||
|
# 5. Zone Processing
|
||||||
|
from app.services.routing.zone_service import ZoneService
|
||||||
|
zone_service = ZoneService()
|
||||||
|
zone_data = zone_service.group_by_zones(flat_orders_list, unassigned_orders, fuel_charge=fuel_charge, base_pay=base_pay)
|
||||||
|
|
||||||
|
zones_structure = zone_data["detailed_zones"]
|
||||||
|
zone_analysis = zone_data["zone_analysis"]
|
||||||
|
|
||||||
|
return {
|
||||||
|
"code": 200,
|
||||||
|
"zone_summary": zone_analysis, # High-level zone metrics
|
||||||
|
"zones": zones_structure, # Detailed data
|
||||||
|
"details": flat_orders_list, # Flat list
|
||||||
|
"message": "Success",
|
||||||
|
"status": True,
|
||||||
|
"meta": {
|
||||||
|
"total_orders": len(orders),
|
||||||
|
"utilized_riders": len([rid for rid, rl in assignments.items() if rl]),
|
||||||
|
"active_riders_pool": len(riders),
|
||||||
|
"assigned_orders": total_assigned,
|
||||||
|
"unassigned_orders": len(unassigned_orders),
|
||||||
|
"total_profit": round(sum(z["total_profit"] for z in zone_analysis), 2),
|
||||||
|
"fuel_charge_base": fuel_charge,
|
||||||
|
"unassigned_details": [
|
||||||
|
{
|
||||||
|
"orderid": o.get("orderid") or o.get("_id"),
|
||||||
|
"reason": o.get("unassigned_reason", "Unknown capacity/proximity issue")
|
||||||
|
} for o in unassigned_orders
|
||||||
|
],
|
||||||
|
"distribution_summary": {rid: len(rl) for rid, rl in assignments.items() if rl},
|
||||||
|
"resuffle_mode": do_reshuffle,
|
||||||
|
"hypertuning_params": hypertuning_params or "default"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in rider assignment: {e}", exc_info=True)
|
||||||
|
raise HTTPException(status_code=500, detail="Internal server error during assignment")
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# -- Fire ML training trigger (non-blocking) -----------------------
|
||||||
|
# Runs AFTER response is ready. Every 10th call kicks off a
|
||||||
|
# background thread that retrains the model. API is never blocked.
|
||||||
|
try:
|
||||||
|
from app.main import trigger_training_if_due
|
||||||
|
trigger_training_if_due()
|
||||||
|
except Exception:
|
||||||
|
pass # Never crash the endpoint due to ML trigger
|
||||||
124
app/services/__init__.py
Normal file
124
app/services/__init__.py
Normal file
@@ -0,0 +1,124 @@
|
|||||||
|
"""Services package."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import logging
|
||||||
|
from typing import Any, Optional, Dict
|
||||||
|
|
||||||
|
try:
|
||||||
|
import redis # type: ignore
|
||||||
|
except Exception: # pragma: no cover
|
||||||
|
redis = None # type: ignore
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class RedisCache:
|
||||||
|
"""Lightweight Redis cache wrapper with graceful fallback."""
|
||||||
|
|
||||||
|
def __init__(self, url_env: str = "REDIS_URL", default_ttl_seconds: Optional[int] = None) -> None:
|
||||||
|
# Allow TTL to be configurable via env var (default 300s = 5 min, or 86400 = 24h)
|
||||||
|
ttl_env = os.getenv("REDIS_CACHE_TTL_SECONDS")
|
||||||
|
if default_ttl_seconds is None:
|
||||||
|
default_ttl_seconds = int(ttl_env) if ttl_env else 300
|
||||||
|
|
||||||
|
self.default_ttl_seconds = default_ttl_seconds
|
||||||
|
self._enabled = False
|
||||||
|
self._client = None
|
||||||
|
self._stats = {"hits": 0, "misses": 0, "sets": 0}
|
||||||
|
|
||||||
|
url = os.getenv(url_env)
|
||||||
|
if not url or redis is None:
|
||||||
|
logger.warning("Redis not configured or client unavailable; caching disabled")
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
self._client = redis.Redis.from_url(url, decode_responses=True)
|
||||||
|
self._client.ping()
|
||||||
|
self._enabled = True
|
||||||
|
logger.info(f"Redis cache connected (TTL: {self.default_ttl_seconds}s)")
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning(f"Redis connection failed: {exc}; caching disabled")
|
||||||
|
self._enabled = False
|
||||||
|
self._client = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def enabled(self) -> bool:
|
||||||
|
return self._enabled and self._client is not None
|
||||||
|
|
||||||
|
def get_json(self, key: str) -> Optional[Any]:
|
||||||
|
if not self.enabled:
|
||||||
|
self._stats["misses"] += 1
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
raw = self._client.get(key) # type: ignore[union-attr]
|
||||||
|
if raw:
|
||||||
|
self._stats["hits"] += 1
|
||||||
|
return json.loads(raw)
|
||||||
|
else:
|
||||||
|
self._stats["misses"] += 1
|
||||||
|
return None
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug(f"Redis get_json error for key={key}: {exc}")
|
||||||
|
self._stats["misses"] += 1
|
||||||
|
return None
|
||||||
|
|
||||||
|
def set_json(self, key: str, value: Any, ttl_seconds: Optional[int] = None) -> None:
|
||||||
|
if not self.enabled:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
payload = json.dumps(value, default=lambda o: getattr(o, "model_dump", lambda: o)())
|
||||||
|
ttl = ttl_seconds if ttl_seconds is not None else self.default_ttl_seconds
|
||||||
|
# Use -1 for no expiration, otherwise use setex
|
||||||
|
if ttl > 0:
|
||||||
|
self._client.setex(key, ttl, payload) # type: ignore[union-attr]
|
||||||
|
else:
|
||||||
|
self._client.set(key, payload) # type: ignore[union-attr]
|
||||||
|
self._stats["sets"] += 1
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug(f"Redis set_json error for key={key}: {exc}")
|
||||||
|
|
||||||
|
def delete(self, pattern: str) -> int:
|
||||||
|
"""Delete keys matching pattern (e.g., 'routes:*'). Returns count deleted."""
|
||||||
|
if not self.enabled:
|
||||||
|
return 0
|
||||||
|
try:
|
||||||
|
keys = list(self._client.scan_iter(match=pattern)) # type: ignore[union-attr]
|
||||||
|
if keys:
|
||||||
|
return self._client.delete(*keys) # type: ignore[union-attr]
|
||||||
|
return 0
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error(f"Redis delete error for pattern={pattern}: {exc}")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def get_stats(self) -> Dict[str, Any]:
|
||||||
|
"""Get cache statistics."""
|
||||||
|
stats = self._stats.copy()
|
||||||
|
if self.enabled:
|
||||||
|
try:
|
||||||
|
# Count cache keys
|
||||||
|
route_keys = list(self._client.scan_iter(match="routes:*")) # type: ignore[union-attr]
|
||||||
|
stats["total_keys"] = len(route_keys)
|
||||||
|
stats["enabled"] = True
|
||||||
|
except Exception:
|
||||||
|
stats["total_keys"] = 0
|
||||||
|
stats["enabled"] = True
|
||||||
|
else:
|
||||||
|
stats["total_keys"] = 0
|
||||||
|
stats["enabled"] = False
|
||||||
|
return stats
|
||||||
|
|
||||||
|
def get_keys(self, pattern: str = "routes:*") -> list[str]:
|
||||||
|
"""Get list of cache keys matching pattern."""
|
||||||
|
if not self.enabled:
|
||||||
|
return []
|
||||||
|
try:
|
||||||
|
return list(self._client.scan_iter(match=pattern)) # type: ignore[union-attr]
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error(f"Redis get_keys error for pattern={pattern}: {exc}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
# Singleton cache instance for app
|
||||||
|
cache = RedisCache()
|
||||||
515
app/services/core/assignment_service.py
Normal file
515
app/services/core/assignment_service.py
Normal file
@@ -0,0 +1,515 @@
|
|||||||
|
|
||||||
|
import logging
|
||||||
|
import random
|
||||||
|
import time
|
||||||
|
from math import radians, cos, sin, asin, sqrt
|
||||||
|
from typing import List, Dict, Any, Optional
|
||||||
|
from collections import defaultdict
|
||||||
|
from app.config.rider_preferences import RIDER_PREFERRED_KITCHENS
|
||||||
|
from app.services.routing.kalman_filter import smooth_rider_locations, smooth_order_coordinates
|
||||||
|
from app.config.dynamic_config import get_config
|
||||||
|
from app.services.ml.ml_data_collector import get_collector
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class AssignmentService:
|
||||||
|
def __init__(self):
|
||||||
|
self.rider_preferences = RIDER_PREFERRED_KITCHENS
|
||||||
|
self.earth_radius_km = 6371
|
||||||
|
self._cfg = get_config()
|
||||||
|
|
||||||
|
def _load_config(self):
|
||||||
|
"""Load ML-tuned hyperparams fresh on every assignment call."""
|
||||||
|
cfg = self._cfg
|
||||||
|
self.MAX_PICKUP_DISTANCE_KM = cfg.get("max_pickup_distance_km")
|
||||||
|
self.MAX_KITCHEN_DISTANCE_KM = cfg.get("max_kitchen_distance_km")
|
||||||
|
self.MAX_ORDERS_PER_RIDER = int(cfg.get("max_orders_per_rider"))
|
||||||
|
self.IDEAL_LOAD = int(cfg.get("ideal_load"))
|
||||||
|
self.WORKLOAD_BALANCE_THRESHOLD = cfg.get("workload_balance_threshold")
|
||||||
|
self.WORKLOAD_PENALTY_WEIGHT = cfg.get("workload_penalty_weight")
|
||||||
|
self.DISTANCE_PENALTY_WEIGHT = cfg.get("distance_penalty_weight")
|
||||||
|
self.PREFERENCE_BONUS = cfg.get("preference_bonus")
|
||||||
|
self.HOME_ZONE_BONUS_4KM = cfg.get("home_zone_bonus_4km")
|
||||||
|
self.HOME_ZONE_BONUS_2KM = cfg.get("home_zone_bonus_2km")
|
||||||
|
self.EMERGENCY_LOAD_PENALTY = cfg.get("emergency_load_penalty")
|
||||||
|
|
||||||
|
def haversine(self, lat1, lon1, lat2, lon2):
|
||||||
|
"""Calculate the great circle distance between two points."""
|
||||||
|
lon1, lat1, lon2, lat2 = map(radians, [float(lon1), float(lat1), float(lon2), float(lat2)])
|
||||||
|
dlon = lon2 - lon1
|
||||||
|
dlat = lat2 - lat1
|
||||||
|
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
|
||||||
|
c = 2 * asin(min(1.0, sqrt(a))) # Clamp to 1.0 to avoid domain errors
|
||||||
|
return c * self.earth_radius_km
|
||||||
|
|
||||||
|
def get_lat_lon(self, obj: Dict[str, Any], prefix: str = "") -> tuple[float, float]:
|
||||||
|
"""Generic helper to extract lat/lon from diversely named keys."""
|
||||||
|
# Try specific prefixes first
|
||||||
|
candidates = [
|
||||||
|
(f"{prefix}lat", f"{prefix}lon"),
|
||||||
|
(f"{prefix}lat", f"{prefix}long"),
|
||||||
|
(f"{prefix}latitude", f"{prefix}longitude"),
|
||||||
|
]
|
||||||
|
# Also try standard keys if prefix fails
|
||||||
|
candidates.extend([
|
||||||
|
("lat", "lon"), ("latitude", "longitude"),
|
||||||
|
("pickuplat", "pickuplon"), ("pickuplat", "pickuplong"),
|
||||||
|
("deliverylat", "deliverylong"), ("droplat", "droplon")
|
||||||
|
])
|
||||||
|
|
||||||
|
for lat_key, lon_key in candidates:
|
||||||
|
if lat_key in obj and lon_key in obj and obj[lat_key] and obj[lon_key]:
|
||||||
|
try:
|
||||||
|
return float(obj[lat_key]), float(obj[lon_key])
|
||||||
|
except: pass
|
||||||
|
|
||||||
|
# Special case: nested 'pickup_location'
|
||||||
|
if "pickup_location" in obj:
|
||||||
|
return self.get_lat_lon(obj["pickup_location"])
|
||||||
|
|
||||||
|
return 0.0, 0.0
|
||||||
|
|
||||||
|
def get_order_kitchen(self, order: Dict[str, Any]) -> str:
|
||||||
|
possible_keys = ['storename', 'restaurantname', 'kitchenname', 'partnername', 'store_name']
|
||||||
|
for key in possible_keys:
|
||||||
|
if key in order and order[key]:
|
||||||
|
return str(order[key]).strip()
|
||||||
|
return "Unknown"
|
||||||
|
|
||||||
|
def assign_orders(self, orders: List[Dict[str, Any]], riders: List[Dict[str, Any]], reshuffle: bool = False) -> tuple[Dict[int, List[Dict[str, Any]]], List[Dict[str, Any]]]:
|
||||||
|
"""
|
||||||
|
ENHANCED: Cluster-Based Load-Balanced Assignment.
|
||||||
|
|
||||||
|
Strategy:
|
||||||
|
1. Cluster orders by kitchen proximity
|
||||||
|
2. Calculate rider workload (current capacity usage)
|
||||||
|
3. Assign clusters to best-fit riders (proximity + workload balance)
|
||||||
|
4. Rebalance if needed
|
||||||
|
|
||||||
|
If reshuffle=True, controlled randomness is injected into rider scoring
|
||||||
|
so that retrying the same input can explore alternative assignments.
|
||||||
|
"""
|
||||||
|
from app.services.rider.rider_history_service import RiderHistoryService
|
||||||
|
from app.services.rider.rider_state_manager import RiderStateManager
|
||||||
|
from app.services.routing.clustering_service import ClusteringService
|
||||||
|
|
||||||
|
# -- Load ML-tuned hyperparameters (or defaults on first run) ------
|
||||||
|
self._load_config()
|
||||||
|
_call_start = time.time()
|
||||||
|
|
||||||
|
# 0. Prep
|
||||||
|
assignments: Dict[int, List[Dict[str, Any]]] = defaultdict(list)
|
||||||
|
unassigned_orders: List[Dict[str, Any]] = []
|
||||||
|
rider_states = {} # Track live load
|
||||||
|
|
||||||
|
# 0a. KALMAN FILTER - Smooth rider GPS locations before scoring
|
||||||
|
riders = smooth_rider_locations(list(riders))
|
||||||
|
|
||||||
|
# 0b. KALMAN FILTER - Smooth order delivery coordinates before clustering
|
||||||
|
orders = smooth_order_coordinates(list(orders))
|
||||||
|
|
||||||
|
# 1. Parse and Filter Riders
|
||||||
|
valid_riders = []
|
||||||
|
BLOCKED_RIDERS = [1242, 1266, 1245, 1232, 1240, 1007] # Test/Blocked IDs
|
||||||
|
|
||||||
|
# Load Existing State (Persistence)
|
||||||
|
state_mgr = RiderStateManager()
|
||||||
|
|
||||||
|
for r in riders:
|
||||||
|
# Robust ID Extraction
|
||||||
|
rid_raw = r.get("userid") or r.get("riderid") or r.get("id") or r.get("_id")
|
||||||
|
try:
|
||||||
|
rid = int(rid_raw)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if rid in BLOCKED_RIDERS: continue
|
||||||
|
|
||||||
|
# Robust Status Check
|
||||||
|
# Keep if: onduty (1, "1", True) OR status is active/idle/online
|
||||||
|
is_onduty = str(r.get("onduty")) in ["1", "True"] or r.get("onduty") is True
|
||||||
|
is_active = r.get("status") in ["active", "idle", "online"]
|
||||||
|
|
||||||
|
if not (is_onduty or is_active):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Location
|
||||||
|
lat, lon = self.get_lat_lon(r)
|
||||||
|
|
||||||
|
# Fetch previous state to know if they are already busy
|
||||||
|
p_state = state_mgr.get_rider_state(rid)
|
||||||
|
|
||||||
|
# If rider has valid GPS, use it. If not, fallback to Last Drop or Home.
|
||||||
|
if lat == 0 or lon == 0:
|
||||||
|
if p_state['last_drop_lat']:
|
||||||
|
lat, lon = p_state['last_drop_lat'], p_state['last_drop_lon']
|
||||||
|
else:
|
||||||
|
# Home Location Fallback
|
||||||
|
from app.config.rider_preferences import RIDER_HOME_LOCATIONS
|
||||||
|
lat, lon = RIDER_HOME_LOCATIONS.get(rid, (0.0, 0.0))
|
||||||
|
|
||||||
|
valid_riders.append({
|
||||||
|
"id": rid,
|
||||||
|
"lat": lat,
|
||||||
|
"lon": lon,
|
||||||
|
"obj": r
|
||||||
|
})
|
||||||
|
|
||||||
|
# Initialize rider state with existing workload
|
||||||
|
existing_load = p_state.get('minutes_remaining', 0) / 15 # Convert minutes to order estimate
|
||||||
|
|
||||||
|
rider_states[rid] = {
|
||||||
|
'lat': lat,
|
||||||
|
'lon': lon,
|
||||||
|
'kitchens': set(),
|
||||||
|
'count': int(existing_load), # Start with existing workload
|
||||||
|
'workload_score': existing_load # For prioritization
|
||||||
|
}
|
||||||
|
|
||||||
|
if not valid_riders:
|
||||||
|
logger.warning("No riders passed on-duty filter. Retrying with all available riders as emergency rescue...")
|
||||||
|
# If no on-duty riders, we take ANY rider provided by the API to ensure assignment
|
||||||
|
for r in riders:
|
||||||
|
rid = int(r.get("userid", 0))
|
||||||
|
if rid in BLOCKED_RIDERS: continue
|
||||||
|
|
||||||
|
lat, lon = self.get_lat_lon(r)
|
||||||
|
if lat == 0 or lon == 0:
|
||||||
|
from app.config.rider_preferences import RIDER_HOME_LOCATIONS
|
||||||
|
lat, lon = RIDER_HOME_LOCATIONS.get(rid, (0.0, 0.0))
|
||||||
|
|
||||||
|
if lat != 0:
|
||||||
|
valid_riders.append({"id": rid, "lat": lat, "lon": lon, "obj": r})
|
||||||
|
rider_states[rid] = {
|
||||||
|
'lat': lat, 'lon': lon, 'kitchens': set(),
|
||||||
|
'count': 0, 'workload_score': 0
|
||||||
|
}
|
||||||
|
|
||||||
|
if not valid_riders:
|
||||||
|
logger.error("DANGER: Absolutely no riders available for assignment.")
|
||||||
|
# Mark all as unassigned
|
||||||
|
for o in orders:
|
||||||
|
o["unassigned_reason"] = "No riders found (check partner online status)."
|
||||||
|
unassigned_orders.append(o)
|
||||||
|
return assignments, unassigned_orders
|
||||||
|
|
||||||
|
logger.info(f"Found {len(valid_riders)} active riders")
|
||||||
|
|
||||||
|
# 2. CLUSTER ORDERS BY KITCHEN PROXIMITY
|
||||||
|
clustering_service = ClusteringService()
|
||||||
|
clusters = clustering_service.cluster_orders_by_kitchen(orders, max_cluster_radius_km=self.MAX_KITCHEN_DISTANCE_KM) # radius from ML
|
||||||
|
|
||||||
|
logger.info(f"Created {len(clusters)} order clusters")
|
||||||
|
|
||||||
|
# 3. ASSIGN CLUSTERS TO RIDERS (Load-Balanced)
|
||||||
|
for cluster_idx, cluster in enumerate(clusters):
|
||||||
|
centroid_lat, centroid_lon = cluster['centroid']
|
||||||
|
cluster_orders = cluster['orders']
|
||||||
|
cluster_size = len(cluster_orders)
|
||||||
|
|
||||||
|
logger.info(f"Assigning cluster {cluster_idx+1}/{len(clusters)}: {cluster_size} orders at ({centroid_lat:.4f}, {centroid_lon:.4f})")
|
||||||
|
|
||||||
|
# Find best riders for this cluster
|
||||||
|
candidate_riders = []
|
||||||
|
|
||||||
|
for r in valid_riders:
|
||||||
|
rid = r["id"]
|
||||||
|
r_state = rider_states[rid]
|
||||||
|
|
||||||
|
# Calculate distance to cluster centroid
|
||||||
|
dist = self.haversine(r_state['lat'], r_state['lon'], centroid_lat, centroid_lon)
|
||||||
|
|
||||||
|
# Preference bonus & Distance Bypass
|
||||||
|
prefs = self.rider_preferences.get(rid, [])
|
||||||
|
has_preference = False
|
||||||
|
for k_name in cluster['kitchen_names']:
|
||||||
|
if any(p.lower() in k_name.lower() or k_name.lower() in p.lower() for p in prefs):
|
||||||
|
has_preference = True
|
||||||
|
break
|
||||||
|
|
||||||
|
# Dynamic Limit: 6km default, 10km for preferred kitchens
|
||||||
|
allowed_dist = self.MAX_PICKUP_DISTANCE_KM
|
||||||
|
if has_preference:
|
||||||
|
allowed_dist = max(allowed_dist, 10.0)
|
||||||
|
|
||||||
|
# Skip if too far
|
||||||
|
if dist > allowed_dist:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Calculate workload utilization (0.0 to 1.0)
|
||||||
|
utilization = r_state['count'] / self.MAX_ORDERS_PER_RIDER
|
||||||
|
|
||||||
|
# Calculate score (lower is better) - weights from DynamicConfig
|
||||||
|
workload_penalty = utilization * self.WORKLOAD_PENALTY_WEIGHT
|
||||||
|
distance_penalty = dist * self.DISTANCE_PENALTY_WEIGHT
|
||||||
|
|
||||||
|
# Preference bonus (ML-tuned)
|
||||||
|
preference_bonus = self.PREFERENCE_BONUS if has_preference else 0
|
||||||
|
|
||||||
|
# Home zone bonus (ML-tuned)
|
||||||
|
from app.config.rider_preferences import RIDER_HOME_LOCATIONS
|
||||||
|
h_lat, h_lon = RIDER_HOME_LOCATIONS.get(rid, (0.0, 0.0))
|
||||||
|
home_bonus = 0
|
||||||
|
if h_lat != 0:
|
||||||
|
home_dist = self.haversine(h_lat, h_lon, centroid_lat, centroid_lon)
|
||||||
|
if home_dist <= 4.0:
|
||||||
|
home_bonus = self.HOME_ZONE_BONUS_4KM
|
||||||
|
if home_dist <= 2.0:
|
||||||
|
home_bonus = self.HOME_ZONE_BONUS_2KM
|
||||||
|
|
||||||
|
score = workload_penalty + distance_penalty + preference_bonus + home_bonus
|
||||||
|
|
||||||
|
# RESHUFFLE: Add controlled noise so retries explore different riders
|
||||||
|
if reshuffle:
|
||||||
|
noise = random.uniform(-15.0, 15.0)
|
||||||
|
score += noise
|
||||||
|
|
||||||
|
candidate_riders.append({
|
||||||
|
'id': rid,
|
||||||
|
'score': score,
|
||||||
|
'distance': dist,
|
||||||
|
'utilization': utilization,
|
||||||
|
'current_load': r_state['count']
|
||||||
|
})
|
||||||
|
|
||||||
|
if not candidate_riders:
|
||||||
|
logger.warning(f"No riders available for cluster {cluster_idx+1}")
|
||||||
|
for o in cluster_orders:
|
||||||
|
o["unassigned_reason"] = f"No riders within {self.MAX_PICKUP_DISTANCE_KM}km radius of kitchen."
|
||||||
|
unassigned_orders.append(o)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Sort by score (best first)
|
||||||
|
candidate_riders.sort(key=lambda x: x['score'])
|
||||||
|
|
||||||
|
# SMART DISTRIBUTION: Split cluster if needed
|
||||||
|
remaining_orders = cluster_orders[:]
|
||||||
|
|
||||||
|
while remaining_orders and candidate_riders:
|
||||||
|
best_rider = candidate_riders[0]
|
||||||
|
rid = best_rider['id']
|
||||||
|
r_state = rider_states[rid]
|
||||||
|
|
||||||
|
# How many orders can this rider take?
|
||||||
|
available_capacity = self.MAX_ORDERS_PER_RIDER - r_state['count']
|
||||||
|
|
||||||
|
if available_capacity <= 0:
|
||||||
|
# Rider is full, remove from candidates
|
||||||
|
candidate_riders.pop(0)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Decide batch size
|
||||||
|
# If rider is underutilized and cluster is small, give all
|
||||||
|
# If rider is busy or cluster is large, split it
|
||||||
|
if best_rider['utilization'] < self.WORKLOAD_BALANCE_THRESHOLD:
|
||||||
|
# Rider has capacity, can take more
|
||||||
|
batch_size = min(available_capacity, len(remaining_orders))
|
||||||
|
else:
|
||||||
|
# Rider is getting busy, be conservative (IDEAL_LOAD from ML)
|
||||||
|
batch_size = min(self.IDEAL_LOAD - r_state['count'], len(remaining_orders), available_capacity)
|
||||||
|
batch_size = max(1, batch_size) # At least 1 order
|
||||||
|
|
||||||
|
# Assign batch
|
||||||
|
batch = remaining_orders[:batch_size]
|
||||||
|
remaining_orders = remaining_orders[batch_size:]
|
||||||
|
|
||||||
|
assignments[rid].extend(batch)
|
||||||
|
|
||||||
|
# Update rider state
|
||||||
|
r_state['count'] += len(batch)
|
||||||
|
r_state['lat'] = centroid_lat
|
||||||
|
r_state['lon'] = centroid_lon
|
||||||
|
r_state['kitchens'].update(cluster['kitchen_names'])
|
||||||
|
r_state['workload_score'] = r_state['count'] / self.MAX_ORDERS_PER_RIDER
|
||||||
|
|
||||||
|
logger.info(f" -> Assigned {len(batch)} orders to Rider {rid} (load: {r_state['count']}/{self.MAX_ORDERS_PER_RIDER})")
|
||||||
|
|
||||||
|
# Re-sort candidates by updated scores
|
||||||
|
for candidate in candidate_riders:
|
||||||
|
if candidate['id'] == rid:
|
||||||
|
candidate['utilization'] = r_state['count'] / self.MAX_ORDERS_PER_RIDER
|
||||||
|
candidate['current_load'] = r_state['count']
|
||||||
|
# Recalculate score
|
||||||
|
workload_penalty = candidate['utilization'] * 100
|
||||||
|
distance_penalty = candidate['distance'] * 2
|
||||||
|
candidate['score'] = workload_penalty + distance_penalty
|
||||||
|
|
||||||
|
candidate_riders.sort(key=lambda x: x['score'])
|
||||||
|
|
||||||
|
# If any orders left in the cluster after exhaustion of candidates
|
||||||
|
if remaining_orders:
|
||||||
|
# Instead of giving up, keep them in a pool for mandatory assignment
|
||||||
|
unassigned_orders.extend(remaining_orders)
|
||||||
|
|
||||||
|
# 4. EMERGENCY MANDATORY ASSIGNMENT (Ensures 0 unassigned if riders exist)
|
||||||
|
if unassigned_orders and valid_riders:
|
||||||
|
logger.info(f"[ALERT] Starting Emergency Mandatory Assignment for {len(unassigned_orders)} orders...")
|
||||||
|
force_pool = unassigned_orders[:]
|
||||||
|
unassigned_orders.clear()
|
||||||
|
|
||||||
|
for o in force_pool:
|
||||||
|
# Determine pickup location
|
||||||
|
o_lat, o_lon = self.get_lat_lon(o, prefix="pickup")
|
||||||
|
if o_lat == 0:
|
||||||
|
o["unassigned_reason"] = "Could not geolocate order (0,0)."
|
||||||
|
unassigned_orders.append(o)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Find the 'least bad' rider (Closest + Balanced Load)
|
||||||
|
best_emergency_rider = None
|
||||||
|
best_emergency_score = float('inf')
|
||||||
|
|
||||||
|
for r in valid_riders:
|
||||||
|
rid = r["id"]
|
||||||
|
r_state = rider_states[rid]
|
||||||
|
|
||||||
|
dist = self.haversine(r_state['lat'], r_state['lon'], o_lat, o_lon)
|
||||||
|
# For emergency: Distance is important, but load prevents one rider taking EVERYTHING
|
||||||
|
# Score = distance + ML-tuned penalty per existing order
|
||||||
|
e_score = dist + (r_state['count'] * self.EMERGENCY_LOAD_PENALTY)
|
||||||
|
|
||||||
|
if e_score < best_emergency_score:
|
||||||
|
best_emergency_score = e_score
|
||||||
|
best_emergency_rider = rid
|
||||||
|
|
||||||
|
if best_emergency_rider:
|
||||||
|
assignments[best_emergency_rider].append(o)
|
||||||
|
rider_states[best_emergency_rider]['count'] += 1
|
||||||
|
logger.info(f" Force-Assigned order {o.get('orderid')} to Rider {best_emergency_rider} (Score: {best_emergency_score:.2f})")
|
||||||
|
else:
|
||||||
|
unassigned_orders.append(o)
|
||||||
|
|
||||||
|
# 5. FINAL REBALANCING (Optional)
|
||||||
|
# Check if any rider is overloaded while others are idle
|
||||||
|
self._rebalance_workload(assignments, rider_states, valid_riders)
|
||||||
|
|
||||||
|
# 6. Commit State and History
|
||||||
|
self._post_process(assignments, rider_states)
|
||||||
|
|
||||||
|
# 7. -- ML DATA COLLECTION -----------------------------------------
|
||||||
|
try:
|
||||||
|
elapsed_ms = (time.time() - _call_start) * 1000
|
||||||
|
get_collector().log_assignment_event(
|
||||||
|
num_orders=len(orders),
|
||||||
|
num_riders=len(riders),
|
||||||
|
hyperparams=self._cfg.get_all(),
|
||||||
|
assignments=assignments,
|
||||||
|
unassigned_count=len(unassigned_orders),
|
||||||
|
elapsed_ms=elapsed_ms,
|
||||||
|
)
|
||||||
|
except Exception as _ml_err:
|
||||||
|
logger.debug(f"ML logging skipped: {_ml_err}")
|
||||||
|
|
||||||
|
# Log final distribution
|
||||||
|
logger.info("=" * 50)
|
||||||
|
logger.info("FINAL ASSIGNMENT DISTRIBUTION:")
|
||||||
|
for rid, orders in sorted(assignments.items()):
|
||||||
|
logger.info(f" Rider {rid}: {len(orders)} orders")
|
||||||
|
|
||||||
|
if unassigned_orders:
|
||||||
|
logger.warning(f" [ALERT] STILL UNASSIGNED: {len(unassigned_orders)} (Reason: No riders online or invalid coords)")
|
||||||
|
else:
|
||||||
|
logger.info(" [OK] ALL ORDERS ASSIGNED SUCCESSFULLY")
|
||||||
|
logger.info("=" * 50)
|
||||||
|
|
||||||
|
return assignments, unassigned_orders
|
||||||
|
|
||||||
|
def _rebalance_workload(self, assignments: Dict[int, List], rider_states: Dict, valid_riders: List):
|
||||||
|
"""
|
||||||
|
Rebalance if workload is heavily skewed.
|
||||||
|
Move orders from overloaded riders to idle ones if possible.
|
||||||
|
"""
|
||||||
|
if not assignments:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Calculate average load
|
||||||
|
total_orders = sum(len(orders) for orders in assignments.values())
|
||||||
|
avg_load = total_orders / len(valid_riders) if valid_riders else 0
|
||||||
|
|
||||||
|
# Find overloaded and underutilized riders
|
||||||
|
overloaded = []
|
||||||
|
underutilized = []
|
||||||
|
|
||||||
|
for r in valid_riders:
|
||||||
|
rid = r['id']
|
||||||
|
load = rider_states[rid]['count']
|
||||||
|
|
||||||
|
if load > avg_load * 1.5 and load > self.IDEAL_LOAD: # 50% above average
|
||||||
|
overloaded.append(rid)
|
||||||
|
elif load < avg_load * 0.5: # 50% below average
|
||||||
|
underutilized.append(rid)
|
||||||
|
|
||||||
|
if not overloaded or not underutilized:
|
||||||
|
return
|
||||||
|
|
||||||
|
logger.info(f"Rebalancing: {len(overloaded)} overloaded, {len(underutilized)} underutilized riders")
|
||||||
|
|
||||||
|
# Try to move orders from overloaded to underutilized
|
||||||
|
for over_rid in overloaded:
|
||||||
|
over_orders = assignments[over_rid]
|
||||||
|
over_state = rider_states[over_rid]
|
||||||
|
|
||||||
|
# Try to offload some orders
|
||||||
|
for under_rid in underutilized:
|
||||||
|
under_state = rider_states[under_rid]
|
||||||
|
under_capacity = self.MAX_ORDERS_PER_RIDER - under_state['count']
|
||||||
|
|
||||||
|
if under_capacity <= 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Find orders that are closer to underutilized rider
|
||||||
|
transferable = []
|
||||||
|
for order in over_orders:
|
||||||
|
o_lat, o_lon = self.get_lat_lon(order, prefix="pickup")
|
||||||
|
if o_lat == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
dist_to_under = self.haversine(under_state['lat'], under_state['lon'], o_lat, o_lon)
|
||||||
|
dist_to_over = self.haversine(over_state['lat'], over_state['lon'], o_lat, o_lon)
|
||||||
|
|
||||||
|
# Transfer if underutilized rider is closer or similar distance
|
||||||
|
if dist_to_under <= self.MAX_PICKUP_DISTANCE_KM and dist_to_under <= dist_to_over * 1.2:
|
||||||
|
transferable.append(order)
|
||||||
|
|
||||||
|
if transferable:
|
||||||
|
# Transfer up to capacity
|
||||||
|
transfer_count = min(len(transferable), under_capacity, over_state['count'] - self.IDEAL_LOAD)
|
||||||
|
transfer_batch = transferable[:transfer_count]
|
||||||
|
|
||||||
|
# Move orders
|
||||||
|
for order in transfer_batch:
|
||||||
|
over_orders.remove(order)
|
||||||
|
assignments[under_rid].append(order)
|
||||||
|
|
||||||
|
# Update states
|
||||||
|
over_state['count'] -= len(transfer_batch)
|
||||||
|
under_state['count'] += len(transfer_batch)
|
||||||
|
|
||||||
|
logger.info(f" Rebalanced: {len(transfer_batch)} orders from Rider {over_rid} -> {under_rid}")
|
||||||
|
|
||||||
|
def _post_process(self, assignments, rider_states):
|
||||||
|
"""Update History and Persistence."""
|
||||||
|
from app.services.rider.rider_history_service import RiderHistoryService
|
||||||
|
from app.services.rider.rider_state_manager import RiderStateManager
|
||||||
|
|
||||||
|
history_service = RiderHistoryService()
|
||||||
|
state_mgr = RiderStateManager()
|
||||||
|
|
||||||
|
import time
|
||||||
|
ts = time.time()
|
||||||
|
|
||||||
|
for rid, orders in assignments.items():
|
||||||
|
if not orders: continue
|
||||||
|
|
||||||
|
history_service.update_rider_stats(rid, 5.0, len(orders))
|
||||||
|
|
||||||
|
st = rider_states[rid]
|
||||||
|
state_mgr.states[rid] = {
|
||||||
|
'minutes_remaining': len(orders) * 15,
|
||||||
|
'last_drop_lat': st['lat'],
|
||||||
|
'last_drop_lon': st['lon'],
|
||||||
|
'active_kitchens': st['kitchens'],
|
||||||
|
'last_updated_ts': ts
|
||||||
|
}
|
||||||
|
|
||||||
|
state_mgr._save_states()
|
||||||
311
app/services/ml/behavior_analyzer.py
Normal file
311
app/services/ml/behavior_analyzer.py
Normal file
@@ -0,0 +1,311 @@
|
|||||||
|
"""
|
||||||
|
Behavior Analyzer - Production Grade
|
||||||
|
======================================
|
||||||
|
Analyzes historical assignment data using the ID3 decision tree to classify
|
||||||
|
assignment outcomes as 'SUCCESS' or 'RISK'.
|
||||||
|
|
||||||
|
Key fixes and upgrades over the original
|
||||||
|
------------------------------------------
|
||||||
|
1. BUG FIX: distance_band now uses `total_distance_km` (not `num_orders`).
|
||||||
|
2. BUG FIX: time_band input is always normalized to uppercase before predict.
|
||||||
|
3. Rich feature set: distance_band, time_band, load_band, order_density_band.
|
||||||
|
4. Returns (label, confidence) from the classifier - exposes uncertainty.
|
||||||
|
5. Trend analysis: tracks rolling success rate over recent N windows.
|
||||||
|
6. Tree persistence: saves/loads trained tree as JSON to survive restarts.
|
||||||
|
7. Feature importance proxy: logs which features drove the split.
|
||||||
|
8. Thread-safe lazy training via a simple lock.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import threading
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
|
from app.services.ml.id3_classifier import ID3Classifier, get_behavior_model
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_DB_PATH = os.getenv("ML_DB_PATH", "ml_data/ml_store.db")
|
||||||
|
_TREE_PATH = os.getenv("ML_TREE_PATH", "ml_data/behavior_tree.json")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Band encoders (discrete labels for ID3)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def distance_band(km: float) -> str:
|
||||||
|
"""Total route distance -> discrete band."""
|
||||||
|
if km <= 5.0: return "SHORT"
|
||||||
|
if km <= 15.0: return "MID"
|
||||||
|
if km <= 30.0: return "LONG"
|
||||||
|
return "VERY_LONG"
|
||||||
|
|
||||||
|
|
||||||
|
def time_band(ts_str: str) -> str:
|
||||||
|
"""ISO timestamp -> time-of-day band."""
|
||||||
|
try:
|
||||||
|
hour = datetime.fromisoformat(ts_str).hour
|
||||||
|
if 6 <= hour < 10: return "MORNING_RUSH"
|
||||||
|
if 10 <= hour < 12: return "LATE_MORNING"
|
||||||
|
if 12 <= hour < 14: return "LUNCH_RUSH"
|
||||||
|
if 14 <= hour < 17: return "AFTERNOON"
|
||||||
|
if 17 <= hour < 20: return "EVENING_RUSH"
|
||||||
|
if 20 <= hour < 23: return "NIGHT"
|
||||||
|
return "LATE_NIGHT"
|
||||||
|
except Exception:
|
||||||
|
return "UNKNOWN"
|
||||||
|
|
||||||
|
|
||||||
|
def load_band(avg_load: float) -> str:
|
||||||
|
"""Average orders-per-rider -> load band."""
|
||||||
|
if avg_load <= 2.0: return "LIGHT"
|
||||||
|
if avg_load <= 5.0: return "MODERATE"
|
||||||
|
if avg_load <= 8.0: return "HEAVY"
|
||||||
|
return "OVERLOADED"
|
||||||
|
|
||||||
|
|
||||||
|
def order_density_band(num_orders: int, num_riders: int) -> str:
|
||||||
|
"""Orders per available rider -> density band."""
|
||||||
|
if num_riders == 0:
|
||||||
|
return "NO_RIDERS"
|
||||||
|
ratio = num_orders / num_riders
|
||||||
|
if ratio <= 2.0: return "SPARSE"
|
||||||
|
if ratio <= 5.0: return "NORMAL"
|
||||||
|
if ratio <= 9.0: return "DENSE"
|
||||||
|
return "OVERLOADED"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Behavior Analyzer
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class BehaviorAnalyzer:
|
||||||
|
"""
|
||||||
|
Trains an ID3 tree on historical assignment logs and predicts whether
|
||||||
|
a new assignment context is likely to SUCCEED or be at RISK.
|
||||||
|
|
||||||
|
Features used
|
||||||
|
-------------
|
||||||
|
- distance_band : total route distance bucket
|
||||||
|
- time_band : time-of-day bucket
|
||||||
|
- load_band : average load per rider bucket
|
||||||
|
- order_density_band : orders-per-rider ratio bucket
|
||||||
|
|
||||||
|
Target
|
||||||
|
------
|
||||||
|
- is_success: "SUCCESS" if unassigned_count == 0, else "RISK"
|
||||||
|
"""
|
||||||
|
|
||||||
|
TARGET = "is_success"
|
||||||
|
FEATURES = ["distance_band", "time_band", "load_band", "order_density_band"]
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._db_path = _DB_PATH
|
||||||
|
self._tree_path = _TREE_PATH
|
||||||
|
self.model: ID3Classifier = get_behavior_model(max_depth=5)
|
||||||
|
self.is_trained: bool = False
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
self._training_size: int = 0
|
||||||
|
self._success_rate: float = 0.0
|
||||||
|
self._rules: List[str] = []
|
||||||
|
self._recent_trend: List[float] = []
|
||||||
|
|
||||||
|
self._load_tree()
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Training
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def train_on_history(self, limit: int = 2000) -> Dict[str, Any]:
|
||||||
|
"""Fetch the most recent rows from SQLite and rebuild the tree."""
|
||||||
|
with self._lock:
|
||||||
|
try:
|
||||||
|
rows = self._fetch_rows(limit)
|
||||||
|
if len(rows) < 10:
|
||||||
|
logger.warning(f"ID3 BehaviorAnalyzer: only {len(rows)} rows - need >=10.")
|
||||||
|
return {"status": "insufficient_data", "rows": len(rows)}
|
||||||
|
|
||||||
|
training_data, successes = self._preprocess(rows)
|
||||||
|
|
||||||
|
if not training_data:
|
||||||
|
return {"status": "preprocess_failed", "rows": len(rows)}
|
||||||
|
|
||||||
|
self.model.train(
|
||||||
|
data=training_data,
|
||||||
|
target=self.TARGET,
|
||||||
|
features=self.FEATURES,
|
||||||
|
)
|
||||||
|
self.is_trained = True
|
||||||
|
self._training_size = len(training_data)
|
||||||
|
self._success_rate = successes / len(training_data)
|
||||||
|
self._rules = self.model.get_tree_rules()
|
||||||
|
self._compute_trend(rows)
|
||||||
|
self._save_tree()
|
||||||
|
|
||||||
|
summary = {
|
||||||
|
"status": "ok",
|
||||||
|
"training_rows": self._training_size,
|
||||||
|
"success_rate": round(self._success_rate, 4),
|
||||||
|
"n_rules": len(self._rules),
|
||||||
|
"classes": self.model.classes,
|
||||||
|
"feature_values": self.model.feature_values,
|
||||||
|
}
|
||||||
|
logger.info(
|
||||||
|
f"ID3 BehaviorAnalyzer trained - rows={self._training_size}, "
|
||||||
|
f"success_rate={self._success_rate:.1%}, rules={len(self._rules)}"
|
||||||
|
)
|
||||||
|
return summary
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"ID3 BehaviorAnalyzer training failed: {e}", exc_info=True)
|
||||||
|
return {"status": "error", "message": str(e)}
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Prediction
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def predict(self, distance_km: float, timestamp_or_band: str,
|
||||||
|
avg_load: float = 4.0, num_orders: int = 5,
|
||||||
|
num_riders: int = 2) -> Dict[str, Any]:
|
||||||
|
"""Predict whether an assignment context will SUCCEED or be at RISK."""
|
||||||
|
if not self.is_trained:
|
||||||
|
return {
|
||||||
|
"label": "SUCCESS",
|
||||||
|
"confidence": 0.5,
|
||||||
|
"features_used": {},
|
||||||
|
"model_trained": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
KNOWN_BANDS = {
|
||||||
|
"MORNING_RUSH", "LATE_MORNING", "LUNCH_RUSH",
|
||||||
|
"AFTERNOON", "EVENING_RUSH", "NIGHT", "LATE_NIGHT", "UNKNOWN"
|
||||||
|
}
|
||||||
|
t_band = (
|
||||||
|
timestamp_or_band.upper()
|
||||||
|
if timestamp_or_band.upper() in KNOWN_BANDS
|
||||||
|
else time_band(timestamp_or_band)
|
||||||
|
)
|
||||||
|
|
||||||
|
features_used = {
|
||||||
|
"distance_band": distance_band(distance_km),
|
||||||
|
"time_band": t_band,
|
||||||
|
"load_band": load_band(avg_load),
|
||||||
|
"order_density_band": order_density_band(num_orders, num_riders),
|
||||||
|
}
|
||||||
|
|
||||||
|
label, confidence = self.model.predict(features_used)
|
||||||
|
return {
|
||||||
|
"label": label,
|
||||||
|
"confidence": round(confidence, 4),
|
||||||
|
"features_used": features_used,
|
||||||
|
"model_trained": True,
|
||||||
|
}
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Info / Diagnostics
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def get_info(self) -> Dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"is_trained": self.is_trained,
|
||||||
|
"training_rows": self._training_size,
|
||||||
|
"success_rate": round(self._success_rate, 4),
|
||||||
|
"n_rules": len(self._rules),
|
||||||
|
"rules": self._rules[:20],
|
||||||
|
"recent_trend": self._recent_trend,
|
||||||
|
"feature_names": self.FEATURES,
|
||||||
|
"feature_values": self.model.feature_values if self.is_trained else {},
|
||||||
|
"classes": self.model.classes if self.is_trained else [],
|
||||||
|
}
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Internal helpers
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _fetch_rows(self, limit: int) -> List[Dict]:
|
||||||
|
conn = sqlite3.connect(self._db_path)
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
rows = conn.execute(
|
||||||
|
"SELECT * FROM assignment_ml_log ORDER BY id DESC LIMIT ?", (limit,)
|
||||||
|
).fetchall()
|
||||||
|
conn.close()
|
||||||
|
return [dict(r) for r in rows]
|
||||||
|
|
||||||
|
def _preprocess(self, rows: List[Dict]) -> Tuple[List[Dict], int]:
|
||||||
|
training_data: List[Dict] = []
|
||||||
|
successes = 0
|
||||||
|
for r in rows:
|
||||||
|
try:
|
||||||
|
dist_km = float(r.get("total_distance_km") or 0.0)
|
||||||
|
ts = str(r.get("timestamp") or "")
|
||||||
|
avg_ld = float(r.get("avg_load") or 0.0)
|
||||||
|
n_orders = int(r.get("num_orders") or 0)
|
||||||
|
n_riders = int(r.get("num_riders") or 1)
|
||||||
|
unassigned = int(r.get("unassigned_count") or 0)
|
||||||
|
|
||||||
|
label = "SUCCESS" if unassigned == 0 else "RISK"
|
||||||
|
if label == "SUCCESS":
|
||||||
|
successes += 1
|
||||||
|
|
||||||
|
training_data.append({
|
||||||
|
"distance_band": distance_band(dist_km),
|
||||||
|
"time_band": time_band(ts),
|
||||||
|
"load_band": load_band(avg_ld),
|
||||||
|
"order_density_band": order_density_band(n_orders, n_riders),
|
||||||
|
self.TARGET: label,
|
||||||
|
})
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
return training_data, successes
|
||||||
|
|
||||||
|
def _compute_trend(self, rows: List[Dict], window: int = 50) -> None:
|
||||||
|
trend = []
|
||||||
|
for i in range(0, len(rows), window):
|
||||||
|
chunk = rows[i:i + window]
|
||||||
|
if not chunk:
|
||||||
|
break
|
||||||
|
rate = sum(1 for r in chunk if int(r.get("unassigned_count", 1)) == 0) / len(chunk)
|
||||||
|
trend.append(round(rate, 4))
|
||||||
|
self._recent_trend = trend[-20:]
|
||||||
|
|
||||||
|
def _save_tree(self) -> None:
|
||||||
|
try:
|
||||||
|
os.makedirs(os.path.dirname(self._tree_path) or ".", exist_ok=True)
|
||||||
|
with open(self._tree_path, "w") as f:
|
||||||
|
f.write(self.model.to_json())
|
||||||
|
logger.info(f"ID3 tree persisted -> {self._tree_path}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"ID3 tree save failed: {e}")
|
||||||
|
|
||||||
|
def _load_tree(self) -> None:
|
||||||
|
try:
|
||||||
|
if not os.path.exists(self._tree_path):
|
||||||
|
return
|
||||||
|
with open(self._tree_path) as f:
|
||||||
|
self.model = ID3Classifier.from_json(f.read())
|
||||||
|
self.is_trained = True
|
||||||
|
self._rules = self.model.get_tree_rules()
|
||||||
|
logger.info(f"ID3 tree restored - rules={len(self._rules)}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"ID3 tree load failed (will retrain): {e}")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Module-level singleton
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
_analyzer: Optional[BehaviorAnalyzer] = None
|
||||||
|
_analyzer_lock = threading.Lock()
|
||||||
|
|
||||||
|
|
||||||
|
def get_analyzer() -> BehaviorAnalyzer:
|
||||||
|
global _analyzer
|
||||||
|
with _analyzer_lock:
|
||||||
|
if _analyzer is None:
|
||||||
|
_analyzer = BehaviorAnalyzer()
|
||||||
|
if not _analyzer.is_trained:
|
||||||
|
_analyzer.train_on_history()
|
||||||
|
return _analyzer
|
||||||
400
app/services/ml/id3_classifier.py
Normal file
400
app/services/ml/id3_classifier.py
Normal file
@@ -0,0 +1,400 @@
|
|||||||
|
"""
|
||||||
|
ID3 Classifier - Production Grade
|
||||||
|
|
||||||
|
Improvements over v1:
|
||||||
|
- Chi-squared pruning to prevent overfitting on sparse branches
|
||||||
|
- Confidence scores on every prediction (Laplace smoothed)
|
||||||
|
- Gain-ratio variant for high-cardinality features
|
||||||
|
- Serialization (to_dict / from_dict / to_json / from_json)
|
||||||
|
- Per-feature importance scores
|
||||||
|
- Full prediction audit trail via explain()
|
||||||
|
- min_samples_split and min_info_gain stopping criteria
|
||||||
|
"""
|
||||||
|
|
||||||
|
import math
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from collections import Counter
|
||||||
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class ID3Classifier:
|
||||||
|
"""
|
||||||
|
ID3 decision tree (entropy / information-gain splitting).
|
||||||
|
All predict* methods work even if the model has never been trained -
|
||||||
|
they return safe defaults rather than raising.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
max_depth: int = 6,
|
||||||
|
min_samples_split: int = 5,
|
||||||
|
min_info_gain: float = 0.001,
|
||||||
|
use_gain_ratio: bool = False,
|
||||||
|
chi2_pruning: bool = True,
|
||||||
|
):
|
||||||
|
self.max_depth = max_depth
|
||||||
|
self.min_samples_split = min_samples_split
|
||||||
|
self.min_info_gain = min_info_gain
|
||||||
|
self.use_gain_ratio = use_gain_ratio
|
||||||
|
self.chi2_pruning = chi2_pruning
|
||||||
|
|
||||||
|
self.tree: Any = None
|
||||||
|
self.features: List[str] = []
|
||||||
|
self.target: str = ""
|
||||||
|
self.classes_: List[str] = []
|
||||||
|
self.feature_importances_: Dict[str, float] = {}
|
||||||
|
self.feature_values: Dict[str, List[str]] = {} # unique values seen per feature
|
||||||
|
self._n_samples: int = 0
|
||||||
|
self._total_gain: Dict[str, float] = {}
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------ train
|
||||||
|
|
||||||
|
def train(self, data: List[Dict[str, Any]], target: str, features: List[str]) -> None:
|
||||||
|
if not data:
|
||||||
|
logger.warning("ID3: train() called with empty data.")
|
||||||
|
return
|
||||||
|
|
||||||
|
self.target = target
|
||||||
|
self.features = list(features)
|
||||||
|
self.classes_ = sorted({str(row.get(target)) for row in data})
|
||||||
|
self._total_gain = {f: 0.0 for f in features}
|
||||||
|
self._n_samples = len(data)
|
||||||
|
|
||||||
|
# Collect unique values per feature for dashboard display
|
||||||
|
self.feature_values = {
|
||||||
|
f: sorted({str(row.get(f)) for row in data if row.get(f) is not None})
|
||||||
|
for f in features
|
||||||
|
}
|
||||||
|
|
||||||
|
self.tree = self._build_tree(data, list(features), target, depth=0)
|
||||||
|
|
||||||
|
if self.chi2_pruning:
|
||||||
|
self.tree = self._prune(self.tree, data, target)
|
||||||
|
|
||||||
|
total_gain = sum(self._total_gain.values()) or 1.0
|
||||||
|
self.feature_importances_ = {
|
||||||
|
f: round(v / total_gain, 4) for f, v in self._total_gain.items()
|
||||||
|
}
|
||||||
|
logger.info(
|
||||||
|
f"ID3: trained on {len(data)} samples | "
|
||||||
|
f"classes={self.classes_} | importances={self.feature_importances_}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# ----------------------------------------------------------- predict API
|
||||||
|
|
||||||
|
def predict(self, sample: Dict[str, Any]) -> Tuple[str, float]:
|
||||||
|
"""Return (label, confidence 0-1). Safe to call before training."""
|
||||||
|
if self.tree is None:
|
||||||
|
return "Unknown", 0.0
|
||||||
|
label, proba = self._classify(self.tree, sample, [])
|
||||||
|
confidence = proba.get(str(label), 0.0) if isinstance(proba, dict) else 1.0
|
||||||
|
return str(label), round(confidence, 4)
|
||||||
|
|
||||||
|
def predict_proba(self, sample: Dict[str, Any]) -> Dict[str, float]:
|
||||||
|
"""Full class probability distribution."""
|
||||||
|
if self.tree is None:
|
||||||
|
return {}
|
||||||
|
_, proba = self._classify(self.tree, sample, [])
|
||||||
|
return proba if isinstance(proba, dict) else {str(proba): 1.0}
|
||||||
|
|
||||||
|
def explain(self, sample: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
"""Human-readable decision path for audit / dashboard display."""
|
||||||
|
if self.tree is None:
|
||||||
|
return {"prediction": "Unknown", "confidence": 0.0, "decision_path": []}
|
||||||
|
path: List[str] = []
|
||||||
|
label, proba = self._classify(self.tree, sample, path)
|
||||||
|
return {
|
||||||
|
"prediction": str(label),
|
||||||
|
"confidence": round(proba.get(str(label), 1.0), 4),
|
||||||
|
"probabilities": proba,
|
||||||
|
"decision_path": path,
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------- serialisation
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"tree": self.tree,
|
||||||
|
"features": self.features,
|
||||||
|
"target": self.target,
|
||||||
|
"classes": self.classes_,
|
||||||
|
"feature_importances": self.feature_importances_,
|
||||||
|
"feature_values": self.feature_values,
|
||||||
|
"n_samples": self._n_samples,
|
||||||
|
"params": {
|
||||||
|
"max_depth": self.max_depth,
|
||||||
|
"min_samples_split": self.min_samples_split,
|
||||||
|
"min_info_gain": self.min_info_gain,
|
||||||
|
"use_gain_ratio": self.use_gain_ratio,
|
||||||
|
"chi2_pruning": self.chi2_pruning,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_dict(cls, d: Dict[str, Any]) -> "ID3Classifier":
|
||||||
|
p = d.get("params", {})
|
||||||
|
obj = cls(
|
||||||
|
max_depth=p.get("max_depth", 6),
|
||||||
|
min_samples_split=p.get("min_samples_split", 5),
|
||||||
|
min_info_gain=p.get("min_info_gain", 0.001),
|
||||||
|
use_gain_ratio=p.get("use_gain_ratio", False),
|
||||||
|
chi2_pruning=p.get("chi2_pruning", True),
|
||||||
|
)
|
||||||
|
obj.tree = d["tree"]
|
||||||
|
obj.features = d["features"]
|
||||||
|
obj.target = d["target"]
|
||||||
|
obj.classes_ = d["classes"]
|
||||||
|
obj.feature_importances_ = d.get("feature_importances", {})
|
||||||
|
obj.feature_values = d.get("feature_values", {})
|
||||||
|
obj._n_samples = d.get("n_samples", 0)
|
||||||
|
return obj
|
||||||
|
|
||||||
|
def to_json(self) -> str:
|
||||||
|
return json.dumps(self.to_dict(), indent=2)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_json(cls, s: str) -> "ID3Classifier":
|
||||||
|
return cls.from_dict(json.loads(s))
|
||||||
|
|
||||||
|
def summary(self) -> Dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"n_samples": self._n_samples,
|
||||||
|
"n_classes": len(self.classes_),
|
||||||
|
"classes": self.classes_,
|
||||||
|
"n_features": len(self.features),
|
||||||
|
"feature_importances": self.feature_importances_,
|
||||||
|
"feature_values": self.feature_values,
|
||||||
|
"trained": self.tree is not None,
|
||||||
|
}
|
||||||
|
|
||||||
|
@property
|
||||||
|
def classes(self) -> List[str]:
|
||||||
|
"""Alias for classes_ for compatibility."""
|
||||||
|
return self.classes_
|
||||||
|
|
||||||
|
def get_tree_rules(self) -> List[str]:
|
||||||
|
"""Extract human-readable if/then rules from the trained tree."""
|
||||||
|
rules: List[str] = []
|
||||||
|
if self.tree is None:
|
||||||
|
return rules
|
||||||
|
self._extract_rules(self.tree, [], rules)
|
||||||
|
return rules
|
||||||
|
|
||||||
|
def _extract_rules(self, node: Any, conditions: List[str], rules: List[str]) -> None:
|
||||||
|
"""Recursively walk the tree and collect decision paths as strings."""
|
||||||
|
if not isinstance(node, dict):
|
||||||
|
return
|
||||||
|
if node.get("__leaf__"):
|
||||||
|
label = node.get("__label__", "?")
|
||||||
|
proba = node.get("__proba__", {})
|
||||||
|
conf = proba.get(str(label), 0.0)
|
||||||
|
prefix = " AND ".join(conditions) if conditions else "(root)"
|
||||||
|
rules.append(f"{prefix} => {label} ({conf:.0%})")
|
||||||
|
return
|
||||||
|
feature = node.get("__feature__", "?")
|
||||||
|
for val, child in node.get("__branches__", {}).items():
|
||||||
|
self._extract_rules(child, conditions + [f"{feature}={val}"], rules)
|
||||||
|
|
||||||
|
# --------------------------------------------------------- tree building
|
||||||
|
|
||||||
|
def _build_tree(
|
||||||
|
self,
|
||||||
|
data: List[Dict[str, Any]],
|
||||||
|
features: List[str],
|
||||||
|
target: str,
|
||||||
|
depth: int,
|
||||||
|
) -> Any:
|
||||||
|
counts = Counter(str(row.get(target)) for row in data)
|
||||||
|
|
||||||
|
# Pure node
|
||||||
|
if len(counts) == 1:
|
||||||
|
return self._make_leaf(data, target)
|
||||||
|
|
||||||
|
# Stopping criteria
|
||||||
|
if not features or depth >= self.max_depth or len(data) < self.min_samples_split:
|
||||||
|
return self._make_leaf(data, target)
|
||||||
|
|
||||||
|
best_f, best_gain = self._best_split(data, features, target)
|
||||||
|
if best_f is None or best_gain < self.min_info_gain:
|
||||||
|
return self._make_leaf(data, target)
|
||||||
|
|
||||||
|
self._total_gain[best_f] = self._total_gain.get(best_f, 0.0) + best_gain
|
||||||
|
|
||||||
|
remaining = [f for f in features if f != best_f]
|
||||||
|
node = {
|
||||||
|
"__feature__": best_f,
|
||||||
|
"__gain__": round(best_gain, 6),
|
||||||
|
"__n__": len(data),
|
||||||
|
"__branches__": {},
|
||||||
|
}
|
||||||
|
for val in {row.get(best_f) for row in data}:
|
||||||
|
subset = [r for r in data if r.get(best_f) == val]
|
||||||
|
node["__branches__"][str(val)] = self._build_tree(
|
||||||
|
subset, remaining, target, depth + 1
|
||||||
|
)
|
||||||
|
return node
|
||||||
|
|
||||||
|
def _make_leaf(self, data: List[Dict[str, Any]], target: str) -> Dict[str, Any]:
|
||||||
|
counts = Counter(str(row.get(target)) for row in data)
|
||||||
|
total = len(data)
|
||||||
|
k = len(self.classes_) or 1
|
||||||
|
# Laplace smoothing
|
||||||
|
proba = {
|
||||||
|
cls: round((counts.get(cls, 0) + 1) / (total + k), 4)
|
||||||
|
for cls in self.classes_
|
||||||
|
}
|
||||||
|
label = max(proba, key=proba.get)
|
||||||
|
return {"__leaf__": True, "__label__": label, "__proba__": proba, "__n__": total}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------- splitting
|
||||||
|
|
||||||
|
def _best_split(
|
||||||
|
self, data: List[Dict[str, Any]], features: List[str], target: str
|
||||||
|
) -> Tuple[Optional[str], float]:
|
||||||
|
base_e = self._entropy(data, target)
|
||||||
|
best_f, best_gain = None, -1.0
|
||||||
|
for f in features:
|
||||||
|
gain = self._info_gain(data, f, target, base_e)
|
||||||
|
if self.use_gain_ratio:
|
||||||
|
si = self._split_info(data, f)
|
||||||
|
gain = gain / si if si > 0 else 0.0
|
||||||
|
if gain > best_gain:
|
||||||
|
best_gain = gain
|
||||||
|
best_f = f
|
||||||
|
return best_f, best_gain
|
||||||
|
|
||||||
|
# ----------------------------------------------------------- pruning
|
||||||
|
|
||||||
|
def _prune(self, node: Any, data: List[Dict[str, Any]], target: str) -> Any:
|
||||||
|
if not isinstance(node, dict) or node.get("__leaf__"):
|
||||||
|
return node
|
||||||
|
feature = node["__feature__"]
|
||||||
|
# Recurse children first
|
||||||
|
for val in list(node["__branches__"].keys()):
|
||||||
|
subset = [r for r in data if str(r.get(feature)) == str(val)]
|
||||||
|
node["__branches__"][val] = self._prune(node["__branches__"][val], subset, target)
|
||||||
|
# Chi-squared test: if split is not significant, collapse to leaf
|
||||||
|
if not self._chi2_significant(data, feature, target):
|
||||||
|
return self._make_leaf(data, target)
|
||||||
|
return node
|
||||||
|
|
||||||
|
def _chi2_significant(
|
||||||
|
self, data: List[Dict[str, Any]], feature: str, target: str
|
||||||
|
) -> bool:
|
||||||
|
classes = self.classes_
|
||||||
|
feature_vals = list({str(r.get(feature)) for r in data})
|
||||||
|
if not classes or len(feature_vals) < 2:
|
||||||
|
return False
|
||||||
|
total = len(data)
|
||||||
|
class_totals = Counter(str(r.get(target)) for r in data)
|
||||||
|
chi2 = 0.0
|
||||||
|
for val in feature_vals:
|
||||||
|
subset = [r for r in data if str(r.get(feature)) == val]
|
||||||
|
n_val = len(subset)
|
||||||
|
val_counts = Counter(str(r.get(target)) for r in subset)
|
||||||
|
for cls in classes:
|
||||||
|
observed = val_counts.get(cls, 0)
|
||||||
|
expected = (n_val * class_totals.get(cls, 0)) / total
|
||||||
|
if expected > 0:
|
||||||
|
chi2 += (observed - expected) ** 2 / expected
|
||||||
|
df = (len(feature_vals) - 1) * (len(classes) - 1)
|
||||||
|
if df <= 0:
|
||||||
|
return False
|
||||||
|
# Critical values at p=0.05
|
||||||
|
crit_table = {1: 3.841, 2: 5.991, 3: 7.815, 4: 9.488, 5: 11.070, 6: 12.592}
|
||||||
|
crit = crit_table.get(df, 3.841 * df)
|
||||||
|
return chi2 > crit
|
||||||
|
|
||||||
|
# ---------------------------------------------------------- classify
|
||||||
|
|
||||||
|
def _classify(
|
||||||
|
self, node: Any, row: Dict[str, Any], path: List[str]
|
||||||
|
) -> Tuple[Any, Any]:
|
||||||
|
if not isinstance(node, dict):
|
||||||
|
return node, {str(node): 1.0}
|
||||||
|
if node.get("__leaf__"):
|
||||||
|
label = node["__label__"]
|
||||||
|
proba = node["__proba__"]
|
||||||
|
path.append(f"predict={label} (p={proba.get(label, 0):.2f})")
|
||||||
|
return label, proba
|
||||||
|
|
||||||
|
feature = node["__feature__"]
|
||||||
|
value = str(row.get(feature, ""))
|
||||||
|
path.append(f"{feature}={value}")
|
||||||
|
|
||||||
|
branches = node["__branches__"]
|
||||||
|
if value in branches:
|
||||||
|
return self._classify(branches[value], row, path)
|
||||||
|
|
||||||
|
# Unseen value: weighted vote from all leaf children
|
||||||
|
all_proba: Counter = Counter()
|
||||||
|
total_n = 0
|
||||||
|
for child in branches.values():
|
||||||
|
if isinstance(child, dict) and child.get("__leaf__"):
|
||||||
|
n = child.get("__n__", 1)
|
||||||
|
total_n += n
|
||||||
|
for cls, p in child.get("__proba__", {}).items():
|
||||||
|
all_proba[cls] += p * n
|
||||||
|
|
||||||
|
if not total_n:
|
||||||
|
fallback = self.classes_[0] if self.classes_ else "Unknown"
|
||||||
|
path.append(f"unseen fallback: {fallback}")
|
||||||
|
return fallback, {fallback: 1.0}
|
||||||
|
|
||||||
|
proba = {cls: round(v / total_n, 4) for cls, v in all_proba.items()}
|
||||||
|
label = max(proba, key=proba.get)
|
||||||
|
path.append(f"weighted vote: {label}")
|
||||||
|
return label, proba
|
||||||
|
|
||||||
|
# ---------------------------------------------------------- entropy math
|
||||||
|
|
||||||
|
def _entropy(self, data: List[Dict[str, Any]], target: str) -> float:
|
||||||
|
if not data:
|
||||||
|
return 0.0
|
||||||
|
counts = Counter(str(row.get(target)) for row in data)
|
||||||
|
total = len(data)
|
||||||
|
return -sum((c / total) * math.log2(c / total) for c in counts.values() if c > 0)
|
||||||
|
|
||||||
|
def _info_gain(
|
||||||
|
self,
|
||||||
|
data: List[Dict[str, Any]],
|
||||||
|
feature: str,
|
||||||
|
target: str,
|
||||||
|
base_entropy: Optional[float] = None,
|
||||||
|
) -> float:
|
||||||
|
if base_entropy is None:
|
||||||
|
base_entropy = self._entropy(data, target)
|
||||||
|
total = len(data)
|
||||||
|
buckets: Dict[Any, list] = {}
|
||||||
|
for row in data:
|
||||||
|
buckets.setdefault(row.get(feature), []).append(row)
|
||||||
|
weighted = sum(
|
||||||
|
(len(sub) / total) * self._entropy(sub, target) for sub in buckets.values()
|
||||||
|
)
|
||||||
|
return base_entropy - weighted
|
||||||
|
|
||||||
|
def _split_info(self, data: List[Dict[str, Any]], feature: str) -> float:
|
||||||
|
total = len(data)
|
||||||
|
counts = Counter(row.get(feature) for row in data)
|
||||||
|
return -sum((c / total) * math.log2(c / total) for c in counts.values() if c > 0)
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------ factory
|
||||||
|
|
||||||
|
def get_behavior_model(
|
||||||
|
max_depth: int = 5,
|
||||||
|
min_samples_split: int = 8,
|
||||||
|
min_info_gain: float = 0.005,
|
||||||
|
use_gain_ratio: bool = True,
|
||||||
|
chi2_pruning: bool = True,
|
||||||
|
) -> ID3Classifier:
|
||||||
|
return ID3Classifier(
|
||||||
|
max_depth=max_depth,
|
||||||
|
min_samples_split=min_samples_split,
|
||||||
|
min_info_gain=min_info_gain,
|
||||||
|
use_gain_ratio=use_gain_ratio,
|
||||||
|
chi2_pruning=chi2_pruning,
|
||||||
|
)
|
||||||
539
app/services/ml/ml_data_collector.py
Normal file
539
app/services/ml/ml_data_collector.py
Normal file
@@ -0,0 +1,539 @@
|
|||||||
|
"""
|
||||||
|
ML Data Collector - Production Grade
|
||||||
|
======================================
|
||||||
|
Logs every assignment call (inputs + outcomes) to SQLite.
|
||||||
|
|
||||||
|
Key upgrades over the original
|
||||||
|
--------------------------------
|
||||||
|
1. FROZEN historical scores - quality_score is written ONCE at log time.
|
||||||
|
get_training_data() returns scores as-is from the DB (no retroactive mutation).
|
||||||
|
2. Rich schema - zone_id, city_id, is_peak, weather_code,
|
||||||
|
sla_breached, avg_delivery_time_min for richer features.
|
||||||
|
3. SLA tracking - logs whether delivery SLA was breached.
|
||||||
|
4. Analytics API - get_hourly_stats(), get_strategy_comparison(),
|
||||||
|
get_quality_histogram(), get_zone_stats() for dashboard consumption.
|
||||||
|
5. Thread-safe writes - connection-per-write pattern for FastAPI workers.
|
||||||
|
6. Indexed columns - timestamp, ml_strategy, zone_id for fast queries.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import csv
|
||||||
|
import io
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import threading
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_DB_PATH = os.getenv("ML_DB_PATH", "ml_data/ml_store.db")
|
||||||
|
_WRITE_LOCK = threading.Lock()
|
||||||
|
|
||||||
|
|
||||||
|
def _std(values: List[float]) -> float:
|
||||||
|
if len(values) < 2:
|
||||||
|
return 0.0
|
||||||
|
mean = sum(values) / len(values)
|
||||||
|
return (sum((v - mean) ** 2 for v in values) / len(values)) ** 0.5
|
||||||
|
|
||||||
|
|
||||||
|
class MLDataCollector:
|
||||||
|
"""
|
||||||
|
Event logger for assignment service calls.
|
||||||
|
|
||||||
|
Each log_assignment_event() call writes one row capturing:
|
||||||
|
- Operating context (time, orders, riders, zone, city)
|
||||||
|
- Active hyperparams (exact config snapshot for this call)
|
||||||
|
- Measured outcomes (quality score, SLA, latency, distances)
|
||||||
|
|
||||||
|
quality_score is computed once and FROZEN - never retroactively changed.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._db_path = _DB_PATH
|
||||||
|
self._ensure_db()
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Main logging API
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def log_assignment_event(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
num_orders: int,
|
||||||
|
num_riders: int,
|
||||||
|
hyperparams: Dict[str, Any],
|
||||||
|
assignments: Dict[int, List[Any]],
|
||||||
|
unassigned_count: int,
|
||||||
|
elapsed_ms: float,
|
||||||
|
zone_id: str = "default",
|
||||||
|
city_id: str = "default",
|
||||||
|
weather_code: str = "CLEAR",
|
||||||
|
sla_minutes: Optional[float] = None,
|
||||||
|
avg_delivery_time_min: Optional[float] = None,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Log one assignment event.
|
||||||
|
|
||||||
|
Call this at the END of AssignmentService.assign_orders() once
|
||||||
|
outcomes are known.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
now = datetime.utcnow()
|
||||||
|
hour = now.hour
|
||||||
|
day_of_week = now.weekday()
|
||||||
|
is_peak = int(hour in (7, 8, 9, 12, 13, 18, 19, 20))
|
||||||
|
|
||||||
|
rider_loads = [len(orders) for orders in assignments.values() if orders]
|
||||||
|
riders_used = len(rider_loads)
|
||||||
|
total_assigned = sum(rider_loads)
|
||||||
|
avg_load = total_assigned / riders_used if riders_used else 0.0
|
||||||
|
load_std = _std(rider_loads) if rider_loads else 0.0
|
||||||
|
|
||||||
|
all_orders = [o for orders in assignments.values() if orders for o in orders]
|
||||||
|
total_distance_km = sum(self._get_km(o) for o in all_orders)
|
||||||
|
ml_strategy = hyperparams.get("ml_strategy", "balanced")
|
||||||
|
max_opr = hyperparams.get("max_orders_per_rider", 12)
|
||||||
|
|
||||||
|
sla_breached = 0
|
||||||
|
if sla_minutes and avg_delivery_time_min:
|
||||||
|
sla_breached = int(avg_delivery_time_min > sla_minutes)
|
||||||
|
|
||||||
|
# Quality score - FROZEN at log time
|
||||||
|
quality_score = self._compute_quality_score(
|
||||||
|
num_orders=num_orders,
|
||||||
|
unassigned_count=unassigned_count,
|
||||||
|
load_std=load_std,
|
||||||
|
riders_used=riders_used,
|
||||||
|
num_riders=num_riders,
|
||||||
|
total_distance_km=total_distance_km,
|
||||||
|
max_orders_per_rider=max_opr,
|
||||||
|
ml_strategy=ml_strategy,
|
||||||
|
)
|
||||||
|
|
||||||
|
row = {
|
||||||
|
"timestamp": now.isoformat(),
|
||||||
|
"hour": hour,
|
||||||
|
"day_of_week": day_of_week,
|
||||||
|
"is_peak": is_peak,
|
||||||
|
"zone_id": zone_id,
|
||||||
|
"city_id": city_id,
|
||||||
|
"weather_code": weather_code,
|
||||||
|
"num_orders": num_orders,
|
||||||
|
"num_riders": num_riders,
|
||||||
|
"max_pickup_distance_km": hyperparams.get("max_pickup_distance_km", 10.0),
|
||||||
|
"max_kitchen_distance_km": hyperparams.get("max_kitchen_distance_km", 3.0),
|
||||||
|
"max_orders_per_rider": max_opr,
|
||||||
|
"ideal_load": hyperparams.get("ideal_load", 6),
|
||||||
|
"workload_balance_threshold": hyperparams.get("workload_balance_threshold", 0.7),
|
||||||
|
"workload_penalty_weight": hyperparams.get("workload_penalty_weight", 100.0),
|
||||||
|
"distance_penalty_weight": hyperparams.get("distance_penalty_weight", 2.0),
|
||||||
|
"cluster_radius_km": hyperparams.get("cluster_radius_km", 3.0),
|
||||||
|
"search_time_limit_seconds": hyperparams.get("search_time_limit_seconds", 5),
|
||||||
|
"road_factor": hyperparams.get("road_factor", 1.3),
|
||||||
|
"ml_strategy": ml_strategy,
|
||||||
|
"riders_used": riders_used,
|
||||||
|
"total_assigned": total_assigned,
|
||||||
|
"unassigned_count": unassigned_count,
|
||||||
|
"avg_load": round(avg_load, 3),
|
||||||
|
"load_std": round(load_std, 3),
|
||||||
|
"total_distance_km": round(total_distance_km, 2),
|
||||||
|
"elapsed_ms": round(elapsed_ms, 1),
|
||||||
|
"sla_breached": sla_breached,
|
||||||
|
"avg_delivery_time_min": round(avg_delivery_time_min or 0.0, 2),
|
||||||
|
"quality_score": round(quality_score, 2),
|
||||||
|
}
|
||||||
|
|
||||||
|
with _WRITE_LOCK:
|
||||||
|
self._insert(row)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"[MLCollector] zone={zone_id} orders={num_orders} "
|
||||||
|
f"assigned={total_assigned} unassigned={unassigned_count} "
|
||||||
|
f"quality={quality_score:.1f} elapsed={elapsed_ms:.0f}ms"
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"[MLCollector] Logging failed (non-fatal): {e}")
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Data retrieval for training
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def get_training_data(
|
||||||
|
self,
|
||||||
|
min_records: int = 30,
|
||||||
|
strategy_filter: Optional[str] = None,
|
||||||
|
since_hours: Optional[int] = None,
|
||||||
|
) -> Optional[List[Dict[str, Any]]]:
|
||||||
|
"""
|
||||||
|
Return logged rows for model training.
|
||||||
|
quality_score is returned AS-IS (frozen at log time - no re-scoring).
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
conn = sqlite3.connect(self._db_path)
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
|
||||||
|
query = "SELECT * FROM assignment_ml_log"
|
||||||
|
params: list = []
|
||||||
|
clauses: list = []
|
||||||
|
|
||||||
|
if strategy_filter:
|
||||||
|
clauses.append("ml_strategy = ?")
|
||||||
|
params.append(strategy_filter)
|
||||||
|
if since_hours:
|
||||||
|
cutoff = (datetime.utcnow() - timedelta(hours=since_hours)).isoformat()
|
||||||
|
clauses.append("timestamp >= ?")
|
||||||
|
params.append(cutoff)
|
||||||
|
|
||||||
|
if clauses:
|
||||||
|
query += " WHERE " + " AND ".join(clauses)
|
||||||
|
query += " ORDER BY id ASC"
|
||||||
|
|
||||||
|
rows = conn.execute(query, params).fetchall()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
if len(rows) < min_records:
|
||||||
|
logger.info(f"[MLCollector] {len(rows)} records < {min_records} minimum.")
|
||||||
|
return None
|
||||||
|
|
||||||
|
return [dict(r) for r in rows]
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[MLCollector] get_training_data failed: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Analytics API
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def get_recent_quality_trend(self, last_n: int = 50) -> Dict[str, Any]:
|
||||||
|
"""Recent quality scores + series for sparkline charts."""
|
||||||
|
try:
|
||||||
|
conn = sqlite3.connect(self._db_path)
|
||||||
|
rows = conn.execute(
|
||||||
|
"SELECT quality_score, timestamp, unassigned_count, elapsed_ms "
|
||||||
|
"FROM assignment_ml_log ORDER BY id DESC LIMIT ?", (last_n,)
|
||||||
|
).fetchall()
|
||||||
|
conn.close()
|
||||||
|
if not rows:
|
||||||
|
return {"avg_quality": 0.0, "sample_size": 0, "history": []}
|
||||||
|
scores = [r[0] for r in rows]
|
||||||
|
return {
|
||||||
|
"avg_quality": round(sum(scores) / len(scores), 2),
|
||||||
|
"min_quality": round(min(scores), 2),
|
||||||
|
"max_quality": round(max(scores), 2),
|
||||||
|
"sample_size": len(scores),
|
||||||
|
"history": list(reversed(scores)),
|
||||||
|
"timestamps": list(reversed([r[1] for r in rows])),
|
||||||
|
"unassigned_series": list(reversed([r[2] for r in rows])),
|
||||||
|
"latency_series": list(reversed([r[3] for r in rows])),
|
||||||
|
}
|
||||||
|
except Exception:
|
||||||
|
return {"avg_quality": 0.0, "sample_size": 0, "history": []}
|
||||||
|
|
||||||
|
def get_hourly_stats(self, last_days: int = 7) -> List[Dict[str, Any]]:
|
||||||
|
"""Quality, SLA, and call volume aggregated by hour-of-day."""
|
||||||
|
try:
|
||||||
|
conn = sqlite3.connect(self._db_path)
|
||||||
|
cutoff = (datetime.utcnow() - timedelta(days=last_days)).isoformat()
|
||||||
|
rows = conn.execute(
|
||||||
|
"""
|
||||||
|
SELECT hour,
|
||||||
|
COUNT(*) AS call_count,
|
||||||
|
AVG(quality_score) AS avg_quality,
|
||||||
|
AVG(unassigned_count) AS avg_unassigned,
|
||||||
|
AVG(elapsed_ms) AS avg_latency_ms,
|
||||||
|
SUM(CASE WHEN sla_breached=1 THEN 1 ELSE 0 END) AS sla_breaches
|
||||||
|
FROM assignment_ml_log WHERE timestamp >= ?
|
||||||
|
GROUP BY hour ORDER BY hour
|
||||||
|
""", (cutoff,)
|
||||||
|
).fetchall()
|
||||||
|
conn.close()
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"hour": r[0],
|
||||||
|
"call_count": r[1],
|
||||||
|
"avg_quality": round(r[2] or 0.0, 2),
|
||||||
|
"avg_unassigned": round(r[3] or 0.0, 2),
|
||||||
|
"avg_latency_ms": round(r[4] or 0.0, 1),
|
||||||
|
"sla_breaches": r[5],
|
||||||
|
}
|
||||||
|
for r in rows
|
||||||
|
]
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[MLCollector] get_hourly_stats: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
def get_strategy_comparison(self) -> List[Dict[str, Any]]:
|
||||||
|
"""Compare quality metrics across ml_strategy values."""
|
||||||
|
try:
|
||||||
|
conn = sqlite3.connect(self._db_path)
|
||||||
|
rows = conn.execute(
|
||||||
|
"""
|
||||||
|
SELECT ml_strategy,
|
||||||
|
COUNT(*) AS call_count,
|
||||||
|
AVG(quality_score) AS avg_quality,
|
||||||
|
MIN(quality_score) AS min_quality,
|
||||||
|
MAX(quality_score) AS max_quality,
|
||||||
|
AVG(unassigned_count) AS avg_unassigned,
|
||||||
|
AVG(total_distance_km) AS avg_distance_km,
|
||||||
|
AVG(elapsed_ms) AS avg_latency_ms
|
||||||
|
FROM assignment_ml_log
|
||||||
|
GROUP BY ml_strategy ORDER BY avg_quality DESC
|
||||||
|
"""
|
||||||
|
).fetchall()
|
||||||
|
conn.close()
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"strategy": r[0],
|
||||||
|
"call_count": r[1],
|
||||||
|
"avg_quality": round(r[2] or 0.0, 2),
|
||||||
|
"min_quality": round(r[3] or 0.0, 2),
|
||||||
|
"max_quality": round(r[4] or 0.0, 2),
|
||||||
|
"avg_unassigned": round(r[5] or 0.0, 2),
|
||||||
|
"avg_distance_km": round(r[6] or 0.0, 2),
|
||||||
|
"avg_latency_ms": round(r[7] or 0.0, 1),
|
||||||
|
}
|
||||||
|
for r in rows
|
||||||
|
]
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[MLCollector] get_strategy_comparison: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
def get_quality_histogram(self, bins: int = 10) -> List[Dict[str, Any]]:
|
||||||
|
"""Quality score distribution for histogram chart."""
|
||||||
|
try:
|
||||||
|
conn = sqlite3.connect(self._db_path)
|
||||||
|
rows = conn.execute("SELECT quality_score FROM assignment_ml_log").fetchall()
|
||||||
|
conn.close()
|
||||||
|
scores = [r[0] for r in rows if r[0] is not None]
|
||||||
|
if not scores:
|
||||||
|
return []
|
||||||
|
bin_width = 100.0 / bins
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"range": f"{i*bin_width:.0f}-{(i+1)*bin_width:.0f}",
|
||||||
|
"count": sum(1 for s in scores if i*bin_width <= s < (i+1)*bin_width)
|
||||||
|
}
|
||||||
|
for i in range(bins)
|
||||||
|
]
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[MLCollector] get_quality_histogram: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
def get_zone_stats(self) -> List[Dict[str, Any]]:
|
||||||
|
"""Quality and SLA stats grouped by zone."""
|
||||||
|
try:
|
||||||
|
conn = sqlite3.connect(self._db_path)
|
||||||
|
rows = conn.execute(
|
||||||
|
"""
|
||||||
|
SELECT zone_id, COUNT(*) AS call_count,
|
||||||
|
AVG(quality_score) AS avg_quality,
|
||||||
|
SUM(sla_breached) AS sla_breaches,
|
||||||
|
AVG(total_distance_km) AS avg_distance_km
|
||||||
|
FROM assignment_ml_log
|
||||||
|
GROUP BY zone_id ORDER BY avg_quality DESC
|
||||||
|
"""
|
||||||
|
).fetchall()
|
||||||
|
conn.close()
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"zone_id": r[0],
|
||||||
|
"call_count": r[1],
|
||||||
|
"avg_quality": round(r[2] or 0.0, 2),
|
||||||
|
"sla_breaches": r[3],
|
||||||
|
"avg_distance_km": round(r[4] or 0.0, 2),
|
||||||
|
}
|
||||||
|
for r in rows
|
||||||
|
]
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[MLCollector] get_zone_stats: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
def count_records(self) -> int:
|
||||||
|
try:
|
||||||
|
conn = sqlite3.connect(self._db_path)
|
||||||
|
count = conn.execute("SELECT COUNT(*) FROM assignment_ml_log").fetchone()[0]
|
||||||
|
conn.close()
|
||||||
|
return count
|
||||||
|
except Exception:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def count_by_strategy(self) -> Dict[str, int]:
|
||||||
|
try:
|
||||||
|
conn = sqlite3.connect(self._db_path)
|
||||||
|
rows = conn.execute(
|
||||||
|
"SELECT ml_strategy, COUNT(*) FROM assignment_ml_log GROUP BY ml_strategy"
|
||||||
|
).fetchall()
|
||||||
|
conn.close()
|
||||||
|
return {r[0]: r[1] for r in rows}
|
||||||
|
except Exception:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def export_csv(self) -> str:
|
||||||
|
"""Export all records as CSV string."""
|
||||||
|
try:
|
||||||
|
conn = sqlite3.connect(self._db_path)
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
rows = conn.execute("SELECT * FROM assignment_ml_log ORDER BY id ASC").fetchall()
|
||||||
|
conn.close()
|
||||||
|
if not rows:
|
||||||
|
return ""
|
||||||
|
buf = io.StringIO()
|
||||||
|
writer = csv.DictWriter(buf, fieldnames=rows[0].keys())
|
||||||
|
writer.writeheader()
|
||||||
|
writer.writerows([dict(r) for r in rows])
|
||||||
|
return buf.getvalue()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[MLCollector] export_csv failed: {e}")
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def purge_old_records(self, keep_days: int = 90) -> int:
|
||||||
|
"""Delete records older than keep_days. Returns count deleted."""
|
||||||
|
try:
|
||||||
|
cutoff = (datetime.utcnow() - timedelta(days=keep_days)).isoformat()
|
||||||
|
conn = sqlite3.connect(self._db_path)
|
||||||
|
cursor = conn.execute(
|
||||||
|
"DELETE FROM assignment_ml_log WHERE timestamp < ?", (cutoff,)
|
||||||
|
)
|
||||||
|
deleted = cursor.rowcount
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
logger.info(f"[MLCollector] Purged {deleted} records older than {keep_days} days.")
|
||||||
|
return deleted
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[MLCollector] purge failed: {e}")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Quality Score Formula (frozen at log time - do not change behavior)
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _compute_quality_score(
|
||||||
|
num_orders: int, unassigned_count: int, load_std: float,
|
||||||
|
riders_used: int, num_riders: int, total_distance_km: float,
|
||||||
|
max_orders_per_rider: int, ml_strategy: str = "balanced",
|
||||||
|
) -> float:
|
||||||
|
if num_orders == 0:
|
||||||
|
return 0.0
|
||||||
|
assigned_ratio = 1.0 - (unassigned_count / num_orders)
|
||||||
|
max_std = max(1.0, max_orders_per_rider / 2.0)
|
||||||
|
balance_ratio = max(0.0, 1.0 - (load_std / max_std))
|
||||||
|
max_dist = max(1.0, float((num_orders - unassigned_count) * 8.0))
|
||||||
|
distance_ratio = max(0.0, 1.0 - (total_distance_km / max_dist))
|
||||||
|
weights = {
|
||||||
|
"aggressive_speed": (80.0, 20.0, 0.0),
|
||||||
|
"fuel_saver": (30.0, 70.0, 0.0),
|
||||||
|
"zone_strict": (40.0, 30.0, 30.0),
|
||||||
|
"balanced": (50.0, 25.0, 25.0),
|
||||||
|
}
|
||||||
|
w_comp, w_dist, w_bal = weights.get(ml_strategy, (50.0, 25.0, 25.0))
|
||||||
|
return min(
|
||||||
|
assigned_ratio * w_comp + distance_ratio * w_dist + balance_ratio * w_bal,
|
||||||
|
100.0,
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_km(order: Any) -> float:
|
||||||
|
try:
|
||||||
|
return float(order.get("kms") or order.get("calculationDistanceKm") or 0.0)
|
||||||
|
except Exception:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# DB Bootstrap
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _ensure_db(self) -> None:
|
||||||
|
try:
|
||||||
|
os.makedirs(os.path.dirname(self._db_path) or ".", exist_ok=True)
|
||||||
|
conn = sqlite3.connect(self._db_path)
|
||||||
|
conn.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS assignment_ml_log (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
timestamp TEXT NOT NULL,
|
||||||
|
hour INTEGER,
|
||||||
|
day_of_week INTEGER,
|
||||||
|
is_peak INTEGER DEFAULT 0,
|
||||||
|
zone_id TEXT DEFAULT 'default',
|
||||||
|
city_id TEXT DEFAULT 'default',
|
||||||
|
weather_code TEXT DEFAULT 'CLEAR',
|
||||||
|
num_orders INTEGER,
|
||||||
|
num_riders INTEGER,
|
||||||
|
max_pickup_distance_km REAL,
|
||||||
|
max_kitchen_distance_km REAL,
|
||||||
|
max_orders_per_rider INTEGER,
|
||||||
|
ideal_load INTEGER,
|
||||||
|
workload_balance_threshold REAL,
|
||||||
|
workload_penalty_weight REAL,
|
||||||
|
distance_penalty_weight REAL,
|
||||||
|
cluster_radius_km REAL,
|
||||||
|
search_time_limit_seconds INTEGER,
|
||||||
|
road_factor REAL,
|
||||||
|
ml_strategy TEXT DEFAULT 'balanced',
|
||||||
|
riders_used INTEGER,
|
||||||
|
total_assigned INTEGER,
|
||||||
|
unassigned_count INTEGER,
|
||||||
|
avg_load REAL,
|
||||||
|
load_std REAL,
|
||||||
|
total_distance_km REAL DEFAULT 0.0,
|
||||||
|
elapsed_ms REAL,
|
||||||
|
sla_breached INTEGER DEFAULT 0,
|
||||||
|
avg_delivery_time_min REAL DEFAULT 0.0,
|
||||||
|
quality_score REAL
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
migrations = [
|
||||||
|
"ALTER TABLE assignment_ml_log ADD COLUMN is_peak INTEGER DEFAULT 0",
|
||||||
|
"ALTER TABLE assignment_ml_log ADD COLUMN zone_id TEXT DEFAULT 'default'",
|
||||||
|
"ALTER TABLE assignment_ml_log ADD COLUMN city_id TEXT DEFAULT 'default'",
|
||||||
|
"ALTER TABLE assignment_ml_log ADD COLUMN weather_code TEXT DEFAULT 'CLEAR'",
|
||||||
|
"ALTER TABLE assignment_ml_log ADD COLUMN sla_breached INTEGER DEFAULT 0",
|
||||||
|
"ALTER TABLE assignment_ml_log ADD COLUMN avg_delivery_time_min REAL DEFAULT 0.0",
|
||||||
|
"ALTER TABLE assignment_ml_log ADD COLUMN ml_strategy TEXT DEFAULT 'balanced'",
|
||||||
|
"ALTER TABLE assignment_ml_log ADD COLUMN total_distance_km REAL DEFAULT 0.0",
|
||||||
|
]
|
||||||
|
for ddl in migrations:
|
||||||
|
try:
|
||||||
|
conn.execute(ddl)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
for idx in [
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_timestamp ON assignment_ml_log(timestamp)",
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_strategy ON assignment_ml_log(ml_strategy)",
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_zone ON assignment_ml_log(zone_id)",
|
||||||
|
]:
|
||||||
|
conn.execute(idx)
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[MLCollector] DB init failed: {e}")
|
||||||
|
|
||||||
|
def _insert(self, row: Dict[str, Any]) -> None:
|
||||||
|
os.makedirs(os.path.dirname(self._db_path) or ".", exist_ok=True)
|
||||||
|
conn = sqlite3.connect(self._db_path)
|
||||||
|
cols = ", ".join(row.keys())
|
||||||
|
placeholders = ", ".join(["?"] * len(row))
|
||||||
|
conn.execute(
|
||||||
|
f"INSERT INTO assignment_ml_log ({cols}) VALUES ({placeholders})",
|
||||||
|
list(row.values()),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Module-level singleton
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
_collector: Optional[MLDataCollector] = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_collector() -> MLDataCollector:
|
||||||
|
global _collector
|
||||||
|
if _collector is None:
|
||||||
|
_collector = MLDataCollector()
|
||||||
|
return _collector
|
||||||
610
app/services/ml/ml_hypertuner.py
Normal file
610
app/services/ml/ml_hypertuner.py
Normal file
@@ -0,0 +1,610 @@
|
|||||||
|
"""
|
||||||
|
ML Hypertuner - Production Grade
|
||||||
|
===================================
|
||||||
|
XGBoost surrogate model + Optuna TPE Bayesian optimization.
|
||||||
|
|
||||||
|
Key upgrades over the original
|
||||||
|
--------------------------------
|
||||||
|
1. Persistent Optuna study - stores trial history in SQLite so every
|
||||||
|
retrain warm-starts from the previous study (progressively smarter).
|
||||||
|
2. Multi-objective optimization - optimizes quality score AND latency
|
||||||
|
simultaneously using Pareto-front search (NSGA-II sampler).
|
||||||
|
3. Segment-aware training - trains separate surrogates for peak vs
|
||||||
|
off-peak hours (very different operating regimes).
|
||||||
|
4. Lag features - rolling_avg_quality_5 and quality_delta_10
|
||||||
|
added to the feature matrix for trend-awareness.
|
||||||
|
5. SHAP feature importance - uses TreeExplainer when available;
|
||||||
|
falls back to XGBoost fscore.
|
||||||
|
6. Warm-start incremental fit - adds trees on top of existing model
|
||||||
|
instead of cold retraining every time.
|
||||||
|
7. Staleness detection - warns if model is older than 24h.
|
||||||
|
8. Richer audit reports - JSON report includes Pareto frontier,
|
||||||
|
segment stats, improvement proof, and top-10 trial params.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.model_selection import KFold
|
||||||
|
from sklearn.metrics import r2_score, mean_absolute_error
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
try:
|
||||||
|
import xgboost as xgb
|
||||||
|
XGB_AVAILABLE = True
|
||||||
|
except ImportError:
|
||||||
|
XGB_AVAILABLE = False
|
||||||
|
logger.warning("[Hypertuner] xgboost not installed.")
|
||||||
|
|
||||||
|
try:
|
||||||
|
import optuna
|
||||||
|
optuna.logging.set_verbosity(optuna.logging.WARNING)
|
||||||
|
OPTUNA_AVAILABLE = True
|
||||||
|
except ImportError:
|
||||||
|
OPTUNA_AVAILABLE = False
|
||||||
|
logger.warning("[Hypertuner] optuna not installed.")
|
||||||
|
|
||||||
|
try:
|
||||||
|
import shap
|
||||||
|
SHAP_AVAILABLE = True
|
||||||
|
except ImportError:
|
||||||
|
SHAP_AVAILABLE = False
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Feature columns
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
BASE_FEATURE_COLS = [
|
||||||
|
"hour", "day_of_week", "is_peak",
|
||||||
|
"num_orders", "num_riders",
|
||||||
|
"max_pickup_distance_km", "max_kitchen_distance_km",
|
||||||
|
"max_orders_per_rider", "ideal_load",
|
||||||
|
"workload_balance_threshold", "workload_penalty_weight",
|
||||||
|
"distance_penalty_weight", "cluster_radius_km",
|
||||||
|
"search_time_limit_seconds", "road_factor",
|
||||||
|
]
|
||||||
|
|
||||||
|
LAG_FEATURE_COLS = [
|
||||||
|
"rolling_avg_quality_5", # rolling mean of last 5 quality scores
|
||||||
|
"quality_delta_10", # quality[i] - quality[i-10]
|
||||||
|
]
|
||||||
|
|
||||||
|
ALL_FEATURE_COLS = BASE_FEATURE_COLS + LAG_FEATURE_COLS
|
||||||
|
LABEL_COL = "quality_score"
|
||||||
|
|
||||||
|
SEARCH_SPACE = {
|
||||||
|
"max_pickup_distance_km": ("float", 4.0, 15.0),
|
||||||
|
"max_kitchen_distance_km": ("float", 1.0, 8.0),
|
||||||
|
"max_orders_per_rider": ("int", 6, 20),
|
||||||
|
"ideal_load": ("int", 2, 10),
|
||||||
|
"workload_balance_threshold": ("float", 0.3, 0.95),
|
||||||
|
"workload_penalty_weight": ("float", 20.0, 200.0),
|
||||||
|
"distance_penalty_weight": ("float", 0.5, 10.0),
|
||||||
|
"cluster_radius_km": ("float", 1.0, 8.0),
|
||||||
|
"search_time_limit_seconds": ("int", 2, 15),
|
||||||
|
"road_factor": ("float", 1.1, 1.6),
|
||||||
|
}
|
||||||
|
|
||||||
|
_STUDY_DB_PATH = os.getenv("ML_DB_PATH", "ml_data/ml_store.db")
|
||||||
|
_REPORT_DIR = "ml_data/reports"
|
||||||
|
_MAX_MODEL_AGE_H = 24
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# MLHypertuner
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class MLHypertuner:
|
||||||
|
"""XGBoost surrogate + Optuna TPE / NSGA-II hyperparameter optimizer."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._model: Optional[Any] = None
|
||||||
|
self._peak_model: Optional[Any] = None
|
||||||
|
self._offpeak_model: Optional[Any] = None
|
||||||
|
self._model_trained_at: Optional[datetime] = None
|
||||||
|
self._training_rows: int = 0
|
||||||
|
self._latest_validation: Optional[Dict] = None
|
||||||
|
self._latest_baseline: Optional[Dict] = None
|
||||||
|
self._feature_importance: Optional[Dict[str, float]] = None
|
||||||
|
self._top_trials: List[Dict] = []
|
||||||
|
self._pareto_frontier: List[Dict] = []
|
||||||
|
self._load_latest_report()
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Main entry point
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def run(
|
||||||
|
self,
|
||||||
|
n_trials: int = 150,
|
||||||
|
min_training_records: int = 30,
|
||||||
|
context_override: Optional[Dict] = None,
|
||||||
|
multi_objective: bool = False,
|
||||||
|
segment_aware: bool = True,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Full pipeline: load -> engineer -> validate -> train -> search -> write."""
|
||||||
|
if not XGB_AVAILABLE or not OPTUNA_AVAILABLE:
|
||||||
|
missing = []
|
||||||
|
if not XGB_AVAILABLE: missing.append("xgboost")
|
||||||
|
if not OPTUNA_AVAILABLE: missing.append("optuna")
|
||||||
|
return {"status": "error", "message": f"Missing: {', '.join(missing)}"}
|
||||||
|
|
||||||
|
from app.services.ml.ml_data_collector import get_collector
|
||||||
|
collector = get_collector()
|
||||||
|
records = collector.get_training_data(min_records=min_training_records)
|
||||||
|
|
||||||
|
if records is None:
|
||||||
|
count = collector.count_records()
|
||||||
|
return {
|
||||||
|
"status": "insufficient_data",
|
||||||
|
"message": f"{count} records - need >={min_training_records}.",
|
||||||
|
"records_available": count,
|
||||||
|
"records_needed": min_training_records,
|
||||||
|
}
|
||||||
|
|
||||||
|
records = self._add_lag_features(records)
|
||||||
|
X, y = self._prepare_data(records, ALL_FEATURE_COLS)
|
||||||
|
if X is None or len(X) == 0:
|
||||||
|
return {"status": "error", "message": "Data preparation failed."}
|
||||||
|
|
||||||
|
cv_results = self._cross_validate(X, y)
|
||||||
|
logger.info(f"[Hypertuner] CV: R2={cv_results['r2_score']:.3f}, MAE={cv_results['mae']:.2f}")
|
||||||
|
|
||||||
|
self._train_model(X, y, model_attr="_model")
|
||||||
|
self._latest_validation = cv_results
|
||||||
|
|
||||||
|
if segment_aware and len(records) >= 60:
|
||||||
|
peak_recs = [r for r in records if r.get("is_peak", 0) == 1]
|
||||||
|
offpeak_recs = [r for r in records if r.get("is_peak", 0) == 0]
|
||||||
|
if len(peak_recs) >= 20:
|
||||||
|
Xp, yp = self._prepare_data(peak_recs, ALL_FEATURE_COLS)
|
||||||
|
self._train_model(Xp, yp, model_attr="_peak_model")
|
||||||
|
if len(offpeak_recs) >= 20:
|
||||||
|
Xo, yo = self._prepare_data(offpeak_recs, ALL_FEATURE_COLS)
|
||||||
|
self._train_model(Xo, yo, model_attr="_offpeak_model")
|
||||||
|
|
||||||
|
baseline_stats = self._compute_baseline_stats(records)
|
||||||
|
self._latest_baseline = baseline_stats
|
||||||
|
context = context_override or self._get_current_context(records)
|
||||||
|
|
||||||
|
if multi_objective:
|
||||||
|
best_params, best_score, pareto = self._optuna_search_multi(context, n_trials)
|
||||||
|
self._pareto_frontier = pareto
|
||||||
|
else:
|
||||||
|
best_params, best_score = self._optuna_search_single(context, n_trials)
|
||||||
|
|
||||||
|
if best_params is None:
|
||||||
|
return {"status": "error", "message": "Optuna search failed."}
|
||||||
|
|
||||||
|
improvement = round(best_score - baseline_stats["avg_quality"], 2)
|
||||||
|
self._compute_feature_importance()
|
||||||
|
|
||||||
|
if cv_results["r2_score"] < 0.5:
|
||||||
|
return {
|
||||||
|
"status": "model_not_ready",
|
||||||
|
"message": f"R2={cv_results['r2_score']:.3f} too low.",
|
||||||
|
"validation": cv_results,
|
||||||
|
"training_rows": len(records),
|
||||||
|
"action_taken": "none - existing config preserved",
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
from app.config.dynamic_config import get_config
|
||||||
|
get_config().set_bulk(best_params, source="ml_hypertuner")
|
||||||
|
except ImportError:
|
||||||
|
logger.info("[Hypertuner] DynamicConfig not available - params not written to config.")
|
||||||
|
|
||||||
|
self._save_report(best_params, best_score, len(records), n_trials, cv_results, baseline_stats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"status": "ok",
|
||||||
|
"best_params": best_params,
|
||||||
|
"best_predicted_quality": round(best_score, 2),
|
||||||
|
"training_rows": len(records),
|
||||||
|
"trials_run": n_trials,
|
||||||
|
"context_used": context,
|
||||||
|
"validation": cv_results,
|
||||||
|
"improvement_proof": {
|
||||||
|
"baseline_avg_quality": baseline_stats["avg_quality"],
|
||||||
|
"baseline_worst": baseline_stats["worst_quality"],
|
||||||
|
"baseline_best": baseline_stats["best_quality"],
|
||||||
|
"ml_predicted_quality": round(best_score, 2),
|
||||||
|
"predicted_improvement": improvement,
|
||||||
|
"verdict": (
|
||||||
|
"ML params significantly better" if improvement > 5 else
|
||||||
|
"Marginal improvement - keep collecting data" if improvement > 0 else
|
||||||
|
"No improvement - defaults may be near-optimal"
|
||||||
|
),
|
||||||
|
},
|
||||||
|
"feature_importance": self._feature_importance,
|
||||||
|
"top_trials": self._top_trials[:5],
|
||||||
|
"message": "Hyperparameters updated successfully.",
|
||||||
|
}
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Feature Engineering
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _add_lag_features(self, records: List[Dict]) -> List[Dict]:
|
||||||
|
scores = [float(r.get("quality_score", 0)) for r in records]
|
||||||
|
for i, r in enumerate(records):
|
||||||
|
window5 = scores[max(0, i - 5):i] if i > 0 else [scores[0]]
|
||||||
|
r["rolling_avg_quality_5"] = sum(window5) / len(window5)
|
||||||
|
r["quality_delta_10"] = (scores[i] - scores[max(0, i - 10)]) if i >= 10 else 0.0
|
||||||
|
return records
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Data Preparation
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _prepare_data(
|
||||||
|
self, records: List[Dict], feature_cols: List[str]
|
||||||
|
) -> Tuple[Optional[np.ndarray], Optional[np.ndarray]]:
|
||||||
|
try:
|
||||||
|
X_rows, y_vals = [], []
|
||||||
|
for rec in records:
|
||||||
|
row = []
|
||||||
|
for col in feature_cols:
|
||||||
|
try:
|
||||||
|
row.append(float(rec.get(col, 0) or 0))
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
row.append(0.0)
|
||||||
|
X_rows.append(row)
|
||||||
|
y_vals.append(float(rec.get(LABEL_COL, 0)))
|
||||||
|
return (
|
||||||
|
np.array(X_rows, dtype=np.float32),
|
||||||
|
np.array(y_vals, dtype=np.float32),
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[Hypertuner] Data prep failed: {e}")
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Model Training (warm-start capable)
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _train_model(self, X: np.ndarray, y: np.ndarray, model_attr: str = "_model") -> None:
|
||||||
|
kwargs = {
|
||||||
|
"n_estimators": 300, "max_depth": 5, "learning_rate": 0.04,
|
||||||
|
"subsample": 0.8, "colsample_bytree": 0.8,
|
||||||
|
"reg_alpha": 0.1, "reg_lambda": 1.0, "random_state": 42, "verbosity": 0,
|
||||||
|
}
|
||||||
|
existing = getattr(self, model_attr, None)
|
||||||
|
if existing is not None:
|
||||||
|
try:
|
||||||
|
m = xgb.XGBRegressor(n_estimators=50, **{k: v for k, v in kwargs.items() if k != "n_estimators"})
|
||||||
|
m.fit(X, y, xgb_model=existing.get_booster())
|
||||||
|
setattr(self, model_attr, m)
|
||||||
|
if model_attr == "_model":
|
||||||
|
self._model_trained_at = datetime.utcnow()
|
||||||
|
self._training_rows = len(X)
|
||||||
|
logger.info(f"[Hypertuner] XGBoost warm-updated ({model_attr}) - {len(X)} rows.")
|
||||||
|
return
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
m = xgb.XGBRegressor(**kwargs)
|
||||||
|
m.fit(X, y)
|
||||||
|
setattr(self, model_attr, m)
|
||||||
|
if model_attr == "_model":
|
||||||
|
self._model_trained_at = datetime.utcnow()
|
||||||
|
self._training_rows = len(X)
|
||||||
|
logger.info(f"[Hypertuner] XGBoost trained ({model_attr}) - {len(X)} rows.")
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Cross Validation
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _cross_validate(self, X: np.ndarray, y: np.ndarray, k: int = 5) -> Dict:
|
||||||
|
if len(X) < k * 2:
|
||||||
|
split = max(1, int(len(X) * 0.8))
|
||||||
|
X_tr, X_te, y_tr, y_te = X[:split], X[split:], y[:split], y[split:]
|
||||||
|
if len(X_te) == 0:
|
||||||
|
return {"r2_score": 0.0, "mae": 99.0, "trust_level": "insufficient_data",
|
||||||
|
"trust_score": 0, "folds": 0}
|
||||||
|
m = xgb.XGBRegressor(n_estimators=100, max_depth=4, verbosity=0, random_state=42)
|
||||||
|
m.fit(X_tr, y_tr)
|
||||||
|
r2 = float(r2_score(y_te, m.predict(X_te)))
|
||||||
|
mae = float(mean_absolute_error(y_te, m.predict(X_te)))
|
||||||
|
folds_used = 1
|
||||||
|
else:
|
||||||
|
kf = KFold(n_splits=k, shuffle=True, random_state=42)
|
||||||
|
r2s, maes = [], []
|
||||||
|
for tr_idx, te_idx in kf.split(X):
|
||||||
|
m = xgb.XGBRegressor(n_estimators=100, max_depth=4, verbosity=0, random_state=42)
|
||||||
|
m.fit(X[tr_idx], y[tr_idx])
|
||||||
|
preds = m.predict(X[te_idx])
|
||||||
|
r2s.append(r2_score(y[te_idx], preds))
|
||||||
|
maes.append(mean_absolute_error(y[te_idx], preds))
|
||||||
|
r2, mae, folds_used = float(np.mean(r2s)), float(np.mean(maes)), k
|
||||||
|
|
||||||
|
trust_map = [(0.85, "excellent", 5), (0.75, "strong", 4),
|
||||||
|
(0.60, "good", 3), (0.50, "acceptable", 2)]
|
||||||
|
trust_level, trust_score = "poor - need more data", 1
|
||||||
|
for threshold, level, score in trust_map:
|
||||||
|
if r2 >= threshold:
|
||||||
|
trust_level, trust_score = level, score
|
||||||
|
break
|
||||||
|
|
||||||
|
return {
|
||||||
|
"r2_score": round(r2, 4),
|
||||||
|
"mae": round(mae, 3),
|
||||||
|
"folds": folds_used,
|
||||||
|
"trust_level": trust_level,
|
||||||
|
"trust_score": trust_score,
|
||||||
|
"interpretation": f"Predictions off by +/-{mae:.1f} pts (R2={r2:.2f}, trust={trust_level})",
|
||||||
|
}
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Optuna - Single Objective (persistent SQLite storage)
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _optuna_search_single(self, context: Dict, n_trials: int) -> Tuple[Optional[Dict], float]:
|
||||||
|
def objective(trial):
|
||||||
|
params = self._sample_params(trial)
|
||||||
|
if params.get("ideal_load", 6) > params.get("max_orders_per_rider", 12):
|
||||||
|
return 0.0
|
||||||
|
return self._predict_quality(context, params)
|
||||||
|
try:
|
||||||
|
study = optuna.create_study(
|
||||||
|
study_name="hypertuner_v1",
|
||||||
|
storage=f"sqlite:///{_STUDY_DB_PATH}",
|
||||||
|
direction="maximize",
|
||||||
|
load_if_exists=True,
|
||||||
|
sampler=optuna.samplers.TPESampler(seed=42),
|
||||||
|
)
|
||||||
|
study.optimize(objective, n_trials=n_trials, show_progress_bar=False)
|
||||||
|
best = study.best_trial
|
||||||
|
self._top_trials = [
|
||||||
|
{"params": t.params, "score": t.value}
|
||||||
|
for t in sorted(study.trials, key=lambda x: x.value or 0, reverse=True)[:10]
|
||||||
|
if t.value is not None
|
||||||
|
]
|
||||||
|
return {k: best.params[k] for k in SEARCH_SPACE if k in best.params}, best.value
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[Hypertuner] Optuna single-obj failed: {e}", exc_info=True)
|
||||||
|
return None, 0.0
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Optuna - Multi Objective (quality + latency, NSGA-II)
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _optuna_search_multi(
|
||||||
|
self, context: Dict, n_trials: int
|
||||||
|
) -> Tuple[Optional[Dict], float, List[Dict]]:
|
||||||
|
def objective(trial):
|
||||||
|
params = self._sample_params(trial)
|
||||||
|
if params.get("ideal_load", 6) > params.get("max_orders_per_rider", 12):
|
||||||
|
return 0.0, 99.0
|
||||||
|
quality = self._predict_quality(context, params)
|
||||||
|
latency_proxy = float(params.get("search_time_limit_seconds", 5)) * 200.0
|
||||||
|
return quality, latency_proxy
|
||||||
|
try:
|
||||||
|
study = optuna.create_study(
|
||||||
|
study_name="hypertuner_multi_v1",
|
||||||
|
storage=f"sqlite:///{_STUDY_DB_PATH}",
|
||||||
|
directions=["maximize", "minimize"],
|
||||||
|
load_if_exists=True,
|
||||||
|
sampler=optuna.samplers.NSGAIISampler(seed=42),
|
||||||
|
)
|
||||||
|
study.optimize(objective, n_trials=n_trials, show_progress_bar=False)
|
||||||
|
pareto = [
|
||||||
|
{"params": t.params, "quality": t.values[0], "latency_proxy": t.values[1]}
|
||||||
|
for t in study.best_trials
|
||||||
|
]
|
||||||
|
if not pareto:
|
||||||
|
return None, 0.0, []
|
||||||
|
best_trial = max(pareto, key=lambda x: x["quality"])
|
||||||
|
return (
|
||||||
|
{k: best_trial["params"][k] for k in SEARCH_SPACE if k in best_trial["params"]},
|
||||||
|
best_trial["quality"],
|
||||||
|
pareto,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[Hypertuner] Optuna multi-obj failed: {e}", exc_info=True)
|
||||||
|
return None, 0.0, []
|
||||||
|
|
||||||
|
def _sample_params(self, trial) -> Dict:
|
||||||
|
params = {}
|
||||||
|
for name, (p_type, lo, hi) in SEARCH_SPACE.items():
|
||||||
|
if p_type == "float":
|
||||||
|
params[name] = trial.suggest_float(name, lo, hi)
|
||||||
|
elif p_type == "int":
|
||||||
|
params[name] = trial.suggest_int(name, int(lo), int(hi))
|
||||||
|
return params
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Prediction
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _predict_quality(self, context: Dict, params: Dict) -> float:
|
||||||
|
if self._model is None:
|
||||||
|
return 0.0
|
||||||
|
combined = {
|
||||||
|
**context, **params,
|
||||||
|
"rolling_avg_quality_5": context.get("rolling_avg_quality_5", 50.0),
|
||||||
|
"quality_delta_10": context.get("quality_delta_10", 0.0),
|
||||||
|
}
|
||||||
|
row = []
|
||||||
|
for col in ALL_FEATURE_COLS:
|
||||||
|
try:
|
||||||
|
row.append(float(combined.get(col, 0) or 0))
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
row.append(0.0)
|
||||||
|
is_peak = int(context.get("is_peak", 0))
|
||||||
|
model = (self._peak_model if is_peak else self._offpeak_model) or self._model
|
||||||
|
pred = float(model.predict(np.array([row], dtype=np.float32))[0])
|
||||||
|
return max(0.0, min(pred, 100.0))
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Feature Importance
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _compute_feature_importance(self) -> None:
|
||||||
|
if self._model is None:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
if SHAP_AVAILABLE:
|
||||||
|
from ml_data_collector import get_collector
|
||||||
|
records = get_collector().get_training_data(min_records=1) or []
|
||||||
|
records = self._add_lag_features(records[-200:])
|
||||||
|
X, _ = self._prepare_data(records, ALL_FEATURE_COLS)
|
||||||
|
if X is not None and len(X) > 0:
|
||||||
|
explainer = shap.TreeExplainer(self._model)
|
||||||
|
shap_values = np.abs(explainer.shap_values(X)).mean(axis=0)
|
||||||
|
total = max(shap_values.sum(), 1e-9)
|
||||||
|
self._feature_importance = dict(sorted(
|
||||||
|
{ALL_FEATURE_COLS[i]: round(float(shap_values[i] / total) * 100, 2)
|
||||||
|
for i in range(len(ALL_FEATURE_COLS))}.items(),
|
||||||
|
key=lambda x: x[1], reverse=True
|
||||||
|
))
|
||||||
|
return
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
scores = self._model.get_booster().get_fscore()
|
||||||
|
total = max(sum(scores.values()), 1)
|
||||||
|
self._feature_importance = dict(sorted(
|
||||||
|
{ALL_FEATURE_COLS[int(k[1:])]: round(v / total * 100, 2)
|
||||||
|
for k, v in scores.items()
|
||||||
|
if k.startswith("f") and k[1:].isdigit() and int(k[1:]) < len(ALL_FEATURE_COLS)
|
||||||
|
}.items(),
|
||||||
|
key=lambda x: x[1], reverse=True
|
||||||
|
))
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"[Hypertuner] Feature importance failed: {e}")
|
||||||
|
|
||||||
|
def get_feature_importance(self) -> Optional[Dict[str, float]]:
|
||||||
|
return self._feature_importance
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Context
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _get_current_context(self, records: List[Dict]) -> Dict:
|
||||||
|
now = datetime.utcnow()
|
||||||
|
recent = records[-20:]
|
||||||
|
avg_orders = sum(r.get("num_orders", 0) for r in recent) / max(len(recent), 1)
|
||||||
|
avg_riders = sum(r.get("num_riders", 0) for r in recent) / max(len(recent), 1)
|
||||||
|
recent_scores = [float(r.get("quality_score", 0)) for r in recent]
|
||||||
|
rolling_avg5 = sum(recent_scores[-5:]) / max(len(recent_scores[-5:]), 1)
|
||||||
|
delta10 = (recent_scores[-1] - recent_scores[-11]) if len(recent_scores) >= 11 else 0.0
|
||||||
|
return {
|
||||||
|
"hour": now.hour,
|
||||||
|
"day_of_week": now.weekday(),
|
||||||
|
"is_peak": int(now.hour in (7, 8, 9, 12, 13, 18, 19, 20)),
|
||||||
|
"num_orders": round(avg_orders),
|
||||||
|
"num_riders": round(avg_riders),
|
||||||
|
"rolling_avg_quality_5": round(rolling_avg5, 2),
|
||||||
|
"quality_delta_10": round(delta10, 2),
|
||||||
|
}
|
||||||
|
|
||||||
|
def _compute_baseline_stats(self, records: List[Dict]) -> Dict:
|
||||||
|
scores = [float(r.get("quality_score", 0)) for r in records if r.get("quality_score")]
|
||||||
|
if not scores:
|
||||||
|
return {"avg_quality": 0.0, "best_quality": 0.0, "worst_quality": 0.0}
|
||||||
|
return {
|
||||||
|
"avg_quality": round(sum(scores) / len(scores), 2),
|
||||||
|
"best_quality": round(max(scores), 2),
|
||||||
|
"worst_quality": round(min(scores), 2),
|
||||||
|
"sample_size": len(scores),
|
||||||
|
}
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Model Info
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def get_model_info(self) -> Dict[str, Any]:
|
||||||
|
baseline = self._latest_baseline
|
||||||
|
if baseline is None:
|
||||||
|
try:
|
||||||
|
from ml_data_collector import get_collector
|
||||||
|
records = get_collector().get_training_data(min_records=1)
|
||||||
|
if records:
|
||||||
|
baseline = self._compute_baseline_stats(records)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return {
|
||||||
|
"model_trained": self._model is not None,
|
||||||
|
"trained_at": self._model_trained_at.isoformat() if self._model_trained_at else None,
|
||||||
|
"training_rows": self._training_rows,
|
||||||
|
"peak_model_trained": self._peak_model is not None,
|
||||||
|
"offpeak_model_trained": self._offpeak_model is not None,
|
||||||
|
"features": ALL_FEATURE_COLS,
|
||||||
|
"validation": self._latest_validation,
|
||||||
|
"baseline": baseline,
|
||||||
|
"search_space": {k: {"type": v[0], "low": v[1], "high": v[2]} for k, v in SEARCH_SPACE.items()},
|
||||||
|
"feature_importance": self._feature_importance,
|
||||||
|
"top_trials": self._top_trials[:10],
|
||||||
|
"pareto_frontier_size": len(self._pareto_frontier),
|
||||||
|
}
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Report I/O
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _save_report(self, best_params, best_score, training_rows,
|
||||||
|
n_trials, cv_results, baseline_stats) -> None:
|
||||||
|
try:
|
||||||
|
os.makedirs(_REPORT_DIR, exist_ok=True)
|
||||||
|
report = {
|
||||||
|
"timestamp": datetime.utcnow().isoformat(),
|
||||||
|
"training_rows": training_rows,
|
||||||
|
"n_trials": n_trials,
|
||||||
|
"best_predicted_quality": round(best_score, 2),
|
||||||
|
"best_params": best_params,
|
||||||
|
"validation": cv_results or {},
|
||||||
|
"baseline_stats": baseline_stats or {},
|
||||||
|
"feature_importance": self._feature_importance or {},
|
||||||
|
"top_trials": self._top_trials[:10],
|
||||||
|
"pareto_frontier": self._pareto_frontier[:20],
|
||||||
|
}
|
||||||
|
path = os.path.join(_REPORT_DIR, f"tuning_{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}.json")
|
||||||
|
with open(path, "w") as f:
|
||||||
|
json.dump(report, f, indent=2)
|
||||||
|
logger.info(f"[Hypertuner] Report -> {path}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"[Hypertuner] Report save failed: {e}")
|
||||||
|
|
||||||
|
def _load_latest_report(self) -> None:
|
||||||
|
try:
|
||||||
|
if not os.path.isdir(_REPORT_DIR):
|
||||||
|
return
|
||||||
|
files = sorted([f for f in os.listdir(_REPORT_DIR) if f.endswith(".json")], reverse=True)
|
||||||
|
if not files:
|
||||||
|
return
|
||||||
|
with open(os.path.join(_REPORT_DIR, files[0])) as f:
|
||||||
|
report = json.load(f)
|
||||||
|
self._latest_validation = report.get("validation")
|
||||||
|
self._latest_baseline = report.get("baseline_stats")
|
||||||
|
self._training_rows = report.get("training_rows", 0)
|
||||||
|
self._feature_importance = report.get("feature_importance")
|
||||||
|
self._top_trials = report.get("top_trials", [])
|
||||||
|
self._pareto_frontier = report.get("pareto_frontier", [])
|
||||||
|
ts = report.get("timestamp")
|
||||||
|
if ts:
|
||||||
|
self._model_trained_at = datetime.fromisoformat(ts)
|
||||||
|
logger.info(f"[Hypertuner] Restored state from {files[0]}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"[Hypertuner] Load latest report failed: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Module-level singleton
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
_tuner: Optional[MLHypertuner] = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_hypertuner() -> MLHypertuner:
|
||||||
|
global _tuner
|
||||||
|
if _tuner is None:
|
||||||
|
_tuner = MLHypertuner()
|
||||||
|
return _tuner
|
||||||
99
app/services/rider/get_active_riders.py
Normal file
99
app/services/rider/get_active_riders.py
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
|
||||||
|
import httpx
|
||||||
|
import logging
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import List, Dict, Any, Optional
|
||||||
|
from app.config.rider_preferences import RIDER_PREFERRED_KITCHENS
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
async def fetch_active_riders() -> List[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Fetch active rider logs from the external API for the current date.
|
||||||
|
Returns a list of rider log dictionaries.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
today_str = datetime.now().strftime("%Y-%m-%d")
|
||||||
|
url = "https://jupiter.nearle.app/live/api/v2/partners/getriderlogs/"
|
||||||
|
params = {
|
||||||
|
"applocationid": 1,
|
||||||
|
"partnerid": 44,
|
||||||
|
"fromdate": today_str,
|
||||||
|
"todate": today_str,
|
||||||
|
"keyword": ""
|
||||||
|
}
|
||||||
|
|
||||||
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||||
|
response = await client.get(url, params=params)
|
||||||
|
response.raise_for_status()
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
if data and data.get("code") == 200 and data.get("details"):
|
||||||
|
# Filter riders who are in our preferences list and are 'active' or 'idle' (assuming we want online riders)
|
||||||
|
# The user's example showed "onduty": 1. We might want to filter by that.
|
||||||
|
# For now, returning all logs, filtering can happen in assignment logic or here.
|
||||||
|
# Let's return the raw list as requested, filtering logic will be applied during assignment.
|
||||||
|
return data.get("details", [])
|
||||||
|
|
||||||
|
logger.warning(f"Fetch active riders returned no details: {data}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error fetching active riders: {e}", exc_info=True)
|
||||||
|
return []
|
||||||
|
|
||||||
|
async def fetch_created_orders() -> List[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Fetch all orders in 'created' state for the current date.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
today_str = datetime.now().strftime("%Y-%m-%d")
|
||||||
|
url = "https://jupiter.nearle.app/live/api/v1/orders/tenant/getorders/"
|
||||||
|
# Removed pagesize as per user request to fetch all
|
||||||
|
params = {
|
||||||
|
"applocationid": 0,
|
||||||
|
"tenantid": 0,
|
||||||
|
"locationid": 0,
|
||||||
|
"status": "created",
|
||||||
|
"fromdate": today_str,
|
||||||
|
"todate": today_str,
|
||||||
|
"keyword": "",
|
||||||
|
"pageno": 1
|
||||||
|
# "pagesize" intentionally omitted to fetch all
|
||||||
|
}
|
||||||
|
|
||||||
|
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||||
|
response = await client.get(url, params=params)
|
||||||
|
response.raise_for_status()
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
if data and data.get("code") == 200 and data.get("details"):
|
||||||
|
return data.get("details", [])
|
||||||
|
|
||||||
|
logger.warning(f"Fetch created orders returned no details: {data}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error fetching created orders: {e}", exc_info=True)
|
||||||
|
return []
|
||||||
|
|
||||||
|
async def fetch_rider_pricing() -> List[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Fetch rider pricing configuration from external API.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
url = "https://jupiter.nearle.app/live/api/v1/partners/getriderpricing"
|
||||||
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||||
|
response = await client.get(url)
|
||||||
|
response.raise_for_status()
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
if data and data.get("code") == 200:
|
||||||
|
return data.get("details", [])
|
||||||
|
|
||||||
|
logger.warning(f"Fetch rider pricing returned no details: {data}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error fetching rider pricing: {e}", exc_info=True)
|
||||||
|
return []
|
||||||
78
app/services/rider/rider_history_service.py
Normal file
78
app/services/rider/rider_history_service.py
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
|
||||||
|
import os
|
||||||
|
import pickle
|
||||||
|
import logging
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Dict, Any
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
HISTORY_FILE = "rider_history.pkl"
|
||||||
|
|
||||||
|
class RiderHistoryService:
|
||||||
|
def __init__(self, history_file: str = HISTORY_FILE):
|
||||||
|
self.history_file = history_file
|
||||||
|
self.history = self._load_history()
|
||||||
|
|
||||||
|
def _load_history(self) -> Dict[int, Dict[str, float]]:
|
||||||
|
"""Load history from pickle file."""
|
||||||
|
if not os.path.exists(self.history_file):
|
||||||
|
return {}
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(self.history_file, 'rb') as f:
|
||||||
|
return pickle.load(f)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to load rider history: {e}")
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def _save_history(self):
|
||||||
|
"""Save history to pickle file."""
|
||||||
|
try:
|
||||||
|
with open(self.history_file, 'wb') as f:
|
||||||
|
pickle.dump(self.history, f)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to save rider history: {e}")
|
||||||
|
|
||||||
|
def update_rider_stats(self, rider_id: int, distance_km: float, order_count: int):
|
||||||
|
"""Update cumulative stats for a rider."""
|
||||||
|
rider_id = int(rider_id)
|
||||||
|
if rider_id not in self.history:
|
||||||
|
self.history[rider_id] = {
|
||||||
|
"total_km": 0.0,
|
||||||
|
"total_orders": 0,
|
||||||
|
"last_updated": datetime.now().isoformat()
|
||||||
|
}
|
||||||
|
|
||||||
|
self.history[rider_id]["total_km"] += distance_km
|
||||||
|
self.history[rider_id]["total_orders"] += order_count
|
||||||
|
self.history[rider_id]["last_updated"] = datetime.now().isoformat()
|
||||||
|
|
||||||
|
# Auto-save on update
|
||||||
|
self._save_history()
|
||||||
|
|
||||||
|
def get_rider_score(self, rider_id: int) -> float:
|
||||||
|
"""
|
||||||
|
Get a score representing the rider's historical 'load' (KMs).
|
||||||
|
Higher Score = More KMs driven recently.
|
||||||
|
"""
|
||||||
|
rider_id = int(rider_id)
|
||||||
|
stats = self.history.get(rider_id, {})
|
||||||
|
return stats.get("total_km", 0.0)
|
||||||
|
|
||||||
|
def get_preferred_assignment_type(self, rider_id: int, all_rider_scores: Dict[int, float]) -> str:
|
||||||
|
"""
|
||||||
|
Determine if rider should get 'Long' or 'Short' routes based on population average.
|
||||||
|
"""
|
||||||
|
score = self.get_rider_score(rider_id)
|
||||||
|
if not all_rider_scores:
|
||||||
|
return "ANY"
|
||||||
|
|
||||||
|
avg_score = sum(all_rider_scores.values()) / len(all_rider_scores)
|
||||||
|
|
||||||
|
# If rider has driven LESS than average, prefer LONG routes (Risky)
|
||||||
|
if score < avg_score:
|
||||||
|
return "LONG"
|
||||||
|
# If rider has driven MORE than average, prefer SHORT routes (Economy)
|
||||||
|
else:
|
||||||
|
return "SHORT"
|
||||||
108
app/services/rider/rider_state_manager.py
Normal file
108
app/services/rider/rider_state_manager.py
Normal file
@@ -0,0 +1,108 @@
|
|||||||
|
import os
|
||||||
|
import pickle
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Dict, Any, List, Set
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
STATE_FILE = "rider_active_state.pkl"
|
||||||
|
|
||||||
|
class RiderStateManager:
|
||||||
|
"""
|
||||||
|
Manages the 'Short-Term' Active State of Riders for session persistence.
|
||||||
|
Tracks:
|
||||||
|
- Minutes Committed (Remaining Workload)
|
||||||
|
- Active Kitchens (Unique Pickups in current queue)
|
||||||
|
- Last Planned Drop Location (for Daisy Chaining)
|
||||||
|
- Timestamp of last update (for Time Decay)
|
||||||
|
"""
|
||||||
|
def __init__(self, state_file: str = STATE_FILE):
|
||||||
|
self.state_file = state_file
|
||||||
|
self.states = self._load_states()
|
||||||
|
|
||||||
|
def _load_states(self) -> Dict[str, Any]:
|
||||||
|
"""Load states from pickle."""
|
||||||
|
if not os.path.exists(self.state_file):
|
||||||
|
return {}
|
||||||
|
try:
|
||||||
|
with open(self.state_file, 'rb') as f:
|
||||||
|
return pickle.load(f)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to load rider active states: {e}")
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def _save_states(self):
|
||||||
|
"""Save states to pickle."""
|
||||||
|
try:
|
||||||
|
with open(self.state_file, 'wb') as f:
|
||||||
|
pickle.dump(self.states, f)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to save rider active states: {e}")
|
||||||
|
|
||||||
|
def get_rider_state(self, rider_id: int) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Get the current active state of a rider with TIME DECAY applied.
|
||||||
|
If the server restarts after 30 mins, the 'minutes_committed' should reduce by 30.
|
||||||
|
"""
|
||||||
|
rider_id = int(rider_id)
|
||||||
|
raw_state = self.states.get(rider_id)
|
||||||
|
|
||||||
|
if not raw_state:
|
||||||
|
return {
|
||||||
|
'minutes_remaining': 0.0,
|
||||||
|
'last_drop_lat': None,
|
||||||
|
'last_drop_lon': None,
|
||||||
|
'active_kitchens': set(),
|
||||||
|
'last_updated_ts': time.time()
|
||||||
|
}
|
||||||
|
|
||||||
|
# Apply Time Decay
|
||||||
|
last_ts = raw_state.get('last_updated_ts', time.time())
|
||||||
|
current_ts = time.time()
|
||||||
|
elapsed_mins = (current_ts - last_ts) / 60.0
|
||||||
|
|
||||||
|
remaining = max(0.0, raw_state.get('minutes_remaining', 0.0) - elapsed_mins)
|
||||||
|
|
||||||
|
# If queue is empty, kitchens are cleared
|
||||||
|
kitchens = raw_state.get('active_kitchens', set())
|
||||||
|
if remaining <= 5.0: # Buffer: if almost done, free up kitchens
|
||||||
|
kitchens = set()
|
||||||
|
|
||||||
|
return {
|
||||||
|
'minutes_remaining': remaining,
|
||||||
|
'last_drop_lat': raw_state.get('last_drop_lat'),
|
||||||
|
'last_drop_lon': raw_state.get('last_drop_lon'),
|
||||||
|
'active_kitchens': kitchens,
|
||||||
|
'last_updated_ts': current_ts
|
||||||
|
}
|
||||||
|
|
||||||
|
def update_rider_state(self, rider_id: int, added_minutes: float, new_kitchens: Set[str], last_lat: float, last_lon: float):
|
||||||
|
"""
|
||||||
|
Update the state after a new assignment.
|
||||||
|
"""
|
||||||
|
rider_id = int(rider_id)
|
||||||
|
|
||||||
|
# Get current state (decayed)
|
||||||
|
current = self.get_rider_state(rider_id)
|
||||||
|
|
||||||
|
# Accumulate
|
||||||
|
updated_minutes = current['minutes_remaining'] + added_minutes
|
||||||
|
updated_kitchens = current['active_kitchens'].union(new_kitchens)
|
||||||
|
|
||||||
|
self.states[rider_id] = {
|
||||||
|
'minutes_remaining': updated_minutes,
|
||||||
|
'last_drop_lat': last_lat,
|
||||||
|
'last_drop_lon': last_lon,
|
||||||
|
'active_kitchens': updated_kitchens,
|
||||||
|
'last_updated_ts': time.time()
|
||||||
|
}
|
||||||
|
|
||||||
|
self._save_states()
|
||||||
|
|
||||||
|
def clear_state(self, rider_id: int):
|
||||||
|
rider_id = int(rider_id)
|
||||||
|
if rider_id in self.states:
|
||||||
|
del self.states[rider_id]
|
||||||
|
self._save_states()
|
||||||
133
app/services/routing/clustering_service.py
Normal file
133
app/services/routing/clustering_service.py
Normal file
@@ -0,0 +1,133 @@
|
|||||||
|
"""
|
||||||
|
Geographic Clustering Service for Order Assignment
|
||||||
|
Uses K-means clustering to group orders by kitchen location.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import numpy as np
|
||||||
|
from typing import List, Dict, Any, Tuple
|
||||||
|
from collections import defaultdict
|
||||||
|
from math import radians, cos, sin, asin, sqrt
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class ClusteringService:
|
||||||
|
"""Clusters orders geographically to enable balanced rider assignment."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.earth_radius_km = 6371
|
||||||
|
|
||||||
|
def haversine(self, lat1: float, lon1: float, lat2: float, lon2: float) -> float:
|
||||||
|
"""Calculate distance between two points in km."""
|
||||||
|
lon1, lat1, lon2, lat2 = map(radians, [float(lon1), float(lat1), float(lon2), float(lat2)])
|
||||||
|
dlon = lon2 - lon1
|
||||||
|
dlat = lat2 - lat1
|
||||||
|
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
|
||||||
|
c = 2 * asin(min(1.0, sqrt(a)))
|
||||||
|
return c * self.earth_radius_km
|
||||||
|
|
||||||
|
def get_kitchen_location(self, order: Dict[str, Any]) -> Tuple[float, float]:
|
||||||
|
"""Extract kitchen coordinates from order."""
|
||||||
|
try:
|
||||||
|
lat = float(order.get("pickuplat", 0))
|
||||||
|
lon = float(order.get("pickuplon") or order.get("pickuplong", 0))
|
||||||
|
if lat != 0 and lon != 0:
|
||||||
|
return lat, lon
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
pass
|
||||||
|
return 0.0, 0.0
|
||||||
|
|
||||||
|
def cluster_orders_by_kitchen(self, orders: List[Dict[str, Any]], max_cluster_radius_km: float = 3.0) -> List[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Cluster orders by kitchen proximity.
|
||||||
|
|
||||||
|
Returns list of clusters, each containing:
|
||||||
|
- centroid: (lat, lon) of cluster center
|
||||||
|
- orders: list of orders in this cluster
|
||||||
|
- kitchen_names: set of kitchen names in cluster
|
||||||
|
- total_orders: count
|
||||||
|
"""
|
||||||
|
if not orders:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Group by kitchen location
|
||||||
|
kitchen_groups = defaultdict(list)
|
||||||
|
kitchen_coords = {}
|
||||||
|
|
||||||
|
for order in orders:
|
||||||
|
k_name = self._get_kitchen_name(order)
|
||||||
|
k_lat, k_lon = self.get_kitchen_location(order)
|
||||||
|
|
||||||
|
if k_lat == 0:
|
||||||
|
# Fallback: use delivery location if pickup missing
|
||||||
|
k_lat = float(order.get("deliverylat", 0))
|
||||||
|
k_lon = float(order.get("deliverylong", 0))
|
||||||
|
|
||||||
|
if k_lat != 0:
|
||||||
|
kitchen_groups[k_name].append(order)
|
||||||
|
kitchen_coords[k_name] = (k_lat, k_lon)
|
||||||
|
|
||||||
|
# Now cluster kitchens that are close together
|
||||||
|
clusters = []
|
||||||
|
processed_kitchens = set()
|
||||||
|
|
||||||
|
for k_name, k_orders in kitchen_groups.items():
|
||||||
|
if k_name in processed_kitchens:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Start a new cluster with this kitchen
|
||||||
|
cluster_kitchens = [k_name]
|
||||||
|
cluster_orders = k_orders[:]
|
||||||
|
processed_kitchens.add(k_name)
|
||||||
|
|
||||||
|
k_lat, k_lon = kitchen_coords[k_name]
|
||||||
|
|
||||||
|
# Find nearby kitchens to merge into this cluster
|
||||||
|
for other_name, other_coords in kitchen_coords.items():
|
||||||
|
if other_name in processed_kitchens:
|
||||||
|
continue
|
||||||
|
|
||||||
|
other_lat, other_lon = other_coords
|
||||||
|
dist = self.haversine(k_lat, k_lon, other_lat, other_lon)
|
||||||
|
|
||||||
|
if dist <= max_cluster_radius_km:
|
||||||
|
cluster_kitchens.append(other_name)
|
||||||
|
cluster_orders.extend(kitchen_groups[other_name])
|
||||||
|
processed_kitchens.add(other_name)
|
||||||
|
|
||||||
|
# Calculate cluster centroid
|
||||||
|
lats = []
|
||||||
|
lons = []
|
||||||
|
for order in cluster_orders:
|
||||||
|
lat, lon = self.get_kitchen_location(order)
|
||||||
|
if lat != 0:
|
||||||
|
lats.append(lat)
|
||||||
|
lons.append(lon)
|
||||||
|
|
||||||
|
if lats:
|
||||||
|
centroid_lat = sum(lats) / len(lats)
|
||||||
|
centroid_lon = sum(lons) / len(lons)
|
||||||
|
else:
|
||||||
|
centroid_lat, centroid_lon = k_lat, k_lon
|
||||||
|
|
||||||
|
clusters.append({
|
||||||
|
'centroid': (centroid_lat, centroid_lon),
|
||||||
|
'orders': cluster_orders,
|
||||||
|
'kitchen_names': set(cluster_kitchens),
|
||||||
|
'total_orders': len(cluster_orders)
|
||||||
|
})
|
||||||
|
|
||||||
|
# Sort clusters by order count (largest first)
|
||||||
|
clusters.sort(key=lambda x: x['total_orders'], reverse=True)
|
||||||
|
|
||||||
|
logger.info(f"Created {len(clusters)} clusters from {len(kitchen_groups)} kitchens")
|
||||||
|
return clusters
|
||||||
|
|
||||||
|
def _get_kitchen_name(self, order: Dict[str, Any]) -> str:
|
||||||
|
"""Extract kitchen name from order."""
|
||||||
|
possible_keys = ['storename', 'restaurantname', 'kitchenname', 'partnername', 'store_name']
|
||||||
|
for key in possible_keys:
|
||||||
|
if key in order and order[key]:
|
||||||
|
return str(order[key]).strip()
|
||||||
|
return "Unknown"
|
||||||
326
app/services/routing/kalman_filter.py
Normal file
326
app/services/routing/kalman_filter.py
Normal file
@@ -0,0 +1,326 @@
|
|||||||
|
"""
|
||||||
|
GPS Kalman Filter \u2014 rider-api
|
||||||
|
|
||||||
|
A 1D Kalman filter applied independently to latitude and longitude
|
||||||
|
to smooth noisy GPS coordinates from riders and delivery points.
|
||||||
|
|
||||||
|
Why Kalman for GPS?
|
||||||
|
- GPS readings contain measurement noise (\u00b15\u201315m typical, \u00b150m poor signal)
|
||||||
|
- Rider location pings can "jump" due to bad signal or device error
|
||||||
|
- Kalman filter gives an optimal estimate by balancing:
|
||||||
|
(1) Previous predicted position (process model)
|
||||||
|
(2) New GPS measurement (observation model)
|
||||||
|
|
||||||
|
Design:
|
||||||
|
- Separate filter instance per rider (stateful \u2014 preserves history)
|
||||||
|
- `CoordinateKalmanFilter` \u2014 single lat/lon smoother
|
||||||
|
- `GPSKalmanFilter` \u2014 wraps two CoordinateKalmanFilters (lat + lon)
|
||||||
|
- `RiderKalmanRegistry` \u2014 manages per-rider filter instances
|
||||||
|
- `smooth_coordinates()` \u2014 stateless single-shot smoother for delivery coords
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
# Stateless (one-shot, no history \u2014 for delivery coords):
|
||||||
|
smooth_lat, smooth_lon = smooth_coordinates(raw_lat, raw_lon)
|
||||||
|
|
||||||
|
# Stateful (per-rider, preserves motion history):
|
||||||
|
registry = RiderKalmanRegistry()
|
||||||
|
lat, lon = registry.update(rider_id=1116, lat=11.0067, lon=76.9558)
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
from typing import Dict, Optional, Tuple
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
|
||||||
|
# CORE 1D KALMAN FILTER
|
||||||
|
# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
|
||||||
|
|
||||||
|
class CoordinateKalmanFilter:
|
||||||
|
"""
|
||||||
|
1-dimensional Kalman filter for a single GPS coordinate (lat or lon).
|
||||||
|
|
||||||
|
State model: position only (constant position with random walk).
|
||||||
|
|
||||||
|
Equations:
|
||||||
|
Prediction: x\u0302\u2096\u207b = x\u0302\u2096\u208b\u2081 (no movement assumed between pings)
|
||||||
|
P\u0302\u2096\u207b = P\u2096\u208b\u2081 + Q (uncertainty grows over time)
|
||||||
|
|
||||||
|
Update: K\u2096 = P\u0302\u2096\u207b / (P\u0302\u2096\u207b + R) (Kalman gain)
|
||||||
|
x\u0302\u2096 = x\u0302\u2096\u207b + K\u2096\u00b7(z\u2096 - x\u0302\u2096\u207b) (weighted fusion)
|
||||||
|
P\u2096 = (1 - K\u2096)\u00b7P\u0302\u2096\u207b (update uncertainty)
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
process_noise (Q): How much position can change between measurements.
|
||||||
|
Higher = filter trusts new measurements more (less smoothing).
|
||||||
|
measurement_noise (R): GPS measurement uncertainty.
|
||||||
|
Higher = filter trusts history more (more smoothing).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
process_noise: float = 1e-4,
|
||||||
|
measurement_noise: float = 0.01,
|
||||||
|
initial_uncertainty: float = 1.0,
|
||||||
|
):
|
||||||
|
self.Q = process_noise
|
||||||
|
self.R = measurement_noise
|
||||||
|
self._x: Optional[float] = None
|
||||||
|
self._P: float = initial_uncertainty
|
||||||
|
|
||||||
|
@property
|
||||||
|
def initialized(self) -> bool:
|
||||||
|
return self._x is not None
|
||||||
|
|
||||||
|
def update(self, measurement: float) -> float:
|
||||||
|
"""Process one new measurement and return the filtered estimate."""
|
||||||
|
if not self.initialized:
|
||||||
|
self._x = measurement
|
||||||
|
return self._x
|
||||||
|
|
||||||
|
# Predict
|
||||||
|
x_prior = self._x
|
||||||
|
P_prior = self._P + self.Q
|
||||||
|
|
||||||
|
# Update
|
||||||
|
K = P_prior / (P_prior + self.R)
|
||||||
|
self._x = x_prior + K * (measurement - x_prior)
|
||||||
|
self._P = (1.0 - K) * P_prior
|
||||||
|
|
||||||
|
return self._x
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self._x = None
|
||||||
|
self._P = 1.0
|
||||||
|
|
||||||
|
|
||||||
|
# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
|
||||||
|
# 2D GPS KALMAN FILTER (lat + lon)
|
||||||
|
# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
|
||||||
|
|
||||||
|
class GPSKalmanFilter:
|
||||||
|
"""
|
||||||
|
Two-dimensional GPS smoother using independent 1D Kalman filters
|
||||||
|
for latitude and longitude.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
process_noise: float = 1e-4,
|
||||||
|
measurement_noise: float = 0.01,
|
||||||
|
):
|
||||||
|
self.lat_filter = CoordinateKalmanFilter(process_noise, measurement_noise)
|
||||||
|
self.lon_filter = CoordinateKalmanFilter(process_noise, measurement_noise)
|
||||||
|
self.last_updated: float = time.time()
|
||||||
|
self.update_count: int = 0
|
||||||
|
|
||||||
|
def update(self, lat: float, lon: float) -> Tuple[float, float]:
|
||||||
|
"""Feed a new GPS reading and get the smoothed (lat, lon)."""
|
||||||
|
if not self._is_valid_coord(lat, lon):
|
||||||
|
if self.lat_filter.initialized:
|
||||||
|
return self.lat_filter._x, self.lon_filter._x
|
||||||
|
return lat, lon
|
||||||
|
|
||||||
|
smooth_lat = self.lat_filter.update(lat)
|
||||||
|
smooth_lon = self.lon_filter.update(lon)
|
||||||
|
self.last_updated = time.time()
|
||||||
|
self.update_count += 1
|
||||||
|
|
||||||
|
return smooth_lat, smooth_lon
|
||||||
|
|
||||||
|
def get_estimate(self) -> Optional[Tuple[float, float]]:
|
||||||
|
if self.lat_filter.initialized:
|
||||||
|
return self.lat_filter._x, self.lon_filter._x
|
||||||
|
return None
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self.lat_filter.reset()
|
||||||
|
self.lon_filter.reset()
|
||||||
|
self.update_count = 0
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _is_valid_coord(lat: float, lon: float) -> bool:
|
||||||
|
try:
|
||||||
|
lat, lon = float(lat), float(lon)
|
||||||
|
return (
|
||||||
|
-90.0 <= lat <= 90.0
|
||||||
|
and -180.0 <= lon <= 180.0
|
||||||
|
and not (lat == 0.0 and lon == 0.0)
|
||||||
|
)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
|
||||||
|
# PER-RIDER FILTER REGISTRY
|
||||||
|
# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
|
||||||
|
|
||||||
|
class RiderKalmanRegistry:
|
||||||
|
"""
|
||||||
|
Maintains per-rider Kalman filter instances across calls.
|
||||||
|
Stale filters (> 30 min silence) are automatically reset.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
process_noise: float = 1e-4,
|
||||||
|
measurement_noise: float = 0.01,
|
||||||
|
stale_seconds: float = 1800.0,
|
||||||
|
):
|
||||||
|
self._filters: Dict[str, GPSKalmanFilter] = {}
|
||||||
|
self._process_noise = process_noise
|
||||||
|
self._measurement_noise = measurement_noise
|
||||||
|
self._stale_seconds = stale_seconds
|
||||||
|
|
||||||
|
def _get_or_create(self, rider_id) -> GPSKalmanFilter:
|
||||||
|
key = str(rider_id)
|
||||||
|
now = time.time()
|
||||||
|
if key in self._filters:
|
||||||
|
f = self._filters[key]
|
||||||
|
if now - f.last_updated > self._stale_seconds:
|
||||||
|
f.reset()
|
||||||
|
return f
|
||||||
|
self._filters[key] = GPSKalmanFilter(
|
||||||
|
process_noise=self._process_noise,
|
||||||
|
measurement_noise=self._measurement_noise,
|
||||||
|
)
|
||||||
|
return self._filters[key]
|
||||||
|
|
||||||
|
def update(self, rider_id, lat: float, lon: float) -> Tuple[float, float]:
|
||||||
|
return self._get_or_create(rider_id).update(lat, lon)
|
||||||
|
|
||||||
|
def get_estimate(self, rider_id) -> Optional[Tuple[float, float]]:
|
||||||
|
key = str(rider_id)
|
||||||
|
if key in self._filters:
|
||||||
|
return self._filters[key].get_estimate()
|
||||||
|
return None
|
||||||
|
|
||||||
|
def reset_rider(self, rider_id):
|
||||||
|
key = str(rider_id)
|
||||||
|
if key in self._filters:
|
||||||
|
self._filters[key].reset()
|
||||||
|
|
||||||
|
def clear_all(self):
|
||||||
|
self._filters.clear()
|
||||||
|
|
||||||
|
|
||||||
|
# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
|
||||||
|
# GLOBAL REGISTRY (process-level singleton)
|
||||||
|
# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
|
||||||
|
|
||||||
|
_global_registry = RiderKalmanRegistry()
|
||||||
|
|
||||||
|
|
||||||
|
def get_registry() -> RiderKalmanRegistry:
|
||||||
|
"""Get the process-level rider Kalman filter registry."""
|
||||||
|
return _global_registry
|
||||||
|
|
||||||
|
|
||||||
|
# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
|
||||||
|
# STATELESS COORDINATE SMOOTHER
|
||||||
|
# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
|
||||||
|
|
||||||
|
def smooth_coordinates(
|
||||||
|
lat: float,
|
||||||
|
lon: float,
|
||||||
|
*,
|
||||||
|
prior_lat: Optional[float] = None,
|
||||||
|
prior_lon: Optional[float] = None,
|
||||||
|
process_noise: float = 1e-4,
|
||||||
|
measurement_noise: float = 0.01,
|
||||||
|
) -> Tuple[float, float]:
|
||||||
|
"""
|
||||||
|
Stateless single-shot GPS smoother.
|
||||||
|
If a prior is provided, blends the new reading towards it.
|
||||||
|
"""
|
||||||
|
f = GPSKalmanFilter(process_noise=process_noise, measurement_noise=measurement_noise)
|
||||||
|
if prior_lat is not None and prior_lon is not None:
|
||||||
|
try:
|
||||||
|
_flat = float(prior_lat)
|
||||||
|
_flon = float(prior_lon)
|
||||||
|
if GPSKalmanFilter._is_valid_coord(_flat, _flon):
|
||||||
|
f.update(_flat, _flon)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
pass
|
||||||
|
return f.update(lat, lon)
|
||||||
|
|
||||||
|
|
||||||
|
# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
|
||||||
|
# BATCH SMOOTHERS
|
||||||
|
# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
|
||||||
|
|
||||||
|
def smooth_rider_locations(riders: list) -> list:
|
||||||
|
"""
|
||||||
|
Apply Kalman smoothing to a list of rider dicts in-place using
|
||||||
|
the global per-rider registry (history preserved across calls).
|
||||||
|
|
||||||
|
Reads/writes: latitude, longitude (and currentlat/currentlong if present).
|
||||||
|
Adds: _kalman_smoothed = True on each processed rider.
|
||||||
|
"""
|
||||||
|
registry = get_registry()
|
||||||
|
for rider in riders:
|
||||||
|
try:
|
||||||
|
rider_id = (
|
||||||
|
rider.get("userid") or rider.get("riderid") or
|
||||||
|
rider.get("id") or "unknown"
|
||||||
|
)
|
||||||
|
raw_lat = float(rider.get("latitude") or rider.get("currentlat") or 0)
|
||||||
|
raw_lon = float(rider.get("longitude") or rider.get("currentlong") or 0)
|
||||||
|
if raw_lat == 0.0 and raw_lon == 0.0:
|
||||||
|
continue
|
||||||
|
smooth_lat, smooth_lon = registry.update(rider_id, raw_lat, raw_lon)
|
||||||
|
# Cast back to string for Go compatibility
|
||||||
|
s_lat, s_lon = str(round(smooth_lat, 8)), str(round(smooth_lon, 8))
|
||||||
|
rider["latitude"] = s_lat
|
||||||
|
rider["longitude"] = s_lon
|
||||||
|
if "currentlat" in rider:
|
||||||
|
rider["currentlat"] = s_lat
|
||||||
|
if "currentlong" in rider:
|
||||||
|
rider["currentlong"] = s_lon
|
||||||
|
rider["_kalman_smoothed"] = True
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Kalman rider smoothing skipped: {e}")
|
||||||
|
return riders
|
||||||
|
|
||||||
|
|
||||||
|
def smooth_order_coordinates(orders: list) -> list:
|
||||||
|
"""
|
||||||
|
Apply stateless Kalman smoothing to delivery coordinates in a list
|
||||||
|
of order dicts. Uses pickup coords as a seed (prior) when available.
|
||||||
|
|
||||||
|
Modifies orders in-place. Returns the same list.
|
||||||
|
"""
|
||||||
|
for order in orders:
|
||||||
|
try:
|
||||||
|
dlat = float(order.get("deliverylat") or order.get("droplat") or 0)
|
||||||
|
dlon = float(order.get("deliverylong") or order.get("droplon") or 0)
|
||||||
|
if not GPSKalmanFilter._is_valid_coord(dlat, dlon):
|
||||||
|
continue
|
||||||
|
|
||||||
|
plat_raw = order.get("pickuplat")
|
||||||
|
plon_raw = order.get("pickuplon") or order.get("pickuplong")
|
||||||
|
try:
|
||||||
|
plat = float(plat_raw) if plat_raw else None
|
||||||
|
plon = float(plon_raw) if plon_raw else None
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
plat, plon = None, None
|
||||||
|
|
||||||
|
smooth_dlat, smooth_dlon = smooth_coordinates(
|
||||||
|
dlat, dlon,
|
||||||
|
prior_lat=plat,
|
||||||
|
prior_lon=plon,
|
||||||
|
)
|
||||||
|
# Cast back to string for Go compatibility (fixes unmarshal error)
|
||||||
|
s_lat, s_lon = str(round(smooth_dlat, 8)), str(round(smooth_dlon, 8))
|
||||||
|
order["deliverylat"] = s_lat
|
||||||
|
order["deliverylong"] = s_lon
|
||||||
|
if "droplat" in order:
|
||||||
|
order["droplat"] = s_lat
|
||||||
|
if "droplon" in order:
|
||||||
|
order["droplon"] = s_lon
|
||||||
|
order["_kalman_smoothed"] = True
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Kalman order smoothing skipped: {e}")
|
||||||
|
return orders
|
||||||
158
app/services/routing/realistic_eta_calculator.py
Normal file
158
app/services/routing/realistic_eta_calculator.py
Normal file
@@ -0,0 +1,158 @@
|
|||||||
|
"""
|
||||||
|
Realistic ETA Calculator for Delivery Operations
|
||||||
|
|
||||||
|
Accounts for:
|
||||||
|
- City traffic conditions
|
||||||
|
- Stop time at pickup/delivery
|
||||||
|
- Navigation time
|
||||||
|
- Parking/finding address time
|
||||||
|
- Different speeds for different order types
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Dict, Any
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class RealisticETACalculator:
|
||||||
|
"""
|
||||||
|
Calculates realistic ETAs accounting for real-world delivery conditions.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
from app.config.dynamic_config import get_config
|
||||||
|
cfg = get_config()
|
||||||
|
|
||||||
|
# BASE SPEED (km/h) - Driven by the DB configuration
|
||||||
|
base_speed = cfg.get("avg_speed_kmh", 18.0)
|
||||||
|
|
||||||
|
# REALISTIC SPEEDS based on time of day
|
||||||
|
self.CITY_SPEED_HEAVY_TRAFFIC = base_speed * 0.7 # Usually ~12 km/h
|
||||||
|
self.CITY_SPEED_MODERATE = base_speed # Usually ~18 km/h
|
||||||
|
self.CITY_SPEED_LIGHT = base_speed * 1.2 # Usually ~21.6 km/h
|
||||||
|
|
||||||
|
# TIME BUFFERS (minutes)
|
||||||
|
self.PICKUP_TIME = cfg.get("eta_pickup_time_min", 3.0)
|
||||||
|
self.DELIVERY_TIME = cfg.get("eta_delivery_time_min", 4.0)
|
||||||
|
self.NAVIGATION_BUFFER = cfg.get("eta_navigation_buffer_min", 1.5)
|
||||||
|
|
||||||
|
# DISTANCE-BASED SPEED SELECTION
|
||||||
|
# Short distances (<2km) are slower due to more stops/starts
|
||||||
|
# Long distances (>8km) might have highway portions
|
||||||
|
self.SHORT_TRIP_FACTOR = cfg.get("eta_short_trip_factor", 0.8)
|
||||||
|
self.LONG_TRIP_FACTOR = cfg.get("eta_long_trip_factor", 1.1)
|
||||||
|
|
||||||
|
def calculate_eta(self,
|
||||||
|
distance_km: float,
|
||||||
|
is_first_order: bool = False,
|
||||||
|
order_type: str = "Economy",
|
||||||
|
time_of_day: str = "peak") -> int:
|
||||||
|
"""
|
||||||
|
Calculate realistic ETA in minutes.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
distance_km: Distance to travel in kilometers
|
||||||
|
is_first_order: If True, includes pickup time
|
||||||
|
order_type: "Economy", "Premium", or "Risky"
|
||||||
|
time_of_day: "peak", "normal", or "light" traffic
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ETA in minutes (rounded up for safety)
|
||||||
|
"""
|
||||||
|
|
||||||
|
if distance_km <= 0:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# 1. SELECT SPEED BASED ON CONDITIONS
|
||||||
|
if time_of_day == "peak":
|
||||||
|
base_speed = self.CITY_SPEED_HEAVY_TRAFFIC
|
||||||
|
elif time_of_day == "light":
|
||||||
|
base_speed = self.CITY_SPEED_LIGHT
|
||||||
|
else:
|
||||||
|
base_speed = self.CITY_SPEED_MODERATE
|
||||||
|
|
||||||
|
# 2. ADJUST SPEED BASED ON DISTANCE
|
||||||
|
# Short trips are slower (more intersections, traffic lights)
|
||||||
|
if distance_km < 2.0:
|
||||||
|
effective_speed = base_speed * self.SHORT_TRIP_FACTOR
|
||||||
|
elif distance_km > 8.0:
|
||||||
|
effective_speed = base_speed * self.LONG_TRIP_FACTOR
|
||||||
|
else:
|
||||||
|
effective_speed = base_speed
|
||||||
|
|
||||||
|
# 3. CALCULATE TRAVEL TIME
|
||||||
|
travel_time = (distance_km / effective_speed) * 60 # Convert to minutes
|
||||||
|
|
||||||
|
# 4. ADD BUFFERS
|
||||||
|
total_time = travel_time
|
||||||
|
|
||||||
|
# Pickup time (only for first order in sequence)
|
||||||
|
if is_first_order:
|
||||||
|
total_time += self.PICKUP_TIME
|
||||||
|
|
||||||
|
# Delivery time (always)
|
||||||
|
total_time += self.DELIVERY_TIME
|
||||||
|
|
||||||
|
# Navigation buffer (proportional to distance)
|
||||||
|
if distance_km > 3.0:
|
||||||
|
total_time += self.NAVIGATION_BUFFER
|
||||||
|
|
||||||
|
# 5. SAFETY MARGIN (Round up to next minute)
|
||||||
|
# Riders prefer to arrive early than late
|
||||||
|
eta_minutes = int(total_time) + 1
|
||||||
|
|
||||||
|
return eta_minutes
|
||||||
|
|
||||||
|
def calculate_batch_eta(self, orders: list) -> list:
|
||||||
|
"""
|
||||||
|
Calculate ETAs for a batch of orders in sequence.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
orders: List of order dicts with 'previouskms' and 'step' fields
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Same list with updated 'eta' fields
|
||||||
|
"""
|
||||||
|
for order in orders:
|
||||||
|
distance_km = float(order.get('previouskms', 0))
|
||||||
|
step = order.get('step', 1)
|
||||||
|
order_type = order.get('ordertype', 'Economy')
|
||||||
|
|
||||||
|
# First order includes pickup time
|
||||||
|
is_first = (step == 1)
|
||||||
|
|
||||||
|
# Assume peak traffic for safety (can be made dynamic)
|
||||||
|
eta = self.calculate_eta(
|
||||||
|
distance_km=distance_km,
|
||||||
|
is_first_order=is_first,
|
||||||
|
order_type=order_type,
|
||||||
|
time_of_day="normal" # Default to moderate traffic
|
||||||
|
)
|
||||||
|
|
||||||
|
order['eta'] = str(eta)
|
||||||
|
order['eta_realistic'] = True # Flag to indicate realistic calculation
|
||||||
|
|
||||||
|
return orders
|
||||||
|
|
||||||
|
|
||||||
|
def get_time_of_day_category() -> str:
|
||||||
|
"""
|
||||||
|
Determine current traffic conditions based on time.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
"peak", "normal", or "light"
|
||||||
|
"""
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
|
current_hour = datetime.now().hour
|
||||||
|
|
||||||
|
# Peak hours: 8-10 AM, 12-2 PM, 5-8 PM
|
||||||
|
if (8 <= current_hour < 10) or (12 <= current_hour < 14) or (17 <= current_hour < 20):
|
||||||
|
return "peak"
|
||||||
|
# Light traffic: Late night/early morning
|
||||||
|
elif current_hour < 7 or current_hour >= 22:
|
||||||
|
return "light"
|
||||||
|
else:
|
||||||
|
return "normal"
|
||||||
425
app/services/routing/route_optimizer.py
Normal file
425
app/services/routing/route_optimizer.py
Normal file
@@ -0,0 +1,425 @@
|
|||||||
|
"""Production-grade route optimization using Google OR-Tools.
|
||||||
|
|
||||||
|
ALGORITHM: TSP / VRP with Google OR-Tools
|
||||||
|
- Industry-standard solver (same as used by major logistics companies)
|
||||||
|
- Constraint-based optimization
|
||||||
|
- Handles time windows (future proofing)
|
||||||
|
- Guaranteed optimal or near-optimal solution
|
||||||
|
|
||||||
|
FEATURES:
|
||||||
|
- Automatic outlier detection and coordinate correction
|
||||||
|
- Hybrid distance calculation (Google Maps + Haversine fallback)
|
||||||
|
- Robust error handling for invalid inputs
|
||||||
|
"""
|
||||||
|
|
||||||
|
import math
|
||||||
|
import os
|
||||||
|
import logging
|
||||||
|
import asyncio
|
||||||
|
from typing import Dict, Any, List as _List, Optional, Tuple, Union
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
import httpx
|
||||||
|
from app.services.routing.kalman_filter import smooth_order_coordinates
|
||||||
|
import numpy as np
|
||||||
|
from app.core.arrow_utils import calculate_haversine_matrix_vectorized
|
||||||
|
from app.config.dynamic_config import get_config
|
||||||
|
|
||||||
|
try:
|
||||||
|
from ortools.constraint_solver import routing_enums_pb2
|
||||||
|
from ortools.constraint_solver import pywrapcp
|
||||||
|
ORTOOLS_AVAILABLE = True
|
||||||
|
except ImportError:
|
||||||
|
ORTOOLS_AVAILABLE = False
|
||||||
|
logging.warning("Google OR-Tools not found. Falling back to simple greedy solver.")
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class RouteOptimizer:
|
||||||
|
"""Route optimization using Google OR-Tools (Async)."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.earth_radius = 6371 # Earth radius in km
|
||||||
|
_cfg = get_config()
|
||||||
|
|
||||||
|
# Initialize Realistic ETA Calculator
|
||||||
|
from app.services.routing.realistic_eta_calculator import RealisticETACalculator, get_time_of_day_category
|
||||||
|
self.eta_calculator = RealisticETACalculator()
|
||||||
|
self.get_traffic_condition = get_time_of_day_category
|
||||||
|
|
||||||
|
# Speed settings (ML-tuned via DynamicConfig)
|
||||||
|
self.avg_speed_kmh = float(_cfg.get("avg_speed_kmh"))
|
||||||
|
|
||||||
|
# Road factor (haversine -> road distance multiplier, ML-tuned)
|
||||||
|
self.road_factor = float(_cfg.get("road_factor"))
|
||||||
|
|
||||||
|
# Google Maps API settings
|
||||||
|
self.google_maps_api_key = os.getenv("GOOGLE_MAPS_API_KEY", "")
|
||||||
|
self.use_google_maps = bool(self.google_maps_api_key)
|
||||||
|
|
||||||
|
# Solver time limit (ML-tuned)
|
||||||
|
self.search_time_limit_seconds = int(_cfg.get("search_time_limit_seconds"))
|
||||||
|
|
||||||
|
# Initialize ID3 Behavior Analyzer
|
||||||
|
from app.services.ml.behavior_analyzer import get_analyzer
|
||||||
|
self.behavior_analyzer = get_analyzer()
|
||||||
|
|
||||||
|
def haversine_distance(self, lat1: float, lon1: float, lat2: float, lon2: float) -> float:
|
||||||
|
"""Calculate great circle distance between two points on Earth (in km)."""
|
||||||
|
try:
|
||||||
|
lat1, lon1, lat2, lon2 = map(math.radians, [float(lat1), float(lon1), float(lat2), float(lon2)])
|
||||||
|
dlat = lat2 - lat1
|
||||||
|
dlon = lon2 - lon1
|
||||||
|
a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2
|
||||||
|
c = 2 * math.asin(math.sqrt(a))
|
||||||
|
return self.earth_radius * c
|
||||||
|
except Exception:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
async def _get_google_maps_distances_batch(self, origin_lat: float, origin_lon: float,
|
||||||
|
destinations: _List[tuple]) -> Dict[tuple, float]:
|
||||||
|
"""Get road distances for multiple destinations from Google Maps API. (Async, Parallel)"""
|
||||||
|
if not self.use_google_maps or not destinations:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
results = {}
|
||||||
|
batch_size = 25
|
||||||
|
chunks = [destinations[i:i + batch_size] for i in range(0, len(destinations), batch_size)]
|
||||||
|
|
||||||
|
async def process_batch(batch):
|
||||||
|
batch_result = {}
|
||||||
|
try:
|
||||||
|
dest_str = "|".join([f"{lat},{lon}" for lat, lon in batch])
|
||||||
|
url = "https://maps.googleapis.com/maps/api/distancematrix/json"
|
||||||
|
params = {
|
||||||
|
"origins": f"{origin_lat},{origin_lon}",
|
||||||
|
"destinations": dest_str,
|
||||||
|
"key": self.google_maps_api_key,
|
||||||
|
"units": "metric"
|
||||||
|
}
|
||||||
|
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||||
|
response = await client.get(url, params=params)
|
||||||
|
response.raise_for_status()
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
if data.get("status") == "OK":
|
||||||
|
rows = data.get("rows", [])
|
||||||
|
if rows:
|
||||||
|
elements = rows[0].get("elements", [])
|
||||||
|
for idx, element in enumerate(elements):
|
||||||
|
if idx < len(batch):
|
||||||
|
dest_coord = batch[idx]
|
||||||
|
if element.get("status") == "OK":
|
||||||
|
dist = element.get("distance", {}).get("value")
|
||||||
|
dur = element.get("duration", {}).get("value")
|
||||||
|
if dist is not None:
|
||||||
|
batch_result[dest_coord] = {
|
||||||
|
'distance': dist / 1000.0,
|
||||||
|
'duration': dur / 60.0 if dur else None
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Google Maps batch call failed: {e}")
|
||||||
|
return batch_result
|
||||||
|
|
||||||
|
batch_results_list = await asyncio.gather(*[process_batch(chunk) for chunk in chunks])
|
||||||
|
for res in batch_results_list:
|
||||||
|
results.update(res)
|
||||||
|
return results
|
||||||
|
|
||||||
|
def _solve_tsp_ortools(self, locations: _List[Tuple[float, float]], dist_matrix: _List[_List[float]]) -> _List[int]:
|
||||||
|
"""Solve TSP using Google OR-Tools."""
|
||||||
|
if not ORTOOLS_AVAILABLE:
|
||||||
|
# Fallback to simple Greedy NN if OR-Tools not installed
|
||||||
|
return self._solve_greedy(locations, dist_matrix)
|
||||||
|
|
||||||
|
if not locations or len(locations) <= 1:
|
||||||
|
return [0]
|
||||||
|
|
||||||
|
manager = pywrapcp.RoutingIndexManager(len(locations), 1, 0) # num_nodes, num_vehicles, depot
|
||||||
|
routing = pywrapcp.RoutingModel(manager)
|
||||||
|
|
||||||
|
def distance_callback(from_index, to_index):
|
||||||
|
from_node = manager.IndexToNode(from_index)
|
||||||
|
to_node = manager.IndexToNode(to_index)
|
||||||
|
|
||||||
|
# Open TSP: Returning to the depot (index 0) has zero cost.
|
||||||
|
# This ensures the solver optimizes for the path from start to last drop-off
|
||||||
|
# rather than a closed circuit that might be reversed if the rider is on the "far" side.
|
||||||
|
if to_node == 0:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# OR-Tools works with integers, so we scale by 1000 (meters)
|
||||||
|
val = dist_matrix[from_node][to_node]
|
||||||
|
return int(val * 1000)
|
||||||
|
|
||||||
|
transit_callback_index = routing.RegisterTransitCallback(distance_callback)
|
||||||
|
routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index)
|
||||||
|
|
||||||
|
search_parameters = pywrapcp.DefaultRoutingSearchParameters()
|
||||||
|
search_parameters.first_solution_strategy = (
|
||||||
|
routing_enums_pb2.FirstSolutionStrategy.PATH_CHEAPEST_ARC
|
||||||
|
)
|
||||||
|
search_parameters.local_search_metaheuristic = (
|
||||||
|
routing_enums_pb2.LocalSearchMetaheuristic.GUIDED_LOCAL_SEARCH
|
||||||
|
)
|
||||||
|
search_parameters.time_limit.seconds = self.search_time_limit_seconds
|
||||||
|
|
||||||
|
solution = routing.SolveWithParameters(search_parameters)
|
||||||
|
|
||||||
|
if solution:
|
||||||
|
index = routing.Start(0)
|
||||||
|
route = []
|
||||||
|
while not routing.IsEnd(index):
|
||||||
|
route.append(manager.IndexToNode(index))
|
||||||
|
index = solution.Value(routing.NextVar(index))
|
||||||
|
return route
|
||||||
|
else:
|
||||||
|
return self._solve_greedy(locations, dist_matrix)
|
||||||
|
|
||||||
|
def _solve_greedy(self, locations, dist_matrix):
|
||||||
|
"""Simple Greedy Nearest Neighbor fallback."""
|
||||||
|
unvisited = set(range(1, len(locations)))
|
||||||
|
curr = 0
|
||||||
|
route = [0]
|
||||||
|
while unvisited:
|
||||||
|
nearest = min(unvisited, key=lambda x: dist_matrix[curr][x])
|
||||||
|
route.append(nearest)
|
||||||
|
unvisited.remove(nearest)
|
||||||
|
curr = nearest
|
||||||
|
return route
|
||||||
|
|
||||||
|
def _cleanup_coords(self, lat: Any, lon: Any, ref_lat: float, ref_lon: float) -> Tuple[float, float]:
|
||||||
|
"""
|
||||||
|
Heuristic to fix bad coordinates.
|
||||||
|
1. Fixes lat==lon typo.
|
||||||
|
2. Fixes missing negative signs if needed (not needed for India).
|
||||||
|
3. Projects outlier > 500km to reference (centroid).
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
lat = float(lat)
|
||||||
|
lon = float(lon)
|
||||||
|
except:
|
||||||
|
return 0.0, 0.0
|
||||||
|
|
||||||
|
if lat == 0 or lon == 0:
|
||||||
|
return lat, lon
|
||||||
|
|
||||||
|
# 1. Check strict equality (typo)
|
||||||
|
if abs(lat - lon) < 0.0001:
|
||||||
|
if ref_lon != 0:
|
||||||
|
# If reference is available, assume lat is correct and fix lon
|
||||||
|
# (Common error: copy lat to lon field)
|
||||||
|
return lat, ref_lon
|
||||||
|
|
||||||
|
# 2. Check general outlier (e.g. 500km away)
|
||||||
|
if ref_lat != 0 and ref_lon != 0:
|
||||||
|
dist = self.haversine_distance(lat, lon, ref_lat, ref_lon)
|
||||||
|
if dist > 500:
|
||||||
|
# Returning reference prevents map explosion
|
||||||
|
return ref_lat, ref_lon
|
||||||
|
|
||||||
|
return lat, lon
|
||||||
|
|
||||||
|
async def optimize_provider_payload(self, orders: _List[Dict[str, Any]], start_coords: Optional[tuple] = None) -> _List[Dict[str, Any]]:
|
||||||
|
"""Optimize delivery route and add step metrics (OR-Tools)."""
|
||||||
|
if not orders:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Deep copy
|
||||||
|
orders = [dict(order) for order in orders]
|
||||||
|
|
||||||
|
# 0. KALMAN FILTER - Smooth noisy delivery GPS coordinates
|
||||||
|
orders = smooth_order_coordinates(orders)
|
||||||
|
|
||||||
|
# Helpers
|
||||||
|
def _to_float(v: Any) -> float:
|
||||||
|
try: return float(v)
|
||||||
|
except: return 0.0
|
||||||
|
|
||||||
|
def _normalize_dt(val: Any) -> str:
|
||||||
|
if val in (None, "", 0): return ""
|
||||||
|
s = str(val).strip()
|
||||||
|
for fmt in ("%Y-%m-%dT%H:%M:%SZ", "%Y-%m-%d %H:%M:%S"):
|
||||||
|
try: return datetime.strptime(s, fmt).strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
except: pass
|
||||||
|
return s
|
||||||
|
|
||||||
|
# 1. PREPARE COORDINATES & CENTROID
|
||||||
|
valid_lats = []
|
||||||
|
valid_lons = []
|
||||||
|
|
||||||
|
for o in orders:
|
||||||
|
lat = _to_float(o.get("deliverylat"))
|
||||||
|
lon = _to_float(o.get("deliverylong"))
|
||||||
|
if lat != 0 and lon != 0:
|
||||||
|
valid_lats.append(lat)
|
||||||
|
valid_lons.append(lon)
|
||||||
|
|
||||||
|
centroid_lat = sum(valid_lats)/len(valid_lats) if valid_lats else 0.0
|
||||||
|
centroid_lon = sum(valid_lons)/len(valid_lons) if valid_lons else 0.0
|
||||||
|
|
||||||
|
# 2. DETERMINE START LOCATION (With Fix)
|
||||||
|
start_lat, start_lon = 0.0, 0.0
|
||||||
|
|
||||||
|
# Try explicit start_coords first
|
||||||
|
if start_coords and len(start_coords) == 2:
|
||||||
|
try:
|
||||||
|
start_lat, start_lon = float(start_coords[0]), float(start_coords[1])
|
||||||
|
except: pass
|
||||||
|
|
||||||
|
# Fallback to pickup location in orders
|
||||||
|
if start_lat == 0:
|
||||||
|
for o in orders:
|
||||||
|
plat = _to_float(o.get("pickuplat"))
|
||||||
|
plon = _to_float(o.get("pickuplon") or o.get("pickuplong"))
|
||||||
|
if plat != 0:
|
||||||
|
start_lat, start_lon = plat, plon
|
||||||
|
break
|
||||||
|
|
||||||
|
# Fallback to centroid
|
||||||
|
if start_lat == 0:
|
||||||
|
start_lat, start_lon = centroid_lat, centroid_lon
|
||||||
|
|
||||||
|
# FIX BAD START COORDINATES
|
||||||
|
start_lat, start_lon = self._cleanup_coords(start_lat, start_lon, centroid_lat, centroid_lon)
|
||||||
|
|
||||||
|
# 3. BUILD LOCATIONS LIST FOR SOLVER
|
||||||
|
# Index 0 is Start (Depot), 1..N are orders
|
||||||
|
locations = [(start_lat, start_lon)]
|
||||||
|
points_map = [] # Maps solver index 1..N back to original order index
|
||||||
|
|
||||||
|
for idx, order in enumerate(orders):
|
||||||
|
lat = _to_float(order.get("deliverylat"))
|
||||||
|
lon = _to_float(order.get("deliverylong"))
|
||||||
|
|
||||||
|
# Project coordinates and ensure they are strings for Go compatibility
|
||||||
|
lat, lon = self._cleanup_coords(lat, lon, centroid_lat, centroid_lon)
|
||||||
|
order_str_lat, order_str_lon = str(lat), str(lon)
|
||||||
|
|
||||||
|
order["deliverylat"] = order_str_lat
|
||||||
|
order["deliverylong"] = order_str_lon
|
||||||
|
if "droplat" in order: order["droplat"] = order_str_lat
|
||||||
|
if "droplon" in order: order["droplon"] = order_str_lon
|
||||||
|
|
||||||
|
locations.append((lat, lon))
|
||||||
|
points_map.append(idx)
|
||||||
|
|
||||||
|
# 4. COMPUTE DISTANCE MATRIX (Vectorized with Arrow/NumPy)
|
||||||
|
# road_factor is now ML-tuned (was hardcoded 1.3)
|
||||||
|
lats = np.array([loc[0] for loc in locations])
|
||||||
|
lons = np.array([loc[1] for loc in locations])
|
||||||
|
dist_matrix = calculate_haversine_matrix_vectorized(lats, lons) * self.road_factor
|
||||||
|
|
||||||
|
# 5. RISK-AWARE COST MATRIX (ID3 INTELLIGENCE)
|
||||||
|
# Apply Risk Penalties to the matrix before solving
|
||||||
|
cost_matrix = dist_matrix.copy()
|
||||||
|
traffic = self.get_traffic_condition()
|
||||||
|
|
||||||
|
num_locs = len(locations)
|
||||||
|
risk_penalty_count = 0
|
||||||
|
|
||||||
|
for i in range(num_locs):
|
||||||
|
for j in range(num_locs):
|
||||||
|
if i == j: continue
|
||||||
|
# Predict success risk for this specific leg
|
||||||
|
dist_km = dist_matrix[i][j]
|
||||||
|
prediction = self.behavior_analyzer.predict(
|
||||||
|
distance_km=dist_km,
|
||||||
|
timestamp_or_band=traffic,
|
||||||
|
)
|
||||||
|
|
||||||
|
if prediction.get("label") == "RISK": # High Risk predicted by ID3
|
||||||
|
# Add 25% penalty to distance to discourage this leg
|
||||||
|
cost_matrix[i][j] *= 1.25
|
||||||
|
risk_penalty_count += 1
|
||||||
|
|
||||||
|
if risk_penalty_count > 0:
|
||||||
|
logger.info(f"ID3 Intelligence: Applied {risk_penalty_count} Risk Penalties to optimize for delivery safety.")
|
||||||
|
|
||||||
|
# 6. SOLVE TSP
|
||||||
|
route_indices = self._solve_tsp_ortools(locations, cost_matrix)
|
||||||
|
|
||||||
|
# Remove 0 (depot)
|
||||||
|
optimized_order_indices = [i for i in route_indices if i != 0]
|
||||||
|
|
||||||
|
# 6. BUILD RESULT
|
||||||
|
result = []
|
||||||
|
cumulative_dist = 0.0
|
||||||
|
|
||||||
|
# Track previous location (starts at 0)
|
||||||
|
prev_idx = 0
|
||||||
|
|
||||||
|
for step_num, solver_idx in enumerate(optimized_order_indices, start=1):
|
||||||
|
order_idx = points_map[solver_idx - 1]
|
||||||
|
order = dict(orders[order_idx])
|
||||||
|
|
||||||
|
# Clean fields
|
||||||
|
for k in ("step", "previouskms", "cumulativekms", "eta", "actualkms", "ordertype"):
|
||||||
|
order.pop(k, None)
|
||||||
|
|
||||||
|
# Normalize dates
|
||||||
|
for field in ["orderdate", "deliverytime", "created"]:
|
||||||
|
if field in order: order[field] = _normalize_dt(order.get(field))
|
||||||
|
|
||||||
|
# Distance for this leg
|
||||||
|
step_dist = dist_matrix[prev_idx][solver_idx]
|
||||||
|
cumulative_dist += step_dist
|
||||||
|
|
||||||
|
# Metadata (Step metrics are integers in the Go struct)
|
||||||
|
order["step"] = int(step_num)
|
||||||
|
order["previouskms"] = int(0 if step_num == 1 else int(round(step_dist)))
|
||||||
|
order["cumulativekms"] = int(round(cumulative_dist))
|
||||||
|
|
||||||
|
# 7. METRICS (Calculate actual distance, prioritize provider input)
|
||||||
|
plat, plon = start_lat, start_lon
|
||||||
|
if plat == 0: plat, plon = _to_float(order.get("pickuplat")), _to_float(order.get("pickuplon") or order.get("pickuplong"))
|
||||||
|
dlat, dlon = locations[solver_idx]
|
||||||
|
|
||||||
|
# Baseline: Haversine * 1.3 (estimated road factor)
|
||||||
|
true_dist = self.haversine_distance(plat, plon, dlat, dlon) * 1.3
|
||||||
|
|
||||||
|
provided_kms = order.get("kms")
|
||||||
|
if provided_kms not in (None, "", 0, "0"):
|
||||||
|
try:
|
||||||
|
# If provider gave us a distance, respect it as the 'actual' distance
|
||||||
|
true_dist = float(provided_kms)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
order["actualkms"] = str(round(true_dist, 2))
|
||||||
|
order["kms"] = str(provided_kms) if provided_kms else str(int(round(true_dist)))
|
||||||
|
|
||||||
|
# Financial metrics - keeping as numbers for calculations
|
||||||
|
if "rider_charge" in order: order["rider_charge"] = round(float(order["rider_charge"]), 2)
|
||||||
|
if "profit" in order: order["profit"] = round(float(order["profit"]), 2)
|
||||||
|
|
||||||
|
# Type & ETA
|
||||||
|
order["ordertype"] = "Economy" if true_dist <= 5 else "Premium" if true_dist <= 12 else "Risky"
|
||||||
|
|
||||||
|
traffic = self.get_traffic_condition()
|
||||||
|
eta = self.eta_calculator.calculate_eta(
|
||||||
|
distance_km=step_dist,
|
||||||
|
is_first_order=(step_num == 1),
|
||||||
|
order_type=order["ordertype"],
|
||||||
|
time_of_day=traffic
|
||||||
|
)
|
||||||
|
order["eta"] = str(eta)
|
||||||
|
|
||||||
|
result.append(order)
|
||||||
|
prev_idx = solver_idx
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def optimize_route(orders: _List[Dict[str, Any]]) -> _List[Dict[str, Any]]:
|
||||||
|
"""Synchronous wrapper."""
|
||||||
|
optimizer = RouteOptimizer()
|
||||||
|
try:
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
except RuntimeError:
|
||||||
|
loop = asyncio.new_event_loop()
|
||||||
|
asyncio.set_event_loop(loop)
|
||||||
|
|
||||||
|
if loop.is_running():
|
||||||
|
# Fallback if loop is running (shouldn't happen in standard usage)
|
||||||
|
return []
|
||||||
|
return loop.run_until_complete(optimizer.optimize_provider_payload(orders))
|
||||||
196
app/services/routing/zone_service.py
Normal file
196
app/services/routing/zone_service.py
Normal file
@@ -0,0 +1,196 @@
|
|||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import List, Dict, Any, Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class ZoneService:
|
||||||
|
"""
|
||||||
|
Service to classify orders and riders into geographic zones.
|
||||||
|
Defaulting to Coimbatore logic as per user context.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Approximate Center of Coimbatore (Gandhipuram/Bus Stand area)
|
||||||
|
CENTER_LAT = 11.0168
|
||||||
|
CENTER_LON = 76.9558
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def determine_zone(self, lat: float, lon: float, pincode: Optional[str] = None) -> str:
|
||||||
|
"""
|
||||||
|
Determine the zone (North, South, East, West, etc.) based on coordinates.
|
||||||
|
"""
|
||||||
|
if lat == 0 or lon == 0:
|
||||||
|
return "Unknown"
|
||||||
|
|
||||||
|
lat_diff = lat - self.CENTER_LAT
|
||||||
|
lon_diff = lon - self.CENTER_LON
|
||||||
|
|
||||||
|
# Simple Quadrant Logic
|
||||||
|
# North: +Lat
|
||||||
|
# South: -Lat
|
||||||
|
# East: +Lon
|
||||||
|
# West: -Lon
|
||||||
|
|
||||||
|
# Define a small central buffer (0.01 degrees ~ 1.1km)
|
||||||
|
buffer = 0.010
|
||||||
|
|
||||||
|
is_north = lat_diff > buffer
|
||||||
|
is_south = lat_diff < -buffer
|
||||||
|
is_east = lon_diff > buffer
|
||||||
|
is_west = lon_diff < -buffer
|
||||||
|
|
||||||
|
zone_parts = []
|
||||||
|
|
||||||
|
if is_north: zone_parts.append("North")
|
||||||
|
elif is_south: zone_parts.append("South")
|
||||||
|
|
||||||
|
if is_east: zone_parts.append("East")
|
||||||
|
elif is_west: zone_parts.append("West")
|
||||||
|
|
||||||
|
if not zone_parts:
|
||||||
|
return "Central"
|
||||||
|
|
||||||
|
return " ".join(zone_parts)
|
||||||
|
|
||||||
|
def group_by_zones(self, flat_orders: List[Dict[str, Any]], unassigned_orders: List[Dict[str, Any]] = None, fuel_charge: float = 2.5, base_pay: float = 30.0) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Group a flat list of optimized orders into Zones -> Riders -> Orders.
|
||||||
|
Calculates profit per order and per zone.
|
||||||
|
"""
|
||||||
|
zones_map = {} # "North East": { "riders": { rider_id: [orders] } }
|
||||||
|
unassigned_orders = unassigned_orders or []
|
||||||
|
|
||||||
|
# Merge both for initial processing if you want everything zoned
|
||||||
|
all_to_process = []
|
||||||
|
for o in flat_orders:
|
||||||
|
all_to_process.append((o, True))
|
||||||
|
for o in unassigned_orders:
|
||||||
|
all_to_process.append((o, False))
|
||||||
|
|
||||||
|
for order, is_assigned in all_to_process:
|
||||||
|
# 1. Extract Coords
|
||||||
|
try:
|
||||||
|
# Prefer Delivery location for zoning (where the customer is)
|
||||||
|
lat = float(order.get("deliverylat") or order.get("droplat") or 0)
|
||||||
|
lon = float(order.get("deliverylong") or order.get("droplon") or 0)
|
||||||
|
pincode = str(order.get("deliveryzip") or "")
|
||||||
|
except:
|
||||||
|
lat, lon, pincode = 0, 0, ""
|
||||||
|
|
||||||
|
# 2. Get Zone
|
||||||
|
zone_name = self.determine_zone(lat, lon, pincode)
|
||||||
|
order["zone_name"] = zone_name
|
||||||
|
|
||||||
|
# 3. Initialize Zone Bucket
|
||||||
|
if zone_name not in zones_map:
|
||||||
|
zones_map[zone_name] = {
|
||||||
|
"riders_map": {},
|
||||||
|
"total_orders": 0,
|
||||||
|
"assigned_orders": 0,
|
||||||
|
"unassigned_orders": [],
|
||||||
|
"total_kms": 0.0,
|
||||||
|
"total_profit": 0.0
|
||||||
|
}
|
||||||
|
|
||||||
|
# 4. Add to Rider bucket within Zone
|
||||||
|
rider_id = order.get("userid") or order.get("_id")
|
||||||
|
|
||||||
|
# Track kms and profit for this zone
|
||||||
|
try:
|
||||||
|
# 'actualkms' is preferred for delivery distance
|
||||||
|
dist = float(order.get("actualkms", order.get("previouskms", 0)))
|
||||||
|
zones_map[zone_name]["total_kms"] += dist
|
||||||
|
|
||||||
|
# Individual charge for this order: Fixed Base + Variable Distance
|
||||||
|
order_amount = float(order.get("orderamount") or order.get("deliveryamount") or 0)
|
||||||
|
rider_payment = base_pay + (dist * fuel_charge)
|
||||||
|
profit = order_amount - rider_payment
|
||||||
|
|
||||||
|
order["rider_charge"] = round(rider_payment, 2)
|
||||||
|
order["profit"] = round(profit, 2)
|
||||||
|
|
||||||
|
# Profit-based classification (Order Type)
|
||||||
|
if profit <= 0:
|
||||||
|
order["ordertype"] = "Loss"
|
||||||
|
elif profit <= 5:
|
||||||
|
order["ordertype"] = "Risky"
|
||||||
|
elif profit <= 10:
|
||||||
|
order["ordertype"] = "Economy"
|
||||||
|
else:
|
||||||
|
order["ordertype"] = "Premium"
|
||||||
|
|
||||||
|
zones_map[zone_name]["total_profit"] += profit
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# If strictly unassigned order (no rider), put in unassigned
|
||||||
|
if not is_assigned:
|
||||||
|
zones_map[zone_name]["unassigned_orders"].append(order)
|
||||||
|
else:
|
||||||
|
str_rid = str(rider_id)
|
||||||
|
if str_rid not in zones_map[zone_name]["riders_map"]:
|
||||||
|
zones_map[zone_name]["riders_map"][str_rid] = {
|
||||||
|
"rider_details": {
|
||||||
|
"id": str_rid,
|
||||||
|
"name": order.get("username", "Unknown")
|
||||||
|
},
|
||||||
|
"orders": []
|
||||||
|
}
|
||||||
|
zones_map[zone_name]["riders_map"][str_rid]["orders"].append(order)
|
||||||
|
zones_map[zone_name]["assigned_orders"] += 1
|
||||||
|
|
||||||
|
zones_map[zone_name]["total_orders"] += 1
|
||||||
|
|
||||||
|
# 5. Restructure for API Response
|
||||||
|
output_zones = []
|
||||||
|
zone_metrics = []
|
||||||
|
|
||||||
|
sorted_zone_names = sorted(zones_map.keys())
|
||||||
|
|
||||||
|
for z_name in sorted_zone_names:
|
||||||
|
z_data = zones_map[z_name]
|
||||||
|
|
||||||
|
# Flatten riders map
|
||||||
|
riders_list = []
|
||||||
|
for r_id, r_data in z_data["riders_map"].items():
|
||||||
|
riders_list.append({
|
||||||
|
"rider_id": r_data["rider_details"]["id"],
|
||||||
|
"rider_name": r_data["rider_details"]["name"],
|
||||||
|
"orders_count": len(r_data["orders"]),
|
||||||
|
"orders": r_data["orders"]
|
||||||
|
})
|
||||||
|
|
||||||
|
# Create the flat metric summary
|
||||||
|
metrics = {
|
||||||
|
"zone_name": z_name,
|
||||||
|
"total_orders": z_data["total_orders"],
|
||||||
|
"assigned_orders": z_data["assigned_orders"],
|
||||||
|
"unassigned_orders_count": len(z_data["unassigned_orders"]),
|
||||||
|
"active_riders_count": len(riders_list),
|
||||||
|
"total_delivery_kms": round(z_data["total_kms"], 2),
|
||||||
|
"total_profit": round(z_data["total_profit"], 2)
|
||||||
|
}
|
||||||
|
|
||||||
|
zone_metrics.append(metrics)
|
||||||
|
|
||||||
|
# Create the detailed zone object with flattened metrics
|
||||||
|
zone_obj = {
|
||||||
|
"zone_name": z_name,
|
||||||
|
"total_orders": metrics["total_orders"],
|
||||||
|
"active_riders_count": metrics["active_riders_count"],
|
||||||
|
"assigned_orders": metrics["assigned_orders"],
|
||||||
|
"unassigned_orders_count": metrics["unassigned_orders_count"],
|
||||||
|
"total_delivery_kms": metrics["total_delivery_kms"],
|
||||||
|
"total_profit": metrics["total_profit"],
|
||||||
|
"riders": riders_list,
|
||||||
|
"unassigned_orders": z_data["unassigned_orders"]
|
||||||
|
}
|
||||||
|
|
||||||
|
output_zones.append(zone_obj)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"detailed_zones": output_zones,
|
||||||
|
"zone_analysis": zone_metrics
|
||||||
|
}
|
||||||
1522
app/templates/ml_dashboard.html
Normal file
1522
app/templates/ml_dashboard.html
Normal file
File diff suppressed because it is too large
Load Diff
36
docker-compose.yml
Normal file
36
docker-compose.yml
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
version: "3.9"
|
||||||
|
|
||||||
|
networks:
|
||||||
|
frontend:
|
||||||
|
external: true
|
||||||
|
|
||||||
|
services:
|
||||||
|
routes_api:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
image: routes-api:latest
|
||||||
|
container_name: routes_api
|
||||||
|
restart: unless-stopped
|
||||||
|
environment:
|
||||||
|
- UVICORN_WORKERS=2
|
||||||
|
- REDIS_URL=redis://:${REDIS_PASSWORD}@routes_redis:6379/0
|
||||||
|
# Optional: Set cache TTL in seconds (default: 300 = 5 min, 86400 = 24h)
|
||||||
|
# Uncomment and set in .env file: REDIS_CACHE_TTL_SECONDS=86400
|
||||||
|
# - REDIS_CACHE_TTL_SECONDS=${REDIS_CACHE_TTL_SECONDS}
|
||||||
|
# Google Maps API key for accurate road distance calculation (actualkms)
|
||||||
|
# Set in .env file: GOOGLE_MAPS_API_KEY=your_api_key_here
|
||||||
|
- GOOGLE_MAPS_API_KEY=${GOOGLE_MAPS_API_KEY}
|
||||||
|
labels:
|
||||||
|
- traefik.enable=true
|
||||||
|
- traefik.http.routers.routes_api.rule=Host(`routes.workolik.com`)
|
||||||
|
- traefik.http.routers.routes_api.entrypoints=websecure
|
||||||
|
- traefik.http.routers.routes_api.tls.certresolver=letsencrypt
|
||||||
|
- traefik.http.services.routes_api.loadbalancer.server.port=8002
|
||||||
|
- traefik.docker.network=frontend
|
||||||
|
volumes:
|
||||||
|
- ./ml_data:/app/ml_data
|
||||||
|
- ./rider_history.pkl:/app/rider_history.pkl
|
||||||
|
- ./rider_active_state.pkl:/app/rider_active_state.pkl
|
||||||
|
networks:
|
||||||
|
- frontend
|
||||||
11
docker-entrypoint.sh
Normal file
11
docker-entrypoint.sh
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# Get number of workers from environment or default to 1
|
||||||
|
WORKERS=${UVICORN_WORKERS:-1}
|
||||||
|
|
||||||
|
echo "Starting Route Optimization API with ${WORKERS} worker(s)..."
|
||||||
|
|
||||||
|
# Start uvicorn
|
||||||
|
exec uvicorn app.main:app --host 0.0.0.0 --port 8002 --workers ${WORKERS}
|
||||||
|
|
||||||
18
requirements.txt
Normal file
18
requirements.txt
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
fastapi
|
||||||
|
uvicorn
|
||||||
|
python-dotenv
|
||||||
|
requests
|
||||||
|
numpy
|
||||||
|
pandas
|
||||||
|
scikit-learn
|
||||||
|
scipy
|
||||||
|
openpyxl
|
||||||
|
xlsxwriter
|
||||||
|
httpx
|
||||||
|
ortools
|
||||||
|
pyarrow
|
||||||
|
# ML Hypertuning
|
||||||
|
xgboost>=2.0.0
|
||||||
|
optuna>=3.5.0
|
||||||
|
sqlalchemy>=2.0.0
|
||||||
|
apscheduler>=3.10.0
|
||||||
173
run_simulation.py
Normal file
173
run_simulation.py
Normal file
@@ -0,0 +1,173 @@
|
|||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import asyncio
|
||||||
|
from app.services.core.assignment_service import AssignmentService
|
||||||
|
from app.services.routing.route_optimizer import RouteOptimizer
|
||||||
|
from app.core.arrow_utils import save_optimized_route_parquet
|
||||||
|
|
||||||
|
# Setup logging
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Load Environment Variables
|
||||||
|
try:
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
load_dotenv()
|
||||||
|
print("✅ Loaded .env file")
|
||||||
|
except ImportError:
|
||||||
|
print("⚠️ python-dotenv not installed, skipping .env load")
|
||||||
|
|
||||||
|
async def run_simulation():
|
||||||
|
print("🚀 Starting Logic Simulation (High Efficiency Mode + K-wMeans)...")
|
||||||
|
|
||||||
|
# 1. Load Orders (using route.json as source)
|
||||||
|
try:
|
||||||
|
with open('route.json', 'r') as f:
|
||||||
|
route_data = json.load(f)
|
||||||
|
except FileNotFoundError:
|
||||||
|
print("❌ route.json not found.")
|
||||||
|
return
|
||||||
|
|
||||||
|
raw_orders = route_data.get('details', [])
|
||||||
|
# Strip assignment data to simulate fresh orders
|
||||||
|
clean_orders = []
|
||||||
|
for o in raw_orders:
|
||||||
|
o_copy = o.copy()
|
||||||
|
for key in ['userid', 'step', 'cumulativekms', 'eta']:
|
||||||
|
o_copy.pop(key, None)
|
||||||
|
clean_orders.append(o_copy)
|
||||||
|
|
||||||
|
print(f"📦 Loaded {len(clean_orders)} orders.")
|
||||||
|
|
||||||
|
# 2. Mock Riders
|
||||||
|
# Using the 5 rider fleet as agreed
|
||||||
|
rider_ids = [753, 883, 1114, 1271, 1116, 1096, 897, 950, 1272, 1133] # Full Active Riders List
|
||||||
|
# Rider Starting Locations (Based on "Mostly Available Location")
|
||||||
|
# Coordinates approximated for Coimbatore areas
|
||||||
|
rider_locations = {
|
||||||
|
1116: (11.0067, 76.9558), # VIVEK ANANDAN: RS PURAM
|
||||||
|
1096: (11.0450, 76.9000), # NARAYANASAMY: VADAVALI
|
||||||
|
897: (11.0430, 76.9380), # VARUN EDWARD: KAVUNDAMPALAYAM
|
||||||
|
950: (11.0330, 76.9800), # JAYASABESH: GANAPATHY
|
||||||
|
1114: (11.0450, 77.0000), # TAMILAZHAGAN: GANDHIMA NAGAR
|
||||||
|
883: (11.0200, 77.0000), # RAJAN: PEELAMEDU
|
||||||
|
1272: (10.9950, 77.0000), # MUTHURAJA: RAMANATHAPURAM
|
||||||
|
753: (11.0000, 77.0300), # MANIKANDAN: SINGANALLUR
|
||||||
|
1133: (11.0067, 76.9558), # THATCHINAMOORTHI: RS PURAM (Covering Kavundampalayam to Kovaipudur)
|
||||||
|
1271: (11.0067, 76.9558) # Legacy ID for Thatchinamoorthi
|
||||||
|
}
|
||||||
|
|
||||||
|
riders = []
|
||||||
|
for i, rid in enumerate(rider_ids):
|
||||||
|
lat, lon = rider_locations.get(rid, (11.0168, 76.9558)) # Default to Central if unknown
|
||||||
|
riders.append({
|
||||||
|
"userid": rid,
|
||||||
|
"status": "idle",
|
||||||
|
"onduty": 1,
|
||||||
|
"latitude": str(lat),
|
||||||
|
"longitude": str(lon)
|
||||||
|
})
|
||||||
|
|
||||||
|
# 3. Run Assignment
|
||||||
|
assignment_service = AssignmentService()
|
||||||
|
try:
|
||||||
|
assignments, unassigned_orders = assignment_service.assign_orders(clean_orders, riders)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Error during assignment: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
return
|
||||||
|
|
||||||
|
# 4. Generate Output (Mirroring API Logic)
|
||||||
|
optimizer = RouteOptimizer()
|
||||||
|
|
||||||
|
output_details = []
|
||||||
|
distribution = {}
|
||||||
|
assigned_count = 0
|
||||||
|
|
||||||
|
# Prepare async tasks
|
||||||
|
tasks = []
|
||||||
|
task_rids = []
|
||||||
|
|
||||||
|
for rid, orders in assignments.items():
|
||||||
|
if not orders: continue
|
||||||
|
distribution[rid] = len(orders)
|
||||||
|
assigned_count += len(orders)
|
||||||
|
|
||||||
|
# Optimize Route & Add Metrics (Cumulative KMS, Step, etc.)
|
||||||
|
mock_rider = next((r for r in riders if r["userid"] == rid), None)
|
||||||
|
start_coords = None
|
||||||
|
if mock_rider:
|
||||||
|
start_coords = (float(mock_rider['latitude']), float(mock_rider['longitude']))
|
||||||
|
|
||||||
|
tasks.append(optimizer.optimize_provider_payload(orders, start_coords=start_coords))
|
||||||
|
task_rids.append(rid)
|
||||||
|
|
||||||
|
# Run tasks
|
||||||
|
if tasks:
|
||||||
|
results = await asyncio.gather(*tasks)
|
||||||
|
|
||||||
|
for rid, optimized_route in zip(task_rids, results):
|
||||||
|
mock_rider = next((r for r in riders if r["userid"] == rid), {})
|
||||||
|
r_name = mock_rider.get("username", "")
|
||||||
|
r_contact = mock_rider.get("contactno", "")
|
||||||
|
|
||||||
|
total_kms = 0
|
||||||
|
if optimized_route:
|
||||||
|
try:
|
||||||
|
total_kms = max([float(o.get("cumulativekms", 0)) for o in optimized_route])
|
||||||
|
except:
|
||||||
|
total_kms = sum([float(o.get("actualkms", o.get("kms", 0))) for o in optimized_route])
|
||||||
|
|
||||||
|
for o in optimized_route:
|
||||||
|
o['userid'] = rid
|
||||||
|
o['username'] = r_name
|
||||||
|
o['rider'] = r_name
|
||||||
|
o['ridercontactno'] = r_contact
|
||||||
|
o['riderkms'] = str(round(total_kms, 2))
|
||||||
|
output_details.append(o)
|
||||||
|
|
||||||
|
# 5. Zone Processing
|
||||||
|
fuel_charge = 2.5
|
||||||
|
base_pay = 30.0
|
||||||
|
from app.services.routing.zone_service import ZoneService
|
||||||
|
zone_service = ZoneService()
|
||||||
|
zone_data = zone_service.group_by_zones(output_details, unassigned_orders, fuel_charge=fuel_charge, base_pay=base_pay)
|
||||||
|
|
||||||
|
# 6. Save output.json
|
||||||
|
output_data = {
|
||||||
|
"message": "Success",
|
||||||
|
"status": True,
|
||||||
|
"details": output_details,
|
||||||
|
"zone_summary": zone_data["zone_analysis"],
|
||||||
|
"zones": zone_data["detailed_zones"],
|
||||||
|
"meta": {
|
||||||
|
"total_orders": len(clean_orders),
|
||||||
|
"total_riders": len(rider_ids),
|
||||||
|
"assigned_orders": assigned_count,
|
||||||
|
"unassigned_orders": len(unassigned_orders),
|
||||||
|
"total_profit": round(sum(z["total_profit"] for z in zone_data["zone_analysis"]), 2),
|
||||||
|
"unassigned_details": [
|
||||||
|
{"id": o.get("orderid") or o.get("_id"), "reason": o.get("unassigned_reason")}
|
||||||
|
for o in unassigned_orders
|
||||||
|
],
|
||||||
|
"distribution_summary": distribution
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
with open('output.json', 'w') as f:
|
||||||
|
json.dump(output_data, f, indent=4)
|
||||||
|
|
||||||
|
# Apache Arrow / Parquet Export
|
||||||
|
try:
|
||||||
|
save_optimized_route_parquet(output_details, 'output.parquet')
|
||||||
|
print("📊 Also saved results to output.parquet (Apache Arrow format)")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"⚠️ Could not save Parquet: {e}")
|
||||||
|
|
||||||
|
print("✅ Simulation Complete. Saved to output.json")
|
||||||
|
print("📊 Distribution Summary:")
|
||||||
|
print(json.dumps(distribution, indent=4))
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(run_simulation())
|
||||||
24
start.py
Normal file
24
start.py
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Mobile-optimized startup script for the Delivery Route Optimization API."""
|
||||||
|
|
||||||
|
import uvicorn
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Start the mobile-optimized API server."""
|
||||||
|
print("📱 Starting Mobile Delivery Route Optimization API...")
|
||||||
|
print("⚡ Optimized for real-time mobile apps")
|
||||||
|
print("🎯 Default algorithm: GREEDY (ultra-fast)")
|
||||||
|
print("📚 Documentation: http://localhost:8002/docs")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
uvicorn.run(
|
||||||
|
"app.main:app",
|
||||||
|
host="0.0.0.0",
|
||||||
|
port=8002,
|
||||||
|
reload=True,
|
||||||
|
access_log=True,
|
||||||
|
log_level="info"
|
||||||
|
)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user