initial project setup with README and ignore

This commit is contained in:
2026-04-08 15:13:42 +05:30
commit 2d5688cb35
47 changed files with 7929 additions and 0 deletions

View File

@@ -0,0 +1,133 @@
"""
Geographic Clustering Service for Order Assignment
Uses K-means clustering to group orders by kitchen location.
"""
import logging
import numpy as np
from typing import List, Dict, Any, Tuple
from collections import defaultdict
from math import radians, cos, sin, asin, sqrt
logger = logging.getLogger(__name__)
class ClusteringService:
"""Clusters orders geographically to enable balanced rider assignment."""
def __init__(self):
self.earth_radius_km = 6371
def haversine(self, lat1: float, lon1: float, lat2: float, lon2: float) -> float:
"""Calculate distance between two points in km."""
lon1, lat1, lon2, lat2 = map(radians, [float(lon1), float(lat1), float(lon2), float(lat2)])
dlon = lon2 - lon1
dlat = lat2 - lat1
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
c = 2 * asin(min(1.0, sqrt(a)))
return c * self.earth_radius_km
def get_kitchen_location(self, order: Dict[str, Any]) -> Tuple[float, float]:
"""Extract kitchen coordinates from order."""
try:
lat = float(order.get("pickuplat", 0))
lon = float(order.get("pickuplon") or order.get("pickuplong", 0))
if lat != 0 and lon != 0:
return lat, lon
except (ValueError, TypeError):
pass
return 0.0, 0.0
def cluster_orders_by_kitchen(self, orders: List[Dict[str, Any]], max_cluster_radius_km: float = 3.0) -> List[Dict[str, Any]]:
"""
Cluster orders by kitchen proximity.
Returns list of clusters, each containing:
- centroid: (lat, lon) of cluster center
- orders: list of orders in this cluster
- kitchen_names: set of kitchen names in cluster
- total_orders: count
"""
if not orders:
return []
# Group by kitchen location
kitchen_groups = defaultdict(list)
kitchen_coords = {}
for order in orders:
k_name = self._get_kitchen_name(order)
k_lat, k_lon = self.get_kitchen_location(order)
if k_lat == 0:
# Fallback: use delivery location if pickup missing
k_lat = float(order.get("deliverylat", 0))
k_lon = float(order.get("deliverylong", 0))
if k_lat != 0:
kitchen_groups[k_name].append(order)
kitchen_coords[k_name] = (k_lat, k_lon)
# Now cluster kitchens that are close together
clusters = []
processed_kitchens = set()
for k_name, k_orders in kitchen_groups.items():
if k_name in processed_kitchens:
continue
# Start a new cluster with this kitchen
cluster_kitchens = [k_name]
cluster_orders = k_orders[:]
processed_kitchens.add(k_name)
k_lat, k_lon = kitchen_coords[k_name]
# Find nearby kitchens to merge into this cluster
for other_name, other_coords in kitchen_coords.items():
if other_name in processed_kitchens:
continue
other_lat, other_lon = other_coords
dist = self.haversine(k_lat, k_lon, other_lat, other_lon)
if dist <= max_cluster_radius_km:
cluster_kitchens.append(other_name)
cluster_orders.extend(kitchen_groups[other_name])
processed_kitchens.add(other_name)
# Calculate cluster centroid
lats = []
lons = []
for order in cluster_orders:
lat, lon = self.get_kitchen_location(order)
if lat != 0:
lats.append(lat)
lons.append(lon)
if lats:
centroid_lat = sum(lats) / len(lats)
centroid_lon = sum(lons) / len(lons)
else:
centroid_lat, centroid_lon = k_lat, k_lon
clusters.append({
'centroid': (centroid_lat, centroid_lon),
'orders': cluster_orders,
'kitchen_names': set(cluster_kitchens),
'total_orders': len(cluster_orders)
})
# Sort clusters by order count (largest first)
clusters.sort(key=lambda x: x['total_orders'], reverse=True)
logger.info(f"Created {len(clusters)} clusters from {len(kitchen_groups)} kitchens")
return clusters
def _get_kitchen_name(self, order: Dict[str, Any]) -> str:
"""Extract kitchen name from order."""
possible_keys = ['storename', 'restaurantname', 'kitchenname', 'partnername', 'store_name']
for key in possible_keys:
if key in order and order[key]:
return str(order[key]).strip()
return "Unknown"

View File

@@ -0,0 +1,326 @@
"""
GPS Kalman Filter \u2014 rider-api
A 1D Kalman filter applied independently to latitude and longitude
to smooth noisy GPS coordinates from riders and delivery points.
Why Kalman for GPS?
- GPS readings contain measurement noise (\u00b15\u201315m typical, \u00b150m poor signal)
- Rider location pings can "jump" due to bad signal or device error
- Kalman filter gives an optimal estimate by balancing:
(1) Previous predicted position (process model)
(2) New GPS measurement (observation model)
Design:
- Separate filter instance per rider (stateful \u2014 preserves history)
- `CoordinateKalmanFilter` \u2014 single lat/lon smoother
- `GPSKalmanFilter` \u2014 wraps two CoordinateKalmanFilters (lat + lon)
- `RiderKalmanRegistry` \u2014 manages per-rider filter instances
- `smooth_coordinates()` \u2014 stateless single-shot smoother for delivery coords
Usage:
# Stateless (one-shot, no history \u2014 for delivery coords):
smooth_lat, smooth_lon = smooth_coordinates(raw_lat, raw_lon)
# Stateful (per-rider, preserves motion history):
registry = RiderKalmanRegistry()
lat, lon = registry.update(rider_id=1116, lat=11.0067, lon=76.9558)
"""
import logging
import time
from typing import Dict, Optional, Tuple
logger = logging.getLogger(__name__)
# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
# CORE 1D KALMAN FILTER
# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
class CoordinateKalmanFilter:
"""
1-dimensional Kalman filter for a single GPS coordinate (lat or lon).
State model: position only (constant position with random walk).
Equations:
Prediction: x\u0302\u2096\u207b = x\u0302\u2096\u208b\u2081 (no movement assumed between pings)
P\u0302\u2096\u207b = P\u2096\u208b\u2081 + Q (uncertainty grows over time)
Update: K\u2096 = P\u0302\u2096\u207b / (P\u0302\u2096\u207b + R) (Kalman gain)
x\u0302\u2096 = x\u0302\u2096\u207b + K\u2096\u00b7(z\u2096 - x\u0302\u2096\u207b) (weighted fusion)
P\u2096 = (1 - K\u2096)\u00b7P\u0302\u2096\u207b (update uncertainty)
Parameters:
process_noise (Q): How much position can change between measurements.
Higher = filter trusts new measurements more (less smoothing).
measurement_noise (R): GPS measurement uncertainty.
Higher = filter trusts history more (more smoothing).
"""
def __init__(
self,
process_noise: float = 1e-4,
measurement_noise: float = 0.01,
initial_uncertainty: float = 1.0,
):
self.Q = process_noise
self.R = measurement_noise
self._x: Optional[float] = None
self._P: float = initial_uncertainty
@property
def initialized(self) -> bool:
return self._x is not None
def update(self, measurement: float) -> float:
"""Process one new measurement and return the filtered estimate."""
if not self.initialized:
self._x = measurement
return self._x
# Predict
x_prior = self._x
P_prior = self._P + self.Q
# Update
K = P_prior / (P_prior + self.R)
self._x = x_prior + K * (measurement - x_prior)
self._P = (1.0 - K) * P_prior
return self._x
def reset(self):
self._x = None
self._P = 1.0
# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
# 2D GPS KALMAN FILTER (lat + lon)
# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
class GPSKalmanFilter:
"""
Two-dimensional GPS smoother using independent 1D Kalman filters
for latitude and longitude.
"""
def __init__(
self,
process_noise: float = 1e-4,
measurement_noise: float = 0.01,
):
self.lat_filter = CoordinateKalmanFilter(process_noise, measurement_noise)
self.lon_filter = CoordinateKalmanFilter(process_noise, measurement_noise)
self.last_updated: float = time.time()
self.update_count: int = 0
def update(self, lat: float, lon: float) -> Tuple[float, float]:
"""Feed a new GPS reading and get the smoothed (lat, lon)."""
if not self._is_valid_coord(lat, lon):
if self.lat_filter.initialized:
return self.lat_filter._x, self.lon_filter._x
return lat, lon
smooth_lat = self.lat_filter.update(lat)
smooth_lon = self.lon_filter.update(lon)
self.last_updated = time.time()
self.update_count += 1
return smooth_lat, smooth_lon
def get_estimate(self) -> Optional[Tuple[float, float]]:
if self.lat_filter.initialized:
return self.lat_filter._x, self.lon_filter._x
return None
def reset(self):
self.lat_filter.reset()
self.lon_filter.reset()
self.update_count = 0
@staticmethod
def _is_valid_coord(lat: float, lon: float) -> bool:
try:
lat, lon = float(lat), float(lon)
return (
-90.0 <= lat <= 90.0
and -180.0 <= lon <= 180.0
and not (lat == 0.0 and lon == 0.0)
)
except (TypeError, ValueError):
return False
# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
# PER-RIDER FILTER REGISTRY
# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
class RiderKalmanRegistry:
"""
Maintains per-rider Kalman filter instances across calls.
Stale filters (> 30 min silence) are automatically reset.
"""
def __init__(
self,
process_noise: float = 1e-4,
measurement_noise: float = 0.01,
stale_seconds: float = 1800.0,
):
self._filters: Dict[str, GPSKalmanFilter] = {}
self._process_noise = process_noise
self._measurement_noise = measurement_noise
self._stale_seconds = stale_seconds
def _get_or_create(self, rider_id) -> GPSKalmanFilter:
key = str(rider_id)
now = time.time()
if key in self._filters:
f = self._filters[key]
if now - f.last_updated > self._stale_seconds:
f.reset()
return f
self._filters[key] = GPSKalmanFilter(
process_noise=self._process_noise,
measurement_noise=self._measurement_noise,
)
return self._filters[key]
def update(self, rider_id, lat: float, lon: float) -> Tuple[float, float]:
return self._get_or_create(rider_id).update(lat, lon)
def get_estimate(self, rider_id) -> Optional[Tuple[float, float]]:
key = str(rider_id)
if key in self._filters:
return self._filters[key].get_estimate()
return None
def reset_rider(self, rider_id):
key = str(rider_id)
if key in self._filters:
self._filters[key].reset()
def clear_all(self):
self._filters.clear()
# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
# GLOBAL REGISTRY (process-level singleton)
# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
_global_registry = RiderKalmanRegistry()
def get_registry() -> RiderKalmanRegistry:
"""Get the process-level rider Kalman filter registry."""
return _global_registry
# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
# STATELESS COORDINATE SMOOTHER
# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
def smooth_coordinates(
lat: float,
lon: float,
*,
prior_lat: Optional[float] = None,
prior_lon: Optional[float] = None,
process_noise: float = 1e-4,
measurement_noise: float = 0.01,
) -> Tuple[float, float]:
"""
Stateless single-shot GPS smoother.
If a prior is provided, blends the new reading towards it.
"""
f = GPSKalmanFilter(process_noise=process_noise, measurement_noise=measurement_noise)
if prior_lat is not None and prior_lon is not None:
try:
_flat = float(prior_lat)
_flon = float(prior_lon)
if GPSKalmanFilter._is_valid_coord(_flat, _flon):
f.update(_flat, _flon)
except (TypeError, ValueError):
pass
return f.update(lat, lon)
# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
# BATCH SMOOTHERS
# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
def smooth_rider_locations(riders: list) -> list:
"""
Apply Kalman smoothing to a list of rider dicts in-place using
the global per-rider registry (history preserved across calls).
Reads/writes: latitude, longitude (and currentlat/currentlong if present).
Adds: _kalman_smoothed = True on each processed rider.
"""
registry = get_registry()
for rider in riders:
try:
rider_id = (
rider.get("userid") or rider.get("riderid") or
rider.get("id") or "unknown"
)
raw_lat = float(rider.get("latitude") or rider.get("currentlat") or 0)
raw_lon = float(rider.get("longitude") or rider.get("currentlong") or 0)
if raw_lat == 0.0 and raw_lon == 0.0:
continue
smooth_lat, smooth_lon = registry.update(rider_id, raw_lat, raw_lon)
# Cast back to string for Go compatibility
s_lat, s_lon = str(round(smooth_lat, 8)), str(round(smooth_lon, 8))
rider["latitude"] = s_lat
rider["longitude"] = s_lon
if "currentlat" in rider:
rider["currentlat"] = s_lat
if "currentlong" in rider:
rider["currentlong"] = s_lon
rider["_kalman_smoothed"] = True
except Exception as e:
logger.debug(f"Kalman rider smoothing skipped: {e}")
return riders
def smooth_order_coordinates(orders: list) -> list:
"""
Apply stateless Kalman smoothing to delivery coordinates in a list
of order dicts. Uses pickup coords as a seed (prior) when available.
Modifies orders in-place. Returns the same list.
"""
for order in orders:
try:
dlat = float(order.get("deliverylat") or order.get("droplat") or 0)
dlon = float(order.get("deliverylong") or order.get("droplon") or 0)
if not GPSKalmanFilter._is_valid_coord(dlat, dlon):
continue
plat_raw = order.get("pickuplat")
plon_raw = order.get("pickuplon") or order.get("pickuplong")
try:
plat = float(plat_raw) if plat_raw else None
plon = float(plon_raw) if plon_raw else None
except (TypeError, ValueError):
plat, plon = None, None
smooth_dlat, smooth_dlon = smooth_coordinates(
dlat, dlon,
prior_lat=plat,
prior_lon=plon,
)
# Cast back to string for Go compatibility (fixes unmarshal error)
s_lat, s_lon = str(round(smooth_dlat, 8)), str(round(smooth_dlon, 8))
order["deliverylat"] = s_lat
order["deliverylong"] = s_lon
if "droplat" in order:
order["droplat"] = s_lat
if "droplon" in order:
order["droplon"] = s_lon
order["_kalman_smoothed"] = True
except Exception as e:
logger.debug(f"Kalman order smoothing skipped: {e}")
return orders

View File

@@ -0,0 +1,158 @@
"""
Realistic ETA Calculator for Delivery Operations
Accounts for:
- City traffic conditions
- Stop time at pickup/delivery
- Navigation time
- Parking/finding address time
- Different speeds for different order types
"""
import logging
from typing import Dict, Any
logger = logging.getLogger(__name__)
class RealisticETACalculator:
"""
Calculates realistic ETAs accounting for real-world delivery conditions.
"""
def __init__(self):
from app.config.dynamic_config import get_config
cfg = get_config()
# BASE SPEED (km/h) - Driven by the DB configuration
base_speed = cfg.get("avg_speed_kmh", 18.0)
# REALISTIC SPEEDS based on time of day
self.CITY_SPEED_HEAVY_TRAFFIC = base_speed * 0.7 # Usually ~12 km/h
self.CITY_SPEED_MODERATE = base_speed # Usually ~18 km/h
self.CITY_SPEED_LIGHT = base_speed * 1.2 # Usually ~21.6 km/h
# TIME BUFFERS (minutes)
self.PICKUP_TIME = cfg.get("eta_pickup_time_min", 3.0)
self.DELIVERY_TIME = cfg.get("eta_delivery_time_min", 4.0)
self.NAVIGATION_BUFFER = cfg.get("eta_navigation_buffer_min", 1.5)
# DISTANCE-BASED SPEED SELECTION
# Short distances (<2km) are slower due to more stops/starts
# Long distances (>8km) might have highway portions
self.SHORT_TRIP_FACTOR = cfg.get("eta_short_trip_factor", 0.8)
self.LONG_TRIP_FACTOR = cfg.get("eta_long_trip_factor", 1.1)
def calculate_eta(self,
distance_km: float,
is_first_order: bool = False,
order_type: str = "Economy",
time_of_day: str = "peak") -> int:
"""
Calculate realistic ETA in minutes.
Args:
distance_km: Distance to travel in kilometers
is_first_order: If True, includes pickup time
order_type: "Economy", "Premium", or "Risky"
time_of_day: "peak", "normal", or "light" traffic
Returns:
ETA in minutes (rounded up for safety)
"""
if distance_km <= 0:
return 0
# 1. SELECT SPEED BASED ON CONDITIONS
if time_of_day == "peak":
base_speed = self.CITY_SPEED_HEAVY_TRAFFIC
elif time_of_day == "light":
base_speed = self.CITY_SPEED_LIGHT
else:
base_speed = self.CITY_SPEED_MODERATE
# 2. ADJUST SPEED BASED ON DISTANCE
# Short trips are slower (more intersections, traffic lights)
if distance_km < 2.0:
effective_speed = base_speed * self.SHORT_TRIP_FACTOR
elif distance_km > 8.0:
effective_speed = base_speed * self.LONG_TRIP_FACTOR
else:
effective_speed = base_speed
# 3. CALCULATE TRAVEL TIME
travel_time = (distance_km / effective_speed) * 60 # Convert to minutes
# 4. ADD BUFFERS
total_time = travel_time
# Pickup time (only for first order in sequence)
if is_first_order:
total_time += self.PICKUP_TIME
# Delivery time (always)
total_time += self.DELIVERY_TIME
# Navigation buffer (proportional to distance)
if distance_km > 3.0:
total_time += self.NAVIGATION_BUFFER
# 5. SAFETY MARGIN (Round up to next minute)
# Riders prefer to arrive early than late
eta_minutes = int(total_time) + 1
return eta_minutes
def calculate_batch_eta(self, orders: list) -> list:
"""
Calculate ETAs for a batch of orders in sequence.
Args:
orders: List of order dicts with 'previouskms' and 'step' fields
Returns:
Same list with updated 'eta' fields
"""
for order in orders:
distance_km = float(order.get('previouskms', 0))
step = order.get('step', 1)
order_type = order.get('ordertype', 'Economy')
# First order includes pickup time
is_first = (step == 1)
# Assume peak traffic for safety (can be made dynamic)
eta = self.calculate_eta(
distance_km=distance_km,
is_first_order=is_first,
order_type=order_type,
time_of_day="normal" # Default to moderate traffic
)
order['eta'] = str(eta)
order['eta_realistic'] = True # Flag to indicate realistic calculation
return orders
def get_time_of_day_category() -> str:
"""
Determine current traffic conditions based on time.
Returns:
"peak", "normal", or "light"
"""
from datetime import datetime
current_hour = datetime.now().hour
# Peak hours: 8-10 AM, 12-2 PM, 5-8 PM
if (8 <= current_hour < 10) or (12 <= current_hour < 14) or (17 <= current_hour < 20):
return "peak"
# Light traffic: Late night/early morning
elif current_hour < 7 or current_hour >= 22:
return "light"
else:
return "normal"

View File

@@ -0,0 +1,425 @@
"""Production-grade route optimization using Google OR-Tools.
ALGORITHM: TSP / VRP with Google OR-Tools
- Industry-standard solver (same as used by major logistics companies)
- Constraint-based optimization
- Handles time windows (future proofing)
- Guaranteed optimal or near-optimal solution
FEATURES:
- Automatic outlier detection and coordinate correction
- Hybrid distance calculation (Google Maps + Haversine fallback)
- Robust error handling for invalid inputs
"""
import math
import os
import logging
import asyncio
from typing import Dict, Any, List as _List, Optional, Tuple, Union
from datetime import datetime, timedelta
import httpx
from app.services.routing.kalman_filter import smooth_order_coordinates
import numpy as np
from app.core.arrow_utils import calculate_haversine_matrix_vectorized
from app.config.dynamic_config import get_config
try:
from ortools.constraint_solver import routing_enums_pb2
from ortools.constraint_solver import pywrapcp
ORTOOLS_AVAILABLE = True
except ImportError:
ORTOOLS_AVAILABLE = False
logging.warning("Google OR-Tools not found. Falling back to simple greedy solver.")
logger = logging.getLogger(__name__)
class RouteOptimizer:
"""Route optimization using Google OR-Tools (Async)."""
def __init__(self):
self.earth_radius = 6371 # Earth radius in km
_cfg = get_config()
# Initialize Realistic ETA Calculator
from app.services.routing.realistic_eta_calculator import RealisticETACalculator, get_time_of_day_category
self.eta_calculator = RealisticETACalculator()
self.get_traffic_condition = get_time_of_day_category
# Speed settings (ML-tuned via DynamicConfig)
self.avg_speed_kmh = float(_cfg.get("avg_speed_kmh"))
# Road factor (haversine -> road distance multiplier, ML-tuned)
self.road_factor = float(_cfg.get("road_factor"))
# Google Maps API settings
self.google_maps_api_key = os.getenv("GOOGLE_MAPS_API_KEY", "")
self.use_google_maps = bool(self.google_maps_api_key)
# Solver time limit (ML-tuned)
self.search_time_limit_seconds = int(_cfg.get("search_time_limit_seconds"))
# Initialize ID3 Behavior Analyzer
from app.services.ml.behavior_analyzer import get_analyzer
self.behavior_analyzer = get_analyzer()
def haversine_distance(self, lat1: float, lon1: float, lat2: float, lon2: float) -> float:
"""Calculate great circle distance between two points on Earth (in km)."""
try:
lat1, lon1, lat2, lon2 = map(math.radians, [float(lat1), float(lon1), float(lat2), float(lon2)])
dlat = lat2 - lat1
dlon = lon2 - lon1
a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2
c = 2 * math.asin(math.sqrt(a))
return self.earth_radius * c
except Exception:
return 0.0
async def _get_google_maps_distances_batch(self, origin_lat: float, origin_lon: float,
destinations: _List[tuple]) -> Dict[tuple, float]:
"""Get road distances for multiple destinations from Google Maps API. (Async, Parallel)"""
if not self.use_google_maps or not destinations:
return {}
results = {}
batch_size = 25
chunks = [destinations[i:i + batch_size] for i in range(0, len(destinations), batch_size)]
async def process_batch(batch):
batch_result = {}
try:
dest_str = "|".join([f"{lat},{lon}" for lat, lon in batch])
url = "https://maps.googleapis.com/maps/api/distancematrix/json"
params = {
"origins": f"{origin_lat},{origin_lon}",
"destinations": dest_str,
"key": self.google_maps_api_key,
"units": "metric"
}
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.get(url, params=params)
response.raise_for_status()
data = response.json()
if data.get("status") == "OK":
rows = data.get("rows", [])
if rows:
elements = rows[0].get("elements", [])
for idx, element in enumerate(elements):
if idx < len(batch):
dest_coord = batch[idx]
if element.get("status") == "OK":
dist = element.get("distance", {}).get("value")
dur = element.get("duration", {}).get("value")
if dist is not None:
batch_result[dest_coord] = {
'distance': dist / 1000.0,
'duration': dur / 60.0 if dur else None
}
except Exception as e:
logger.warning(f"Google Maps batch call failed: {e}")
return batch_result
batch_results_list = await asyncio.gather(*[process_batch(chunk) for chunk in chunks])
for res in batch_results_list:
results.update(res)
return results
def _solve_tsp_ortools(self, locations: _List[Tuple[float, float]], dist_matrix: _List[_List[float]]) -> _List[int]:
"""Solve TSP using Google OR-Tools."""
if not ORTOOLS_AVAILABLE:
# Fallback to simple Greedy NN if OR-Tools not installed
return self._solve_greedy(locations, dist_matrix)
if not locations or len(locations) <= 1:
return [0]
manager = pywrapcp.RoutingIndexManager(len(locations), 1, 0) # num_nodes, num_vehicles, depot
routing = pywrapcp.RoutingModel(manager)
def distance_callback(from_index, to_index):
from_node = manager.IndexToNode(from_index)
to_node = manager.IndexToNode(to_index)
# Open TSP: Returning to the depot (index 0) has zero cost.
# This ensures the solver optimizes for the path from start to last drop-off
# rather than a closed circuit that might be reversed if the rider is on the "far" side.
if to_node == 0:
return 0
# OR-Tools works with integers, so we scale by 1000 (meters)
val = dist_matrix[from_node][to_node]
return int(val * 1000)
transit_callback_index = routing.RegisterTransitCallback(distance_callback)
routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index)
search_parameters = pywrapcp.DefaultRoutingSearchParameters()
search_parameters.first_solution_strategy = (
routing_enums_pb2.FirstSolutionStrategy.PATH_CHEAPEST_ARC
)
search_parameters.local_search_metaheuristic = (
routing_enums_pb2.LocalSearchMetaheuristic.GUIDED_LOCAL_SEARCH
)
search_parameters.time_limit.seconds = self.search_time_limit_seconds
solution = routing.SolveWithParameters(search_parameters)
if solution:
index = routing.Start(0)
route = []
while not routing.IsEnd(index):
route.append(manager.IndexToNode(index))
index = solution.Value(routing.NextVar(index))
return route
else:
return self._solve_greedy(locations, dist_matrix)
def _solve_greedy(self, locations, dist_matrix):
"""Simple Greedy Nearest Neighbor fallback."""
unvisited = set(range(1, len(locations)))
curr = 0
route = [0]
while unvisited:
nearest = min(unvisited, key=lambda x: dist_matrix[curr][x])
route.append(nearest)
unvisited.remove(nearest)
curr = nearest
return route
def _cleanup_coords(self, lat: Any, lon: Any, ref_lat: float, ref_lon: float) -> Tuple[float, float]:
"""
Heuristic to fix bad coordinates.
1. Fixes lat==lon typo.
2. Fixes missing negative signs if needed (not needed for India).
3. Projects outlier > 500km to reference (centroid).
"""
try:
lat = float(lat)
lon = float(lon)
except:
return 0.0, 0.0
if lat == 0 or lon == 0:
return lat, lon
# 1. Check strict equality (typo)
if abs(lat - lon) < 0.0001:
if ref_lon != 0:
# If reference is available, assume lat is correct and fix lon
# (Common error: copy lat to lon field)
return lat, ref_lon
# 2. Check general outlier (e.g. 500km away)
if ref_lat != 0 and ref_lon != 0:
dist = self.haversine_distance(lat, lon, ref_lat, ref_lon)
if dist > 500:
# Returning reference prevents map explosion
return ref_lat, ref_lon
return lat, lon
async def optimize_provider_payload(self, orders: _List[Dict[str, Any]], start_coords: Optional[tuple] = None) -> _List[Dict[str, Any]]:
"""Optimize delivery route and add step metrics (OR-Tools)."""
if not orders:
return []
# Deep copy
orders = [dict(order) for order in orders]
# 0. KALMAN FILTER - Smooth noisy delivery GPS coordinates
orders = smooth_order_coordinates(orders)
# Helpers
def _to_float(v: Any) -> float:
try: return float(v)
except: return 0.0
def _normalize_dt(val: Any) -> str:
if val in (None, "", 0): return ""
s = str(val).strip()
for fmt in ("%Y-%m-%dT%H:%M:%SZ", "%Y-%m-%d %H:%M:%S"):
try: return datetime.strptime(s, fmt).strftime("%Y-%m-%d %H:%M:%S")
except: pass
return s
# 1. PREPARE COORDINATES & CENTROID
valid_lats = []
valid_lons = []
for o in orders:
lat = _to_float(o.get("deliverylat"))
lon = _to_float(o.get("deliverylong"))
if lat != 0 and lon != 0:
valid_lats.append(lat)
valid_lons.append(lon)
centroid_lat = sum(valid_lats)/len(valid_lats) if valid_lats else 0.0
centroid_lon = sum(valid_lons)/len(valid_lons) if valid_lons else 0.0
# 2. DETERMINE START LOCATION (With Fix)
start_lat, start_lon = 0.0, 0.0
# Try explicit start_coords first
if start_coords and len(start_coords) == 2:
try:
start_lat, start_lon = float(start_coords[0]), float(start_coords[1])
except: pass
# Fallback to pickup location in orders
if start_lat == 0:
for o in orders:
plat = _to_float(o.get("pickuplat"))
plon = _to_float(o.get("pickuplon") or o.get("pickuplong"))
if plat != 0:
start_lat, start_lon = plat, plon
break
# Fallback to centroid
if start_lat == 0:
start_lat, start_lon = centroid_lat, centroid_lon
# FIX BAD START COORDINATES
start_lat, start_lon = self._cleanup_coords(start_lat, start_lon, centroid_lat, centroid_lon)
# 3. BUILD LOCATIONS LIST FOR SOLVER
# Index 0 is Start (Depot), 1..N are orders
locations = [(start_lat, start_lon)]
points_map = [] # Maps solver index 1..N back to original order index
for idx, order in enumerate(orders):
lat = _to_float(order.get("deliverylat"))
lon = _to_float(order.get("deliverylong"))
# Project coordinates and ensure they are strings for Go compatibility
lat, lon = self._cleanup_coords(lat, lon, centroid_lat, centroid_lon)
order_str_lat, order_str_lon = str(lat), str(lon)
order["deliverylat"] = order_str_lat
order["deliverylong"] = order_str_lon
if "droplat" in order: order["droplat"] = order_str_lat
if "droplon" in order: order["droplon"] = order_str_lon
locations.append((lat, lon))
points_map.append(idx)
# 4. COMPUTE DISTANCE MATRIX (Vectorized with Arrow/NumPy)
# road_factor is now ML-tuned (was hardcoded 1.3)
lats = np.array([loc[0] for loc in locations])
lons = np.array([loc[1] for loc in locations])
dist_matrix = calculate_haversine_matrix_vectorized(lats, lons) * self.road_factor
# 5. RISK-AWARE COST MATRIX (ID3 INTELLIGENCE)
# Apply Risk Penalties to the matrix before solving
cost_matrix = dist_matrix.copy()
traffic = self.get_traffic_condition()
num_locs = len(locations)
risk_penalty_count = 0
for i in range(num_locs):
for j in range(num_locs):
if i == j: continue
# Predict success risk for this specific leg
dist_km = dist_matrix[i][j]
prediction = self.behavior_analyzer.predict(
distance_km=dist_km,
timestamp_or_band=traffic,
)
if prediction.get("label") == "RISK": # High Risk predicted by ID3
# Add 25% penalty to distance to discourage this leg
cost_matrix[i][j] *= 1.25
risk_penalty_count += 1
if risk_penalty_count > 0:
logger.info(f"ID3 Intelligence: Applied {risk_penalty_count} Risk Penalties to optimize for delivery safety.")
# 6. SOLVE TSP
route_indices = self._solve_tsp_ortools(locations, cost_matrix)
# Remove 0 (depot)
optimized_order_indices = [i for i in route_indices if i != 0]
# 6. BUILD RESULT
result = []
cumulative_dist = 0.0
# Track previous location (starts at 0)
prev_idx = 0
for step_num, solver_idx in enumerate(optimized_order_indices, start=1):
order_idx = points_map[solver_idx - 1]
order = dict(orders[order_idx])
# Clean fields
for k in ("step", "previouskms", "cumulativekms", "eta", "actualkms", "ordertype"):
order.pop(k, None)
# Normalize dates
for field in ["orderdate", "deliverytime", "created"]:
if field in order: order[field] = _normalize_dt(order.get(field))
# Distance for this leg
step_dist = dist_matrix[prev_idx][solver_idx]
cumulative_dist += step_dist
# Metadata (Step metrics are integers in the Go struct)
order["step"] = int(step_num)
order["previouskms"] = int(0 if step_num == 1 else int(round(step_dist)))
order["cumulativekms"] = int(round(cumulative_dist))
# 7. METRICS (Calculate actual distance, prioritize provider input)
plat, plon = start_lat, start_lon
if plat == 0: plat, plon = _to_float(order.get("pickuplat")), _to_float(order.get("pickuplon") or order.get("pickuplong"))
dlat, dlon = locations[solver_idx]
# Baseline: Haversine * 1.3 (estimated road factor)
true_dist = self.haversine_distance(plat, plon, dlat, dlon) * 1.3
provided_kms = order.get("kms")
if provided_kms not in (None, "", 0, "0"):
try:
# If provider gave us a distance, respect it as the 'actual' distance
true_dist = float(provided_kms)
except:
pass
order["actualkms"] = str(round(true_dist, 2))
order["kms"] = str(provided_kms) if provided_kms else str(int(round(true_dist)))
# Financial metrics - keeping as numbers for calculations
if "rider_charge" in order: order["rider_charge"] = round(float(order["rider_charge"]), 2)
if "profit" in order: order["profit"] = round(float(order["profit"]), 2)
# Type & ETA
order["ordertype"] = "Economy" if true_dist <= 5 else "Premium" if true_dist <= 12 else "Risky"
traffic = self.get_traffic_condition()
eta = self.eta_calculator.calculate_eta(
distance_km=step_dist,
is_first_order=(step_num == 1),
order_type=order["ordertype"],
time_of_day=traffic
)
order["eta"] = str(eta)
result.append(order)
prev_idx = solver_idx
return result
def optimize_route(orders: _List[Dict[str, Any]]) -> _List[Dict[str, Any]]:
"""Synchronous wrapper."""
optimizer = RouteOptimizer()
try:
loop = asyncio.get_event_loop()
except RuntimeError:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
if loop.is_running():
# Fallback if loop is running (shouldn't happen in standard usage)
return []
return loop.run_until_complete(optimizer.optimize_provider_payload(orders))

View File

@@ -0,0 +1,196 @@
import logging
from typing import List, Dict, Any, Optional
logger = logging.getLogger(__name__)
class ZoneService:
"""
Service to classify orders and riders into geographic zones.
Defaulting to Coimbatore logic as per user context.
"""
# Approximate Center of Coimbatore (Gandhipuram/Bus Stand area)
CENTER_LAT = 11.0168
CENTER_LON = 76.9558
def __init__(self):
pass
def determine_zone(self, lat: float, lon: float, pincode: Optional[str] = None) -> str:
"""
Determine the zone (North, South, East, West, etc.) based on coordinates.
"""
if lat == 0 or lon == 0:
return "Unknown"
lat_diff = lat - self.CENTER_LAT
lon_diff = lon - self.CENTER_LON
# Simple Quadrant Logic
# North: +Lat
# South: -Lat
# East: +Lon
# West: -Lon
# Define a small central buffer (0.01 degrees ~ 1.1km)
buffer = 0.010
is_north = lat_diff > buffer
is_south = lat_diff < -buffer
is_east = lon_diff > buffer
is_west = lon_diff < -buffer
zone_parts = []
if is_north: zone_parts.append("North")
elif is_south: zone_parts.append("South")
if is_east: zone_parts.append("East")
elif is_west: zone_parts.append("West")
if not zone_parts:
return "Central"
return " ".join(zone_parts)
def group_by_zones(self, flat_orders: List[Dict[str, Any]], unassigned_orders: List[Dict[str, Any]] = None, fuel_charge: float = 2.5, base_pay: float = 30.0) -> Dict[str, Any]:
"""
Group a flat list of optimized orders into Zones -> Riders -> Orders.
Calculates profit per order and per zone.
"""
zones_map = {} # "North East": { "riders": { rider_id: [orders] } }
unassigned_orders = unassigned_orders or []
# Merge both for initial processing if you want everything zoned
all_to_process = []
for o in flat_orders:
all_to_process.append((o, True))
for o in unassigned_orders:
all_to_process.append((o, False))
for order, is_assigned in all_to_process:
# 1. Extract Coords
try:
# Prefer Delivery location for zoning (where the customer is)
lat = float(order.get("deliverylat") or order.get("droplat") or 0)
lon = float(order.get("deliverylong") or order.get("droplon") or 0)
pincode = str(order.get("deliveryzip") or "")
except:
lat, lon, pincode = 0, 0, ""
# 2. Get Zone
zone_name = self.determine_zone(lat, lon, pincode)
order["zone_name"] = zone_name
# 3. Initialize Zone Bucket
if zone_name not in zones_map:
zones_map[zone_name] = {
"riders_map": {},
"total_orders": 0,
"assigned_orders": 0,
"unassigned_orders": [],
"total_kms": 0.0,
"total_profit": 0.0
}
# 4. Add to Rider bucket within Zone
rider_id = order.get("userid") or order.get("_id")
# Track kms and profit for this zone
try:
# 'actualkms' is preferred for delivery distance
dist = float(order.get("actualkms", order.get("previouskms", 0)))
zones_map[zone_name]["total_kms"] += dist
# Individual charge for this order: Fixed Base + Variable Distance
order_amount = float(order.get("orderamount") or order.get("deliveryamount") or 0)
rider_payment = base_pay + (dist * fuel_charge)
profit = order_amount - rider_payment
order["rider_charge"] = round(rider_payment, 2)
order["profit"] = round(profit, 2)
# Profit-based classification (Order Type)
if profit <= 0:
order["ordertype"] = "Loss"
elif profit <= 5:
order["ordertype"] = "Risky"
elif profit <= 10:
order["ordertype"] = "Economy"
else:
order["ordertype"] = "Premium"
zones_map[zone_name]["total_profit"] += profit
except:
pass
# If strictly unassigned order (no rider), put in unassigned
if not is_assigned:
zones_map[zone_name]["unassigned_orders"].append(order)
else:
str_rid = str(rider_id)
if str_rid not in zones_map[zone_name]["riders_map"]:
zones_map[zone_name]["riders_map"][str_rid] = {
"rider_details": {
"id": str_rid,
"name": order.get("username", "Unknown")
},
"orders": []
}
zones_map[zone_name]["riders_map"][str_rid]["orders"].append(order)
zones_map[zone_name]["assigned_orders"] += 1
zones_map[zone_name]["total_orders"] += 1
# 5. Restructure for API Response
output_zones = []
zone_metrics = []
sorted_zone_names = sorted(zones_map.keys())
for z_name in sorted_zone_names:
z_data = zones_map[z_name]
# Flatten riders map
riders_list = []
for r_id, r_data in z_data["riders_map"].items():
riders_list.append({
"rider_id": r_data["rider_details"]["id"],
"rider_name": r_data["rider_details"]["name"],
"orders_count": len(r_data["orders"]),
"orders": r_data["orders"]
})
# Create the flat metric summary
metrics = {
"zone_name": z_name,
"total_orders": z_data["total_orders"],
"assigned_orders": z_data["assigned_orders"],
"unassigned_orders_count": len(z_data["unassigned_orders"]),
"active_riders_count": len(riders_list),
"total_delivery_kms": round(z_data["total_kms"], 2),
"total_profit": round(z_data["total_profit"], 2)
}
zone_metrics.append(metrics)
# Create the detailed zone object with flattened metrics
zone_obj = {
"zone_name": z_name,
"total_orders": metrics["total_orders"],
"active_riders_count": metrics["active_riders_count"],
"assigned_orders": metrics["assigned_orders"],
"unassigned_orders_count": metrics["unassigned_orders_count"],
"total_delivery_kms": metrics["total_delivery_kms"],
"total_profit": metrics["total_profit"],
"riders": riders_list,
"unassigned_orders": z_data["unassigned_orders"]
}
output_zones.append(zone_obj)
return {
"detailed_zones": output_zones,
"zone_analysis": zone_metrics
}