Files
routesapi/app/services/routing/route_optimizer.py

425 lines
17 KiB
Python

"""Production-grade route optimization using Google OR-Tools.
ALGORITHM: TSP / VRP with Google OR-Tools
- Industry-standard solver (same as used by major logistics companies)
- Constraint-based optimization
- Handles time windows (future proofing)
- Guaranteed optimal or near-optimal solution
FEATURES:
- Automatic outlier detection and coordinate correction
- Hybrid distance calculation (Google Maps + Haversine fallback)
- Robust error handling for invalid inputs
"""
import math
import os
import logging
import asyncio
from typing import Dict, Any, List as _List, Optional, Tuple, Union
from datetime import datetime, timedelta
import httpx
from app.services.routing.kalman_filter import smooth_order_coordinates
import numpy as np
from app.core.arrow_utils import calculate_haversine_matrix_vectorized
from app.config.dynamic_config import get_config
try:
from ortools.constraint_solver import routing_enums_pb2
from ortools.constraint_solver import pywrapcp
ORTOOLS_AVAILABLE = True
except ImportError:
ORTOOLS_AVAILABLE = False
logging.warning("Google OR-Tools not found. Falling back to simple greedy solver.")
logger = logging.getLogger(__name__)
class RouteOptimizer:
"""Route optimization using Google OR-Tools (Async)."""
def __init__(self):
self.earth_radius = 6371 # Earth radius in km
_cfg = get_config()
# Initialize Realistic ETA Calculator
from app.services.routing.realistic_eta_calculator import RealisticETACalculator, get_time_of_day_category
self.eta_calculator = RealisticETACalculator()
self.get_traffic_condition = get_time_of_day_category
# Speed settings (ML-tuned via DynamicConfig)
self.avg_speed_kmh = float(_cfg.get("avg_speed_kmh"))
# Road factor (haversine -> road distance multiplier, ML-tuned)
self.road_factor = float(_cfg.get("road_factor"))
# Google Maps API settings
self.google_maps_api_key = os.getenv("GOOGLE_MAPS_API_KEY", "")
self.use_google_maps = bool(self.google_maps_api_key)
# Solver time limit (ML-tuned)
self.search_time_limit_seconds = int(_cfg.get("search_time_limit_seconds"))
# Initialize ID3 Behavior Analyzer
from app.services.ml.behavior_analyzer import get_analyzer
self.behavior_analyzer = get_analyzer()
def haversine_distance(self, lat1: float, lon1: float, lat2: float, lon2: float) -> float:
"""Calculate great circle distance between two points on Earth (in km)."""
try:
lat1, lon1, lat2, lon2 = map(math.radians, [float(lat1), float(lon1), float(lat2), float(lon2)])
dlat = lat2 - lat1
dlon = lon2 - lon1
a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2
c = 2 * math.asin(math.sqrt(a))
return self.earth_radius * c
except Exception:
return 0.0
async def _get_google_maps_distances_batch(self, origin_lat: float, origin_lon: float,
destinations: _List[tuple]) -> Dict[tuple, float]:
"""Get road distances for multiple destinations from Google Maps API. (Async, Parallel)"""
if not self.use_google_maps or not destinations:
return {}
results = {}
batch_size = 25
chunks = [destinations[i:i + batch_size] for i in range(0, len(destinations), batch_size)]
async def process_batch(batch):
batch_result = {}
try:
dest_str = "|".join([f"{lat},{lon}" for lat, lon in batch])
url = "https://maps.googleapis.com/maps/api/distancematrix/json"
params = {
"origins": f"{origin_lat},{origin_lon}",
"destinations": dest_str,
"key": self.google_maps_api_key,
"units": "metric"
}
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.get(url, params=params)
response.raise_for_status()
data = response.json()
if data.get("status") == "OK":
rows = data.get("rows", [])
if rows:
elements = rows[0].get("elements", [])
for idx, element in enumerate(elements):
if idx < len(batch):
dest_coord = batch[idx]
if element.get("status") == "OK":
dist = element.get("distance", {}).get("value")
dur = element.get("duration", {}).get("value")
if dist is not None:
batch_result[dest_coord] = {
'distance': dist / 1000.0,
'duration': dur / 60.0 if dur else None
}
except Exception as e:
logger.warning(f"Google Maps batch call failed: {e}")
return batch_result
batch_results_list = await asyncio.gather(*[process_batch(chunk) for chunk in chunks])
for res in batch_results_list:
results.update(res)
return results
def _solve_tsp_ortools(self, locations: _List[Tuple[float, float]], dist_matrix: _List[_List[float]]) -> _List[int]:
"""Solve TSP using Google OR-Tools."""
if not ORTOOLS_AVAILABLE:
# Fallback to simple Greedy NN if OR-Tools not installed
return self._solve_greedy(locations, dist_matrix)
if not locations or len(locations) <= 1:
return [0]
manager = pywrapcp.RoutingIndexManager(len(locations), 1, 0) # num_nodes, num_vehicles, depot
routing = pywrapcp.RoutingModel(manager)
def distance_callback(from_index, to_index):
from_node = manager.IndexToNode(from_index)
to_node = manager.IndexToNode(to_index)
# Open TSP: Returning to the depot (index 0) has zero cost.
# This ensures the solver optimizes for the path from start to last drop-off
# rather than a closed circuit that might be reversed if the rider is on the "far" side.
if to_node == 0:
return 0
# OR-Tools works with integers, so we scale by 1000 (meters)
val = dist_matrix[from_node][to_node]
return int(val * 1000)
transit_callback_index = routing.RegisterTransitCallback(distance_callback)
routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index)
search_parameters = pywrapcp.DefaultRoutingSearchParameters()
search_parameters.first_solution_strategy = (
routing_enums_pb2.FirstSolutionStrategy.PATH_CHEAPEST_ARC
)
search_parameters.local_search_metaheuristic = (
routing_enums_pb2.LocalSearchMetaheuristic.GUIDED_LOCAL_SEARCH
)
search_parameters.time_limit.seconds = self.search_time_limit_seconds
solution = routing.SolveWithParameters(search_parameters)
if solution:
index = routing.Start(0)
route = []
while not routing.IsEnd(index):
route.append(manager.IndexToNode(index))
index = solution.Value(routing.NextVar(index))
return route
else:
return self._solve_greedy(locations, dist_matrix)
def _solve_greedy(self, locations, dist_matrix):
"""Simple Greedy Nearest Neighbor fallback."""
unvisited = set(range(1, len(locations)))
curr = 0
route = [0]
while unvisited:
nearest = min(unvisited, key=lambda x: dist_matrix[curr][x])
route.append(nearest)
unvisited.remove(nearest)
curr = nearest
return route
def _cleanup_coords(self, lat: Any, lon: Any, ref_lat: float, ref_lon: float) -> Tuple[float, float]:
"""
Heuristic to fix bad coordinates.
1. Fixes lat==lon typo.
2. Fixes missing negative signs if needed (not needed for India).
3. Projects outlier > 500km to reference (centroid).
"""
try:
lat = float(lat)
lon = float(lon)
except:
return 0.0, 0.0
if lat == 0 or lon == 0:
return lat, lon
# 1. Check strict equality (typo)
if abs(lat - lon) < 0.0001:
if ref_lon != 0:
# If reference is available, assume lat is correct and fix lon
# (Common error: copy lat to lon field)
return lat, ref_lon
# 2. Check general outlier (e.g. 500km away)
if ref_lat != 0 and ref_lon != 0:
dist = self.haversine_distance(lat, lon, ref_lat, ref_lon)
if dist > 500:
# Returning reference prevents map explosion
return ref_lat, ref_lon
return lat, lon
async def optimize_provider_payload(self, orders: _List[Dict[str, Any]], start_coords: Optional[tuple] = None) -> _List[Dict[str, Any]]:
"""Optimize delivery route and add step metrics (OR-Tools)."""
if not orders:
return []
# Deep copy
orders = [dict(order) for order in orders]
# 0. KALMAN FILTER - Smooth noisy delivery GPS coordinates
orders = smooth_order_coordinates(orders)
# Helpers
def _to_float(v: Any) -> float:
try: return float(v)
except: return 0.0
def _normalize_dt(val: Any) -> str:
if val in (None, "", 0): return ""
s = str(val).strip()
for fmt in ("%Y-%m-%dT%H:%M:%SZ", "%Y-%m-%d %H:%M:%S"):
try: return datetime.strptime(s, fmt).strftime("%Y-%m-%d %H:%M:%S")
except: pass
return s
# 1. PREPARE COORDINATES & CENTROID
valid_lats = []
valid_lons = []
for o in orders:
lat = _to_float(o.get("deliverylat"))
lon = _to_float(o.get("deliverylong"))
if lat != 0 and lon != 0:
valid_lats.append(lat)
valid_lons.append(lon)
centroid_lat = sum(valid_lats)/len(valid_lats) if valid_lats else 0.0
centroid_lon = sum(valid_lons)/len(valid_lons) if valid_lons else 0.0
# 2. DETERMINE START LOCATION (With Fix)
start_lat, start_lon = 0.0, 0.0
# Try explicit start_coords first
if start_coords and len(start_coords) == 2:
try:
start_lat, start_lon = float(start_coords[0]), float(start_coords[1])
except: pass
# Fallback to pickup location in orders
if start_lat == 0:
for o in orders:
plat = _to_float(o.get("pickuplat"))
plon = _to_float(o.get("pickuplon") or o.get("pickuplong"))
if plat != 0:
start_lat, start_lon = plat, plon
break
# Fallback to centroid
if start_lat == 0:
start_lat, start_lon = centroid_lat, centroid_lon
# FIX BAD START COORDINATES
start_lat, start_lon = self._cleanup_coords(start_lat, start_lon, centroid_lat, centroid_lon)
# 3. BUILD LOCATIONS LIST FOR SOLVER
# Index 0 is Start (Depot), 1..N are orders
locations = [(start_lat, start_lon)]
points_map = [] # Maps solver index 1..N back to original order index
for idx, order in enumerate(orders):
lat = _to_float(order.get("deliverylat"))
lon = _to_float(order.get("deliverylong"))
# Project coordinates and ensure they are strings for Go compatibility
lat, lon = self._cleanup_coords(lat, lon, centroid_lat, centroid_lon)
order_str_lat, order_str_lon = str(lat), str(lon)
order["deliverylat"] = order_str_lat
order["deliverylong"] = order_str_lon
if "droplat" in order: order["droplat"] = order_str_lat
if "droplon" in order: order["droplon"] = order_str_lon
locations.append((lat, lon))
points_map.append(idx)
# 4. COMPUTE DISTANCE MATRIX (Vectorized with Arrow/NumPy)
# road_factor is now ML-tuned (was hardcoded 1.3)
lats = np.array([loc[0] for loc in locations])
lons = np.array([loc[1] for loc in locations])
dist_matrix = calculate_haversine_matrix_vectorized(lats, lons) * self.road_factor
# 5. RISK-AWARE COST MATRIX (ID3 INTELLIGENCE)
# Apply Risk Penalties to the matrix before solving
cost_matrix = dist_matrix.copy()
traffic = self.get_traffic_condition()
num_locs = len(locations)
risk_penalty_count = 0
for i in range(num_locs):
for j in range(num_locs):
if i == j: continue
# Predict success risk for this specific leg
dist_km = dist_matrix[i][j]
prediction = self.behavior_analyzer.predict(
distance_km=dist_km,
timestamp_or_band=traffic,
)
if prediction.get("label") == "RISK": # High Risk predicted by ID3
# Add 25% penalty to distance to discourage this leg
cost_matrix[i][j] *= 1.25
risk_penalty_count += 1
if risk_penalty_count > 0:
logger.info(f"ID3 Intelligence: Applied {risk_penalty_count} Risk Penalties to optimize for delivery safety.")
# 6. SOLVE TSP
route_indices = self._solve_tsp_ortools(locations, cost_matrix)
# Remove 0 (depot)
optimized_order_indices = [i for i in route_indices if i != 0]
# 6. BUILD RESULT
result = []
cumulative_dist = 0.0
# Track previous location (starts at 0)
prev_idx = 0
for step_num, solver_idx in enumerate(optimized_order_indices, start=1):
order_idx = points_map[solver_idx - 1]
order = dict(orders[order_idx])
# Clean fields
for k in ("step", "previouskms", "cumulativekms", "eta", "actualkms", "ordertype"):
order.pop(k, None)
# Normalize dates
for field in ["orderdate", "deliverytime", "created"]:
if field in order: order[field] = _normalize_dt(order.get(field))
# Distance for this leg
step_dist = dist_matrix[prev_idx][solver_idx]
cumulative_dist += step_dist
# Metadata (Step metrics are integers in the Go struct)
order["step"] = int(step_num)
order["previouskms"] = int(0 if step_num == 1 else int(round(step_dist)))
order["cumulativekms"] = int(round(cumulative_dist))
# 7. METRICS (Calculate actual distance, prioritize provider input)
plat, plon = start_lat, start_lon
if plat == 0: plat, plon = _to_float(order.get("pickuplat")), _to_float(order.get("pickuplon") or order.get("pickuplong"))
dlat, dlon = locations[solver_idx]
# Baseline: Haversine * 1.3 (estimated road factor)
true_dist = self.haversine_distance(plat, plon, dlat, dlon) * 1.3
provided_kms = order.get("kms")
if provided_kms not in (None, "", 0, "0"):
try:
# If provider gave us a distance, respect it as the 'actual' distance
true_dist = float(provided_kms)
except:
pass
order["actualkms"] = str(round(true_dist, 2))
order["kms"] = str(provided_kms) if provided_kms else str(int(round(true_dist)))
# Financial metrics - keeping as numbers for calculations
if "rider_charge" in order: order["rider_charge"] = round(float(order["rider_charge"]), 2)
if "profit" in order: order["profit"] = round(float(order["profit"]), 2)
# Type & ETA
order["ordertype"] = "Economy" if true_dist <= 5 else "Premium" if true_dist <= 12 else "Risky"
traffic = self.get_traffic_condition()
eta = self.eta_calculator.calculate_eta(
distance_km=step_dist,
is_first_order=(step_num == 1),
order_type=order["ordertype"],
time_of_day=traffic
)
order["eta"] = str(eta)
result.append(order)
prev_idx = solver_idx
return result
def optimize_route(orders: _List[Dict[str, Any]]) -> _List[Dict[str, Any]]:
"""Synchronous wrapper."""
optimizer = RouteOptimizer()
try:
loop = asyncio.get_event_loop()
except RuntimeError:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
if loop.is_running():
# Fallback if loop is running (shouldn't happen in standard usage)
return []
return loop.run_until_complete(optimizer.optimize_provider_payload(orders))