initial project setup with README and ignore

This commit is contained in:
2026-04-08 15:13:42 +05:30
commit 2d5688cb35
47 changed files with 7929 additions and 0 deletions

View File

@@ -0,0 +1,515 @@
import logging
import random
import time
from math import radians, cos, sin, asin, sqrt
from typing import List, Dict, Any, Optional
from collections import defaultdict
from app.config.rider_preferences import RIDER_PREFERRED_KITCHENS
from app.services.routing.kalman_filter import smooth_rider_locations, smooth_order_coordinates
from app.config.dynamic_config import get_config
from app.services.ml.ml_data_collector import get_collector
logger = logging.getLogger(__name__)
class AssignmentService:
def __init__(self):
self.rider_preferences = RIDER_PREFERRED_KITCHENS
self.earth_radius_km = 6371
self._cfg = get_config()
def _load_config(self):
"""Load ML-tuned hyperparams fresh on every assignment call."""
cfg = self._cfg
self.MAX_PICKUP_DISTANCE_KM = cfg.get("max_pickup_distance_km")
self.MAX_KITCHEN_DISTANCE_KM = cfg.get("max_kitchen_distance_km")
self.MAX_ORDERS_PER_RIDER = int(cfg.get("max_orders_per_rider"))
self.IDEAL_LOAD = int(cfg.get("ideal_load"))
self.WORKLOAD_BALANCE_THRESHOLD = cfg.get("workload_balance_threshold")
self.WORKLOAD_PENALTY_WEIGHT = cfg.get("workload_penalty_weight")
self.DISTANCE_PENALTY_WEIGHT = cfg.get("distance_penalty_weight")
self.PREFERENCE_BONUS = cfg.get("preference_bonus")
self.HOME_ZONE_BONUS_4KM = cfg.get("home_zone_bonus_4km")
self.HOME_ZONE_BONUS_2KM = cfg.get("home_zone_bonus_2km")
self.EMERGENCY_LOAD_PENALTY = cfg.get("emergency_load_penalty")
def haversine(self, lat1, lon1, lat2, lon2):
"""Calculate the great circle distance between two points."""
lon1, lat1, lon2, lat2 = map(radians, [float(lon1), float(lat1), float(lon2), float(lat2)])
dlon = lon2 - lon1
dlat = lat2 - lat1
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
c = 2 * asin(min(1.0, sqrt(a))) # Clamp to 1.0 to avoid domain errors
return c * self.earth_radius_km
def get_lat_lon(self, obj: Dict[str, Any], prefix: str = "") -> tuple[float, float]:
"""Generic helper to extract lat/lon from diversely named keys."""
# Try specific prefixes first
candidates = [
(f"{prefix}lat", f"{prefix}lon"),
(f"{prefix}lat", f"{prefix}long"),
(f"{prefix}latitude", f"{prefix}longitude"),
]
# Also try standard keys if prefix fails
candidates.extend([
("lat", "lon"), ("latitude", "longitude"),
("pickuplat", "pickuplon"), ("pickuplat", "pickuplong"),
("deliverylat", "deliverylong"), ("droplat", "droplon")
])
for lat_key, lon_key in candidates:
if lat_key in obj and lon_key in obj and obj[lat_key] and obj[lon_key]:
try:
return float(obj[lat_key]), float(obj[lon_key])
except: pass
# Special case: nested 'pickup_location'
if "pickup_location" in obj:
return self.get_lat_lon(obj["pickup_location"])
return 0.0, 0.0
def get_order_kitchen(self, order: Dict[str, Any]) -> str:
possible_keys = ['storename', 'restaurantname', 'kitchenname', 'partnername', 'store_name']
for key in possible_keys:
if key in order and order[key]:
return str(order[key]).strip()
return "Unknown"
def assign_orders(self, orders: List[Dict[str, Any]], riders: List[Dict[str, Any]], reshuffle: bool = False) -> tuple[Dict[int, List[Dict[str, Any]]], List[Dict[str, Any]]]:
"""
ENHANCED: Cluster-Based Load-Balanced Assignment.
Strategy:
1. Cluster orders by kitchen proximity
2. Calculate rider workload (current capacity usage)
3. Assign clusters to best-fit riders (proximity + workload balance)
4. Rebalance if needed
If reshuffle=True, controlled randomness is injected into rider scoring
so that retrying the same input can explore alternative assignments.
"""
from app.services.rider.rider_history_service import RiderHistoryService
from app.services.rider.rider_state_manager import RiderStateManager
from app.services.routing.clustering_service import ClusteringService
# -- Load ML-tuned hyperparameters (or defaults on first run) ------
self._load_config()
_call_start = time.time()
# 0. Prep
assignments: Dict[int, List[Dict[str, Any]]] = defaultdict(list)
unassigned_orders: List[Dict[str, Any]] = []
rider_states = {} # Track live load
# 0a. KALMAN FILTER - Smooth rider GPS locations before scoring
riders = smooth_rider_locations(list(riders))
# 0b. KALMAN FILTER - Smooth order delivery coordinates before clustering
orders = smooth_order_coordinates(list(orders))
# 1. Parse and Filter Riders
valid_riders = []
BLOCKED_RIDERS = [1242, 1266, 1245, 1232, 1240, 1007] # Test/Blocked IDs
# Load Existing State (Persistence)
state_mgr = RiderStateManager()
for r in riders:
# Robust ID Extraction
rid_raw = r.get("userid") or r.get("riderid") or r.get("id") or r.get("_id")
try:
rid = int(rid_raw)
except (ValueError, TypeError):
continue
if rid in BLOCKED_RIDERS: continue
# Robust Status Check
# Keep if: onduty (1, "1", True) OR status is active/idle/online
is_onduty = str(r.get("onduty")) in ["1", "True"] or r.get("onduty") is True
is_active = r.get("status") in ["active", "idle", "online"]
if not (is_onduty or is_active):
continue
# Location
lat, lon = self.get_lat_lon(r)
# Fetch previous state to know if they are already busy
p_state = state_mgr.get_rider_state(rid)
# If rider has valid GPS, use it. If not, fallback to Last Drop or Home.
if lat == 0 or lon == 0:
if p_state['last_drop_lat']:
lat, lon = p_state['last_drop_lat'], p_state['last_drop_lon']
else:
# Home Location Fallback
from app.config.rider_preferences import RIDER_HOME_LOCATIONS
lat, lon = RIDER_HOME_LOCATIONS.get(rid, (0.0, 0.0))
valid_riders.append({
"id": rid,
"lat": lat,
"lon": lon,
"obj": r
})
# Initialize rider state with existing workload
existing_load = p_state.get('minutes_remaining', 0) / 15 # Convert minutes to order estimate
rider_states[rid] = {
'lat': lat,
'lon': lon,
'kitchens': set(),
'count': int(existing_load), # Start with existing workload
'workload_score': existing_load # For prioritization
}
if not valid_riders:
logger.warning("No riders passed on-duty filter. Retrying with all available riders as emergency rescue...")
# If no on-duty riders, we take ANY rider provided by the API to ensure assignment
for r in riders:
rid = int(r.get("userid", 0))
if rid in BLOCKED_RIDERS: continue
lat, lon = self.get_lat_lon(r)
if lat == 0 or lon == 0:
from app.config.rider_preferences import RIDER_HOME_LOCATIONS
lat, lon = RIDER_HOME_LOCATIONS.get(rid, (0.0, 0.0))
if lat != 0:
valid_riders.append({"id": rid, "lat": lat, "lon": lon, "obj": r})
rider_states[rid] = {
'lat': lat, 'lon': lon, 'kitchens': set(),
'count': 0, 'workload_score': 0
}
if not valid_riders:
logger.error("DANGER: Absolutely no riders available for assignment.")
# Mark all as unassigned
for o in orders:
o["unassigned_reason"] = "No riders found (check partner online status)."
unassigned_orders.append(o)
return assignments, unassigned_orders
logger.info(f"Found {len(valid_riders)} active riders")
# 2. CLUSTER ORDERS BY KITCHEN PROXIMITY
clustering_service = ClusteringService()
clusters = clustering_service.cluster_orders_by_kitchen(orders, max_cluster_radius_km=self.MAX_KITCHEN_DISTANCE_KM) # radius from ML
logger.info(f"Created {len(clusters)} order clusters")
# 3. ASSIGN CLUSTERS TO RIDERS (Load-Balanced)
for cluster_idx, cluster in enumerate(clusters):
centroid_lat, centroid_lon = cluster['centroid']
cluster_orders = cluster['orders']
cluster_size = len(cluster_orders)
logger.info(f"Assigning cluster {cluster_idx+1}/{len(clusters)}: {cluster_size} orders at ({centroid_lat:.4f}, {centroid_lon:.4f})")
# Find best riders for this cluster
candidate_riders = []
for r in valid_riders:
rid = r["id"]
r_state = rider_states[rid]
# Calculate distance to cluster centroid
dist = self.haversine(r_state['lat'], r_state['lon'], centroid_lat, centroid_lon)
# Preference bonus & Distance Bypass
prefs = self.rider_preferences.get(rid, [])
has_preference = False
for k_name in cluster['kitchen_names']:
if any(p.lower() in k_name.lower() or k_name.lower() in p.lower() for p in prefs):
has_preference = True
break
# Dynamic Limit: 6km default, 10km for preferred kitchens
allowed_dist = self.MAX_PICKUP_DISTANCE_KM
if has_preference:
allowed_dist = max(allowed_dist, 10.0)
# Skip if too far
if dist > allowed_dist:
continue
# Calculate workload utilization (0.0 to 1.0)
utilization = r_state['count'] / self.MAX_ORDERS_PER_RIDER
# Calculate score (lower is better) - weights from DynamicConfig
workload_penalty = utilization * self.WORKLOAD_PENALTY_WEIGHT
distance_penalty = dist * self.DISTANCE_PENALTY_WEIGHT
# Preference bonus (ML-tuned)
preference_bonus = self.PREFERENCE_BONUS if has_preference else 0
# Home zone bonus (ML-tuned)
from app.config.rider_preferences import RIDER_HOME_LOCATIONS
h_lat, h_lon = RIDER_HOME_LOCATIONS.get(rid, (0.0, 0.0))
home_bonus = 0
if h_lat != 0:
home_dist = self.haversine(h_lat, h_lon, centroid_lat, centroid_lon)
if home_dist <= 4.0:
home_bonus = self.HOME_ZONE_BONUS_4KM
if home_dist <= 2.0:
home_bonus = self.HOME_ZONE_BONUS_2KM
score = workload_penalty + distance_penalty + preference_bonus + home_bonus
# RESHUFFLE: Add controlled noise so retries explore different riders
if reshuffle:
noise = random.uniform(-15.0, 15.0)
score += noise
candidate_riders.append({
'id': rid,
'score': score,
'distance': dist,
'utilization': utilization,
'current_load': r_state['count']
})
if not candidate_riders:
logger.warning(f"No riders available for cluster {cluster_idx+1}")
for o in cluster_orders:
o["unassigned_reason"] = f"No riders within {self.MAX_PICKUP_DISTANCE_KM}km radius of kitchen."
unassigned_orders.append(o)
continue
# Sort by score (best first)
candidate_riders.sort(key=lambda x: x['score'])
# SMART DISTRIBUTION: Split cluster if needed
remaining_orders = cluster_orders[:]
while remaining_orders and candidate_riders:
best_rider = candidate_riders[0]
rid = best_rider['id']
r_state = rider_states[rid]
# How many orders can this rider take?
available_capacity = self.MAX_ORDERS_PER_RIDER - r_state['count']
if available_capacity <= 0:
# Rider is full, remove from candidates
candidate_riders.pop(0)
continue
# Decide batch size
# If rider is underutilized and cluster is small, give all
# If rider is busy or cluster is large, split it
if best_rider['utilization'] < self.WORKLOAD_BALANCE_THRESHOLD:
# Rider has capacity, can take more
batch_size = min(available_capacity, len(remaining_orders))
else:
# Rider is getting busy, be conservative (IDEAL_LOAD from ML)
batch_size = min(self.IDEAL_LOAD - r_state['count'], len(remaining_orders), available_capacity)
batch_size = max(1, batch_size) # At least 1 order
# Assign batch
batch = remaining_orders[:batch_size]
remaining_orders = remaining_orders[batch_size:]
assignments[rid].extend(batch)
# Update rider state
r_state['count'] += len(batch)
r_state['lat'] = centroid_lat
r_state['lon'] = centroid_lon
r_state['kitchens'].update(cluster['kitchen_names'])
r_state['workload_score'] = r_state['count'] / self.MAX_ORDERS_PER_RIDER
logger.info(f" -> Assigned {len(batch)} orders to Rider {rid} (load: {r_state['count']}/{self.MAX_ORDERS_PER_RIDER})")
# Re-sort candidates by updated scores
for candidate in candidate_riders:
if candidate['id'] == rid:
candidate['utilization'] = r_state['count'] / self.MAX_ORDERS_PER_RIDER
candidate['current_load'] = r_state['count']
# Recalculate score
workload_penalty = candidate['utilization'] * 100
distance_penalty = candidate['distance'] * 2
candidate['score'] = workload_penalty + distance_penalty
candidate_riders.sort(key=lambda x: x['score'])
# If any orders left in the cluster after exhaustion of candidates
if remaining_orders:
# Instead of giving up, keep them in a pool for mandatory assignment
unassigned_orders.extend(remaining_orders)
# 4. EMERGENCY MANDATORY ASSIGNMENT (Ensures 0 unassigned if riders exist)
if unassigned_orders and valid_riders:
logger.info(f"[ALERT] Starting Emergency Mandatory Assignment for {len(unassigned_orders)} orders...")
force_pool = unassigned_orders[:]
unassigned_orders.clear()
for o in force_pool:
# Determine pickup location
o_lat, o_lon = self.get_lat_lon(o, prefix="pickup")
if o_lat == 0:
o["unassigned_reason"] = "Could not geolocate order (0,0)."
unassigned_orders.append(o)
continue
# Find the 'least bad' rider (Closest + Balanced Load)
best_emergency_rider = None
best_emergency_score = float('inf')
for r in valid_riders:
rid = r["id"]
r_state = rider_states[rid]
dist = self.haversine(r_state['lat'], r_state['lon'], o_lat, o_lon)
# For emergency: Distance is important, but load prevents one rider taking EVERYTHING
# Score = distance + ML-tuned penalty per existing order
e_score = dist + (r_state['count'] * self.EMERGENCY_LOAD_PENALTY)
if e_score < best_emergency_score:
best_emergency_score = e_score
best_emergency_rider = rid
if best_emergency_rider:
assignments[best_emergency_rider].append(o)
rider_states[best_emergency_rider]['count'] += 1
logger.info(f" Force-Assigned order {o.get('orderid')} to Rider {best_emergency_rider} (Score: {best_emergency_score:.2f})")
else:
unassigned_orders.append(o)
# 5. FINAL REBALANCING (Optional)
# Check if any rider is overloaded while others are idle
self._rebalance_workload(assignments, rider_states, valid_riders)
# 6. Commit State and History
self._post_process(assignments, rider_states)
# 7. -- ML DATA COLLECTION -----------------------------------------
try:
elapsed_ms = (time.time() - _call_start) * 1000
get_collector().log_assignment_event(
num_orders=len(orders),
num_riders=len(riders),
hyperparams=self._cfg.get_all(),
assignments=assignments,
unassigned_count=len(unassigned_orders),
elapsed_ms=elapsed_ms,
)
except Exception as _ml_err:
logger.debug(f"ML logging skipped: {_ml_err}")
# Log final distribution
logger.info("=" * 50)
logger.info("FINAL ASSIGNMENT DISTRIBUTION:")
for rid, orders in sorted(assignments.items()):
logger.info(f" Rider {rid}: {len(orders)} orders")
if unassigned_orders:
logger.warning(f" [ALERT] STILL UNASSIGNED: {len(unassigned_orders)} (Reason: No riders online or invalid coords)")
else:
logger.info(" [OK] ALL ORDERS ASSIGNED SUCCESSFULLY")
logger.info("=" * 50)
return assignments, unassigned_orders
def _rebalance_workload(self, assignments: Dict[int, List], rider_states: Dict, valid_riders: List):
"""
Rebalance if workload is heavily skewed.
Move orders from overloaded riders to idle ones if possible.
"""
if not assignments:
return
# Calculate average load
total_orders = sum(len(orders) for orders in assignments.values())
avg_load = total_orders / len(valid_riders) if valid_riders else 0
# Find overloaded and underutilized riders
overloaded = []
underutilized = []
for r in valid_riders:
rid = r['id']
load = rider_states[rid]['count']
if load > avg_load * 1.5 and load > self.IDEAL_LOAD: # 50% above average
overloaded.append(rid)
elif load < avg_load * 0.5: # 50% below average
underutilized.append(rid)
if not overloaded or not underutilized:
return
logger.info(f"Rebalancing: {len(overloaded)} overloaded, {len(underutilized)} underutilized riders")
# Try to move orders from overloaded to underutilized
for over_rid in overloaded:
over_orders = assignments[over_rid]
over_state = rider_states[over_rid]
# Try to offload some orders
for under_rid in underutilized:
under_state = rider_states[under_rid]
under_capacity = self.MAX_ORDERS_PER_RIDER - under_state['count']
if under_capacity <= 0:
continue
# Find orders that are closer to underutilized rider
transferable = []
for order in over_orders:
o_lat, o_lon = self.get_lat_lon(order, prefix="pickup")
if o_lat == 0:
continue
dist_to_under = self.haversine(under_state['lat'], under_state['lon'], o_lat, o_lon)
dist_to_over = self.haversine(over_state['lat'], over_state['lon'], o_lat, o_lon)
# Transfer if underutilized rider is closer or similar distance
if dist_to_under <= self.MAX_PICKUP_DISTANCE_KM and dist_to_under <= dist_to_over * 1.2:
transferable.append(order)
if transferable:
# Transfer up to capacity
transfer_count = min(len(transferable), under_capacity, over_state['count'] - self.IDEAL_LOAD)
transfer_batch = transferable[:transfer_count]
# Move orders
for order in transfer_batch:
over_orders.remove(order)
assignments[under_rid].append(order)
# Update states
over_state['count'] -= len(transfer_batch)
under_state['count'] += len(transfer_batch)
logger.info(f" Rebalanced: {len(transfer_batch)} orders from Rider {over_rid} -> {under_rid}")
def _post_process(self, assignments, rider_states):
"""Update History and Persistence."""
from app.services.rider.rider_history_service import RiderHistoryService
from app.services.rider.rider_state_manager import RiderStateManager
history_service = RiderHistoryService()
state_mgr = RiderStateManager()
import time
ts = time.time()
for rid, orders in assignments.items():
if not orders: continue
history_service.update_rider_stats(rid, 5.0, len(orders))
st = rider_states[rid]
state_mgr.states[rid] = {
'minutes_remaining': len(orders) * 15,
'last_drop_lat': st['lat'],
'last_drop_lon': st['lon'],
'active_kitchens': st['kitchens'],
'last_updated_ts': ts
}
state_mgr._save_states()