import logging import random import time from math import radians, cos, sin, asin, sqrt from typing import List, Dict, Any, Optional from collections import defaultdict from app.config.rider_preferences import RIDER_PREFERRED_KITCHENS from app.services.routing.kalman_filter import smooth_rider_locations, smooth_order_coordinates from app.config.dynamic_config import get_config from app.services.ml.ml_data_collector import get_collector logger = logging.getLogger(__name__) class AssignmentService: def __init__(self): self.rider_preferences = RIDER_PREFERRED_KITCHENS self.earth_radius_km = 6371 self._cfg = get_config() def _load_config(self): """Load ML-tuned hyperparams fresh on every assignment call.""" cfg = self._cfg self.MAX_PICKUP_DISTANCE_KM = cfg.get("max_pickup_distance_km") self.MAX_KITCHEN_DISTANCE_KM = cfg.get("max_kitchen_distance_km") self.MAX_ORDERS_PER_RIDER = int(cfg.get("max_orders_per_rider")) self.IDEAL_LOAD = int(cfg.get("ideal_load")) self.WORKLOAD_BALANCE_THRESHOLD = cfg.get("workload_balance_threshold") self.WORKLOAD_PENALTY_WEIGHT = cfg.get("workload_penalty_weight") self.DISTANCE_PENALTY_WEIGHT = cfg.get("distance_penalty_weight") self.PREFERENCE_BONUS = cfg.get("preference_bonus") self.HOME_ZONE_BONUS_4KM = cfg.get("home_zone_bonus_4km") self.HOME_ZONE_BONUS_2KM = cfg.get("home_zone_bonus_2km") self.EMERGENCY_LOAD_PENALTY = cfg.get("emergency_load_penalty") def haversine(self, lat1, lon1, lat2, lon2): """Calculate the great circle distance between two points.""" lon1, lat1, lon2, lat2 = map(radians, [float(lon1), float(lat1), float(lon2), float(lat2)]) dlon = lon2 - lon1 dlat = lat2 - lat1 a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2 c = 2 * asin(min(1.0, sqrt(a))) # Clamp to 1.0 to avoid domain errors return c * self.earth_radius_km def get_lat_lon(self, obj: Dict[str, Any], prefix: str = "") -> tuple[float, float]: """Generic helper to extract lat/lon from diversely named keys.""" # Try specific prefixes first candidates = [ (f"{prefix}lat", f"{prefix}lon"), (f"{prefix}lat", f"{prefix}long"), (f"{prefix}latitude", f"{prefix}longitude"), ] # Also try standard keys if prefix fails candidates.extend([ ("lat", "lon"), ("latitude", "longitude"), ("pickuplat", "pickuplon"), ("pickuplat", "pickuplong"), ("deliverylat", "deliverylong"), ("droplat", "droplon") ]) for lat_key, lon_key in candidates: if lat_key in obj and lon_key in obj and obj[lat_key] and obj[lon_key]: try: return float(obj[lat_key]), float(obj[lon_key]) except: pass # Special case: nested 'pickup_location' if "pickup_location" in obj: return self.get_lat_lon(obj["pickup_location"]) return 0.0, 0.0 def get_order_kitchen(self, order: Dict[str, Any]) -> str: possible_keys = ['storename', 'restaurantname', 'kitchenname', 'partnername', 'store_name'] for key in possible_keys: if key in order and order[key]: return str(order[key]).strip() return "Unknown" def assign_orders(self, orders: List[Dict[str, Any]], riders: List[Dict[str, Any]], reshuffle: bool = False) -> tuple[Dict[int, List[Dict[str, Any]]], List[Dict[str, Any]]]: """ ENHANCED: Cluster-Based Load-Balanced Assignment. Strategy: 1. Cluster orders by kitchen proximity 2. Calculate rider workload (current capacity usage) 3. Assign clusters to best-fit riders (proximity + workload balance) 4. Rebalance if needed If reshuffle=True, controlled randomness is injected into rider scoring so that retrying the same input can explore alternative assignments. """ from app.services.rider.rider_history_service import RiderHistoryService from app.services.rider.rider_state_manager import RiderStateManager from app.services.routing.clustering_service import ClusteringService # -- Load ML-tuned hyperparameters (or defaults on first run) ------ self._load_config() _call_start = time.time() # 0. Prep assignments: Dict[int, List[Dict[str, Any]]] = defaultdict(list) unassigned_orders: List[Dict[str, Any]] = [] rider_states = {} # Track live load # 0a. KALMAN FILTER - Smooth rider GPS locations before scoring riders = smooth_rider_locations(list(riders)) # 0b. KALMAN FILTER - Smooth order delivery coordinates before clustering orders = smooth_order_coordinates(list(orders)) # 1. Parse and Filter Riders valid_riders = [] BLOCKED_RIDERS = [1242, 1266, 1245, 1232, 1240, 1007] # Test/Blocked IDs # Load Existing State (Persistence) state_mgr = RiderStateManager() for r in riders: # Robust ID Extraction rid_raw = r.get("userid") or r.get("riderid") or r.get("id") or r.get("_id") try: rid = int(rid_raw) except (ValueError, TypeError): continue if rid in BLOCKED_RIDERS: continue # Robust Status Check # Keep if: onduty (1, "1", True) OR status is active/idle/online is_onduty = str(r.get("onduty")) in ["1", "True"] or r.get("onduty") is True is_active = r.get("status") in ["active", "idle", "online"] if not (is_onduty or is_active): continue # Location lat, lon = self.get_lat_lon(r) # Fetch previous state to know if they are already busy p_state = state_mgr.get_rider_state(rid) # If rider has valid GPS, use it. If not, fallback to Last Drop or Home. if lat == 0 or lon == 0: if p_state['last_drop_lat']: lat, lon = p_state['last_drop_lat'], p_state['last_drop_lon'] else: # Home Location Fallback from app.config.rider_preferences import RIDER_HOME_LOCATIONS lat, lon = RIDER_HOME_LOCATIONS.get(rid, (0.0, 0.0)) valid_riders.append({ "id": rid, "lat": lat, "lon": lon, "obj": r }) # Initialize rider state with existing workload existing_load = p_state.get('minutes_remaining', 0) / 15 # Convert minutes to order estimate rider_states[rid] = { 'lat': lat, 'lon': lon, 'kitchens': set(), 'count': int(existing_load), # Start with existing workload 'workload_score': existing_load # For prioritization } if not valid_riders: logger.warning("No riders passed on-duty filter. Retrying with all available riders as emergency rescue...") # If no on-duty riders, we take ANY rider provided by the API to ensure assignment for r in riders: rid = int(r.get("userid", 0)) if rid in BLOCKED_RIDERS: continue lat, lon = self.get_lat_lon(r) if lat == 0 or lon == 0: from app.config.rider_preferences import RIDER_HOME_LOCATIONS lat, lon = RIDER_HOME_LOCATIONS.get(rid, (0.0, 0.0)) if lat != 0: valid_riders.append({"id": rid, "lat": lat, "lon": lon, "obj": r}) rider_states[rid] = { 'lat': lat, 'lon': lon, 'kitchens': set(), 'count': 0, 'workload_score': 0 } if not valid_riders: logger.error("DANGER: Absolutely no riders available for assignment.") # Mark all as unassigned for o in orders: o["unassigned_reason"] = "No riders found (check partner online status)." unassigned_orders.append(o) return assignments, unassigned_orders logger.info(f"Found {len(valid_riders)} active riders") # 2. CLUSTER ORDERS BY KITCHEN PROXIMITY clustering_service = ClusteringService() clusters = clustering_service.cluster_orders_by_kitchen(orders, max_cluster_radius_km=self.MAX_KITCHEN_DISTANCE_KM) # radius from ML logger.info(f"Created {len(clusters)} order clusters") # 3. ASSIGN CLUSTERS TO RIDERS (Load-Balanced) for cluster_idx, cluster in enumerate(clusters): centroid_lat, centroid_lon = cluster['centroid'] cluster_orders = cluster['orders'] cluster_size = len(cluster_orders) logger.info(f"Assigning cluster {cluster_idx+1}/{len(clusters)}: {cluster_size} orders at ({centroid_lat:.4f}, {centroid_lon:.4f})") # Find best riders for this cluster candidate_riders = [] for r in valid_riders: rid = r["id"] r_state = rider_states[rid] # Calculate distance to cluster centroid dist = self.haversine(r_state['lat'], r_state['lon'], centroid_lat, centroid_lon) # Preference bonus & Distance Bypass prefs = self.rider_preferences.get(rid, []) has_preference = False for k_name in cluster['kitchen_names']: if any(p.lower() in k_name.lower() or k_name.lower() in p.lower() for p in prefs): has_preference = True break # Dynamic Limit: 6km default, 10km for preferred kitchens allowed_dist = self.MAX_PICKUP_DISTANCE_KM if has_preference: allowed_dist = max(allowed_dist, 10.0) # Skip if too far if dist > allowed_dist: continue # Calculate workload utilization (0.0 to 1.0) utilization = r_state['count'] / self.MAX_ORDERS_PER_RIDER # Calculate score (lower is better) - weights from DynamicConfig workload_penalty = utilization * self.WORKLOAD_PENALTY_WEIGHT distance_penalty = dist * self.DISTANCE_PENALTY_WEIGHT # Preference bonus (ML-tuned) preference_bonus = self.PREFERENCE_BONUS if has_preference else 0 # Home zone bonus (ML-tuned) from app.config.rider_preferences import RIDER_HOME_LOCATIONS h_lat, h_lon = RIDER_HOME_LOCATIONS.get(rid, (0.0, 0.0)) home_bonus = 0 if h_lat != 0: home_dist = self.haversine(h_lat, h_lon, centroid_lat, centroid_lon) if home_dist <= 4.0: home_bonus = self.HOME_ZONE_BONUS_4KM if home_dist <= 2.0: home_bonus = self.HOME_ZONE_BONUS_2KM score = workload_penalty + distance_penalty + preference_bonus + home_bonus # RESHUFFLE: Add controlled noise so retries explore different riders if reshuffle: noise = random.uniform(-15.0, 15.0) score += noise candidate_riders.append({ 'id': rid, 'score': score, 'distance': dist, 'utilization': utilization, 'current_load': r_state['count'] }) if not candidate_riders: logger.warning(f"No riders available for cluster {cluster_idx+1}") for o in cluster_orders: o["unassigned_reason"] = f"No riders within {self.MAX_PICKUP_DISTANCE_KM}km radius of kitchen." unassigned_orders.append(o) continue # Sort by score (best first) candidate_riders.sort(key=lambda x: x['score']) # SMART DISTRIBUTION: Split cluster if needed remaining_orders = cluster_orders[:] while remaining_orders and candidate_riders: best_rider = candidate_riders[0] rid = best_rider['id'] r_state = rider_states[rid] # How many orders can this rider take? available_capacity = self.MAX_ORDERS_PER_RIDER - r_state['count'] if available_capacity <= 0: # Rider is full, remove from candidates candidate_riders.pop(0) continue # Decide batch size # If rider is underutilized and cluster is small, give all # If rider is busy or cluster is large, split it if best_rider['utilization'] < self.WORKLOAD_BALANCE_THRESHOLD: # Rider has capacity, can take more batch_size = min(available_capacity, len(remaining_orders)) else: # Rider is getting busy, be conservative (IDEAL_LOAD from ML) batch_size = min(self.IDEAL_LOAD - r_state['count'], len(remaining_orders), available_capacity) batch_size = max(1, batch_size) # At least 1 order # Assign batch batch = remaining_orders[:batch_size] remaining_orders = remaining_orders[batch_size:] assignments[rid].extend(batch) # Update rider state r_state['count'] += len(batch) r_state['lat'] = centroid_lat r_state['lon'] = centroid_lon r_state['kitchens'].update(cluster['kitchen_names']) r_state['workload_score'] = r_state['count'] / self.MAX_ORDERS_PER_RIDER logger.info(f" -> Assigned {len(batch)} orders to Rider {rid} (load: {r_state['count']}/{self.MAX_ORDERS_PER_RIDER})") # Re-sort candidates by updated scores for candidate in candidate_riders: if candidate['id'] == rid: candidate['utilization'] = r_state['count'] / self.MAX_ORDERS_PER_RIDER candidate['current_load'] = r_state['count'] # Recalculate score workload_penalty = candidate['utilization'] * 100 distance_penalty = candidate['distance'] * 2 candidate['score'] = workload_penalty + distance_penalty candidate_riders.sort(key=lambda x: x['score']) # If any orders left in the cluster after exhaustion of candidates if remaining_orders: # Instead of giving up, keep them in a pool for mandatory assignment unassigned_orders.extend(remaining_orders) # 4. EMERGENCY MANDATORY ASSIGNMENT (Ensures 0 unassigned if riders exist) if unassigned_orders and valid_riders: logger.info(f"[ALERT] Starting Emergency Mandatory Assignment for {len(unassigned_orders)} orders...") force_pool = unassigned_orders[:] unassigned_orders.clear() for o in force_pool: # Determine pickup location o_lat, o_lon = self.get_lat_lon(o, prefix="pickup") if o_lat == 0: o["unassigned_reason"] = "Could not geolocate order (0,0)." unassigned_orders.append(o) continue # Find the 'least bad' rider (Closest + Balanced Load) best_emergency_rider = None best_emergency_score = float('inf') for r in valid_riders: rid = r["id"] r_state = rider_states[rid] dist = self.haversine(r_state['lat'], r_state['lon'], o_lat, o_lon) # For emergency: Distance is important, but load prevents one rider taking EVERYTHING # Score = distance + ML-tuned penalty per existing order e_score = dist + (r_state['count'] * self.EMERGENCY_LOAD_PENALTY) if e_score < best_emergency_score: best_emergency_score = e_score best_emergency_rider = rid if best_emergency_rider: assignments[best_emergency_rider].append(o) rider_states[best_emergency_rider]['count'] += 1 logger.info(f" Force-Assigned order {o.get('orderid')} to Rider {best_emergency_rider} (Score: {best_emergency_score:.2f})") else: unassigned_orders.append(o) # 5. FINAL REBALANCING (Optional) # Check if any rider is overloaded while others are idle self._rebalance_workload(assignments, rider_states, valid_riders) # 6. Commit State and History self._post_process(assignments, rider_states) # 7. -- ML DATA COLLECTION ----------------------------------------- try: elapsed_ms = (time.time() - _call_start) * 1000 get_collector().log_assignment_event( num_orders=len(orders), num_riders=len(riders), hyperparams=self._cfg.get_all(), assignments=assignments, unassigned_count=len(unassigned_orders), elapsed_ms=elapsed_ms, ) except Exception as _ml_err: logger.debug(f"ML logging skipped: {_ml_err}") # Log final distribution logger.info("=" * 50) logger.info("FINAL ASSIGNMENT DISTRIBUTION:") for rid, orders in sorted(assignments.items()): logger.info(f" Rider {rid}: {len(orders)} orders") if unassigned_orders: logger.warning(f" [ALERT] STILL UNASSIGNED: {len(unassigned_orders)} (Reason: No riders online or invalid coords)") else: logger.info(" [OK] ALL ORDERS ASSIGNED SUCCESSFULLY") logger.info("=" * 50) return assignments, unassigned_orders def _rebalance_workload(self, assignments: Dict[int, List], rider_states: Dict, valid_riders: List): """ Rebalance if workload is heavily skewed. Move orders from overloaded riders to idle ones if possible. """ if not assignments: return # Calculate average load total_orders = sum(len(orders) for orders in assignments.values()) avg_load = total_orders / len(valid_riders) if valid_riders else 0 # Find overloaded and underutilized riders overloaded = [] underutilized = [] for r in valid_riders: rid = r['id'] load = rider_states[rid]['count'] if load > avg_load * 1.5 and load > self.IDEAL_LOAD: # 50% above average overloaded.append(rid) elif load < avg_load * 0.5: # 50% below average underutilized.append(rid) if not overloaded or not underutilized: return logger.info(f"Rebalancing: {len(overloaded)} overloaded, {len(underutilized)} underutilized riders") # Try to move orders from overloaded to underutilized for over_rid in overloaded: over_orders = assignments[over_rid] over_state = rider_states[over_rid] # Try to offload some orders for under_rid in underutilized: under_state = rider_states[under_rid] under_capacity = self.MAX_ORDERS_PER_RIDER - under_state['count'] if under_capacity <= 0: continue # Find orders that are closer to underutilized rider transferable = [] for order in over_orders: o_lat, o_lon = self.get_lat_lon(order, prefix="pickup") if o_lat == 0: continue dist_to_under = self.haversine(under_state['lat'], under_state['lon'], o_lat, o_lon) dist_to_over = self.haversine(over_state['lat'], over_state['lon'], o_lat, o_lon) # Transfer if underutilized rider is closer or similar distance if dist_to_under <= self.MAX_PICKUP_DISTANCE_KM and dist_to_under <= dist_to_over * 1.2: transferable.append(order) if transferable: # Transfer up to capacity transfer_count = min(len(transferable), under_capacity, over_state['count'] - self.IDEAL_LOAD) transfer_batch = transferable[:transfer_count] # Move orders for order in transfer_batch: over_orders.remove(order) assignments[under_rid].append(order) # Update states over_state['count'] -= len(transfer_batch) under_state['count'] += len(transfer_batch) logger.info(f" Rebalanced: {len(transfer_batch)} orders from Rider {over_rid} -> {under_rid}") def _post_process(self, assignments, rider_states): """Update History and Persistence.""" from app.services.rider.rider_history_service import RiderHistoryService from app.services.rider.rider_state_manager import RiderStateManager history_service = RiderHistoryService() state_mgr = RiderStateManager() import time ts = time.time() for rid, orders in assignments.items(): if not orders: continue history_service.update_rider_stats(rid, 5.0, len(orders)) st = rider_states[rid] state_mgr.states[rid] = { 'minutes_remaining': len(orders) * 15, 'last_drop_lat': st['lat'], 'last_drop_lon': st['lon'], 'active_kitchens': st['kitchens'], 'last_updated_ts': ts } state_mgr._save_states()