initial project setup with README and ignore

2026-04-08 15:13:42 +05:30
commit 2d5688cb35
47 changed files with 7929 additions and 0 deletions
--- a/app/services/core/assignment_service.py
+++ b/app/services/core/assignment_service.py
@@ -0,0 +1,515 @@
+
+import logging
+import random
+import time
+from math import radians, cos, sin, asin, sqrt
+from typing import List, Dict, Any, Optional
+from collections import defaultdict
+from app.config.rider_preferences import RIDER_PREFERRED_KITCHENS
+from app.services.routing.kalman_filter import smooth_rider_locations, smooth_order_coordinates
+from app.config.dynamic_config import get_config
+from app.services.ml.ml_data_collector import get_collector
+
+logger = logging.getLogger(__name__)
+
+class AssignmentService:
+    def __init__(self):
+        self.rider_preferences = RIDER_PREFERRED_KITCHENS
+        self.earth_radius_km = 6371
+        self._cfg = get_config()
+
+    def _load_config(self):
+        """Load ML-tuned hyperparams fresh on every assignment call."""
+        cfg = self._cfg
+        self.MAX_PICKUP_DISTANCE_KM      = cfg.get("max_pickup_distance_km")
+        self.MAX_KITCHEN_DISTANCE_KM     = cfg.get("max_kitchen_distance_km")
+        self.MAX_ORDERS_PER_RIDER        = int(cfg.get("max_orders_per_rider"))
+        self.IDEAL_LOAD                  = int(cfg.get("ideal_load"))
+        self.WORKLOAD_BALANCE_THRESHOLD  = cfg.get("workload_balance_threshold")
+        self.WORKLOAD_PENALTY_WEIGHT     = cfg.get("workload_penalty_weight")
+        self.DISTANCE_PENALTY_WEIGHT     = cfg.get("distance_penalty_weight")
+        self.PREFERENCE_BONUS            = cfg.get("preference_bonus")
+        self.HOME_ZONE_BONUS_4KM         = cfg.get("home_zone_bonus_4km")
+        self.HOME_ZONE_BONUS_2KM         = cfg.get("home_zone_bonus_2km")
+        self.EMERGENCY_LOAD_PENALTY      = cfg.get("emergency_load_penalty")
+
+    def haversine(self, lat1, lon1, lat2, lon2):
+        """Calculate the great circle distance between two points."""
+        lon1, lat1, lon2, lat2 = map(radians, [float(lon1), float(lat1), float(lon2), float(lat2)])
+        dlon = lon2 - lon1 
+        dlat = lat2 - lat1 
+        a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
+        c = 2 * asin(min(1.0, sqrt(a)))  # Clamp to 1.0 to avoid domain errors 
+        return c * self.earth_radius_km
+
+    def get_lat_lon(self, obj: Dict[str, Any], prefix: str = "") -> tuple[float, float]:
+        """Generic helper to extract lat/lon from diversely named keys."""
+        # Try specific prefixes first
+        candidates = [
+            (f"{prefix}lat", f"{prefix}lon"),
+            (f"{prefix}lat", f"{prefix}long"),
+            (f"{prefix}latitude", f"{prefix}longitude"),
+        ]
+        # Also try standard keys if prefix fails
+        candidates.extend([
+            ("lat", "lon"), ("latitude", "longitude"),
+            ("pickuplat", "pickuplon"), ("pickuplat", "pickuplong"),
+            ("deliverylat", "deliverylong"), ("droplat", "droplon")
+        ])
+
+        for lat_key, lon_key in candidates:
+            if lat_key in obj and lon_key in obj and obj[lat_key] and obj[lon_key]:
+                 try:
+                     return float(obj[lat_key]), float(obj[lon_key])
+                 except: pass
+                 
+        # Special case: nested 'pickup_location'
+        if "pickup_location" in obj:
+            return self.get_lat_lon(obj["pickup_location"])
+            
+        return 0.0, 0.0
+
+    def get_order_kitchen(self, order: Dict[str, Any]) -> str:
+        possible_keys = ['storename', 'restaurantname', 'kitchenname', 'partnername', 'store_name']
+        for key in possible_keys:
+            if key in order and order[key]:
+                return str(order[key]).strip()
+        return "Unknown"
+
+    def assign_orders(self, orders: List[Dict[str, Any]], riders: List[Dict[str, Any]], reshuffle: bool = False) -> tuple[Dict[int, List[Dict[str, Any]]], List[Dict[str, Any]]]:
+        """
+        ENHANCED: Cluster-Based Load-Balanced Assignment.
+        
+        Strategy:
+        1. Cluster orders by kitchen proximity
+        2. Calculate rider workload (current capacity usage)
+        3. Assign clusters to best-fit riders (proximity + workload balance)
+        4. Rebalance if needed
+        
+        If reshuffle=True, controlled randomness is injected into rider scoring
+        so that retrying the same input can explore alternative assignments.
+        """
+        from app.services.rider.rider_history_service import RiderHistoryService
+        from app.services.rider.rider_state_manager import RiderStateManager
+        from app.services.routing.clustering_service import ClusteringService
+
+        # -- Load ML-tuned hyperparameters (or defaults on first run) ------
+        self._load_config()
+        _call_start = time.time()
+
+        # 0. Prep
+        assignments: Dict[int, List[Dict[str, Any]]] = defaultdict(list)
+        unassigned_orders: List[Dict[str, Any]] = []
+        rider_states = {} # Track live load
+
+        # 0a. KALMAN FILTER - Smooth rider GPS locations before scoring
+        riders = smooth_rider_locations(list(riders))
+
+        # 0b. KALMAN FILTER - Smooth order delivery coordinates before clustering
+        orders = smooth_order_coordinates(list(orders))
+        
+        # 1. Parse and Filter Riders
+        valid_riders = []
+        BLOCKED_RIDERS = [1242, 1266, 1245, 1232, 1240, 1007] # Test/Blocked IDs
+
+        # Load Existing State (Persistence)
+        state_mgr = RiderStateManager()
+        
+        for r in riders:
+            # Robust ID Extraction
+            rid_raw = r.get("userid") or r.get("riderid") or r.get("id") or r.get("_id")
+            try:
+                rid = int(rid_raw)
+            except (ValueError, TypeError):
+                continue
+
+            if rid in BLOCKED_RIDERS: continue
+            
+            # Robust Status Check
+            # Keep if: onduty (1, "1", True) OR status is active/idle/online
+            is_onduty = str(r.get("onduty")) in ["1", "True"] or r.get("onduty") is True
+            is_active = r.get("status") in ["active", "idle", "online"]
+            
+            if not (is_onduty or is_active):
+                continue
+
+            # Location
+            lat, lon = self.get_lat_lon(r)
+            
+            # Fetch previous state to know if they are already busy
+            p_state = state_mgr.get_rider_state(rid)
+            
+            # If rider has valid GPS, use it. If not, fallback to Last Drop or Home.
+            if lat == 0 or lon == 0:
+                if p_state['last_drop_lat']:
+                    lat, lon = p_state['last_drop_lat'], p_state['last_drop_lon']
+                else:
+                    # Home Location Fallback
+                    from app.config.rider_preferences import RIDER_HOME_LOCATIONS
+                    lat, lon = RIDER_HOME_LOCATIONS.get(rid, (0.0, 0.0))
+
+            valid_riders.append({
+                "id": rid,
+                "lat": lat,
+                "lon": lon,
+                "obj": r
+            })
+            
+            # Initialize rider state with existing workload
+            existing_load = p_state.get('minutes_remaining', 0) / 15  # Convert minutes to order estimate
+            
+            rider_states[rid] = {
+                'lat': lat,
+                'lon': lon,
+                'kitchens': set(), 
+                'count': int(existing_load),  # Start with existing workload
+                'workload_score': existing_load  # For prioritization
+            }
+
+        if not valid_riders:
+            logger.warning("No riders passed on-duty filter. Retrying with all available riders as emergency rescue...")
+            # If no on-duty riders, we take ANY rider provided by the API to ensure assignment
+            for r in riders:
+                rid = int(r.get("userid", 0))
+                if rid in BLOCKED_RIDERS: continue
+                
+                lat, lon = self.get_lat_lon(r)
+                if lat == 0 or lon == 0:
+                    from app.config.rider_preferences import RIDER_HOME_LOCATIONS
+                    lat, lon = RIDER_HOME_LOCATIONS.get(rid, (0.0, 0.0))
+                
+                if lat != 0:
+                    valid_riders.append({"id": rid, "lat": lat, "lon": lon, "obj": r})
+                    rider_states[rid] = {
+                        'lat': lat, 'lon': lon, 'kitchens': set(), 
+                        'count': 0, 'workload_score': 0
+                    }
+
+        if not valid_riders:
+            logger.error("DANGER: Absolutely no riders available for assignment.")
+            # Mark all as unassigned
+            for o in orders:
+                o["unassigned_reason"] = "No riders found (check partner online status)."
+                unassigned_orders.append(o)
+            return assignments, unassigned_orders
+
+        logger.info(f"Found {len(valid_riders)} active riders")
+
+        # 2. CLUSTER ORDERS BY KITCHEN PROXIMITY
+        clustering_service = ClusteringService()
+        clusters = clustering_service.cluster_orders_by_kitchen(orders, max_cluster_radius_km=self.MAX_KITCHEN_DISTANCE_KM)  # radius from ML
+        
+        logger.info(f"Created {len(clusters)} order clusters")
+
+        # 3. ASSIGN CLUSTERS TO RIDERS (Load-Balanced)
+        for cluster_idx, cluster in enumerate(clusters):
+            centroid_lat, centroid_lon = cluster['centroid']
+            cluster_orders = cluster['orders']
+            cluster_size = len(cluster_orders)
+            
+            logger.info(f"Assigning cluster {cluster_idx+1}/{len(clusters)}: {cluster_size} orders at ({centroid_lat:.4f}, {centroid_lon:.4f})")
+            
+            # Find best riders for this cluster
+            candidate_riders = []
+            
+            for r in valid_riders:
+                rid = r["id"]
+                r_state = rider_states[rid]
+                
+                # Calculate distance to cluster centroid
+                dist = self.haversine(r_state['lat'], r_state['lon'], centroid_lat, centroid_lon)
+                
+                # Preference bonus & Distance Bypass
+                prefs = self.rider_preferences.get(rid, [])
+                has_preference = False
+                for k_name in cluster['kitchen_names']:
+                    if any(p.lower() in k_name.lower() or k_name.lower() in p.lower() for p in prefs):
+                        has_preference = True
+                        break
+                
+                # Dynamic Limit: 6km default, 10km for preferred kitchens
+                allowed_dist = self.MAX_PICKUP_DISTANCE_KM
+                if has_preference:
+                    allowed_dist = max(allowed_dist, 10.0) 
+
+                # Skip if too far
+                if dist > allowed_dist:
+                    continue
+                
+                # Calculate workload utilization (0.0 to 1.0)
+                utilization = r_state['count'] / self.MAX_ORDERS_PER_RIDER
+
+                # Calculate score (lower is better) - weights from DynamicConfig
+                workload_penalty  = utilization * self.WORKLOAD_PENALTY_WEIGHT
+                distance_penalty  = dist * self.DISTANCE_PENALTY_WEIGHT
+
+                # Preference bonus (ML-tuned)
+                preference_bonus = self.PREFERENCE_BONUS if has_preference else 0
+
+                # Home zone bonus (ML-tuned)
+                from app.config.rider_preferences import RIDER_HOME_LOCATIONS
+                h_lat, h_lon = RIDER_HOME_LOCATIONS.get(rid, (0.0, 0.0))
+                home_bonus = 0
+                if h_lat != 0:
+                    home_dist = self.haversine(h_lat, h_lon, centroid_lat, centroid_lon)
+                    if home_dist <= 4.0:
+                        home_bonus = self.HOME_ZONE_BONUS_4KM
+                    if home_dist <= 2.0:
+                        home_bonus = self.HOME_ZONE_BONUS_2KM
+
+                score = workload_penalty + distance_penalty + preference_bonus + home_bonus
+                
+                # RESHUFFLE: Add controlled noise so retries explore different riders
+                if reshuffle:
+                    noise = random.uniform(-15.0, 15.0)
+                    score += noise
+                
+                candidate_riders.append({
+                    'id': rid,
+                    'score': score,
+                    'distance': dist,
+                    'utilization': utilization,
+                    'current_load': r_state['count']
+                })
+            
+            if not candidate_riders:
+                logger.warning(f"No riders available for cluster {cluster_idx+1}")
+                for o in cluster_orders:
+                    o["unassigned_reason"] = f"No riders within {self.MAX_PICKUP_DISTANCE_KM}km radius of kitchen."
+                    unassigned_orders.append(o)
+                continue
+            
+            # Sort by score (best first)
+            candidate_riders.sort(key=lambda x: x['score'])
+            
+            # SMART DISTRIBUTION: Split cluster if needed
+            remaining_orders = cluster_orders[:]
+            
+            while remaining_orders and candidate_riders:
+                best_rider = candidate_riders[0]
+                rid = best_rider['id']
+                r_state = rider_states[rid]
+                
+                # How many orders can this rider take?
+                available_capacity = self.MAX_ORDERS_PER_RIDER - r_state['count']
+                
+                if available_capacity <= 0:
+                    # Rider is full, remove from candidates
+                    candidate_riders.pop(0)
+                    continue
+                
+                # Decide batch size
+                # If rider is underutilized and cluster is small, give all
+                # If rider is busy or cluster is large, split it
+                if best_rider['utilization'] < self.WORKLOAD_BALANCE_THRESHOLD:
+                    # Rider has capacity, can take more
+                    batch_size = min(available_capacity, len(remaining_orders))
+                else:
+                    # Rider is getting busy, be conservative (IDEAL_LOAD from ML)
+                    batch_size = min(self.IDEAL_LOAD - r_state['count'], len(remaining_orders), available_capacity)
+                    batch_size = max(1, batch_size)  # At least 1 order
+                
+                # Assign batch
+                batch = remaining_orders[:batch_size]
+                remaining_orders = remaining_orders[batch_size:]
+                
+                assignments[rid].extend(batch)
+                
+                # Update rider state
+                r_state['count'] += len(batch)
+                r_state['lat'] = centroid_lat
+                r_state['lon'] = centroid_lon
+                r_state['kitchens'].update(cluster['kitchen_names'])
+                r_state['workload_score'] = r_state['count'] / self.MAX_ORDERS_PER_RIDER
+                
+                logger.info(f"  -> Assigned {len(batch)} orders to Rider {rid} (load: {r_state['count']}/{self.MAX_ORDERS_PER_RIDER})")
+                
+                # Re-sort candidates by updated scores
+                for candidate in candidate_riders:
+                    if candidate['id'] == rid:
+                        candidate['utilization'] = r_state['count'] / self.MAX_ORDERS_PER_RIDER
+                        candidate['current_load'] = r_state['count']
+                        # Recalculate score
+                        workload_penalty = candidate['utilization'] * 100
+                        distance_penalty = candidate['distance'] * 2
+                        candidate['score'] = workload_penalty + distance_penalty
+                
+                candidate_riders.sort(key=lambda x: x['score'])
+
+            # If any orders left in the cluster after exhaustion of candidates
+            if remaining_orders:
+                # Instead of giving up, keep them in a pool for mandatory assignment
+                unassigned_orders.extend(remaining_orders)
+
+        # 4. EMERGENCY MANDATORY ASSIGNMENT (Ensures 0 unassigned if riders exist)
+        if unassigned_orders and valid_riders:
+            logger.info(f"[ALERT] Starting Emergency Mandatory Assignment for {len(unassigned_orders)} orders...")
+            force_pool = unassigned_orders[:]
+            unassigned_orders.clear()
+            
+            for o in force_pool:
+                # Determine pickup location
+                o_lat, o_lon = self.get_lat_lon(o, prefix="pickup")
+                if o_lat == 0:
+                    o["unassigned_reason"] = "Could not geolocate order (0,0)."
+                    unassigned_orders.append(o)
+                    continue
+
+                # Find the 'least bad' rider (Closest + Balanced Load)
+                best_emergency_rider = None
+                best_emergency_score = float('inf')
+
+                for r in valid_riders:
+                    rid = r["id"]
+                    r_state = rider_states[rid]
+                    
+                    dist = self.haversine(r_state['lat'], r_state['lon'], o_lat, o_lon)
+                    # For emergency: Distance is important, but load prevents one rider taking EVERYTHING
+                    # Score = distance + ML-tuned penalty per existing order
+                    e_score = dist + (r_state['count'] * self.EMERGENCY_LOAD_PENALTY)
+
+                    if e_score < best_emergency_score:
+                        best_emergency_score = e_score
+                        best_emergency_rider = rid
+                
+                if best_emergency_rider:
+                    assignments[best_emergency_rider].append(o)
+                    rider_states[best_emergency_rider]['count'] += 1
+                    logger.info(f"   Force-Assigned order {o.get('orderid')} to Rider {best_emergency_rider} (Score: {best_emergency_score:.2f})")
+                else:
+                    unassigned_orders.append(o)
+
+        # 5. FINAL REBALANCING (Optional)
+        # Check if any rider is overloaded while others are idle
+        self._rebalance_workload(assignments, rider_states, valid_riders)
+
+        # 6. Commit State and History
+        self._post_process(assignments, rider_states)
+
+        # 7. -- ML DATA COLLECTION -----------------------------------------
+        try:
+            elapsed_ms = (time.time() - _call_start) * 1000
+            get_collector().log_assignment_event(
+                num_orders=len(orders),
+                num_riders=len(riders),
+                hyperparams=self._cfg.get_all(),
+                assignments=assignments,
+                unassigned_count=len(unassigned_orders),
+                elapsed_ms=elapsed_ms,
+            )
+        except Exception as _ml_err:
+            logger.debug(f"ML logging skipped: {_ml_err}")
+
+        # Log final distribution
+        logger.info("=" * 50)
+        logger.info("FINAL ASSIGNMENT DISTRIBUTION:")
+        for rid, orders in sorted(assignments.items()):
+            logger.info(f"  Rider {rid}: {len(orders)} orders")
+
+        if unassigned_orders:
+            logger.warning(f"  [ALERT] STILL UNASSIGNED: {len(unassigned_orders)} (Reason: No riders online or invalid coords)")
+        else:
+            logger.info("  [OK] ALL ORDERS ASSIGNED SUCCESSFULLY")
+        logger.info("=" * 50)
+
+        return assignments, unassigned_orders
+
+    def _rebalance_workload(self, assignments: Dict[int, List], rider_states: Dict, valid_riders: List):
+        """
+        Rebalance if workload is heavily skewed.
+        Move orders from overloaded riders to idle ones if possible.
+        """
+        if not assignments:
+            return
+        
+        # Calculate average load
+        total_orders = sum(len(orders) for orders in assignments.values())
+        avg_load = total_orders / len(valid_riders) if valid_riders else 0
+        
+        # Find overloaded and underutilized riders
+        overloaded = []
+        underutilized = []
+        
+        for r in valid_riders:
+            rid = r['id']
+            load = rider_states[rid]['count']
+            
+            if load > avg_load * 1.5 and load > self.IDEAL_LOAD:  # 50% above average
+                overloaded.append(rid)
+            elif load < avg_load * 0.5:  # 50% below average
+                underutilized.append(rid)
+        
+        if not overloaded or not underutilized:
+            return
+        
+        logger.info(f"Rebalancing: {len(overloaded)} overloaded, {len(underutilized)} underutilized riders")
+        
+        # Try to move orders from overloaded to underutilized
+        for over_rid in overloaded:
+            over_orders = assignments[over_rid]
+            over_state = rider_states[over_rid]
+            
+            # Try to offload some orders
+            for under_rid in underutilized:
+                under_state = rider_states[under_rid]
+                under_capacity = self.MAX_ORDERS_PER_RIDER - under_state['count']
+                
+                if under_capacity <= 0:
+                    continue
+                
+                # Find orders that are closer to underutilized rider
+                transferable = []
+                for order in over_orders:
+                    o_lat, o_lon = self.get_lat_lon(order, prefix="pickup")
+                    if o_lat == 0:
+                        continue
+                    
+                    dist_to_under = self.haversine(under_state['lat'], under_state['lon'], o_lat, o_lon)
+                    dist_to_over = self.haversine(over_state['lat'], over_state['lon'], o_lat, o_lon)
+                    
+                    # Transfer if underutilized rider is closer or similar distance
+                    if dist_to_under <= self.MAX_PICKUP_DISTANCE_KM and dist_to_under <= dist_to_over * 1.2:
+                        transferable.append(order)
+                
+                if transferable:
+                    # Transfer up to capacity
+                    transfer_count = min(len(transferable), under_capacity, over_state['count'] - self.IDEAL_LOAD)
+                    transfer_batch = transferable[:transfer_count]
+                    
+                    # Move orders
+                    for order in transfer_batch:
+                        over_orders.remove(order)
+                        assignments[under_rid].append(order)
+                    
+                    # Update states
+                    over_state['count'] -= len(transfer_batch)
+                    under_state['count'] += len(transfer_batch)
+                    
+                    logger.info(f"  Rebalanced: {len(transfer_batch)} orders from Rider {over_rid} -> {under_rid}")
+
+    def _post_process(self, assignments, rider_states):
+        """Update History and Persistence."""
+        from app.services.rider.rider_history_service import RiderHistoryService
+        from app.services.rider.rider_state_manager import RiderStateManager
+        
+        history_service = RiderHistoryService()
+        state_mgr = RiderStateManager()
+        
+        import time
+        ts = time.time()
+        
+        for rid, orders in assignments.items():
+            if not orders: continue
+            
+            history_service.update_rider_stats(rid, 5.0, len(orders)) 
+            
+            st = rider_states[rid]
+            state_mgr.states[rid] = {
+                'minutes_remaining': len(orders) * 15, 
+                'last_drop_lat': st['lat'],
+                'last_drop_lon': st['lon'],
+                'active_kitchens': st['kitchens'],
+                'last_updated_ts': ts
+            }
+
+        state_mgr._save_states()