""" High-performance utilities using Apache Arrow and NumPy for geographic data. Provides vectorized operations for distances and coordinate processing. """ import numpy as np import pyarrow as pa import pyarrow.parquet as pq import logging from typing import List, Dict, Any, Tuple logger = logging.getLogger(__name__) def calculate_haversine_matrix_vectorized(lats: np.ndarray, lons: np.ndarray) -> np.ndarray: """ Calculate an N x N distance matrix using the Haversine formula. Fully vectorized using NumPy for O(N^2) speed improvement over Python loops. """ # Earth's radius in kilometers R = 6371.0 # Convert degrees to radians lats_rad = np.radians(lats) lons_rad = np.radians(lons) # Create meshgrids for pairwise differences # lats.reshape(-1, 1) creates a column vector # lats.reshape(1, -1) creates a row vector # Subtracting them creates an N x N matrix of differences dlat = lats_rad.reshape(-1, 1) - lats_rad.reshape(1, -1) dlon = lons_rad.reshape(-1, 1) - lons_rad.reshape(1, -1) # Haversine formula a = np.sin(dlat / 2)**2 + np.cos(lats_rad.reshape(-1, 1)) * np.cos(lats_rad.reshape(1, -1)) * np.sin(dlon / 2)**2 c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a)) return R * c def orders_to_arrow_table(orders: List[Dict[str, Any]]) -> pa.Table: """ Convert a list of order dictionaries to an Apache Arrow Table. This enables zero-copy operations and efficient columnar storage. """ return pa.Table.from_pylist(orders) def save_optimized_route_parquet(orders: List[Dict[str, Any]], filename: str): """ Save optimized route data to a Parquet file for high-speed analysis. Useful for logging and historical simulation replays. """ try: table = orders_to_arrow_table(orders) pq.write_table(table, filename) logger.info(f" Saved route data to Parquet: {filename}") except Exception as e: logger.error(f" Failed to save Parquet: {e}") def load_route_parquet(filename: str) -> List[Dict[str, Any]]: """ Load route data from a Parquet file and return as a list of dicts. """ table = pq.read_table(filename) return table.to_pylist()