64 lines
2.2 KiB
Python
64 lines
2.2 KiB
Python
"""
|
|
High-performance utilities using Apache Arrow and NumPy for geographic data.
|
|
Provides vectorized operations for distances and coordinate processing.
|
|
"""
|
|
|
|
import numpy as np
|
|
import pyarrow as pa
|
|
import pyarrow.parquet as pq
|
|
import logging
|
|
from typing import List, Dict, Any, Tuple
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
def calculate_haversine_matrix_vectorized(lats: np.ndarray, lons: np.ndarray) -> np.ndarray:
|
|
"""
|
|
Calculate an N x N distance matrix using the Haversine formula.
|
|
Fully vectorized using NumPy for O(N^2) speed improvement over Python loops.
|
|
"""
|
|
# Earth's radius in kilometers
|
|
R = 6371.0
|
|
|
|
# Convert degrees to radians
|
|
lats_rad = np.radians(lats)
|
|
lons_rad = np.radians(lons)
|
|
|
|
# Create meshgrids for pairwise differences
|
|
# lats.reshape(-1, 1) creates a column vector
|
|
# lats.reshape(1, -1) creates a row vector
|
|
# Subtracting them creates an N x N matrix of differences
|
|
dlat = lats_rad.reshape(-1, 1) - lats_rad.reshape(1, -1)
|
|
dlon = lons_rad.reshape(-1, 1) - lons_rad.reshape(1, -1)
|
|
|
|
# Haversine formula
|
|
a = np.sin(dlat / 2)**2 + np.cos(lats_rad.reshape(-1, 1)) * np.cos(lats_rad.reshape(1, -1)) * np.sin(dlon / 2)**2
|
|
c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
|
|
|
|
return R * c
|
|
|
|
def orders_to_arrow_table(orders: List[Dict[str, Any]]) -> pa.Table:
|
|
"""
|
|
Convert a list of order dictionaries to an Apache Arrow Table.
|
|
This enables zero-copy operations and efficient columnar storage.
|
|
"""
|
|
return pa.Table.from_pylist(orders)
|
|
|
|
def save_optimized_route_parquet(orders: List[Dict[str, Any]], filename: str):
|
|
"""
|
|
Save optimized route data to a Parquet file for high-speed analysis.
|
|
Useful for logging and historical simulation replays.
|
|
"""
|
|
try:
|
|
table = orders_to_arrow_table(orders)
|
|
pq.write_table(table, filename)
|
|
logger.info(f" Saved route data to Parquet: {filename}")
|
|
except Exception as e:
|
|
logger.error(f" Failed to save Parquet: {e}")
|
|
|
|
def load_route_parquet(filename: str) -> List[Dict[str, Any]]:
|
|
"""
|
|
Load route data from a Parquet file and return as a list of dicts.
|
|
"""
|
|
table = pq.read_table(filename)
|
|
return table.to_pylist()
|