initial project setup with README and ignore
This commit is contained in:
63
app/core/arrow_utils.py
Normal file
63
app/core/arrow_utils.py
Normal file
@@ -0,0 +1,63 @@
|
||||
"""
|
||||
High-performance utilities using Apache Arrow and NumPy for geographic data.
|
||||
Provides vectorized operations for distances and coordinate processing.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import pyarrow as pa
|
||||
import pyarrow.parquet as pq
|
||||
import logging
|
||||
from typing import List, Dict, Any, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def calculate_haversine_matrix_vectorized(lats: np.ndarray, lons: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Calculate an N x N distance matrix using the Haversine formula.
|
||||
Fully vectorized using NumPy for O(N^2) speed improvement over Python loops.
|
||||
"""
|
||||
# Earth's radius in kilometers
|
||||
R = 6371.0
|
||||
|
||||
# Convert degrees to radians
|
||||
lats_rad = np.radians(lats)
|
||||
lons_rad = np.radians(lons)
|
||||
|
||||
# Create meshgrids for pairwise differences
|
||||
# lats.reshape(-1, 1) creates a column vector
|
||||
# lats.reshape(1, -1) creates a row vector
|
||||
# Subtracting them creates an N x N matrix of differences
|
||||
dlat = lats_rad.reshape(-1, 1) - lats_rad.reshape(1, -1)
|
||||
dlon = lons_rad.reshape(-1, 1) - lons_rad.reshape(1, -1)
|
||||
|
||||
# Haversine formula
|
||||
a = np.sin(dlat / 2)**2 + np.cos(lats_rad.reshape(-1, 1)) * np.cos(lats_rad.reshape(1, -1)) * np.sin(dlon / 2)**2
|
||||
c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
|
||||
|
||||
return R * c
|
||||
|
||||
def orders_to_arrow_table(orders: List[Dict[str, Any]]) -> pa.Table:
|
||||
"""
|
||||
Convert a list of order dictionaries to an Apache Arrow Table.
|
||||
This enables zero-copy operations and efficient columnar storage.
|
||||
"""
|
||||
return pa.Table.from_pylist(orders)
|
||||
|
||||
def save_optimized_route_parquet(orders: List[Dict[str, Any]], filename: str):
|
||||
"""
|
||||
Save optimized route data to a Parquet file for high-speed analysis.
|
||||
Useful for logging and historical simulation replays.
|
||||
"""
|
||||
try:
|
||||
table = orders_to_arrow_table(orders)
|
||||
pq.write_table(table, filename)
|
||||
logger.info(f" Saved route data to Parquet: {filename}")
|
||||
except Exception as e:
|
||||
logger.error(f" Failed to save Parquet: {e}")
|
||||
|
||||
def load_route_parquet(filename: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Load route data from a Parquet file and return as a list of dicts.
|
||||
"""
|
||||
table = pq.read_table(filename)
|
||||
return table.to_pylist()
|
||||
Reference in New Issue
Block a user