Files
Analytical_engine_backend/peek_for_insights.py
2026-05-11 12:36:20 +05:30

44 lines
1.4 KiB
Python

import duckdb
def peek_data():
try:
conn = duckdb.connect()
conn.execute("INSTALL httpfs; LOAD httpfs;")
conn.execute("SET s3_region='sgp1';")
conn.execute("SET s3_endpoint='sgp1.digitaloceanspaces.com';")
conn.execute("SET s3_url_style='path';")
# Create view
s3_path = 's3://nearle/parquet/deliveries/*.parquet'
conn.execute(f"CREATE VIEW deliveries AS SELECT * FROM read_parquet('{s3_path}', union_by_name = true)")
# Test Query 1
print("Querying Avg Delivery Time...")
df1 = conn.execute("""
SELECT ridername, ROUND(AVG(ridertime), 2) AS avg_delivery_time
FROM deliveries
WHERE ridertime > 0 AND userid IS NOT NULL
GROUP BY ridername
ORDER BY avg_delivery_time ASC
LIMIT 5
""").df()
print(df1)
# Test Query 2
print("\nQuerying Total Revenue...")
df2 = conn.execute("""
SELECT ridername, ROUND(SUM(deliveryamt), 2) AS total_revenue
FROM deliveries
WHERE ridertime > 0 AND userid IS NOT NULL
GROUP BY ridername
ORDER BY total_revenue DESC
LIMIT 5
""").df()
print(df2)
except Exception as e:
print(f"Error: {e}")
if __name__ == "__main__":
peek_data()