44 lines
1.4 KiB
Python
44 lines
1.4 KiB
Python
import duckdb
|
|
|
|
def peek_data():
|
|
try:
|
|
conn = duckdb.connect()
|
|
conn.execute("INSTALL httpfs; LOAD httpfs;")
|
|
conn.execute("SET s3_region='sgp1';")
|
|
conn.execute("SET s3_endpoint='sgp1.digitaloceanspaces.com';")
|
|
conn.execute("SET s3_url_style='path';")
|
|
|
|
# Create view
|
|
s3_path = 's3://nearle/parquet/deliveries/*.parquet'
|
|
conn.execute(f"CREATE VIEW deliveries AS SELECT * FROM read_parquet('{s3_path}', union_by_name = true)")
|
|
|
|
# Test Query 1
|
|
print("Querying Avg Delivery Time...")
|
|
df1 = conn.execute("""
|
|
SELECT ridername, ROUND(AVG(ridertime), 2) AS avg_delivery_time
|
|
FROM deliveries
|
|
WHERE ridertime > 0 AND userid IS NOT NULL
|
|
GROUP BY ridername
|
|
ORDER BY avg_delivery_time ASC
|
|
LIMIT 5
|
|
""").df()
|
|
print(df1)
|
|
|
|
# Test Query 2
|
|
print("\nQuerying Total Revenue...")
|
|
df2 = conn.execute("""
|
|
SELECT ridername, ROUND(SUM(deliveryamt), 2) AS total_revenue
|
|
FROM deliveries
|
|
WHERE ridertime > 0 AND userid IS NOT NULL
|
|
GROUP BY ridername
|
|
ORDER BY total_revenue DESC
|
|
LIMIT 5
|
|
""").df()
|
|
print(df2)
|
|
|
|
except Exception as e:
|
|
print(f"Error: {e}")
|
|
|
|
if __name__ == "__main__":
|
|
peek_data()
|