initial commit
This commit is contained in:
43
peek_for_insights.py
Normal file
43
peek_for_insights.py
Normal file
@@ -0,0 +1,43 @@
|
||||
import duckdb
|
||||
|
||||
def peek_data():
|
||||
try:
|
||||
conn = duckdb.connect()
|
||||
conn.execute("INSTALL httpfs; LOAD httpfs;")
|
||||
conn.execute("SET s3_region='sgp1';")
|
||||
conn.execute("SET s3_endpoint='sgp1.digitaloceanspaces.com';")
|
||||
conn.execute("SET s3_url_style='path';")
|
||||
|
||||
# Create view
|
||||
s3_path = 's3://nearle/parquet/deliveries/*.parquet'
|
||||
conn.execute(f"CREATE VIEW deliveries AS SELECT * FROM read_parquet('{s3_path}', union_by_name = true)")
|
||||
|
||||
# Test Query 1
|
||||
print("Querying Avg Delivery Time...")
|
||||
df1 = conn.execute("""
|
||||
SELECT ridername, ROUND(AVG(ridertime), 2) AS avg_delivery_time
|
||||
FROM deliveries
|
||||
WHERE ridertime > 0 AND userid IS NOT NULL
|
||||
GROUP BY ridername
|
||||
ORDER BY avg_delivery_time ASC
|
||||
LIMIT 5
|
||||
""").df()
|
||||
print(df1)
|
||||
|
||||
# Test Query 2
|
||||
print("\nQuerying Total Revenue...")
|
||||
df2 = conn.execute("""
|
||||
SELECT ridername, ROUND(SUM(deliveryamt), 2) AS total_revenue
|
||||
FROM deliveries
|
||||
WHERE ridertime > 0 AND userid IS NOT NULL
|
||||
GROUP BY ridername
|
||||
ORDER BY total_revenue DESC
|
||||
LIMIT 5
|
||||
""").df()
|
||||
print(df2)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
peek_data()
|
||||
Reference in New Issue
Block a user