import duckdb def peek_data(): try: conn = duckdb.connect() conn.execute("INSTALL httpfs; LOAD httpfs;") conn.execute("SET s3_region='sgp1';") conn.execute("SET s3_endpoint='sgp1.digitaloceanspaces.com';") conn.execute("SET s3_url_style='path';") # Create view s3_path = 's3://nearle/parquet/deliveries/*.parquet' conn.execute(f"CREATE VIEW deliveries AS SELECT * FROM read_parquet('{s3_path}', union_by_name = true)") # Test Query 1 print("Querying Avg Delivery Time...") df1 = conn.execute(""" SELECT ridername, ROUND(AVG(ridertime), 2) AS avg_delivery_time FROM deliveries WHERE ridertime > 0 AND userid IS NOT NULL GROUP BY ridername ORDER BY avg_delivery_time ASC LIMIT 5 """).df() print(df1) # Test Query 2 print("\nQuerying Total Revenue...") df2 = conn.execute(""" SELECT ridername, ROUND(SUM(deliveryamt), 2) AS total_revenue FROM deliveries WHERE ridertime > 0 AND userid IS NOT NULL GROUP BY ridername ORDER BY total_revenue DESC LIMIT 5 """).df() print(df2) except Exception as e: print(f"Error: {e}") if __name__ == "__main__": peek_data()