34 lines
1.2 KiB
Python
34 lines
1.2 KiB
Python
import duckdb
|
|
|
|
def run_test():
|
|
try:
|
|
conn = duckdb.connect()
|
|
conn.execute("INSTALL httpfs; LOAD httpfs;")
|
|
conn.execute("SET s3_region='sgp1';")
|
|
conn.execute("SET s3_endpoint='sgp1.digitaloceanspaces.com';")
|
|
conn.execute("SET s3_url_style='path';")
|
|
|
|
print("Querying riders with ridertime > 0...")
|
|
sql = """
|
|
SELECT ridername, ROUND(AVG(ridertime), 2) AS avg_time
|
|
FROM read_parquet('s3://nearle/parquet/deliveries/*.parquet', union_by_name = true)
|
|
WHERE ridertime > 0 AND ridername IS NOT NULL
|
|
GROUP BY ridername
|
|
ORDER BY avg_time DESC
|
|
LIMIT 10
|
|
"""
|
|
df = conn.execute(sql).df()
|
|
if df.empty:
|
|
print("NO DATA RETURNED!")
|
|
# Check counts
|
|
print("Counts for ridertime > 0:")
|
|
print(conn.execute("SELECT COUNT(*) FROM read_parquet('s3://nearle/parquet/deliveries/*.parquet', union_by_name = true) WHERE ridertime > 0").fetchall())
|
|
else:
|
|
print(df)
|
|
|
|
except Exception as e:
|
|
print(f"Error: {e}")
|
|
|
|
if __name__ == "__main__":
|
|
run_test()
|