34 lines
1.1 KiB
Python
34 lines
1.1 KiB
Python
import duckdb
|
|
import os
|
|
import json
|
|
|
|
DUCKDB_PATH = 'analytics.duckdb'
|
|
|
|
def verify_counts():
|
|
conn = duckdb.connect(DUCKDB_PATH)
|
|
results = {}
|
|
try:
|
|
# Same setup as service
|
|
conn.execute("SET s3_region='sgp1';")
|
|
conn.execute("SET s3_endpoint='sgp1.digitaloceanspaces.com';")
|
|
conn.execute("SET s3_url_style='path';")
|
|
|
|
s3_path = 's3://nearle/parquet/deliveries/*.parquet'
|
|
conn.execute(f"CREATE OR REPLACE VIEW deliveries AS SELECT * FROM read_parquet('{s3_path}', union_by_name = true)")
|
|
|
|
results['total_orders'] = int(conn.execute("SELECT COUNT(*) FROM deliveries").fetchone()[0])
|
|
|
|
df = conn.execute("SELECT tenantname, COUNT(*) as orders FROM deliveries WHERE tenantname IS NOT NULL GROUP BY 1 ORDER BY 2 DESC LIMIT 10").df()
|
|
results['top_tenants'] = df.to_dict('records')
|
|
|
|
except Exception as e:
|
|
results['error'] = str(e)
|
|
finally:
|
|
conn.close()
|
|
|
|
with open('verification_results.json', 'w') as f:
|
|
json.dump(results, f, indent=4)
|
|
|
|
if __name__ == "__main__":
|
|
verify_counts()
|