Files
Analytical_engine_backend/list_s3.py
2026-05-11 12:36:20 +05:30

25 lines
806 B
Python

import duckdb
def list_s3_files():
conn = duckdb.connect()
conn.execute("INSTALL httpfs; LOAD httpfs;")
conn.execute("SET s3_region='sgp1';")
conn.execute("SET s3_endpoint='sgp1.digitaloceanspaces.com';")
conn.execute("SET s3_url_style='path';")
# This might not work directly for listing buckets, but let's try reading schema of other potential paths
paths = [
"s3://nearle/parquet/tenants/*.parquet",
"s3://nearle/parquet/riders/*.parquet",
"s3://nearle/parquet/orders/*.parquet"
]
for p in paths:
try:
res = conn.execute(f"SELECT count(*) FROM read_parquet('{p}')").fetchall()
print(f"{p}: {res[0][0]}")
except:
print(f"{p}: Not found")
if __name__ == "__main__":
list_s3_files()