initial commit
This commit is contained in:
24
list_s3.py
Normal file
24
list_s3.py
Normal file
@@ -0,0 +1,24 @@
|
||||
import duckdb
|
||||
|
||||
def list_s3_files():
|
||||
conn = duckdb.connect()
|
||||
conn.execute("INSTALL httpfs; LOAD httpfs;")
|
||||
conn.execute("SET s3_region='sgp1';")
|
||||
conn.execute("SET s3_endpoint='sgp1.digitaloceanspaces.com';")
|
||||
conn.execute("SET s3_url_style='path';")
|
||||
|
||||
# This might not work directly for listing buckets, but let's try reading schema of other potential paths
|
||||
paths = [
|
||||
"s3://nearle/parquet/tenants/*.parquet",
|
||||
"s3://nearle/parquet/riders/*.parquet",
|
||||
"s3://nearle/parquet/orders/*.parquet"
|
||||
]
|
||||
for p in paths:
|
||||
try:
|
||||
res = conn.execute(f"SELECT count(*) FROM read_parquet('{p}')").fetchall()
|
||||
print(f"{p}: {res[0][0]}")
|
||||
except:
|
||||
print(f"{p}: Not found")
|
||||
|
||||
if __name__ == "__main__":
|
||||
list_s3_files()
|
||||
Reference in New Issue
Block a user