initial commit

2026-05-11 12:36:20 +05:30
commit 384cbe8019
15377 changed files with 2360544 additions and 0 deletions
--- a/list_s3.py
+++ b/list_s3.py
@@ -0,0 +1,24 @@
+import duckdb
+
+def list_s3_files():
+    conn = duckdb.connect()
+    conn.execute("INSTALL httpfs; LOAD httpfs;")
+    conn.execute("SET s3_region='sgp1';")
+    conn.execute("SET s3_endpoint='sgp1.digitaloceanspaces.com';")
+    conn.execute("SET s3_url_style='path';")
+    
+    # This might not work directly for listing buckets, but let's try reading schema of other potential paths
+    paths = [
+        "s3://nearle/parquet/tenants/*.parquet",
+        "s3://nearle/parquet/riders/*.parquet",
+        "s3://nearle/parquet/orders/*.parquet"
+    ]
+    for p in paths:
+        try:
+            res = conn.execute(f"SELECT count(*) FROM read_parquet('{p}')").fetchall()
+            print(f"{p}: {res[0][0]}")
+        except:
+            print(f"{p}: Not found")
+
+if __name__ == "__main__":
+    list_s3_files()