Files
Analytical_engine_backend/get_important_cols.py
2026-05-11 12:36:20 +05:30

17 lines
721 B
Python

import duckdb
def get_special_cols():
conn = duckdb.connect()
conn.execute("INSTALL httpfs; LOAD httpfs;")
conn.execute("SET s3_region='sgp1';")
conn.execute("SET s3_endpoint='sgp1.digitaloceanspaces.com';")
conn.execute("SET s3_url_style='path';")
rows = conn.execute("DESCRIBE SELECT * FROM read_parquet('s3://nearle/parquet/deliveries/*.parquet')").fetchall()
cols = [row[0] for row in rows]
filtered = [c for c in cols if "name" in c.lower() or "amt" in c.lower() or "amount" in c.lower() or "rider" in c.lower() or "tenant" in c.lower() or "time" in c.lower() or "status" in c.lower()]
print("Filtered Columns:", filtered)
if __name__ == "__main__":
get_special_cols()