16 lines
461 B
Python
16 lines
461 B
Python
import docx
|
|
import json
|
|
|
|
def extract_docx_content(file_path):
|
|
doc = docx.Document(file_path)
|
|
full_text = []
|
|
for para in doc.paragraphs:
|
|
full_text.append(para.text)
|
|
return "\n".join(full_text)
|
|
|
|
content = extract_docx_content('Nearle_Full_API_Documentation.docx')
|
|
with open('scratch/docx_content_utf8.txt', 'w', encoding='utf-8') as f:
|
|
lines = content.split('\n')
|
|
for i, line in enumerate(lines):
|
|
f.write(f"{i}: {line}\n")
|