diff --git a/Delos Aerospace Master Business Plan.md b/Delos Aerospace Master Business Plan.md new file mode 100644 index 0000000..a975b75 Binary files /dev/null and b/Delos Aerospace Master Business Plan.md differ diff --git a/read_docx.py b/read_docx.py new file mode 100644 index 0000000..826bfe7 --- /dev/null +++ b/read_docx.py @@ -0,0 +1,36 @@ +import zipfile +import xml.etree.ElementTree as ET +import sys +import os + +def read_docx(file_path): + if not os.path.exists(file_path): + print(f"Error: {file_path} not found.") + return + + try: + with zipfile.ZipFile(file_path, 'r') as docx: + # The main text content is in word/document.xml + xml_content = docx.read('word/document.xml') + tree = ET.fromstring(xml_content) + + # Namespaces are important in docx XML + namespaces = { + 'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main' + } + + texts = [] + for paragraph in tree.findall('.//w:p', namespaces): + t_elements = paragraph.findall('.//w:t', namespaces) + if t_elements: + texts.append("".join([t.text for t in t_elements if t.text])) + + print("\n".join(texts)) + except Exception as e: + print(f"Error reading {file_path}: {e}") + +if __name__ == "__main__": + if len(sys.argv) > 1: + read_docx(sys.argv[1]) + else: + print("Usage: python read_docx.py ")