import zipfile import xml.etree.ElementTree as ET import sys import os def read_docx(file_path): if not os.path.exists(file_path): print(f"Error: {file_path} not found.") return try: with zipfile.ZipFile(file_path, 'r') as docx: # The main text content is in word/document.xml xml_content = docx.read('word/document.xml') tree = ET.fromstring(xml_content) # Namespaces are important in docx XML namespaces = { 'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main' } texts = [] for paragraph in tree.findall('.//w:p', namespaces): t_elements = paragraph.findall('.//w:t', namespaces) if t_elements: texts.append("".join([t.text for t in t_elements if t.text])) print("\n".join(texts)) except Exception as e: print(f"Error reading {file_path}: {e}") if __name__ == "__main__": if len(sys.argv) > 1: read_docx(sys.argv[1]) else: print("Usage: python read_docx.py ")