Skip to content

Commit e8f399d

Browse files
Merge pull request #1 from openize-com/muhammadumar-patch
Add Support for Batch Conversion via Directory Input
2 parents 3452bfc + e337872 commit e8f399d

File tree

2 files changed

+19
-7
lines changed

2 files changed

+19
-7
lines changed

packages/markitdown/src/openize/markitdown/core.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import os
2+
13
from processor import DocumentProcessor
24

35

@@ -9,3 +11,10 @@ def convert_document(self, input_file, insert_into_llm=False):
911
"""Run the document conversion process."""
1012
processor = DocumentProcessor(self.output_dir)
1113
processor.process_document(input_file, insert_into_llm)
14+
15+
def convert_directory(self, input_dir: str, insert_into_llm: bool = False):
16+
supported_exts = [".docx", ".pdf", ".xlsx", ".pptx"]
17+
for filename in os.listdir(input_dir):
18+
filepath = os.path.join(input_dir, filename)
19+
if os.path.isfile(filepath) and os.path.splitext(filename)[1].lower() in supported_exts:
20+
self.convert_document(filepath, insert_into_llm)

packages/markitdown/src/openize/markitdown/main.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,10 @@ def main():
4848
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
4949

5050
parser = argparse.ArgumentParser(description="Convert documents to Markdown.")
51-
parser.add_argument("input_file", help="Path to the input document (PDF, Word, etc.)")
52-
parser.add_argument("-o", "--output-dir", required=True, help="Directory to save the converted Markdown file")
51+
input_group = parser.add_mutually_exclusive_group(required=True)
52+
input_group.add_argument("--input-file", help="Path to the input document (PDF, Word, etc.)")
53+
input_group.add_argument("--input-dir", help="Path to a directory containing supported documents")
54+
parser.add_argument("-o", "--output-dir", required=True, help="Directory to save the converted Markdown file(s)")
5355
parser.add_argument("--insert-into-llm", action="store_true", help="Insert output into LLM")
5456

5557
args = parser.parse_args()
@@ -66,14 +68,15 @@ def main():
6668
ensure_env_variable("OPENAI_API_KEY", "Enter your OpenAI API key: ")
6769
ensure_env_variable("OPENAI_MODEL", "Enter OpenAI model name (default: gpt-4): ", default="gpt-4")
6870

69-
# Run conversion
71+
# Run conversion for either a single file or a directory
7072
markitdown = MarkItDown(args.output_dir)
71-
markitdown.convert_document(args.input_file, args.insert_into_llm)
73+
74+
if args.input_file:
75+
markitdown.convert_document(args.input_file, args.insert_into_llm)
76+
elif args.input_dir:
77+
markitdown.convert_directory(args.input_dir, args.insert_into_llm)
7278

7379
except Exception as e:
7480
logging.error(f"Error: {e}", exc_info=True)
7581
sys.exit(1)
7682

77-
78-
if __name__ == "__main__":
79-
main()

0 commit comments

Comments
 (0)