Skip to content
This repository was archived by the owner on Jun 29, 2024. It is now read-only.

Commit 0ea86f8

Browse files
authoredMay 29, 2024
Add files via upload
1 parent a658dc6 commit 0ea86f8

File tree

1 file changed

+46
-0
lines changed

1 file changed

+46
-0
lines changed
 

‎pdf.py

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import PyPDF2
2+
import fitz # PyMuPDF
3+
from docx import Document
4+
from PIL import Image
5+
from docx.shared import Pt
6+
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
7+
8+
def pdf_to_image(pdf_path, image_path):
9+
pdf_document = fitz.open(pdf_path)
10+
for page_number in range(len(pdf_document)):
11+
page = pdf_document[page_number]
12+
image = page.get_pixmap()
13+
image.save(f"{image_path}page{page_number + 1}.png")
14+
15+
def pdf_to_text(pdf_path, text_path):
16+
with open(pdf_path, 'rb') as file:
17+
reader = PyPDF2.PdfReader(file)
18+
text = ''
19+
for page_number in range(len(reader.pages)):
20+
text += reader.pages[page_number].extract_text()
21+
22+
with open(text_path, 'w', encoding='utf-8') as text_file:
23+
text_file.write(text)
24+
25+
def text_to_document(text_path, doc_path):
26+
document = Document()
27+
with open(text_path, 'r', encoding='utf-8') as text_file:
28+
for line in text_file:
29+
paragraph = document.add_paragraph(line.strip())
30+
paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
31+
run = paragraph.runs[0]
32+
run.font.size = Pt(12) # Set font size to 12pt (adjust as needed)
33+
# You can add more formatting options here
34+
35+
document.save(doc_path)
36+
37+
# Example usage
38+
pdf_file = r"C:\Users\DELL\Downloads\Roshini Khammam.pdf"
39+
image_output_path =r"C:\Users\DELL\Downloads"
40+
text_output_path=r"C:\Users\DELL\textfile.txt"
41+
doc_output_path =r"C:\Users\DELL\document.docx"
42+
43+
pdf_to_image(pdf_file, image_output_path)
44+
pdf_to_text(pdf_file, text_output_path)
45+
text_to_document(text_output_path, doc_output_path)
46+

0 commit comments

Comments
 (0)