File tree 1 file changed +28
-0
lines changed
1 file changed +28
-0
lines changed Original file line number Diff line number Diff line change @@ -22,3 +22,31 @@ for file_name in pdf_files:
22
22
with fitz.open(os.path.join(pdf_dir, file_name)) as doc:
23
23
print_annotation_details(doc)
24
24
print("=" * 50)
25
+
26
+
27
+ import fitz
28
+
29
+ def extract_text_and_images(doc):
30
+ for page in doc:
31
+ # Extract text
32
+ text = page.get_text("text")
33
+ print("Extracted text:")
34
+ print(text)
35
+
36
+ # Extract images
37
+ image_list = page.get_images(full=True)
38
+ print("Found images:")
39
+ for img in image_list:
40
+ xref = img[0] # XREF of the image
41
+ base_image = doc.extract_image(xref)
42
+ print(f"Image {xref} details: {base_image['width']}x{base_image['height']} pixels")
43
+ # Optionally save the image to disk
44
+ image_filename = f'image_{xref}.png'
45
+ with open(image_filename, 'wb') as imgfile:
46
+ imgfile.write(base_image['image'])
47
+
48
+ # Open the PDF
49
+ file_path = 'path_to_your_pdf.pdf' # Specify the path to your PDF
50
+ doc = fitz.open(file_path)
51
+ extract_text_and_images(doc)
52
+
You can’t perform that action at this time.
0 commit comments