Skip to content

Commit 1db97bf

Browse files
committed
add pdf_img
1 parent 6e7a317 commit 1db97bf

File tree

2 files changed

+54
-0
lines changed

2 files changed

+54
-0
lines changed

pdf/img/test.pdf_img1.png

31.2 KB
Loading

pdf/pdf_img.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
#!/usr/bin/env python3
2+
3+
import fitz #pip install pymupdf
4+
import re
5+
import os
6+
7+
8+
def find_imag(path,img_path):
9+
10+
checkXO = r"/Type(?= */XObject)"
11+
checkIM = r"/Subtype(?= */Image)"
12+
13+
pdf = fitz.open(path)
14+
15+
img_count = 0
16+
len_XREF = pdf._getXrefLength()
17+
18+
print("文件名:{}, 页数: {}, 对象: {}".format(path, len(pdf), len_XREF - 1))
19+
20+
for i in range(1, len_XREF):
21+
text = pdf._getXrefString(i)
22+
isXObject = re.search(checkXO, text)
23+
24+
# 使用正则表达式查看是否是图片
25+
isImage = re.search(checkIM, text)
26+
27+
# 如果不是对象也不是图片,则continue
28+
if not isXObject or not isImage:
29+
continue
30+
img_count += 1
31+
# 根据索引生成图像
32+
pix = fitz.Pixmap(pdf, i)
33+
34+
new_name = path.replace('\\', '_') + "_img{}.png".format(img_count)
35+
new_name = new_name.replace(':', '')
36+
37+
# 如果pix.n<5,可以直接存为PNG
38+
if pix.n < 5:
39+
pix.writePNG(os.path.join(img_path, new_name))
40+
41+
else:
42+
pix0 = fitz.Pixmap(fitz.csRGB, pix)
43+
pix0.writePNG(os.path.join(img_path, new_name))
44+
pix0 = None
45+
46+
pix = None
47+
48+
print("提取了{}张图片".format(img_count))
49+
50+
51+
if __name__=='__main__':
52+
pdf_path = r'test.pdf'
53+
img_path = r'img'
54+
m = find_imag(pdf_path, img_path)

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy