[Python] 纯文本查看 复制代码 import os
import fitz
from PIL import Image
def pdf_to_jpg(pdf_path, zoom_x=2.0, zoom_y=2.0, image_quality=90, keep_pdf=True):
pdf_document = fitz.open(pdf_path)
for page_number in range(len(pdf_document)):
page = pdf_document.load_page(page_number)
mat = fitz.Matrix(zoom_x, zoom_y)
pix = page.get_pixmap(matrix=mat)
image_name = f"{os.path.splitext(os.path.basename(pdf_path))[0]}_page{page_number + 1}.jpg"
image_path = os.path.join(os.path.dirname(pdf_path), image_name)
image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
image.save(image_path, "JPEG", quality=image_quality)
pdf_document.close()
if not keep_pdf:
os.remove(pdf_path)
def png_to_jpg(png_path, image_quality=90):
with Image.open(png_path) as img:
rgb_img = img.convert('RGB')
jpg_name = os.path.splitext(os.path.basename(png_path))[0] + '.jpg'
jpg_path = os.path.join(os.path.dirname(png_path), jpg_name)
rgb_img.save(jpg_path, 'JPEG', quality=image_quality)
os.remove(png_path)
def jpeg_to_jpg(jpeg_path, image_quality=90):
with Image.open(jpeg_path) as img:
rgb_img = img.convert('RGB')
jpg_name = os.path.splitext(os.path.basename(jpeg_path))[0] + '.jpg'
jpg_path = os.path.join(os.path.dirname(jpeg_path), jpg_name)
rgb_img.save(jpg_path, 'JPEG', quality=image_quality)
os.remove(jpeg_path)
def convert_all_files_in_folder(folder_path, zoom_x=2.0, zoom_y=2.0, image_quality=90, keep_pdf=True):
for root, dirs, files in os.walk(folder_path):
for file in files:
if file.lower().endswith('.pdf'):
pdf_path = os.path.join(root, file)
pdf_to_jpg(pdf_path, zoom_x, zoom_y, image_quality, keep_pdf)
elif file.lower().endswith('.png'):
png_path = os.path.join(root, file)
png_to_jpg(png_path, image_quality)
elif file.lower().endswith('.jpeg'):
jpeg_path = os.path.join(root, file)
jpeg_to_jpg(jpeg_path, image_quality)
if __name__ == "__main__":
target_folder = '识别专用'
zoom_x_factor = 2.0
zoom_y_factor = 2.0
save_quality = 90
config = {
"keep_original_pdf": True, # 此行注释则不会保存原PDF文件
}
keep_original_pdf = config.get("keep_original_pdf", False)
convert_all_files_in_folder(target_folder, zoom_x_factor, zoom_y_factor, save_quality, keep_original_pdf)
|