import
os
import
concurrent.futures
from
pypdf
import
PdfReader, PdfWriter
from
tqdm
import
tqdm
from
PIL
import
Image, ImageEnhance
from
io
import
BytesIO
import
multiprocessing
def
blacky(im):
im
=
im.convert(
'L'
)
im
=
ImageEnhance.Contrast(im).enhance(
3
)
im
=
ImageEnhance.Brightness(im).enhance(
1.5
)
im
=
ImageEnhance.Sharpness(im).enhance(
2
)
im
=
ImageEnhance.Color(im).enhance(
1.5
)
threshold
=
128
table
=
[
0
if
i < threshold
else
1
for
i
in
range
(
256
)]
new_image
=
im.point(table,
'1'
)
imgbuffer
=
BytesIO()
new_image.save(imgbuffer,
format
=
"TIFF"
, compression
=
'group4'
, optimize
=
True
, dpi
=
[
300
,
300
])
return
Image.
open
(imgbuffer)
def
get_page_number_from_indirect(reader, indirect_reference):
for
i, page
in
enumerate
(reader.pages):
if
page.indirect_reference
=
=
indirect_reference:
return
i
return
None
def
add_bookmarks_to_writer(writer, reader, outlines, parent
=
None
):
parents
=
[]
for
item
in
outlines:
if
isinstance
(item,
list
):
add_bookmarks_to_writer(writer, reader, item, parent
=
parent)
parents.append(parent)
else
:
title
=
item.get(
'/Title'
)
indirect_reference
=
item.get(
'/Page'
)
page_num
=
get_page_number_from_indirect(reader, indirect_reference)
if
'/Count'
in
item
and
item[
'/Count'
] <
0
:
new_parent
=
writer.add_outline_item(title, page_num, parent
=
parent)
parents.append(new_parent)
else
:
writer.add_outline_item(title, page_num, parent
=
parent)
def
process_pdf(file_index, choice, pdf_file, quality
=
None
):
try
:
reader
=
PdfReader(pdf_file)
writer
=
PdfWriter()
for
page
in
tqdm(reader.pages, desc
=
f
"Processing {pdf_file}"
):
writer.add_page(page)
if
choice
=
=
1
:
if
reader.metadata
is
not
None
:
writer.add_metadata(reader.metadata)
else
:
print
(
"没有元数据可以添加。"
)
elif
choice
=
=
2
:
writer.remove_images()
elif
choice
=
=
3
:
if
quality
is
None
:
raise
ValueError(
"图片质量值未提供"
)
for
page
in
tqdm(writer.pages, desc
=
f
"Compressing images {pdf_file}"
):
for
img
in
page.images:
img.replace(img.image, quality
=
quality)
elif
choice
=
=
4
:
for
page
in
tqdm(writer.pages, desc
=
f
"Applying lossless compression {pdf_file}"
):
page.compress_content_streams()
elif
choice
=
=
5
:
for
page
in
tqdm(writer.pages, desc
=
f
"Binarizing images {pdf_file}"
):
for
img
in
page.images:
img.replace(blacky(img.image))
if
reader.outline:
add_bookmarks_to_writer(writer, reader, reader.outline)
output_file
=
f
"reduced_{os.path.splitext(pdf_file)[0]}.pdf"
with
open
(output_file,
"wb"
) as f:
writer.write(f)
print
(f
"Processed file saved as {output_file}"
)
except
Exception as e:
print
(f
"处理文件 {pdf_file} 时发生错误:{e}"
)
def
main():
pdf_files
=
[f
for
f
in
os.listdir(
'.'
)
if
f.endswith(
'.pdf'
)]
if
not
pdf_files:
print
(
"当前目录下没有找到PDF文件。"
)
return
indices
=
input_indices(pdf_files)
choice
=
input_choice()
quality
=
input_quality(choice)
if
indices
is
None
or
choice
is
None
or
(choice
=
=
3
and
quality
is
None
):
return
cpu_cores
=
os.cpu_count()
or
1
with concurrent.futures.ProcessPoolExecutor(max_workers
=
cpu_cores) as executor:
futures
=
[executor.submit(process_pdf, idx, choice, pdf_files[idx], quality
if
choice
=
=
3
else
None
)
for
idx
in
indices]
for
future
in
concurrent.futures.as_completed(futures):
try
:
future.result()
except
Exception as exc:
print
(f
"处理文件 {pdf_files[indices.index(idx)]} 时发生错误:{exc}"
)
pdf_files
=
[f
for
f
in
os.listdir(
'.'
)
if
f.endswith(
'.pdf'
)]
for
idx,
file
in
enumerate
(pdf_files):
print
(f
"{idx}: {file}"
)
def
input_indices(pdf_files):
while
True
:
selection
=
input
(
"输入'all'以处理所有PDF文件,或者输入用逗号分隔的文件索引:"
).strip().lower()
if
selection
=
=
'all'
:
return
range
(
len
(pdf_files))
else
:
try
:
indices
=
[
int
(idx)
for
idx
in
selection.split(
','
)]
if
all
(
0
<
=
idx <
len
(pdf_files)
for
idx
in
indices):
return
indices
else
:
print
(
"所有索引必须在0到{}之间。"
.
format
(
len
(pdf_files)
-
1
))
except
ValueError:
print
(
"输入无效。请输入'all'或者用逗号分隔的索引。"
)
def
input_choice():
while
True
:
try
:
print
(
"选择压缩PDF的方式"
)
print
(
"1: 删除重复对象"
)
print
(
"2: 删除图像"
)
print
(
"3: 降低图片质量"
)
print
(
"4: 使用无损压缩"
)
print
(
"5: 二值化压缩为tif"
)
choice
=
int
(
input
(
"输入选择 (1-5): "
))
if
choice <
1
or
choice >
5
:
raise
ValueError
return
choice
except
ValueError:
print
(
"输入无效。请输入1到5之间的数字。"
)
def
input_quality(choice):
if
choice
=
=
3
:
while
True
:
quality
=
input
(
"请输入图片质量(1-100):"
)
if
quality.isdigit()
and
1
<
=
int
(quality) <
=
100
:
return
int
(quality)
else
:
print
(
"输入错误,请重新输入图片质量(1-100)。"
)
return
None
if
__name__
=
=
"__main__"
:
multiprocessing.freeze_support()
main()