[更新]python 试卷A3转A4的两种方式

bester · 发表于 2025-7-8 16:23

本帖最后由 bester 于 2025-7-9 16:16 编辑

临近暑期,公司好多人找我将A3的docx试卷转成A4大小

话说现在的老师太偷懒了,给出的试卷质量参差不齐,有的喜欢用A3,有的喜欢用8K

手动操作的思路是,直接打开word,将页面布局的分栏改成1栏,再将纸张大小改成A4,最后将纸张方向改成纵向

不支持双面打印的打印机,需要自己先打奇数页,再翻过来打偶数页

问题是9门功课,一门最少七八张试卷,这得干到啥时候?

代码的思路是,直接转成pdf再从中间切割,然后左右两边可能会有一点点留白,不影响使用

目前测试8K,A3能正常分割

库安装:
pip install docx2pdf
pip install PyMuPDF

代码:

[Python] 纯文本查看 复制代码

001

002

003

004

005

006

007

008

009

010

011

012

013

014

015

016

017

018

019

020

021

022

023

024

025

026

027

028

029

030

031

032

033

034

035

036

037

038

039

040

041

042

043

044

045

046

047

048

049

050

051

052

053

054

055

056

057

058

059

060

061

062

063

064

065

066

067

068

069

070

071

072

073

074

075

076

077

078

079

080

081

082

083

084

085

086

087

088

089

090

091

092

093

094

095

096

097

098

099

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

import os
import fitz  # PyMuPDF
from docx2pdf import convert
 
 
def list_docx_files(folder_path):
    """
    枚举指定文件夹下的所有.docx文件
     
    参数:
    folder_path (str): 要搜索的文件夹路径，例如 r'c:\文件'
     
    返回:
    list: {file_name:文件名(不含后缀),file_folder:文件路径}
    """
    docx_files = []
     
    try:
        # 检查路径是否存在
        if not os.path.exists(folder_path):
            print(f"错误：路径不存在")
            return docx_files
             
        # 遍历目录树
        for root, dirs, files in os.walk(folder_path):
            for file in files:
                # 检查文件扩展名是否为.docx（不区分大小写）
                if file.lower().endswith('.docx'):
                    full_path = os.path.join(root, file)
                     
                    dict_files = {}
                    dict_files['file_folder'] = full_path
                    filename_without_ext = os.path.splitext(file)[0]
                    dict_files['file_name'] = filename_without_ext
                     
                    docx_files.append(dict_files)
     
    except Exception as e:
        print(f"发生错误: {e}")
     
    return docx_files
 
def docx_to_pdf(input_docx, output_pdf):
    convert(input_docx, output_pdf)
     
def a3_to_a4(pdf_path, output_path):
 
    # 打开PDF文件
    pdf = fitz.open(pdf_path)
    new_pdf = fitz.open()
    for page_num in range(len(pdf)):
        page = pdf[page_num]
        rect = page.rect  # 获取当前页面的矩形区域
         
        # 二选一,如果是左右分割,则删除 split_type = 'horizon' 这句
        split_type = 'horizon'
        split_type = 'vertical'
         
        if split_type == 'vertical':
            # 垂直裁切：将页面分为左右两半
            width = rect.width
            # 创建左半部分
            left_rect = fitz.Rect(rect.x0, rect.y0, rect.x0 + width/2, rect.y1)
            # 创建右半部分
            right_rect = fitz.Rect(rect.x0 + width/2, rect.y0, rect.x1, rect.y1)
             
            # 将左右两部分分别添加为新页面
            for part_rect in [left_rect, right_rect]:
                new_page = new_pdf.new_page(width=part_rect.width, height=part_rect.height)
                new_page.show_pdf_page(new_page.rect, pdf, page_num, clip=part_rect)
                 
        elif split_type == 'horizon':
            # 水平裁切：将页面分为上下两半
            height = rect.height
            # 创建上半部分
            top_rect = fitz.Rect(rect.x0, rect.y0, rect.x1, rect.y0 + height/2)
            # 创建下半部分
            bottom_rect = fitz.Rect(rect.x0, rect.y0 + height/2, rect.x1, rect.y1)
             
            # 将上下两部分分别添加为新页面
            for part_rect in [top_rect, bottom_rect]:
                new_page = new_pdf.new_page(width=part_rect.width, height=part_rect.height)
                new_page.show_pdf_page(new_page.rect, pdf, page_num, clip=part_rect)
                 
    new_pdf.save(output_path)
    new_pdf.close()
    pdf.close()
 
if __name__ == "__main__":
     
    # 输入DOCX文件夹路径,由用户给出
    input_docx = r"C:\Users\administrator\Desktop\暑假作业\暑假作业学生版数学"
     
    # 临时存放DOCX转PDF文件的路径,无需修改,除非你需要自定义该文件夹名
    temp_pdf = input_docx + '\original_pdf'
    if not os.path.exists(temp_pdf):
        os.makedirs(temp_pdf)
     
    # 输出转成A4尺寸的PDF文件的路径,无需修改,除非你需要自定义该文件夹名
    output_pdf = input_docx + '\output_A4_pdf'
    if not os.path.exists(output_pdf):
        os.makedirs(output_pdf)
     
    # 枚举路径下所有的docx文档
    docx_list = list_docx_files(input_docx)
     
    # 将word文档转pdf
    for docx in docx_list:
        temp_pdf_file = temp_pdf + '\\' + docx["file_name"] + '.pdf'
        docx_to_pdf(docx['file_folder'],temp_pdf_file)
     
        # 将原始的A3大小的pdf转成A4大小,转换后的文件均已A4_开头,并存放在output_A4_pdf目录下
        out_pdf_file = output_pdf + '\\A4_' + docx["file_name"] + '.pdf'
        a3_to_a4(temp_pdf_file, out_pdf_file)

方式二(推荐)

1 . 内容居中,不会出现不对称的情况
2. 效率高,直接操作了docx,6个文件实测不到1秒就处理完毕
3. 带UI界面,但仅支持win10以上
4. 实测支持wps 以及office 365

库安装
pip install python-docx

打包:
auto-py-to-exe

代码带UI界面版本

[Python] 纯文本查看 复制代码

001

002

003

004

005

006

007

008

009

010

011

012

013

014

015

016

017

018

019

020

021

022

023

024

025

026

027

028

029

030

031

032

033

034

035

036

037

038

039

040

041

042

043

044

045

046

047

048

049

050

051

052

053

054

055

056

057

058

059

060

061

062

063

064

065

066

067

068

069

070

071

072

073

074

075

076

077

078

079

080

081

082

083

084

085

086

087

088

089

090

091

092

093

094

095

096

097

098

099

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

import tkinter as tk
from tkinter import filedialog, messagebox, ttk
import os
 
from docx import Document
from docx.shared import Cm
from docx.oxml.ns import qn
from docx.enum.section import WD_ORIENT
 
class DirectoryConverterApp:
    def __init__(self, root):
        self.root = root
        self.root.title("文件转换工具")
        self.root.geometry("500x300")
        self.root.resizable(True, True)
 
        # 设置中文字体支持（针对不同系统做兼容，这里以常见的 SimHei 为例）
        try:
            self.root.option_add("*Font", "SimHei 10")
        except:
            pass
 
        # 创建主框架
        main_frame = ttk.Frame(root, padding="20")
        main_frame.pack(fill=tk.BOTH, expand=True)
 
        # 目录选择部分，使用 LabelFrame 包裹
        dir_frame = ttk.LabelFrame(main_frame, text="选择目录", padding="10")
        dir_frame.pack(fill=tk.X, pady=10)
 
        self.dir_path = tk.StringVar()
 
        # 标签
        ttk.Label(dir_frame, text="目录路径:").grid(
            row=0, column=0, padx=(5, 0), pady=5, sticky=tk.W
        )
        # 输入框
        dir_entry = ttk.Entry(
            dir_frame, textvariable=self.dir_path, width=30
        )
        dir_entry.grid(row=0, column=1, padx=5, pady=5, sticky=tk.W)
        # 浏览按钮
        browse_btn = ttk.Button(
            dir_frame, text="浏览...", command=self.browse_directory
        )
        # 调整 padx，让按钮与边框保持距离，(左右内边距)
        browse_btn.grid(row=0, column=2, padx=(10, 5), pady=5)
 
        # 转换按钮
        btn_frame = ttk.Frame(main_frame, padding=(0, 0, 50, 0))
        btn_frame.pack(pady=10)
 
        self.convert_btn = ttk.Button(
            btn_frame, text="开始转换", command=self.convert, width=20
        )
        self.convert_btn.pack(pady=10)
     
    def browse_directory(self):
        directory = filedialog.askdirectory(title="选择目录")
        if directory:
            self.dir_path.set(directory)
     
    def convert(self):
        directory = self.dir_path.get()
        if not directory:
            messagebox.showerror("错误", "请先选择一个目录")
            return
            # 输入文件夹
             
        input_docx = directory
        output_A4_docx = os.path.join(input_docx, 'A4_New_docx')
         
        # 输出文件夹
        if not os.path.exists(output_A4_docx):
            os.makedirs(output_A4_docx)
         
        # 遍历所有docx文件
        all_docx = self.list_docx_files(input_docx)
         
        self.convert_btn.config(state=tk.DISABLED) # 按钮变灰
         
        # 转换所有docx文件
        for docx in all_docx:
             
            input_docx = docx['file_folder']
            output_docx = os.path.join(output_A4_docx, 'A4_' + docx['file_name'] + '.docx')
             
            # 转换文件函数
            self.convert_a3_to_a4(input_docx, output_docx)
             
        self.convert_btn.config(state=tk.NORMAL) # 按钮使能
        messagebox.showinfo("提示", "转换完成")
 
    def convert_a3_to_a4(self,input_docx, output_docx):
        """
        将Word文档的纸张大小从A3调整为A4,并将分栏数从2栏改为1栏,纸张方向为纵向
        """
        doc = Document(input_docx)
         
        # 分栏数从2栏改为1栏,并移除可能存在的间距属性
         
        # 遍历所有节(sections)
        for section in doc.sections:
            # 获取节的XML元素
            sectPr = section._sectPr
             
            # 查找分栏设置元素
            cols = sectPr.xpath('./w:cols')[0] if sectPr.xpath('./w:cols') else None
             
            if cols is not None:
                # 设置分栏数为1
                cols.set(qn('w:num'), '1')
                # 移除可能存在的间距属性
                if cols.get(qn('w:space')):
                    cols.set(qn('w:space'), '0')
             
        # 调整纸张大小,从A3调整为A4,纸张方向为纵向
        for section in doc.sections:
            section.orientation = WD_ORIENT.PORTRAIT # 纸张方向为纵向
            section.page_height = Cm(29.7)
            section.page_width = Cm(21.0)
         
        # 保存修改后的文档 
        doc.save(output_docx)
         
 
 
    def list_docx_files(self,path):
        """
        列出指定目录下的docx文件
        """
        files = os.listdir(path)
        docx_files = []
         
        for file in files:
             
            # 判断路径是否为文件夹,或者文件名以~$开头(以~$开头的docx文件是word打开时自动生成的临时文件,需要忽略)
            if file.startswith('~$') or os.path.isdir(os.path.join(path, file)):
                continue
             
            # 判断文件名是否以.docx结尾
            if file.endswith('.docx'):
                 
                dict_file = {}
                dict_file['file_name'] = os.path.splitext(file)[0]
                dict_file['file_folder'] = os.path.join(path, file)
                docx_files.append(dict_file)
                 
        return docx_files
if __name__ == "__main__":
    root = tk.Tk()
    app = DirectoryConverterApp(root)
    root.mainloop()

bester · 发表于 2025-7-9 15:08

lingqixzw 发表于 2025-7-9 14:23
不知道楼主这个是否对称分割呀？我遇到的A3是奇数页偏左一点、偶数页偏右一点分割，才能把竖行切割不带字。 ...

默认是居中分割,不过确实在测试中有发现过这个问题,这个好像是原档排版问题,我等会会更新代码,这个问题就解决了

qixia888 · 发表于 2025-7-8 21:04

老大，做个成品

fengwolf3 · 发表于 2025-7-8 16:26

感谢分享，有没有效果图哈？

sabotage · 发表于 2025-7-8 16:34

感谢分享

cyrilcao · 发表于 2025-7-8 16:34

感谢分享，十分好，老师偷懒导致我们编辑起来太复杂

Zoey_lihaha · 发表于 2025-7-8 16:35

感谢分享

wq3stone · 发表于 2025-7-8 16:35

不错不错

zcldp · 发表于 2025-7-8 16:35

很实用，挺好

unrapple · 发表于 2025-7-8 16:38

感谢分享

a4778006 · 发表于 2025-7-8 16:38

感谢分享

qweaxd · 发表于 2025-7-8 16:42

感谢分享！！

帐号		自动登录	找回密码
密码			注册[Register]

[Python 原创] [更新]python 试卷A3转A4的两种方式

免费评分

本帖被以下淘专辑推荐: