python-docx这个强大的库来实现。
pip install python-docx
pip install lxml # 依赖项
from docx import Document
from docx.shared import Pt, Inches, RGBColor
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.enum.style import WD_STYLE_TYPE
def create_document_with_formatting():
"""创建文档并应用基础排版"""
doc = Document()
# 1. 设置文档属性
doc.core_properties.author = "张三"
doc.core_properties.title = "自动化排版示例文档"
# 2. 添加标题
title = doc.add_heading('Python自动化排版报告', 0)
title.alignment = WD_ALIGN_PARAGRAPH.CENTER
# 3. 添加副标题
subtitle = doc.add_heading('技术文档 v1.0', 1)
subtitle.alignment = WD_ALIGN_PARAGRAPH.CENTER
# 4. 添加段落
para = doc.add_paragraph('这是使用Python自动化生成的文档内容。')
# 5. 格式化文本
run = para.add_run('这是一段加粗的文字。')
run.bold = True
run = para.add_run(' 这是一段红色的文字。')
run.font.color.rgb = RGBColor(255, 0, 0)
# 6. 设置字体
font = run.font
font.name = '微软雅黑'
font.size = Pt(12)
return doc
from docx.shared import Cm, Mm
from docx.oxml.ns import qn
from docx.enum.table import WD_TABLE_ALIGNMENT
from datetime import datetime
import os
class WordAutoFormatter:
"""Word文档自动化排版类"""
def __init__(self, template_path=None):
"""
初始化
:param template_path: 模板文档路径(可选)
"""
if template_path and os.path.exists(template_path):
self.doc = Document(template_path)
else:
self.doc = Document()
self._setup_page_settings()
self._create_styles()
def _setup_page_settings(self):
"""设置页面布局"""
sections = self.doc.sections
for section in sections:
# 设置页边距
section.top_margin = Cm(2.54)
section.bottom_margin = Cm(2.54)
section.left_margin = Cm(3.17)
section.right_margin = Cm(3.17)
# 设置页面大小(A4)
section.page_width = Cm(21)
section.page_height = Cm(29.7)
def _create_styles(self):
"""创建自定义样式"""
# 标题1样式
styles = self.doc.styles
if 'CustomTitle1' not in styles:
style = styles.add_style('CustomTitle1', WD_STYLE_TYPE.PARAGRAPH)
style.font.name = '黑体'
style.font.size = Pt(16)
style.font.bold = True
style.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
style.paragraph_format.space_after = Pt(12)
# 正文样式
if 'CustomBody' not in styles:
style = styles.add_style('CustomBody', WD_STYLE_TYPE.PARAGRAPH)
style.font.name = '宋体'
style.font.size = Pt(12)
style.paragraph_format.line_spacing = 1.5
style.paragraph_format.first_line_indent = Cm(0.74) # 首行缩进
style.paragraph_format.space_after = Pt(6)
# 代码样式
if 'CodeStyle' not in styles:
style = styles.add_style('CodeStyle', WD_STYLE_TYPE.CHARACTER)
style.font.name = 'Consolas'
style.font.size = Pt(10)
style.font.color.rgb = RGBColor(0, 128, 0)
def add_cover_page(self, title, subtitle=None, author=None, date=None):
"""添加封面页"""
# 添加分节符(新页)
self.doc.add_page_break()
# 垂直居中计算
para = self.doc.add_paragraph()
para.alignment = WD_ALIGN_PARAGRAPH.CENTER
para.paragraph_format.space_before = Cm(10)
# 主标题
run = para.add_run(title)
run.font.name = '黑体'
run.font.size = Pt(28)
run.font.bold = True
run.font.color.rgb = RGBColor(0, 51, 102)
if subtitle:
para = self.doc.add_paragraph()
para.alignment = WD_ALIGN_PARAGRAPH.CENTER
run = para.add_run(subtitle)
run.font.size = Pt(14)
run.font.italic = True
# 添加空行
for _ in range(15):
self.doc.add_paragraph()
# 作者信息
if author or date:
para = self.doc.add_paragraph()
para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
if author:
run = para.add_run(f"作者:{author}")
run.font.size = Pt(12)
if date:
para = self.doc.add_paragraph()
para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
run = para.add_run(f"日期:{date}")
run.font.size = Pt(12)
# 添加分页符
self.doc.add_page_break()
def add_table_of_contents(self):
"""添加目录"""
# 添加目录标题
para = self.doc.add_paragraph("目 录")
para.alignment = WD_ALIGN_PARAGRAPH.CENTER
run = para.runs[0]
run.font.size = Pt(16)
run.font.bold = True
self.doc.add_paragraph()
# 在实际应用中,这里可以自动生成目录
# 注意:python-docx 需要手动管理目录或使用字段代码
# 这里演示手动添加
toc_items = [
("1. 引言", 1),
("2. 技术实现", 1),
(" 2.1 环境配置", 2),
(" 2.2 代码实现", 2),
("3. 结果分析", 1),
("4. 结论", 1)
]
for text, level in toc_items:
para = self.doc.add_paragraph()
if level == 2:
para.paragraph_format.left_indent = Cm(0.74)
run = para.add_run(text)
run.font.size = Pt(12)
# 添加分页符
self.doc.add_page_break()
def add_formatted_table(self, headers, data,
table_style='LightGrid',
header_bg_color=None):
"""添加格式化表格"""
# 创建表格
table = self.doc.add_table(rows=1, cols=len(headers))
table.style = table_style
table.alignment = WD_TABLE_ALIGNMENT.CENTER
# 设置表头
header_cells = table.rows[0].cells
for i, header in enumerate(headers):
header_cells[i].text = header
# 表头格式
para = header_cells[i].paragraphs[0]
para.runs[0].bold = True
para.alignment = WD_ALIGN_PARAGRAPH.CENTER
if header_bg_color:
shading = header_cells[i]._element.xpath('.//w:shd')[0]
shading.set(qn('w:fill'), header_bg_color)
# 添加数据行
for row_data in data:
row_cells = table.add_row().cells
for i, cell_data in enumerate(row_data):
row_cells[i].text = str(cell_data)
# 居中显示
row_cells[i].paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER
self.doc.add_paragraph() # 添加空行
def add_code_block(self, code, language='python'):
"""添加代码块"""
para = self.doc.add_paragraph()
para.style = 'CodeStyle'
# 添加代码
run = para.add_run(f"```{language}\n{code}\n```")
run.font.name = 'Consolas'
run.font.size = Pt(10)
run.font.color.rgb = RGBColor(0, 128, 0)
# 设置背景色
shading = para._element.xpath('.//w:shd')
if not shading:
shd = para._element.makeelement(qn('w:shd'))
para._element.append(shd)
shading = para._element.xpath('.//w:shd')[0]
shading.set(qn('w:fill'), 'F0F0F0') # 浅灰色背景
self.doc.add_paragraph()
def add_footer(self, text="页脚内容"):
"""添加页脚"""
section = self.doc.sections[0]
footer = section.footer
# 清除默认内容
for paragraph in footer.paragraphs:
p = paragraph._element
p.getparent().remove(p)
# 添加自定义页脚
para = footer.add_paragraph()
para.alignment = WD_ALIGN_PARAGRAPH.CENTER
run = para.add_run(text)
run.font.size = Pt(9)
run.font.color.rgb = RGBColor(128, 128, 128)
def add_header(self, text="页眉内容"):
"""添加页眉"""
section = self.doc.sections[0]
header = section.header
# 清除默认内容
for paragraph in header.paragraphs:
p = paragraph._element
p.getparent().remove(p)
# 添加自定义页眉
para = header.add_paragraph()
para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
run = para.add_run(text)
run.font.size = Pt(9)
def save(self, filename):
"""保存文档"""
self.doc.save(filename)
print(f"文档已保存:{filename}")
# 使用示例
def create_complete_document():
"""创建完整排版文档的示例"""
formatter = WordAutoFormatter()
# 1. 添加封面
formatter.add_cover_page(
title="Python自动化排版系统",
subtitle="技术文档与实现",
author="智能助手",
date=datetime.now().strftime("%Y年%m月%d日")
)
# 2. 添加目录
formatter.add_table_of_contents()
# 3. 添加页眉页脚
formatter.add_header("Python自动化排版系统")
formatter.add_footer(f"第 {{{{页码}}}} 页")
# 4. 添加章节
formatter.doc.add_heading("1. 引言", level=1)
para = formatter.doc.add_paragraph(
"本文档展示了使用Python实现Word文档自动化排版的完整流程。"
"通过python-docx库,我们可以实现文档创建、格式设置、内容插入等"
"自动化操作,大大提高文档处理的效率。"
)
para.style = 'CustomBody'
# 5. 添加表格
headers = ['姓名', '年龄', '部门', '职位']
data = [
['张三', '28', '技术部', '工程师'],
['李四', '32', '市场部', '经理'],
['王五', '25', '人事部', '专员']
]
formatter.add_formatted_table(headers, data, header_bg_color='D9EAD3')
# 6. 添加代码示例
code_example = '''def hello_world():
"""简单的Python函数"""
print("Hello, World!")
return True'''
formatter.add_code_block(code_example, 'python')
# 7. 添加图片(如果有图片文件)
try:
formatter.doc.add_picture('example.png', width=Cm(10))
para = formatter.doc.add_paragraph("图1:示例图片")
para.alignment = WD_ALIGN_PARAGRAPH.CENTER
para.runs[0].font.size = Pt(10)
para.runs[0].italic = True
except:
pass # 图片文件不存在时跳过
# 8. 保存文档
formatter.save('自动化排版示例.docx')
if __name__ == "__main__":
create_complete_document()
import glob
from pathlib import Path
class BatchWordProcessor:
"""批量Word文档处理器"""
def __init__(self):
self.doc = Document()
def merge_documents(self, folder_path, output_file='merged.docx'):
"""合并多个Word文档"""
word_files = glob.glob(f"{folder_path}/*.docx")
for i, file_path in enumerate(word_files, 1):
try:
sub_doc = Document(file_path)
# 添加文件名作为标题
self.doc.add_heading(Path(file_path).stem, level=1)
# 复制所有段落
for para in sub_doc.paragraphs:
new_para = self.doc.add_paragraph()
for run in para.runs:
new_run = new_para.add_run(run.text)
# 复制格式
new_run.bold = run.bold
new_run.italic = run.italic
new_run.font.size = run.font.size
if i < len(word_files):
self.doc.add_page_break()
except Exception as e:
print(f"处理文件 {file_path} 时出错: {e}")
self.doc.save(output_file)
print(f"已合并 {len(word_files)} 个文件到 {output_file}")
def batch_format(self, folder_path, style_config):
"""批量格式化文档"""
word_files = glob.glob(f"{folder_path}/*.docx")
for file_path in word_files:
try:
doc = Document(file_path)
self._apply_formatting(doc, style_config)
# 保存为新文件
new_path = file_path.replace('.docx', '_formatted.docx')
doc.save(new_path)
print(f"已格式化: {new_path}")
except Exception as e:
print(f"处理文件 {file_path} 时出错: {e}")
def _apply_formatting(self, doc, config):
"""应用格式化配置"""
# 示例配置
for para in doc.paragraphs:
if para.style.name.startswith('Heading'):
# 设置标题格式
for run in para.runs:
run.font.name = config.get('heading_font', '黑体')
run.font.size = Pt(config.get('heading_size', 16))
else:
# 设置正文格式
for run in para.runs:
run.font.name = config.get('body_font', '宋体')
run.font.size = Pt(config.get('body_size', 12))
def extract_document_info(doc_path):
"""提取文档信息"""
doc = Document(doc_path)
info = {
'paragraphs': len(doc.paragraphs),
'tables': len(doc.tables),
'images': len(doc.inline_shapes),
'pages': estimate_page_count(doc),
'author': doc.core_properties.author,
'created': doc.core_properties.created,
'modified': doc.core_properties.modified
}
return info
def estimate_page_count(doc):
"""估算页数(python-docx不直接提供页数)"""
total_chars = 0
for para in doc.paragraphs:
total_chars += len(para.text)
# 简单估算:每页约3000字符
return max(1, total_chars // 3000)
def replace_text_in_document(doc_path, replacements, output_path):
"""批量替换文本"""
doc = Document(doc_path)
for para in doc.paragraphs:
for old_text, new_text in replacements.items():
if old_text in para.text:
# 替换整个段落中的文本
para.text = para.text.replace(old_text, new_text)
doc.save(output_path)
class TemplateSystem:
"""模板化文档生成系统"""
def __init__(self, template_path):
self.template = Document(template_path)
self.placeholders = self._find_placeholders()
def _find_placeholders(self):
"""查找模板中的占位符"""
placeholders = []
for para in self.template.paragraphs:
if '{{' in para.text and '}}' in para.text:
# 提取占位符
import re
matches = re.findall(r'\{\{(.*?)\}\}', para.text)
placeholders.extend(matches)
return list(set(placeholders))
def fill_template(self, data_dict, output_path):
"""填充模板数据"""
doc = Document()
for para in self.template.paragraphs:
new_para = doc.add_paragraph()
text = para.text
# 替换占位符
for key, value in data_dict.items():
placeholder = f'{{{{{key}}}}}'
if placeholder in text:
text = text.replace(placeholder, str(value))
# 添加文本并保留格式
new_para.add_run(text)
# 复制格式
new_para.style = para.style
new_para.alignment = para.alignment
doc.save(output_path)
print(f"模板已填充并保存到: {output_path}")
# 使用模板系统
def use_template_system():
# 1. 创建模板文档(手动创建或代码生成)
template_doc = Document()
# 添加带占位符的内容
template_doc.add_heading('{{title}}', level=1)
template_doc.add_paragraph('日期: {{date}}')
template_doc.add_paragraph('作者: {{author}}')
template_doc.add_paragraph('{{content}}')
template_doc.save('report_template.docx')
# 2. 使用模板
template = TemplateSystem('report_template.docx')
data = {
'title': '月度报告',
'date': '2024-01-15',
'author': '张三',
'content': '这是本月的详细报告内容...'
}
template.fill_template(data, '月度报告_202401.docx')
这个完整的自动化排版系统可以用于生成报告、文档、合同等各种标准化文档,大大提高工作效率。你可以根据具体需求调整和扩展这些功能。