使用python删除word文档中的指定段落,顺便实现一下文档中的图片导出

【自取】最近整理的,有需要可以领取学习:

#! /etc/env/bin python3
#! *_* coding=utf8 *_*

from pathlib import Path
from docx import Document
import os

# 从word中导出图片
def extract_img_word(filename='',doc_path=''):
    '''
    docx文档其实也是一个zip压缩包,所以我们可以通过zip包解压它
    也可以直接改文件后缀
    '''
    from zipfile import ZipFile

    with ZipFile(filename) as zip_file:
        for names in zip_file.namelist():
            if names.startswith("word/media/image"):
                zip_file.extract(names, doc_path)

'''
pip install python-docx
https://python-docx.readthedocs.io/en/latest/
'''
#创建文档
def createWord():
    document = Document()
    document.add_heading('Document Title', 0)
    document.add_paragraph('A plain paragraph having some')
    document.add_heading('Heading, level 1', level=1)
    document.add_heading('Heading, level 1', level=2)
    document.add_paragraph('以下段落需要删除')
    document.add_paragraph('A plain paragraph')
    document.add_paragraph('A plain paragraph 新段落')
    document.add_heading('Heading, level 2', level=2)
    document.save('H:/temp/test.docx')

createWord()

#删除指定段落
def delete_paragraph(paragraph):
    p = paragraph._element
    p.getparent().remove(p)
    # p._p = p._element = None
    paragraph._p = paragraph._element = None

def delWordContent(docx_file='',dest_file=''):
    #读取文本
    doc = Document(docx_file)
    paragraphs = doc.paragraphs
    i = 0
    flag = False
    for p in paragraphs:
        i+=1
        #print(str(i))
        #print(p.text)
        if p.text.find('需要删除') > -1:
            #print('找到了')
            flag = True
        if flag is True:
            #print('deleting')
            delete_paragraph(p)
    if flag is True:
        #保存为新文件
        doc.save(dest_file)

delWordContent(docx_file='H:/temp/test.docx',dest_file='H:/temp/test-new.docx')

def testDel():
    dest_dir = 'words'
    for filename in Path('H:/').glob('*.docx'):
        print(str(filename))
        dest_file = str(filename.parent / f'{dest_dir}'/filename.name)
        delWordContent(docx_file = str(filename), dest_file = dest_file)
        os.remove(str(filename))

Original: https://www.cnblogs.com/liangblog/p/16203382.html
Author: 凉城
Title: 使用python删除word文档中的指定段落,顺便实现一下文档中的图片导出

原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/6618/

转载文章受原作者版权保护。转载请注明原作者出处!

(0)

大家都在看

发表回复

登录后才能评论
免费咨询
免费咨询
扫码关注
扫码关注
联系站长

站长Johngo!

大数据和算法重度研究者!

持续产出大数据、算法、LeetCode干货,以及业界好资源!

2022012703491714

微信来撩,免费咨询:xiaozhu_tec

分享本页
返回顶部