Python删除文件中的中文及中文符号


import os, sys
import re

soupath = "../src"

os.chdir(os.path.abspath(os.path.dirname(sys.argv[0])))

ChinesePattern = re.compile(u'[\u4e00-\u9FA0]+')

ChineseCharPattern = re.compile(u'[\u3002\uff1b\uff0c\uff1a\u201c\u201d\uff08\uff09\u3001\uff1f\u300a\u300b]+')

def print_files(soupath):
    lsdir = os.listdir(soupath)
    dirs = [i for i in lsdir if os.path.isdir(os.path.join(soupath, i))]
    if dirs:
        for i in dirs:
            print_files(soupath + "/" + i)
    files = [i for i in lsdir if os.path.isfile(os.path.join(soupath, i))]
    for f in files:
        if f.endswith(".cpp"):
            soufilename = soupath + "/" + f
            tmpfile = f.split(".", 1)[0] + "_tmp.cpp"
            os.rename(soupath + "/" + f, soupath + "/" + tmpfile)
            fo2 = open(soufilename, "w+", encoding = "utf-8")
            try:
                fo2.write(parse(soupath + "/" + tmpfile))
            except IOError:
                fo2.close()
                print(soupath + "/" + tmpfile + " failure")
                exit(1)
            else:
                fo2.close()
                os.remove(soupath + "/" + tmpfile)

def parse(filename):
    try:
        fd1 = open(filename, "r", encoding = "utf-8")
        dstr = ""
        while True:
            str = fd1.read(1)
            if not str:
                break
            ChinesePatternMatch = ChinesePattern.search(str)
            ChineseCharPatternMatch = ChineseCharPattern.search(str)
            if not ChinesePatternMatch and not ChineseCharPatternMatch:
                dstr += str
        fd1.close()
    except IndexError:
        print(filename + " error")
        exit(1)
    else:
        return dstr

if __name__ == '__main__':
    print_files(soupath)

Original: https://blog.csdn.net/gaoyuelon/article/details/127820481
Author: 高二的笔记
Title: Python删除文件中的中文及中文符号

原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/658358/

转载文章受原作者版权保护。转载请注明原作者出处!

(0)

大家都在看

亲爱的 Coder【最近整理,可免费获取】👉 最新必读书单  | 👏 面试题下载  | 🌎 免费的AI知识星球