import os, sys
import re
soupath = "../src"
os.chdir(os.path.abspath(os.path.dirname(sys.argv[0])))
ChinesePattern = re.compile(u'[\u4e00-\u9FA0]+')
ChineseCharPattern = re.compile(u'[\u3002\uff1b\uff0c\uff1a\u201c\u201d\uff08\uff09\u3001\uff1f\u300a\u300b]+')
def print_files(soupath):
lsdir = os.listdir(soupath)
dirs = [i for i in lsdir if os.path.isdir(os.path.join(soupath, i))]
if dirs:
for i in dirs:
print_files(soupath + "/" + i)
files = [i for i in lsdir if os.path.isfile(os.path.join(soupath, i))]
for f in files:
if f.endswith(".cpp"):
soufilename = soupath + "/" + f
tmpfile = f.split(".", 1)[0] + "_tmp.cpp"
os.rename(soupath + "/" + f, soupath + "/" + tmpfile)
fo2 = open(soufilename, "w+", encoding = "utf-8")
try:
fo2.write(parse(soupath + "/" + tmpfile))
except IOError:
fo2.close()
print(soupath + "/" + tmpfile + " failure")
exit(1)
else:
fo2.close()
os.remove(soupath + "/" + tmpfile)
def parse(filename):
try:
fd1 = open(filename, "r", encoding = "utf-8")
dstr = ""
while True:
str = fd1.read(1)
if not str:
break
ChinesePatternMatch = ChinesePattern.search(str)
ChineseCharPatternMatch = ChineseCharPattern.search(str)
if not ChinesePatternMatch and not ChineseCharPatternMatch:
dstr += str
fd1.close()
except IndexError:
print(filename + " error")
exit(1)
else:
return dstr
if __name__ == '__main__':
print_files(soupath)
Original: https://blog.csdn.net/gaoyuelon/article/details/127820481
Author: 高二的笔记
Title: Python删除文件中的中文及中文符号
原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/658358/
转载文章受原作者版权保护。转载请注明原作者出处!