2021-03-04 05:43:09 +08:00
|
|
|
|
#
|
|
|
|
|
# File : formatting.py
|
|
|
|
|
# This file is part of RT-Thread RTOS
|
|
|
|
|
# COPYRIGHT (C) 2006 - 2018, RT-Thread Development Team
|
|
|
|
|
#
|
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
|
|
|
# (at your option) any later version.
|
|
|
|
|
#
|
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
|
#
|
|
|
|
|
# You should have received a copy of the GNU General Public License along
|
|
|
|
|
# with this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
|
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
|
|
|
#
|
|
|
|
|
# Change Logs:
|
|
|
|
|
# Date Author Notes
|
|
|
|
|
# 2021-03-02 Meco Man The first version
|
|
|
|
|
# 2021-03-04 Meco Man 增加统一转换成UTF-8编码格式功能
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#本文件会自动对指定路径下的所有文件包括子文件夹的文件(仅针对.c.h)进行扫描
|
|
|
|
|
# 1)将源文件编码统一为UTF-8;
|
|
|
|
|
# 2)将TAB键替换为空格;
|
|
|
|
|
# 3)将每行末尾多余的空格删除,并统一换行符为'\n';
|
|
|
|
|
#使用时只需要双击本文件,输入要扫描的文件夹路径即可
|
|
|
|
|
#不能保证100%全部成功转换为UTF-8,有一些编码特殊或识别不准确会在终端打印信息,需人工转换
|
|
|
|
|
|
2021-03-04 10:49:46 +08:00
|
|
|
|
#欢迎对本文件的功能继续做出补充,欢迎提交PR
|
|
|
|
|
|
2021-03-04 05:43:09 +08:00
|
|
|
|
import os
|
|
|
|
|
import chardet
|
|
|
|
|
|
|
|
|
|
#用空格代替TAB键
|
|
|
|
|
#这里并不是简单的将TAB替换成4个空格
|
|
|
|
|
#空格个数到底是多少需要计算,因为TAB制表本身有自动对齐的功能
|
|
|
|
|
def tab2spaces(line):
|
2021-03-05 09:01:14 +08:00
|
|
|
|
list_str = list(line) #字符串打散成列表,放边操作
|
2021-03-04 05:43:09 +08:00
|
|
|
|
i = list_str.count('\t')
|
|
|
|
|
|
|
|
|
|
while i > 0:
|
|
|
|
|
ptr = list_str.index('\t')
|
|
|
|
|
del list_str[ptr]
|
|
|
|
|
space_need_to_insert = 4 - (ptr%4)
|
|
|
|
|
j = 0
|
|
|
|
|
while j < space_need_to_insert:
|
|
|
|
|
list_str.insert(ptr,' ')
|
|
|
|
|
j = j+1
|
|
|
|
|
|
|
|
|
|
i = i-1
|
|
|
|
|
|
|
|
|
|
line = ''.join(list_str) #列表恢复成字符串
|
|
|
|
|
return line
|
|
|
|
|
|
|
|
|
|
#删除每行末尾多余的空格 统一使用\n作为结尾
|
|
|
|
|
def formattail(line):
|
|
|
|
|
line = line.rstrip()
|
|
|
|
|
line = line + '\n'
|
|
|
|
|
return line
|
|
|
|
|
|
|
|
|
|
#对单个文件进行格式整理
|
|
|
|
|
def format_codes(filename):
|
|
|
|
|
try:
|
|
|
|
|
file=open(filename,'r',encoding = 'utf-8')
|
|
|
|
|
file_temp=open('temp','w',encoding = 'utf-8')
|
|
|
|
|
for line in file:
|
|
|
|
|
line = tab2spaces(line)
|
|
|
|
|
line = formattail(line)
|
|
|
|
|
file_temp.write(line)
|
|
|
|
|
file_temp.close()
|
|
|
|
|
file.close()
|
|
|
|
|
os.remove(filename)
|
|
|
|
|
os.rename('temp',filename)
|
|
|
|
|
|
|
|
|
|
def get_encode_info(file):
|
|
|
|
|
with open(file, 'rb') as f:
|
2021-03-05 09:01:14 +08:00
|
|
|
|
code = chardet.detect(f.read())['encoding']
|
|
|
|
|
#charde库有一定几率对当前文件的编码识别不准确
|
|
|
|
|
if code == 'EUC-JP': #容易将含着少量中文的英文字符文档识别为日语编码格式
|
|
|
|
|
code = 'GB2312'
|
2021-03-04 05:43:09 +08:00
|
|
|
|
elif code == 'ISO-8859-1': #部分文件GB2312码会被识别成ISO-8859-1
|
|
|
|
|
code = 'GB2312'
|
|
|
|
|
|
2021-03-05 09:01:14 +08:00
|
|
|
|
if not (code == 'ascii' or code == 'utf-8' or code == 'GB2312' #编码识别正确
|
|
|
|
|
or code == 'Windows-1252'): # Windows-1252 是由于意法半导体是法国企业's的'是法语的'导致的
|
2021-03-04 05:43:09 +08:00
|
|
|
|
if code != None:
|
|
|
|
|
print('未处理,需人工确认:'+code+':'+file) #需要人工确认
|
|
|
|
|
code = None
|
|
|
|
|
|
|
|
|
|
return code
|
|
|
|
|
|
|
|
|
|
#将单个文件转为UTF-8编码
|
|
|
|
|
def conver_to_utf_8 (path):
|
|
|
|
|
try:
|
|
|
|
|
info = get_encode_info(path)
|
|
|
|
|
if info == None:
|
|
|
|
|
return 0 #0 失败
|
|
|
|
|
|
|
|
|
|
file=open(path,'rb+')
|
|
|
|
|
data = file.read()
|
|
|
|
|
string = data.decode(info)
|
|
|
|
|
utf = string.encode('utf-8')
|
|
|
|
|
file.seek(0)
|
|
|
|
|
file.write(utf)
|
|
|
|
|
file.close()
|
|
|
|
|
return 1 #1成功
|
|
|
|
|
except UnicodeDecodeError:
|
|
|
|
|
print("UnicodeDecodeError未处理,需人工确认"+path)
|
|
|
|
|
return 0
|
|
|
|
|
except UnicodeEncodeError:
|
|
|
|
|
print("UnicodeEncodeError未处理,需人工确认"+path)
|
|
|
|
|
return 0
|
|
|
|
|
|
|
|
|
|
# 递归扫描目录下的所有文件
|
|
|
|
|
def traversalallfile(path):
|
|
|
|
|
filelist=os.listdir(path)
|
|
|
|
|
for file in filelist:
|
|
|
|
|
filepath=os.path.join(path,file)
|
|
|
|
|
if os.path.isdir(filepath):
|
|
|
|
|
traversalallfile(filepath)
|
|
|
|
|
elif os.path.isfile(filepath):
|
|
|
|
|
if filepath.endswith(".c") == True or filepath.endswith(".h") == True: #只处理.c和.h文件
|
|
|
|
|
if conver_to_utf_8(filepath) == 1: #先把这个文件转为UTF-8编码,1成功
|
|
|
|
|
format_codes(filepath) #再对这个文件进行格式整理
|
|
|
|
|
|
|
|
|
|
def formatfiles():
|
|
|
|
|
workpath = input('enter work path: ')
|
|
|
|
|
traversalallfile(workpath)
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
formatfiles()
|