From 881d4fda4b1d4f8e449540ea4211b11b1a1e3c2a Mon Sep 17 00:00:00 2001 From: Meco Man <920369182@qq.com> Date: Thu, 4 Mar 2021 05:43:09 +0800 Subject: [PATCH] =?UTF-8?q?[tools]=20=E5=A2=9E=E5=8A=A0formatting=E8=87=AA?= =?UTF-8?q?=E5=8A=A8=E5=8C=96=E6=A0=BC=E5=BC=8F=E8=B0=83=E6=95=B4=E8=84=9A?= =?UTF-8?q?=E6=9C=AC=EF=BC=88=E5=88=9D=E7=89=88=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tools/{ => tools}/as.sh | 0 tools/tools/formatting.py | 131 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 131 insertions(+) rename tools/{ => tools}/as.sh (100%) mode change 100755 => 100644 create mode 100644 tools/tools/formatting.py diff --git a/tools/as.sh b/tools/tools/as.sh old mode 100755 new mode 100644 similarity index 100% rename from tools/as.sh rename to tools/tools/as.sh diff --git a/tools/tools/formatting.py b/tools/tools/formatting.py new file mode 100644 index 0000000000..35a98d587e --- /dev/null +++ b/tools/tools/formatting.py @@ -0,0 +1,131 @@ +# +# File : formatting.py +# This file is part of RT-Thread RTOS +# COPYRIGHT (C) 2006 - 2018, RT-Thread Development Team +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Change Logs: +# Date Author Notes +# 2021-03-02 Meco Man The first version +# 2021-03-04 Meco Man 增加统一转换成UTF-8编码格式功能 + + +#本文件会自动对指定路径下的所有文件包括子文件夹的文件(仅针对.c.h)进行扫描 +# 1)将源文件编码统一为UTF-8; +# 2)将TAB键替换为空格; +# 3)将每行末尾多余的空格删除,并统一换行符为'\n'; +#使用时只需要双击本文件,输入要扫描的文件夹路径即可 +#不能保证100%全部成功转换为UTF-8,有一些编码特殊或识别不准确会在终端打印信息,需人工转换 + +import os +import chardet + +#用空格代替TAB键 +#这里并不是简单的将TAB替换成4个空格 +#空格个数到底是多少需要计算,因为TAB制表本身有自动对齐的功能 +def tab2spaces(line): + list_str = list(line) #字符串变成列表 + i = list_str.count('\t') + + while i > 0: + ptr = list_str.index('\t') + del list_str[ptr] + space_need_to_insert = 4 - (ptr%4) + j = 0 + while j < space_need_to_insert: + list_str.insert(ptr,' ') + j = j+1 + + i = i-1 + + line = ''.join(list_str) #列表恢复成字符串 + return line + +#删除每行末尾多余的空格 统一使用\n作为结尾 +def formattail(line): + line = line.rstrip() + line = line + '\n' + return line + +#对单个文件进行格式整理 +def format_codes(filename): + try: + file=open(filename,'r',encoding = 'utf-8') + file_temp=open('temp','w',encoding = 'utf-8') + for line in file: + line = tab2spaces(line) + line = formattail(line) + file_temp.write(line) + file_temp.close() + file.close() + os.remove(filename) + os.rename('temp',filename) + +def get_encode_info(file): + with open(file, 'rb') as f: + code = chardet.detect(f.read())['encoding'] + if code == 'EUC-JP': #chardet库容易将含着少量中文的英文字符文档识别为日语编码格式 + code = 'GB2312' + elif code == 'ISO-8859-1': #部分文件GB2312码会被识别成ISO-8859-1 + code = 'GB2312' + + if not (code == 'ascii' or code == 'utf-8' or code == 'GB2312' or code == 'Windows-1252'): # Windows-1252 是由于意法半导体是法国企业's的'是法语的'导致的 + if code != None: + print('未处理,需人工确认:'+code+':'+file) #需要人工确认 + code = None + + return code + +#将单个文件转为UTF-8编码 +def conver_to_utf_8 (path): + try: + info = get_encode_info(path) + if info == None: + return 0 #0 失败 + + file=open(path,'rb+') + data = file.read() + string = data.decode(info) + utf = string.encode('utf-8') + file.seek(0) + file.write(utf) + file.close() + return 1 #1成功 + except UnicodeDecodeError: + print("UnicodeDecodeError未处理,需人工确认"+path) + return 0 + except UnicodeEncodeError: + print("UnicodeEncodeError未处理,需人工确认"+path) + return 0 + +# 递归扫描目录下的所有文件 +def traversalallfile(path): + filelist=os.listdir(path) + for file in filelist: + filepath=os.path.join(path,file) + if os.path.isdir(filepath): + traversalallfile(filepath) + elif os.path.isfile(filepath): + if filepath.endswith(".c") == True or filepath.endswith(".h") == True: #只处理.c和.h文件 + if conver_to_utf_8(filepath) == 1: #先把这个文件转为UTF-8编码,1成功 + format_codes(filepath) #再对这个文件进行格式整理 + +def formatfiles(): + workpath = input('enter work path: ') + traversalallfile(workpath) + +if __name__ == '__main__': + formatfiles()