restore_utf8、utf8togbk

最新推荐文章于 2025-06-10 10:42:03 发布

原创最新推荐文章于 2025-06-10 10:42:03 发布 · 323 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#python

本文提供了一个Python脚本，用于批量将*.h.utf8文件转换为GBK编码，并导出到对应的*.h文件。适用于Windows系统。

restore_utf8.py


#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Run "python restore_utf8.py" to rename *.h.utf8 to *.h.
#

import os

def restore_utf8(dir):
    resultfn = ''
    for fn in os.listdir(dir):
        sfile = os.path.join(dir, fn)
        if os.path.isdir(sfile):
            resultfn += restore_utf8(sfile)
            continue
        if fn.endswith('.utf8'):
            orgfile = sfile[:-5]
            try:
                if os.path.exists(orgfile): os.remove(orgfile)
                os.rename(sfile, orgfile)
                resultfn += fn[:-5] + ' '
            except:
                print('except for %s' %(fn,))
    return resultfn

if __name__=="__main__":
    resultfn = restore_utf8(os.path.abspath('.'))
    resultfn += restore_utf8(os.path.abspath('../core'))
    resultfn += restore_utf8(os.path.abspath('../android'))
    if resultfn != '': print('restore files: ' + resultfn)


#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Run "python utf8togbk.py" to convert source code files to the GBK format on Windows.
#

import os, codecs, sys

reload(sys)
sys.setdefaultencoding('gbk')

def utf8togbk(dir):
    resultfn = ''
    for fn in os.listdir(dir):
        sfile = os.path.join(dir, fn)
        if os.path.isdir(sfile):
            resultfn += utf8togbk(sfile)
            continue
        if fn.endswith('.h') or fn.endswith('.cpp'):
            if os.path.exists(sfile + '.utf8'):
                continue
            try:
                text = open(sfile,'r',-1,'utf-8').read()
                oldtext = text
            except UnicodeDecodeError:
                continue
            except TypeError:
                text = open(sfile).read()
                oldtext = text
                try:
                    if text[:3] == codecs.BOM_UTF8:
                        u = text[3:].decode('utf-8')
                        text = u.encode('gbk')
                except UnicodeEncodeError:
                    continue
                except UnicodeDecodeError:
                    continue
            try:
                text = text.replace('\r\n','\n')
                text = text.replace('\n','\r\n')
                if cmp(text, oldtext) != 0:
                    os.rename(sfile, sfile + '.utf8')
                    open(sfile, 'wb').write(text)
                    resultfn += fn + ' '
                    st = os.stat(sfile + '.utf8')
                    os.utime(sfile, (st.st_atime, st.st_mtime))
            except:
                print('except for %s' %(fn,))
    return resultfn

if __name__=="__main__":
    resultfn = utf8togbk(os.path.abspath('.'))
    resultfn += utf8togbk(os.path.abspath('../core'))
    resultfn += utf8togbk(os.path.abspath('../android'))
    if resultfn != '': print('utf8->gbk: ' + resultfn)