defcn_to_unicode(in_str, need_str=True, debug=False):
out =[]for s in in_str:# 获得该字符的数值
val =ord(s)# print(val)# 小于0xff则为ASCII码,手动构造\u00xx格式if val <=0xff:
hex_str =hex(val).replace('0x','').zfill(4)# 这里不能以unicode_escape编码,不然会自动增加一个'\\'
res =bytes('\\u'+ hex_str, encoding='utf-8')else:
res = s.encode("unicode_escape")
out.append(res)# 调试if debug:print(out)print(len(out),len(out[0]),len(out[-1]))# 转换为str类if need_str:
out_str =''for s in out:
out_str +=str(s, encoding='utf-8')return out_str
else:return out
Unicode转中文
defunicode_to_cn(in_str, debug=False):
out =Noneifisinstance(in_str,bytes):
temp =str(in_str, encoding='utf-8')
out = temp.encode('utf-8').decode('unicode_escape')else:
out = in_str.encode('utf-8').decode('unicode_escape')return out
测试代码
test.py
...if __name__ =="__main__":
val =input("unicode to GBK or GBK to unicode? <enter 1 for the front, 2 for the end>: ")ifeval(val)==1:
s1 =input("input unicode str(like '\\u4f60\\u597d'): ")
s2 = unicode_to_cn(s1)print("result: ", s2)elifeval(val)==2:
s1 =input("input GBK str(like '你好'): ")
s2 = cn_to_unicode(s1)print("result: ", s2)else:print("input wrong choice! can only be 1 or 2!")
执行效果
unicode to 中文
jason@jason-vm:~/test$ python3 test.py
unicode to GBK or GBK to unicode? <enter 1 for the front, 2 for the end>: 1
input unicode str(like '\u4f60\u597d'): \u4f60\u597d
result: 你好
中文 to unicode
jason@jason-vm:~/test$ python3 test.py
unicode to GBK or GBK to unicode? <enter 1 for the front, 2 for the end>: 2
input GBK str(like '你好'): 你好123
result: \u4f60\u597d\u0031\u0032\u0033