# -*- coding: utf-8 -*- #encoding=utf8 import os,sys,re # # test_str1='蒙派克E' # # test_str2='新ABC蒙派克' # # test_s1 = test_str1.decode('utf-8') # test_s2= test_str2.decode('utf-8') # # pat_1 = re.compile(ur'[\u4e00-\u9fa5]') #这里是关键, # # s1_zhongwen='' # # s2_zhongwen='' # # for item in re.findall(pat_1,test_s1): #这里截取出中文字符 # # print item # s1_zhongwen=s1_zhongwen+item # print s1_zhongwen # # for item in re.findall(pat_1,test_s2): # s2_zhongwen=s2_zhongwen+item # print s2_zhongwen # # for item in (set(s1_zhongwen)-set(s2_zhongwen))|(set(s2_zhongwen)-set(s1_zhongwen)): # print item #计算到两个车型的中文名称的重叠度。 # print pat_1.split(test_s2)#这里截取出非中文的字符 以及,快速的把字符串,变成各个单个字符元素组成的集合的方法# tl1=[1,3,5,'abc'] # # tl2=[3,4,5] # # tl3=[5,9] # # # # test_set=set(tl1) | set(tl2) | set(tl3) #
python中从字符串中截取中文和英文字符
最新推荐文章于 2023-08-22 17:28:54 发布