核心内容:
1. set模块 intersection() ,union(), difference() 函数的使用
2. 文件格式:一行一词, uft-8
3. 解决中文以str读入后转成list /set 时再以中文形式显示的问题
<pre class="python" name="code">#!/usr/bin/python
# -*- coding: utf-8 -*-
import sys
print sys.getdefaultencoding()
reload(sys)
sys.setdefaultencoding('utf-8')
print sys.getdefaultencoding()
import codecs #文件是unicode编码时启用
info = "D:/python/project1"
f1 = codecs.open(info +"/data/*.txt",'r')
f2 = codecs.open(info +u"/data/*.txt",'r')
s1 = set(f1.readlines())
s2 = set(f2.readlines())
print '交集'
print 'ins: %s'%(s1.intersection(s2))
print '并集'
print 'uni: %s'%(s1.union(s2))
print '差集'
print 'dif: %s'%(s1.difference(s2).union(s2.difference(s1)))
print '交、并、差集计算 ok'
print '\n'+'ins:'
print ' '.join(s1.intersection(s2))
print '\n'+'uni:'
print ' '.join(s1.union(s2))
print '\n'+'dif:'
print ' '.join(s1.difference(s2).union(s2.difference(s1)))
print '中文显示 ok'
# 写入文件
r1 = open(r''+info+'/output/ins.txt','w')
r2 = open(r''+info+'/output/uni.txt','w')
r3 = open(r''+info+'/output/dif.txt','w')
r1.write(' '.join(s1.intersection(s2)))
r2.write(' '.join(s1.union(s2)))
r3.write(' '.join(s1.difference(s2).union(s2.difference(s1))))
f1.close()
f2.close()
r1.close()
r2.close()
r3.close()