通过正则表达式匹配网页中的指定字符串提取相应内容生成m3u格式的播放列表
Python2.7代码如下:
# -*- coding: UTF-8 -*-
import httplib2
import re
import time
h = httplib2.Http()
url = 'http://tv.djtu.edu.cn'
print url
resp, content = h.request(url)
print resp
content = content.decode('gb2312').encode('utf-8')
print content
playList = re.findall(r'doGo\(\'(\S+)\'.*>(.*)</a>',content)
filename = '大连交大iptv播放列表' + time.strftime('%Y%m',time.localtime(time.time())) + '.m3u'
print filename
result = '#EXTM3U' + '\n'
for play in playList:
print play[0]
print play[1]
result = result + '#EXTINF:0,' + play[1] + '\n' + play[0] + '\n' + '#EXTVLCOPT:network-caching=1000' + '\n'
filename=filename.decode('utf-8').encode('gb2312')
fileobj = open(filename, 'w')
fileobj.writelines(result)
fileobj.flush()
fileobj.close()
print '写入完成'