原文地址:http://fc-lamp.blog.163.com/blog/static/17456668720127221363513/
我们在做软件开发的时候很多要用到多线程技术。例如如果做一个下载软件像flashget就要用到、像在线视频工具realplayer也要用到因为要同时下载media stream还要播放。其实例子是很多的。
1) 在python中如何创建一个线程对象?
import threading class MyThread(threading.Thread): def __init__(self,threadname): threading.Thread.__init__(self,name=threadname)
import threading,time class MyThread(threading.Thread): def __init__(self,threadname): threading.Thread.__init__(self,name=threadname) def run(self):
'''
run 方法
''' for i in xrange(10): print self.getName,i time.sleep(1)
一个线程对象被创建后,他就处于“born”(诞生状态), 如何让这个线程对象开始运行呢?只要调用线程对象的 start() 方法即可my = MyThread('test')
现在线程就处于“ready”状态或者也称为“runnable”状态。import threading,time class MyThread(threading.Thread): def __init__(self,threadname): threading.Thread.__init__(self,name=threadname) def run(self): for i in xrange(10): print self.getName(),i time.sleep(1) my = MyThread('test') my.start()
t1 = MyThread('t1')
print t1.getName(),t1.isDaemon()
t1.setDaemon(True)
print t1.getName(),t1.isDaemon()
t1.start()
print 'main thread exit'
获得当前正在运行的线程的引用
running = threading.currentThread()
获得当前所有活动对象(即run方法开始但是未终止的任何线程)的一个列表
threadlist = threading.enumerate()
获得这个列表的长度
threadcount = threading.activeCount()
查看一个线程对象的状态调用这个线程对象的isAlive()方法,返回1代表处于“runnable”状态且没有“dead”
threadflag = threading.isAlive()
import Queue
myqueue = Queue.Queue(maxsize = 10)
myqueue.put(10)
调用队列对象的get()方法从队头删除并返回一个项目。可选参数为block,默认为1。如果队列为空且block为1,get()就使调用线程暂停,直至有项目可用。如果block为0,队列将引发Empty异常。myqueue.get()
一个关于多线程的SOCKET SERVER:#coding:utf-8 ''' 今天我们来学习一下python里的多线程问题,并用一个多线程爬虫程序来实验。 @author FC_LAMP 有几点要说明一下: 1) 线程之间共享状态、内存、资源,并且它们相互间易于通信。 ''' import threading,urllib2 import datetime,time import Queue hosts = ['http://www.baidu.com','http://news.163.com/','http://weibo.com/u/2043979614','http://fc-lamp.blog.163.com'] class ThreadClass(threading.Thread): def __init__(self,queue): threading.Thread.__init__(self) self.queue = queue def run(self): ''' run 方法用于要执行的功能 ''' #getName()用于获取线程名称 while True: #从队列中获取一个任务 host = self.queue.get() #抓取工作 url = urllib2.urlopen(host) print url.read(500) #标记队列工作已完成 self.queue.task_done() def main(): #创建队列实例 queue = Queue.Queue() #生成一个线程池 for i in range(len(hosts)): t = ThreadClass(queue) #主程序退出时,子线程也立即退出 t.setDaemon(True) #启动线程 t.start() #向队列中填充数数 for host in hosts: queue.put(host) #只到所有任务完成后,才退出主程序 queue.join() if __name__=='__main__': st = time.time() main() print '%f'%(time.time()-st)
#coding:utf-8 import socket import sys import time import Queue import threading host = 'localhost' port = 8000 #创建socket对象 s = socket.socket(socket.AF_INET,socket.SOCK_STREAM) #绑定一个特定地址,端口 try: s.bind((host,port)) except Exception as e : print 'Bind Failed:%s'%(str(e)) sys.exit() print 'Socket bind complete!!' #监听连接 s.listen(10) #最大连接数10 #创建连接队列 queue = Queue.Queue() #创建线程 class TaskThread(threading.Thread): def __init__(self): threading.Thread.__init__(self) def run(self): while 1: t = queue.get() t.send('welecome.....') #接收数据 client_data = t.recv(1024) t.sendall(client_data) #释放资源 #t.close() #接受连接 while 1: #将连接放入队列 conn,addr = s.accept() print 'Connected from %s:%s'%(addr[0],str(addr[1])) queue.put(conn) #生成线程池 th = TaskThread() th.setDaemon(True) th.start() queue.join() s.close()
#coding:utf-8
'''
@author:FC_LAMP
''' import urllib2,urllib,socket import os,re,threading,Queue import cookielib,time,Image as image import StringIO #30 S请求 socket.setdefaulttimeout(30) #详情页 class spiderDetailThread(threading.Thread): header = { 'User-Agent':'Mozilla/5.0 (Windows NT 5.1; rv:6.0.2) Gecko/20100101 Firefox/6.0.2', 'Referer':'http://www.xxx.com' #这里是某图片网站 } dir_path = 'D:/test/' def __init__(self,queue): threading.Thread.__init__(self) cookie = cookielib.CookieJar() cookieproc = urllib2.HTTPCookieProcessor(cookie) urllib2.install_opener(urllib2.build_opener(cookieproc)) self.queue = queue self.dir_path = dir_address def run(self): while True: urls = self.queue.get() for url in urls: res = urllib2.urlopen(urllib2.Request(url=url,headers=self.header)).read() patt = re.compile(r'<title>([^<]+)<\/title>',re.I) patt = patt.search(res) if patt==None: continue #获取TITLE title = patt.group(1).split('_')[0]#'abc/\\:*?"<>|' for i in ['\\','/',':','*','?','"',"'",'<','>','|']: title=title.replace(i,'') title = unicode(title,'utf-8').encode('gbk') print title #获取图片 cid = url.split('/')[-1].split('c')[-1].split('.')[0] patt = re.compile(r'new\s+Array\(".*?<div[^>]+>(.*?)<\/div>"\)',re.I|re.S) patt =patt.search(res) if not patt: continue patt = patt.group(1) src_patt = re.compile(r'.*?src=\'(.*?)\'.*?',re.I|re.S) src_patt = src_patt.findall(patt) if not src_patt: continue #创建目录 try: path = os.path.join(self.dir_path,title) if not os.path.exists(path): os.makedirs(path) except Exception as e: pass if not os.path.exists(path): continue for src in src_patt: name = src.split('/')[-1] #小图 s_path = os.path.join(path,name) img = urllib2.urlopen(src).read() im = image.open(StringIO.StringIO(img)) im.save(s_path) #中图 src = src.replace('_s.','_r.') name = src.split('/')[-1] m_path = os.path.join(path,name) img = urllib2.urlopen(src).read() im = image.open(StringIO.StringIO(img)) im.save(m_path) #大图 src = src.replace('smallcase','case') src = src.replace('_r.','.') name = src.split('/')[-1] b_path = os.path.join(path,name) img = urllib2.urlopen(src).read() im = image.open(StringIO.StringIO(img)) im.save(b_path) self.queue.task_done() #例表页 class spiderlistThread(threading.Thread): header = { 'User-Agent':'Mozilla/5.0 (Windows NT 5.1; rv:6.0.2) Gecko/20100101 Firefox/6.0.2', 'Referer':'http://www.xxx.com' #这里某图片网站 } def __init__(self,queue,url): threading.Thread.__init__(self) cookie = cookielib.CookieJar() cookieproc = urllib2.HTTPCookieProcessor(cookie) urllib2.install_opener(urllib2.build_opener(cookieproc)) self.queue = queue self.url = url def run(self): i = 1 while 1: url = '%slist0-%d.html'%(self.url,i) res = urllib2.urlopen(urllib2.Request(url=url,headers=self.header)).read() patt = re.compile(r'<ul\s+id="container"[^>]+>(.*?)<\/ul>',re.I|re.S) patt = patt.search(res) if not patt: break else: res = patt.group(1) patt = re.compile(r'<label\s+class="a">.*?href="(.*?)".*?<\/label>',re.I|re.S) patt = patt.findall(res) if not patt: break self.queue.put(patt) i+=1 time.sleep(3) self.queue.task_done() ''' 多线程图片抓取 ''' if __name__=='__main__': print unicode('---=======图片抓取=====----\n先请输入图片的保存地址(一定要是像这样的路径:D:/xxx/ 不然会出现一些未知错误)。\n若不输入,则默认保存在D:/test/ 文件夹会自动创建','utf-8').encode('gbk') dir_address = raw_input(u'地址(回车确定):'.encode('gbk')).strip() print unicode('抓取工作马上开始.......','utf-8').encode('gbk') if not dir_address: dir_address = 'D:/test/' if not os.path.exists(dir_address): #试着创建目录(多级) try: os.makedirs(dir_address) except Exception as e: raise Exception(u'无法创建目录%s'%(dir_address)) url = 'http://www.xxx.com/' #这里是某图片网站 queue = Queue.Queue() t1 = spiderlistThread(queue,url) t1.setDaemon(True) t1.start() t2 = spiderDetailThread(queue) t2.setDaemon(True) t2.start() while 1: pass
