webpy的先前版本是继承了Python的SimpleHttpServer实现了一个服务器,后来里面出现了一个CherryPyWSGIServer,包括他的父类HttpServer,这是一个继承自object的类,我们可以通过观看他的源码来了解多线程Server处理请求应答的过程,同时还实现了一个ThreadPool,也很有学习的价值。
先介绍下各个类
application
也就是我们项目中创建的那个application类,他主要负责把我们写的mapping传入,设置自动刷新,还有添加钩子的功能,
webpy中添加钩子分两种,请求前和请求后
def loadhook(h):
def processor(handler):
h()
return handler()
return processor
请求前比较简单,预先传入一个待执行的函数,在请求之前执行
def unloadhook(h):
def processor(handler):
try:
result = handler()
is_generator = result and hasattr(result, 'next')
except:
# run the hook even when handler raises some exception
h()
raise
if is_generator:
return wrap(result)
else:
h()
return result
def wrap(result):
def next():
try:
return result.next()
except:
# call the hook at the and of iterator
h()
raise
result = iter(result)
while True:
yield next()
return processor
请求后的钩子多了一点东西,我认为是因为处理的结果不一定是返回值,还有可能是一个生成器,所以需要吧生成器全部迭代完成才可以去执行请求后的钩子
CherryPyWSGIServer
这是一个继承自HTTPServer的类,他主要的功能在于传入wsgi处理的相关对象,毕竟HTTP那边并不只有一个wsgi,在这个类的__init__方法中创建了线程池,初始化相关的参数等等
class CherryPyWSGIServer(HTTPServer):
wsgi_version = (1, 0)
def __init__(self, bind_addr, wsgi_app, numthreads=10, server_name=None,
max=-1, request_queue_size=5, timeout=10, shutdown_timeout=5):
self.requests = ThreadPool(self, min=numthreads or 1, max=max)
self.wsgi_app = wsgi_app
self.gateway = wsgi_gateways[self.wsgi_version]
self.bind_addr = bind_addr
if not server_name:
server_name = socket.gethostname()
self.server_name = server_name
self.request_queue_size = request_queue_size
self.timeout = timeout
self.shutdown_timeout = shutdown_timeout
self.clear_stats()
HTTPServer
HTTPServer是一个比较主要的类,他负责循环的接受socket,创建HTTPConnection对象,存入队列中等待线程池中的线程进行处理
def start(self):
......
host, port = self.bind_addr
try:
info = socket.getaddrinfo(host, port, socket.AF_UNSPEC,
socket.SOCK_STREAM, 0, socket.AI_PASSIVE)
except socket.gaierror:
if ':' in self.bind_addr[0]:
info = [(socket.AF_INET6, socket.SOCK_STREAM,
0, "", self.bind_addr + (0, 0))]
else:
info = [(socket.AF_INET, socket.SOCK_STREAM,
0, "", self.bind_addr)]
self.socket = None
msg = "No socket could be created"
for res in info:
af, socktype, proto, canonname, sa = res
try:
self.bind(af, socktype, proto)
except socket.error:
if self.socket:
self.socket.close()
self.socket = None
continue
break
if not self.socket:
raise socket.error(msg)
self.socket.settimeout(1)
self.socket.listen(self.request_queue_size)
self.requests.start()
while self.ready:
self.tick()
if self.interrupt:
while self.interrupt is True:
# Wait for self.stop() to complete. See _set_interrupt.
time.sleep(0.1)
if self.interrupt:
raise self.interrupt
start()方法里面启动了线程池,并进入循环。
主要的循环集中在self.tick()那边,就是循环接受socket的方法
def tick(self):
......
"""Accept a new connection and put it on the Queue."""
try:
s, addr = self.socket.accept()
if self.stats['Enabled']:
self.stats['Accepts'] += 1
if not self.ready:
return
prevent_socket_inheritance(s)
if hasattr(s, 'settimeout'):
s.settimeout(self.timeout)
makefile = CP_fileobject
conn = self.ConnectionClass(self, s, makefile)
self.requests.put(conn)
except socket.timeout:
return
except socket.error, x:
if self.stats['Enabled']:
self.stats['Socket Errors'] += 1
if x.args[0] in socket_error_eintr:
return
if x.args[0] in socket_errors_nonblocking:
return
if x.args[0] in socket_errors_to_ignore:
return
raise
......
可以看到这个方法做了很多的异常处理,以防止我们的Server挂掉,每接收到一个socket他会生成一个HTTPConnection对象,放入线程池中,然后。。。。。。下次循环接受socket。为什么他没有处理?_?,我一开始疑惑了好一会,后来想到了线程池,多线程嘛,别的线程去处理他了,下面就可以看到了。
Gateway
网关,,,可以这么翻译吧,这是一个父类,子类有WSGIGateway,WSGIGateway_10,WSGIGateway_u0,也是为了方便wsgi接口,把HTTPServer独立出来,
def get_environ(self):
"""Return a new environ dict targeting the given wsgi.version"""
req = self.req
env = {
# set a non-standard environ entry so the WSGI app can know what
# the *real* server protocol is (and what features to support).
# See http://www.faqs.org/rfcs/rfc2145.html.
'ACTUAL_SERVER_PROTOCOL': req.server.protocol,
'PATH_INFO': req.path,
'QUERY_STRING': req.qs,
'REMOTE_ADDR': req.conn.remote_addr or '',
'REMOTE_PORT': str(req.conn.remote_port or ''),
'REQUEST_METHOD': req.method,
'REQUEST_URI': req.uri,
'SCRIPT_NAME': '',
'SERVER_NAME': req.server.server_name,
# Bah. "SERVER_PROTOCOL" is actually the REQUEST protocol.
'SERVER_PROTOCOL': req.request_protocol,
'SERVER_SOFTWARE': req.server.software,
'wsgi.errors': sys.stderr,
'wsgi.input': req.rfile,
'wsgi.multiprocess': False,
'wsgi.multithread': True,
'wsgi.run_once': False,
'wsgi.url_scheme': req.scheme,
'wsgi.version': (1, 0),
}
可以取得wsgi接口的相关参数
ThreadPool
这是我们可(you)爱(yong)的线程池同学,
def __init__(self, server, min=10, max=-1):
self.server = server
self.min = min
self.max = max
self._threads = []
self._queue = Queue.Queue()
self.get = self._queue.get
self._threads里面就是处理用的线程,也就是下面的WorkerThread,不停的循环处理新来的连接。
self._queue里面就是待处理的任务,来一个消灭(处理)一个。
其中有个控制线程池中线程个数的方法
添加新的线程
def grow(self, amount):
for i in range(amount):
if self.max > 0 and len(self._threads) >= self.max:
break
worker = WorkerThread(self.server)
worker.setName("CP Server " + worker.getName())
self._threads.append(worker)
worker.start()
直接添即可。
假如要减少线程呢,可不能来硬的,线程同学还要和他好好相处呢
def shrink(self, amount):
for t in self._threads:
if not t.isAlive():
self._threads.remove(t)
amount -= 1
if amount > 0:
for i in range(min(amount, len(self._threads) - self.min)):
# in self.put.
self._queue.put(_SHUTDOWNREQUEST)
在任务队列中添加_SHUTDOWNREQUEST
_SHUTDOWNREQUEST = None
一个代表你可以卷铺盖走人了的东西,线程同学忙完了手中的活,接到一张炒鱿鱼通知单。return了,这样线程就减少了,而且也不会影响正在工作的其他同学。怎么感觉好像公司裁员,,希望我以后不会被这样^_^。学生党还是对以后的职业充满希望的呢
WorkerThread
这个使我们的线程同学,
def run(self):
self.server.stats['Worker Threads'][self.getName()] = self.stats
try:
self.ready = True
while True:
conn = self.server.requests.get()
if conn is _SHUTDOWNREQUEST:
return
self.conn = conn
if self.server.stats['Enabled']:
self.start_time = time.time()
try:
conn.communicate()
finally:
conn.close()
if self.server.stats['Enabled']:
self.requests_seen += self.conn.requests_seen
self.bytes_read += self.conn.rfile.bytes_read
self.bytes_written += self.conn.wfile.bytes_written
self.work_time += time.time() - self.start_time
self.start_time = None
self.conn = None
except (KeyboardInterrupt, SystemExit), exc:
self.server.interrupt = exc
run方法循环从队列中取出HTTPConnecion处理,娶不到?等着呗,单身狗没人权T^T,为了验证我特意在self.server,requests.get()上下都挂了断点尝试,一个请求来了之后,下面断住了,含义大家都知道,当然如果收到的任务是上面所说的通知书的话,return吧,有任务需要处理就处理,调用communicate()
HTTPConnection
def __init__(self, server, sock, makefile=CP_fileobject):
self.server = server
self.socket = sock
self.rfile = makefile(sock, "rb", self.rbufsize)
self.wfile = makefile(sock, "wb", self.wbufsize)
self.requests_seen = 0
连接对象初始化时,把socket进行了打包,方便处理
def communicate(self):
request_seen = False
try:
while True:
req = None
req = self.RequestHandlerClass(self.server, self)
req.parse_request()
if self.server.stats['Enabled']:
self.requests_seen += 1
if not req.ready:
return
request_seen = True
req.respond()
if req.close_connection:
return
此处的RequestHandlClass即
RequestHandlerClass = HTTPRequest
工作还是得交给request本神来做,这种面向对象的功能结构方法很有借鉴的价值~
HTTPRequest
def parse_request(self):
self.rfile = SizeCheckWrapper(self.conn.rfile,
self.server.max_request_header_size)
try:
self.read_request_line()
except MaxSizeExceeded:
self.simple_response("414 Request-URI Too Long",
"The Request-URI sent with the request exceeds the maximum "
"allowed bytes.")
return
try:
success = self.read_request_headers()
except MaxSizeExceeded:
self.simple_response("413 Request Entity Too Large",
"The headers sent with the request exceed the maximum "
"allowed bytes.")
return
else:
if not success:
return
self.ready = True
这个方法读取HTTP相关信息,至于HTTP的结构可以看 http://www.blogjava.net/zjusuyong/articles/304788.html 这边文章,博主图文并茂的展示,非常适合了解HTTP的结构
读取完信息,该处理了,到现在我们自己写的函数还看没到呢,下面就是了
def respond(self):
mrbs = self.server.max_request_body_size
if self.chunked_read:
self.rfile = ChunkedRFile(self.conn.rfile, mrbs)
else:
cl = int(self.inheaders.get("Content-Length", 0))
if mrbs and mrbs < cl:
if not self.sent_headers:
self.simple_response("413 Request Entity Too Large",
"The entity sent with the request exceeds the maximum "
"allowed bytes.")
return
self.rfile = KnownLengthRFile(self.conn.rfile, cl)
self.server.gateway(self).respond()
if (self.ready and not self.sent_headers):
self.sent_headers = True
self.send_headers()
if self.chunked_write:
self.conn.wfile.sendall("0\r\n\r\n")
又交给别人去了-_-!,交给网关去了,我们去看看网关的respond()
def respond(self):
response = self.req.server.wsgi_app(self.env, self.start_response)
try:
for chunk in response:
if chunk:
if isinstance(chunk, unicode):
chunk = chunk.encode('ISO-8859-1')
self.write(chunk)
finally:
if hasattr(response, "close"):
response.close()
哇,终于看到app这三个字母了,wsgi_app(self.env, self.start_response)很明显的wsgi接口
注意,response可能是一个list(不是list他会打包成list),也可能是一个
Generators,比如大文件的下载,所以下面用了for循环来处理,然后我们看看app那边是怎么个弄法
找到server.wsgi_app这个函数对象最初被赋值的地方
class CherryPyWSGIServer(HTTPServer):
def __init__(self, bind_addr, wsgi_app, numthreads=10, server_name=None,
max=-1, request_queue_size=5, timeout=10, shutdown_timeout=5):
self.wsgi_app = wsgi_app
不是这里,往上
def runsimple(func, server_address=("0.0.0.0", 8080)):
global server
func = StaticMiddleware(func)
func = LogMiddleware(func)
server = WSGIServer(server_address, func)
也不是这里,但我们从这里可以看到中间件实现的方式,函数包装,得益于Python动态语法,非常的方便
def run(self, *middleware):
return wsgi.runwsgi(self.wsgifunc(*middleware))
......
def runwsgi(func):
return httpserver.runsimple(func, server_addr)
已经到了run()方法了,最顶层了
这个self.wsgifunc又是什么
def wsgifunc(self, *middleware):
"""Returns a WSGI-compatible function for this application."""
def peep(iterator):
"""Peeps into an iterator by doing an iteration
and returns an equivalent iterator.
"""
# wsgi requires the headers first
# so we need to do an iteration
# and save the result for later
try:
firstchunk = iterator.next()
except StopIteration:
firstchunk = ''
return itertools.chain([firstchunk], iterator)
def is_generator(x): return x and hasattr(x, 'next')
def wsgi(env, start_resp):
# clear threadlocal to avoid inteference of previous requests
self._cleanup()
self.load(env)
try:
# allow uppercase methods only
if web.ctx.method.upper() != web.ctx.method:
raise web.nomethod()
result = self.handle_with_processors()
if is_generator(result):
result = peep(result)
else:
result = [result]
except web.HTTPError, e:
result = [e.data]
result = web.safestr(iter(result))
status, headers = web.ctx.status, web.ctx.headers
start_resp(status, headers)
def cleanup():
self._cleanup()
yield '' # force this function to be a generator
return itertools.chain(result, cleanup())
for m in middleware:
wsgi = m(wsgi)
return wsgi
历经千辛万苦,我们终于找到了app的藏身之处,下面是,撒显影之尘!不是不是,我们要看看这个函数的包装的方式,
一看wsgi方法,果然参数是wsgi接口的两个东西,
最后几行,可以看到中间节的包装方式,下面我们要找到我们一开始传入的mappings的地方,
注意到
result = self.handle_with_processors()
这个方法,
def handle_with_processors(self):
def process(processors):
try:
if processors:
p, processors = processors[0], processors[1:]
return p(lambda: process(processors))
else:
return self.handle()
except web.HTTPError:
raise
except (KeyboardInterrupt, SystemExit):
raise
except:
print >> web.debug, traceback.format_exc()
raise self.internalerror()
return process(self.processors)
是handle()方法的调用处理了那个mapping
def handle(self):
fn, args = self._match(self.mapping, web.ctx.path)
return self._delegate(fn, self.fvars, args
def _match(self, mapping, value):
for pat, what in mapping:
if isinstance(what, basestring):
what, result = utils.re_subm('^' + pat + '$', what, value)
else:
result = utils.re_compile('^' + pat + '$').match(value)
if result: # it's a match
return what, [x for x in result.groups()]
return None, None
这边用正则循环判断,url,提取url中的分组,传入
def _delegate(self, f, fvars, args=[]):
def handle_class(cls):
meth = web.ctx.method
if meth == 'HEAD' and not hasattr(cls, meth):
meth = 'GET'
if not hasattr(cls, meth):
raise web.nomethod(cls)
tocall = getattr(cls(), meth)
return tocall(*args)
tocall(*args)就是调用我们写的class进行处理,
over,分析源代码结束,谢谢观赏~