ex1
recv 函数会返回接收的数据长度,如果返回值小于你给它的参数,说明已经接收完了所有数据,否则说明仍然有数据需要接收,你应该再次调用它来接收数据,用循环把它改成能正确接收所有数据。
import socket
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
host = 'g.cn'
s.connect((host, 80))
ip, port = s.getsockname()
print('local ip and port {0} {1}'.format(ip, port))
request = b'GET / HTTP/1.1\r\nhost:g.cn\r\n\r\n'
s.send(request)
size = 103
part = s.recv(size)
response = part
while len(part) >= size:
part = s.recv(size)
response = response + part
print(response.decode('utf-8'))
ex2
把向服务器发送 HTTP 请求并且获得数据这个过程封装成函数
定义如下
def get(url):
url 格式为 http://g.cn/
返回的数据类型为 bytes
测试代码
url = ‘http://movie.douban.com/top250 ’
response = get(url)
r = response.decode(‘utf-8’)
print(r)
import socket
def get(url):
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
host = url.split('/')[2]
path = url.split('/',3)[3]
s.connect((host, 80))
ip, port = s.getsockname()
print('local ip and port {0} {1}'.format(ip, port))
request = 'GET /'+path+' HTTP/1.1\r\nhost:'+host+'\r\n\r\n'
print(request)
s.send(request.encode('utf-8'))
size = 10
part = s.recv(size)
response = part
while len(part)>=size:
part = s.recv(size)
response = response + part
return response
url = 'http://movie.douban.com/top250'
response = get(url)
r = response.decode('utf-8')
print(r)
ex3
get
函数接受以下这种参数
g.cn
没有协议名的情况下默认用HTTP
没有路径的情况下默认路径是 /
接收带端口的URL
http://g.cn:80/
用 URL
中指定的端口来进行 socket
连接
import socket
def get(url):
s = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
if 'http://' in url:
url = url[7:]
if '/' in url:
path = url.split('/', 1)[1]
host = url.split('/')[0]
else:
path = ''
host = url
if ':' in host:
port1 = int(host.split(':')[1])
host = host.split(':')[0]
else:
port1 = 80
print('host=%s,path=%s,port=%s' % (host, path, port1))
s.connect((host, port1))
ip, port = s.getsockname()
print('local ip and port {0} {1}'.format(ip, port))
request = 'GET /' + path + ' HTTP/1.1\r\nhost:' + host + '\r\n\r\n'
print(request)
s.send(request.encode('utf-8'))
size = 10
part = s.recv(size)
response = part
while len(part) >= size:
part = s.recv(size)
response = response + part
return response
url = 'http://g.cn:80/'
response = get(url)
r = response.decode('utf-8')
print(r)
###还有一种非常蠢的办法,更改判断部分
if 'http' not in url:
if ':' in url.split('/')[0]:
host = url.split(':')[0]
port1 = int(url.split(':')[1])
else:
host = url.split('/')[0]
port1 = 80
if '/' in url:
path = ''.join(url.split('/')[1:])
else:
path = ''
else:
if ':' in url.split('/')[2]:
host = url.split('/')[2].split(':')[0]
port1 = int(url.split('/')[2].split(':')[1])
else:
host = url.split('/')[2]
port1 = 80
if '/' in url.split('/', 2)[2]:
path = '/'.join(url.split('/')[3:])
else:
path = ''