通过HTTP GET下载网页(socket编程)
2009-04-29 11:16
基于linux socket,通过了http get方法获得网页信息
遇到问题与解决方法:利用超时方法判断http server发送数据完毕 通过select函数来实现超时。 GET方法的命令形式 GET / HTTP/1.1 包括两个文件HttpConnection.h和HttpConnection.cpp HttpConnection.h #ifndef HTTP_CONNECTION_H #define HTTP_CONNECTION_H #include <string> #include <QtCore/QObject> using std::string; #define MAX_BUFF_LEN 4096 #define SLEEP_TIME 2 namespace HTTP { class HttpConnection { public: HttpConnection(string hostname,int port):hostname(hostname),port(port){} HttpConnection() { hostname="www.163.com"; port = 80; } int http_connect(); string get_html(const string) const; void http_close(); private: int conn_sock; string hostname; int port; int status_code; int total_bytes; int current_bytes; }; } #endif HttpConnection.cpp #include <unistd.h> #include <stdlib.h> #include <iostream> #include <string> #include <strings.h> #include <cstring> #include <netdb.h> #include <sys/socket.h> #include <netinet/in.h> #include <arpa/inet.h> #include <sstream> #include <errno.h> #include <sys/select.h> #include <sys/time.h> #include "HttpConnection.h" using namespace std; int HTTP::HttpConnection::http_connect() { struct hostent *hptr; char ip_dot_num[INET_ADDRSTRLEN]; char **ptr; struct sockaddr_in servaddr; if((hptr = gethostbyname(hostname.c_str()))==NULL) { #ifdef DEBUG cout<<"gethostbyname error for host "<<hostname<<endl; #endif return -1; } if(hptr->h_addrtype!=AF_INET) { #ifdef DEBUG cout<<"not support the address type"<<endl; #endif return -1; } ptr = hptr->h_addr_list; inet_ntop(AF_INET,*ptr,ip_dot_num,sizeof(ip_dot_num)); #ifdef DEBUG cout<<"connect to "<<ip_dot_num<<" ..."<<endl; #endif conn_sock = socket(AF_INET,SOCK_STREAM,0); bzero(&servaddr,sizeof(servaddr)); servaddr.sin_family = AF_INET; servaddr.sin_port = htons(port); int ret; if((ret =inet_pton(AF_INET, ip_dot_num, &servaddr.sin_addr))!=1) { #ifdef DEBUG cout<<"inet_pton: invalid input or error "<<ret<<endl; #endif return -1; } if(::connect(conn_sock,(struct sockaddr *)&servaddr, sizeof(servaddr))!=0) { #ifdef DEBUG cout<<"failed to connect"<<endl; #endif return -1; } #ifdef DEBUG cout<<"succeed to connect"<<endl; #endif return 0; } void HTTP::HttpConnection::http_close() { if(close(conn_sock)==0) { #ifdef DEBUG cout<<"succeed to close socket"<<endl; #endif } else { #ifdef DEBUG cout<<"failed to close socket"<<endl; #endif } } string HTTP::HttpConnection::get_html(string path) const { stringstream strm; strm<<"GET "<<path<<" HTTP/1.1\n"; strm<<"Connection:Keep-Alive\r\n"; strm<<"Cache-Control:no-cache\r\n"; strm<<"\r\n"; char *send_content = (char *)strm.str().c_str(); char receive_buff[MAX_BUFF_LEN]; ssize_t nwritten; nwritten = write(conn_sock,send_content,strlen(send_content)); string receive_content; #ifdef DEBUG cout<<"send request to server"<<endl; cout<<strm.str()<<endl; #endif ssize_t nread; while(true) { fd_set rset; struct timeval tv; tv.tv_sec = SLEEP_TIME; tv.tv_usec = 0; FD_ZERO(&rset); FD_SET(conn_sock,&rset); int ret = select(conn_sock+1,&rset,NULL,NULL,&tv); if (ret == 0) break; nread = read(conn_sock,receive_buff,MAX_BUFF_LEN); if(nread<0 && errno==EINTR) { continue; } else if(nread>0) { receive_buff[nread]='\0'; receive_content.append(receive_buff); continue; } break; } return receive_content; } |