最近在网上东找西找,才找到了几个windows下用socket实现http的例子,然后改动一番,就可以编译通过了。
代码实现的功能是打开一个url,将该地址的href标签包含的链接过滤出来 保存在一个文件里面,本人c和c++都不精,只能这么混着用了,各位不要笑话偶哦~~
废话不多说,上代码:
#include <stdio.h>
#include <stdlib.h>
#include <winsock2.h>
#include <errno.h>
#include <vector>
using namespace std;
#pragma comment( lib , "ws2_32.lib")
#define BUF_SIZE 1024*1024
#define DEF_HOSTNAME "www.baidu.com"
#define HEAD_TAG "href=\""
vector<string> v_urls;
void find_link(char *buffer, vector<string> &v_urls)
{
char * pos = strstr(buffer,HEAD_TAG);
while( pos )
{
pos +=strlen(HEAD_TAG);
const char * nextQ = strstr( pos, "\"" );
if( nextQ )
{
char * url = new char[ nextQ-pos+1 ];
sscanf( pos, "%[^\"]", url);
string s = url;
v_urls.insert(v_urls.end(),url);
pos = strstr(pos, HEAD_TAG);
delete [] url;
}
}
}
int main( int argc , char** argv )
{
WSADATA wsaData;
int len , maxLen;
char* bufMsg;
char* strHost;
char* ptr;
FILE* page;
HOSTENT* pHostent;
SOCKET sock;
SOCKADDR_IN addr;
TIMEVAL timeout;
fd_set fd;
//启动网络服务
if( WSAStartup( 0x101 , &wsaData ) != 0 ){
printf( "error:%d,启动网络服务失败" , WSAGetLastError() );
return 1;
}
//解析域名
if( argc > 1 ){
strHost = argv[1];
}
else{
strHost = DEF_HOSTNAME;
}
printf( "Host:%s\n" , strHost );
pHostent = gethostbyname( strHost);
if( pHostent == NULL ){
printf( "error:%d,解析域名失败" , WSAGetLastError() );
return 1;
}
memset( &addr , 0 , sizeof( SOCKADDR_IN ) );
addr.sin_family = AF_INET;
addr.sin_port = htons(80);
addr.sin_addr.s_addr = *(unsigned long*)pHostent->h_addr ;
printf( "IP:%s\n" , inet_ntoa(addr.sin_addr) );
//建立连接
sock = socket( AF_INET , SOCK_STREAM , 0 );
if( sock == INVALID_SOCKET ){
printf( "error:%d,建立socket失败" , WSAGetLastError() );
return 1;
}
if( connect( sock , (SOCKADDR*)&addr , sizeof( SOCKADDR ) ) == SOCKET_ERROR ){
printf( "error:%d,连接失败" , WSAGetLastError() );
return 1;
}
//发送请求
bufMsg= (char*)calloc( BUF_SIZE , sizeof( char ) );
if( bufMsg== NULL ) {
printf( "error:%d,分配内存失败" , errno );
return 1;
}
sprintf(bufMsg,"GET http://%s:%d HTTP/1.1\r\nHost:%s:%d\r\nAccept: */*\r\nConnection: close\r\n\r\n",strHost ,80,strHost ,80);
printf( "MSG:%s" , bufMsg );
len = send( sock , bufMsg , strlen( bufMsg ) , 0 );
if( len == strlen( bufMsg ) ){
printf( "请求成功\n" );
}
else{
printf( "请求失败\n" );
}
//下载页面
memset( bufMsg , 0 , len );
ptr = bufMsg ;
maxLen = BUF_SIZE - 1 ;
printf( "下载中...\n" );
do{
timeout.tv_sec = 2;
timeout.tv_usec = 0;
FD_ZERO( &fd );
FD_SET( sock , &fd );
if( select( sock+1 , &fd , NULL , NULL ,&timeout ) < 0 ){
printf( "error:%d,下载失败" , WSAGetLastError() );
break;
}
if( FD_ISSET( sock , &fd ) ){
len = recv( sock , ptr , maxLen , 0 );
if( len == 0 ){
printf( "下载完毕\n" );
break;
}
else if(len == -1 ){
printf( "error:%d,下载错误" , WSAGetLastError() );
break;
}
maxLen -= len;
if( maxLen <= 0 ) {
printf( "缓冲区满了n" );
break;
}
ptr += len;
}
}while( 1 );
closesocket( sock );
WSACleanup( );
//保存页面
ptr = strstr( bufMsg , "\r\n\r\n" );
ptr += 4;
page = fopen( "page.txt" , "w" );
if( page == NULL ) {
printf( "error:%d,打开文件失败" , errno ) ;
return 1;
}
find_link(ptr,v_urls);
for(int j=0; j<v_urls.size(); j++)
{
fwrite(v_urls.at(j).c_str(),sizeof(char),strlen(v_urls.at(j).c_str()),page);
fwrite("\n",1,1,page);
}
fclose( page );
free( bufMsg);
return 0;
}