C code for a simple HTTP client

本文介绍了一个用C语言编写的简易HTTP客户端实现。该客户端能够通过命令行参数接收目标主机名及页面路径,解析主机名,建立TCP连接,并发送HTTP GET请求获取指定网页的内容。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

  1. Linux provide a high level socket API that will allow programmer to easily connect to any TCP or UDP services.

    In this tutorial, we will see how this works by implementing a simple HTTP client which will get request a web page given the hostname and the page name, then read the server answer and output the HTML content of the reply.

    To be able to connect to a service built on top of TCP, we first need to create a socket for the TCP protocol, fill in a network address structure representing our destination and the port to connect to and use the latter to connect to the remote server.

    From there, we will be able to send and receive data over the network. Once we are done, we will close the connection.

    Below is the C code for a simple HTTP client that will get the host and the page to request from the command line arguments, resolve the hostname name to an IP, connect to this IP on port 80, build the HTTP query, send it and the retrieve the page content.

  2. #include <stdio.h>
  3. #include <sys/socket.h>
  4. #include <arpa/inet.h>
  5. #include <stdlib.h>
  6. #include <netdb.h>
  7. #include <string.h>
  8. int create_tcp_socket();
  9. char *get_ip(char *host);
  10. char *build_get_query(char *host, char *page);
  11. void usage();
  12.  
  13. #define HOST "coding.debuntu.org"
  14. #define PAGE "/"
  15. #define PORT 80
  16. #define USERAGENT "HTMLGET 1.0"
  17.  
  18. int main(int argc, char **argv)
  19. {
  20.   struct sockaddr_in *remote;
  21.   int sock;
  22.   int tmpres;
  23.   char *ip;
  24.   char *get;
  25.   char buf[BUFSIZ+1];
  26.   char *host;
  27.   char *page;
  28.  
  29.   if(argc == 1){
  30.     usage();
  31.     exit(2);
  32.   }  
  33.   host = argv[1];
  34.   if(argc > 2){
  35.     page = argv[2];
  36.   }else{
  37.     page = PAGE;
  38.   }
  39.   sock = create_tcp_socket();
  40.   ip = get_ip(host);
  41.   fprintf(stderr, "IP is %s/n", ip);
  42.   remote = (struct sockaddr_in *)malloc(sizeof(struct sockaddr_in *));
  43.   remote->sin_family = AF_INET;
  44.   tmpres = inet_pton(AF_INET, ip, (void *)(&(remote->sin_addr.s_addr)));
  45.   if( tmpres < 0)  
  46.   {
  47.     perror("Can't set remote->sin_addr.s_addr");
  48.     exit(1);
  49.   }else if(tmpres == 0)
  50.   {
  51.     fprintf(stderr, "%s is not a valid IP address/n", ip);
  52.     exit(1);
  53.   }
  54.   remote->sin_port = htons(PORT);
  55.  
  56.   if(connect(sock, (struct sockaddr *)remote, sizeof(struct sockaddr)) < 0){
  57.     perror("Could not connect");
  58.     exit(1);
  59.   }
  60.   get = build_get_query(host, page);
  61.   fprintf(stderr, "Query is:/n<<START>>/n%s<<END>>/n", get);
  62.  
  63.   //Send the query to the server
  64.   int sent = 0;
  65.   while(sent < strlen(get))
  66.   {
  67.     tmpres = send(sock, get+sent, strlen(get)-sent, 0);
  68.     if(tmpres == -1){
  69.       perror("Can't send query");
  70.       exit(1);
  71.     }
  72.     sent += tmpres;
  73.   }
  74.   //now it is time to receive the page
  75.   memset(buf, 0, sizeof(buf));
  76.   int htmlstart = 0;
  77.   char * htmlcontent;
  78.   while((tmpres = recv(sock, buf, BUFSIZ, 0)) > 0){
  79.     if(htmlstart == 0)
  80.     {
  81.       /* Under certain conditions this will not work.
  82.       * If the /r/n/r/n part is splitted into two messages
  83.       * it will fail to detect the beginning of HTML content
  84.       */
  85.       htmlcontent = strstr(buf, "/r/n/r/n");
  86.       if(htmlcontent != NULL){
  87.         htmlstart = 1;
  88.         htmlcontent += 4;
  89.       }
  90.     }else{
  91.       htmlcontent = buf;
  92.     }
  93.     if(htmlstart){
  94.       fprintf(stdout, htmlcontent);
  95.     }
  96.  
  97.     memset(buf, 0, tmpres);
  98.   }
  99.   if(tmpres < 0)
  100.   {
  101.     perror("Error receiving data");
  102.   }
  103.   free(get);
  104.   free(remote);
  105.   free(ip);
  106.   close(sock);
  107.   return 0;
  108. }
  109.  
  110. void usage()
  111. {
  112.   fprintf(stderr, "USAGE: htmlget host [page]/n/
  113. /thost: the website hostname. ex: coding.debuntu.org/n/
  114. /tpage: the page to retrieve. ex: index.html, default: //n");
  115. }
  116.  
  117.  
  118. int create_tcp_socket()
  119. {
  120.   int sock;
  121.   if((sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0){
  122.     perror("Can't create TCP socket");
  123.     exit(1);
  124.   }
  125.   return sock;
  126. }
  127.  
  128.  
  129. char *get_ip(char *host)
  130. {
  131.   struct hostent *hent;
  132.   int iplen = 15; //XXX.XXX.XXX.XXX
  133.   char *ip = (char *)malloc(iplen+1);
  134.   memset(ip, 0, iplen+1);
  135.   if((hent = gethostbyname(host)) == NULL)
  136.   {
  137.     herror("Can't get IP");
  138.     exit(1);
  139.   }
  140.   if(inet_ntop(AF_INET, (void *)hent->h_addr_list[0], ip, iplen) == NULL)
  141.   {
  142.     perror("Can't resolve host");
  143.     exit(1);
  144.   }
  145.   return ip;
  146. }
  147.  
  148. char *build_get_query(char *host, char *page)
  149. {
  150.   char *query;
  151.   char *getpage = page;
  152.   char *tpl = "GET /%s HTTP/1.0/r/nHost: %s/r/nUser-Agent: %s/r/n/r/n";
  153.   if(getpage[0] == '/'){
  154.     getpage = getpage + 1;
  155.     fprintf(stderr,"Removing leading /"//", converting %s to %s/n", page, getpage);
  156.   }
  157.   // -5 is to consider the %s %s %s in tpl and the ending /0
  158.   query = (char *)malloc(strlen(host)+strlen(getpage)+strlen(USERAGENT)+strlen(tpl)-5);
  159.   sprintf(query, tpl, getpage, host, USERAGENT);
  160.   return query;
  161. }

 

To compile it, run:

$ gcc -o htmlget htmlget.c
$ ./htmlget 
USAGE: htmlget host [page]
	host: the website hostname. ex: coding.debuntu.org
	page: the page to retrieve. ex: index.html, default: /

Informative messages and errors are printed to stderr. The content of the page is printed to stdout. Thus, to save the HTML content of a page to a file, you will need to run:

$ ./htmlget coding.debuntu.org category > /tmp/page.html
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值