workflow框架 wget 实现http请求

这篇博客探讨了如何利用wget工具在workflow框架中实现HTTP请求,类似于Go语言中的sync.WaitGroup,通过添加和完成计数来同步多个并发请求,并确保所有任务执行完毕。

类似于go的sync.WaitGroup
var wg sync.WaitGroup
wg.Add(num)//启动的协程个数
go func{
defer wg.Done()

}
wg.Wait()

/*
  Copyright (c) 2019 Sogou, Inc.

  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

	  http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License.

  Author: Xie Han (xiehan@sogou-inc.com;63350856@qq.com)
*/
//g++ -std=c++11 tutorial-01-wget_bak.cc -I/usr/local/include/workflow -lworkflow

#include <netdb.h>
#include <signal.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <string>

#include <assert.h>
#include <sys/time.h>
#include <vector>
#include <iostream>


#include "workflow/HttpMessage.h"
#include "workflow/HttpUtil.h"
#include "workflow/WFTaskFactory.h"
#include "workflow/WFFacilities.h"

#define REDIRECT_MAX    5
#define RETRY_MAX       2

double tick(void)
{
    struct timeval t;
    gettimeofday(&t, 0);
    return t.tv_sec + 1E-6 * t.tv_usec;
}


static int task_num = 5;
static WFFacilities::WaitGroup wait_group(task_num);//

void wget_callback(WFHttpTask *task)
{
	protocol::HttpRequest *req = task->get_req();
	protocol::HttpResponse *resp = task->get_resp();
	int state = task->get_state();
	int error = task->get_error();

	//get_request_uri
	//printf("url %s\n", req->get_request_uri());

	switch (state)
	{
	case WFT_STATE_SYS_ERROR:
		fprintf(stderr, "system error: %s\n", strerror(error));
		break;
	case WFT_STATE_DNS_ERROR:
		fprintf(stderr, "DNS error: %s\n", gai_strerror(error));
		break;
	case WFT_STATE_SSL_ERROR:
		fprintf(stderr, "SSL error: %d\n", error);
		break;
	case WFT_STATE_TASK_ERROR:
		fprintf(stderr, "Task error: %d\n", error);
		break;
	case WFT_STATE_SUCCESS:
		break;
	}

	if (state != WFT_STATE_SUCCESS)
	{
		fprintf(stderr, "Failed. Press Ctrl-C to exit.\n");
		return;
	}

	std::string name;
	std::string value;

	/* Print request. */
	fprintf(stderr, "%s %s %s\r\n", req->get_method(),
									req->get_http_version(),
									req->get_request_uri());

	protocol::HttpHeaderCursor req_cursor(req);

	while (req_cursor.next(name, value))
		fprintf(stderr, "%s: %s\r\n", name.c_str(), value.c_str());
	fprintf(stderr, "\r\n");

	/* Print response header. */
	fprintf(stderr, "%s %s %s\r\n", resp->get_http_version(),
									resp->get_status_code(),
									resp->get_reason_phrase());

	protocol::HttpHeaderCursor resp_cursor(resp);

	while (resp_cursor.next(name, value))
		fprintf(stderr, "%s: %s\r\n", name.c_str(), value.c_str());
	fprintf(stderr, "\r\n");

	/* Print response body. */
	const void *body;
	size_t body_len;

	resp->get_parsed_body(&body, &body_len);
	//fwrite(body, 1, body_len, stdout);
	//fflush(stdout);

	char *filename =NULL; 
	filename = (char*)(req->get_request_uri()) + strlen(req->get_request_uri()) - 7;
	assert(filename);


	FILE *fp = fopen(filename, "wb");
	if(fp == NULL) {
		fprintf(stderr, "fopen");
	}
	fwrite(body, 1, body_len, fp);
	fclose(fp);

	fprintf(stderr, "\nSuccess. Press Ctrl-C to exit.\n");
	wait_group.done();//
	std::cout<<"down: "<<req->get_request_uri()<<std::endl<<std::endl;
}



void sig_handler(int signo)
{
	wait_group.done();
}

int main(int argc, char *argv[])
{
	double t = tick();

	/*
	if (argc != 2)
	{
		fprintf(stderr, "USAGE: %s <http URL>\n", argv[0]);
		exit(1);
	}
	*/

	//signal(SIGINT, sig_handler);

	/*
	std::string url = argv[1];
	if (strncasecmp(argv[1], "http://", 7) != 0 &&
		strncasecmp(argv[1], "https://", 8) != 0)
	{
		url = "http://" + url;
	}
	*/

	std::vector<std::string> url{
		"https://meituba.jmsla.cn/uploads/allimg/2019/04/02/92_2644.jpg",
		"https://meituba.jmsla.cn/uploads/allimg/2019/04/02/92_2645.jpg",
		"https://meituba.jmsla.cn/uploads/allimg/2019/04/02/92_2646.jpg",
		"https://meituba.jmsla.cn/uploads/allimg/2019/04/02/92_2647.jpg",
		"https://meituba.jmsla.cn/uploads/allimg/2019/04/02/92_2652.jpg",
	};

	for (int i = 0; i < task_num; i++) {
		WFHttpTask *task;
		task = WFTaskFactory::create_http_task(url[i], REDIRECT_MAX, RETRY_MAX,
										   wget_callback);
		protocol::HttpRequest *req = task->get_req();
		req->add_header_pair("Accept", "*/*");
		req->add_header_pair("User-Agent", "Wget/1.14 (linux-gnu)");
		req->add_header_pair("Connection", "close");
		task->start();

		std::cout<<"start: "<<url[i]<<std::endl;
	}

	

	wait_group.wait();
	t = tick() - t;
	printf("main end, Completed in %.6f secs \n", t);
	
	return 0;
}
g++ -std=c++11 tutorial-01-wget_bak.cc -I/usr/local/include/workflow -lworkflow
[gdut17@localhost tutorial]$ ./a.out 
start: https://meituba.jmsla.cn/uploads/allimg/2019/04/02/92_2644.jpg
start: https://meituba.jmsla.cn/uploads/allimg/2019/04/02/92_2645.jpg
start: https://meituba.jmsla.cn/uploads/allimg/2019/04/02/92_2646.jpg
start: https://meituba.jmsla.cn/uploads/allimg/2019/04/02/92_2647.jpg
start: https://meituba.jmsla.cn/uploads/allimg/2019/04/02/92_2652.jpg
GET HTTP/1.1 /uploads/allimg/2019/04/02/92_2652.jpg
Host: meituba.jmsla.cn
Accept: */*
User-Agent: Wget/1.14 (linux-gnu)
Connection: close

HTTP/1.1 200 OK
Server: nginx
Date: Thu, 22 Oct 2020 12:58:52 GMT
Content-Type: image/jpeg
Content-Length: 15485
Last-Modified: Tue, 20 Aug 2019 03:39:11 GMT
Connection: close
ETag: "5d5b6b5f-3c7d"
Expires: Sat, 21 Nov 2020 12:58:52 GMT
Cache-Control: max-age=2592000
Accept-Ranges: bytes


Success. Press Ctrl-C to exit.
down: /uploads/allimg/2019/04/02/92_2652.jpg

GET HTTP/1.1 /uploads/allimg/2019/04/02/92_2647.jpg
Host: meituba.jmsla.cn
Accept: */*
User-Agent: Wget/1.14 (linux-gnu)
Connection: close

HTTP/1.1 200 OK
Server: nginx
Date: Thu, 22 Oct 2020 12:58:52 GMT
Content-Type: image/jpeg
Content-Length: 118955
Last-Modified: Tue, 20 Aug 2019 03:39:15 GMT
Connection: close
ETag: "5d5b6b63-1d0ab"
Expires: Sat, 21 Nov 2020 12:58:52 GMT
Cache-Control: max-age=2592000
Accept-Ranges: bytes


Success. Press Ctrl-C to exit.
down: /uploads/allimg/2019/04/02/92_2647.jpg

GET HTTP/1.1 /uploads/allimg/2019/04/02/92_2644.jpg
Host: meituba.jmsla.cn
Accept: */*
User-Agent: Wget/1.14 (linux-gnu)
Connection: close

HTTP/1.1 200 OK
Server: nginx
Date: Thu, 22 Oct 2020 12:58:52 GMT
Content-Type: image/jpeg
Content-Length: 190624
Last-Modified: Tue, 20 Aug 2019 03:39:06 GMT
Connection: close
ETag: "5d5b6b5a-2e8a0"
Expires: Sat, 21 Nov 2020 12:58:52 GMT
Cache-Control: max-age=2592000
Accept-Ranges: bytes


Success. Press Ctrl-C to exit.
down: /uploads/allimg/2019/04/02/92_2644.jpg

GET HTTP/1.1 /uploads/allimg/2019/04/02/92_2645.jpg
Host: meituba.jmsla.cn
Accept: */*
User-Agent: Wget/1.14 (linux-gnu)
Connection: close

HTTP/1.1 200 OK
Server: nginx
Date: Thu, 22 Oct 2020 12:58:52 GMT
Content-Type: image/jpeg
Content-Length: 200403
Last-Modified: Tue, 20 Aug 2019 03:39:07 GMT
Connection: close
ETag: "5d5b6b5b-30ed3"
Expires: Sat, 21 Nov 2020 12:58:52 GMT
Cache-Control: max-age=2592000
Accept-Ranges: bytes


Success. Press Ctrl-C to exit.
down: /uploads/allimg/2019/04/02/92_2645.jpg

GET HTTP/1.1 /uploads/allimg/2019/04/02/92_2646.jpg
Host: meituba.jmsla.cn
Accept: */*
User-Agent: Wget/1.14 (linux-gnu)
Connection: close

HTTP/1.1 200 OK
Server: nginx
Date: Thu, 22 Oct 2020 12:58:52 GMT
Content-Type: image/jpeg
Content-Length: 244893
Last-Modified: Tue, 20 Aug 2019 03:38:58 GMT
Connection: close
ETag: "5d5b6b52-3bc9d"
Expires: Sat, 21 Nov 2020 12:58:52 GMT
Cache-Control: max-age=2592000
Accept-Ranges: bytes


Success. Press Ctrl-C to exit.
down: /uploads/allimg/2019/04/02/92_2646.jpg

main end, Completed in 0.472164 secs 
[gdut17@localhost tutorial]$ 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值