类似于go的sync.WaitGroup
var wg sync.WaitGroup
wg.Add(num)//启动的协程个数
go func{
defer wg.Done()
…
}
wg.Wait()
/*
Copyright (c) 2019 Sogou, Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Author: Xie Han (xiehan@sogou-inc.com;63350856@qq.com)
*/
//g++ -std=c++11 tutorial-01-wget_bak.cc -I/usr/local/include/workflow -lworkflow
#include <netdb.h>
#include <signal.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <string>
#include <assert.h>
#include <sys/time.h>
#include <vector>
#include <iostream>
#include "workflow/HttpMessage.h"
#include "workflow/HttpUtil.h"
#include "workflow/WFTaskFactory.h"
#include "workflow/WFFacilities.h"
#define REDIRECT_MAX 5
#define RETRY_MAX 2
double tick(void)
{
struct timeval t;
gettimeofday(&t, 0);
return t.tv_sec + 1E-6 * t.tv_usec;
}
static int task_num = 5;
static WFFacilities::WaitGroup wait_group(task_num);//
void wget_callback(WFHttpTask *task)
{
protocol::HttpRequest *req = task->get_req();
protocol::HttpResponse *resp = task->get_resp();
int state = task->get_state();
int error = task->get_error();
//get_request_uri
//printf("url %s\n", req->get_request_uri());
switch (state)
{
case WFT_STATE_SYS_ERROR:
fprintf(stderr, "system error: %s\n", strerror(error));
break;
case WFT_STATE_DNS_ERROR:
fprintf(stderr, "DNS error: %s\n", gai_strerror(error));
break;
case WFT_STATE_SSL_ERROR:
fprintf(stderr, "SSL error: %d\n", error);
break;
case WFT_STATE_TASK_ERROR:
fprintf(stderr, "Task error: %d\n", error);
break;
case WFT_STATE_SUCCESS:
break;
}
if (state != WFT_STATE_SUCCESS)
{
fprintf(stderr, "Failed. Press Ctrl-C to exit.\n");
return;
}
std::string name;
std::string value;
/* Print request. */
fprintf(stderr, "%s %s %s\r\n", req->get_method(),
req->get_http_version(),
req->get_request_uri());
protocol::HttpHeaderCursor req_cursor(req);
while (req_cursor.next(name, value))
fprintf(stderr, "%s: %s\r\n", name.c_str(), value.c_str());
fprintf(stderr, "\r\n");
/* Print response header. */
fprintf(stderr, "%s %s %s\r\n", resp->get_http_version(),
resp->get_status_code(),
resp->get_reason_phrase());
protocol::HttpHeaderCursor resp_cursor(resp);
while (resp_cursor.next(name, value))
fprintf(stderr, "%s: %s\r\n", name.c_str(), value.c_str());
fprintf(stderr, "\r\n");
/* Print response body. */
const void *body;
size_t body_len;
resp->get_parsed_body(&body, &body_len);
//fwrite(body, 1, body_len, stdout);
//fflush(stdout);
char *filename =NULL;
filename = (char*)(req->get_request_uri()) + strlen(req->get_request_uri()) - 7;
assert(filename);
FILE *fp = fopen(filename, "wb");
if(fp == NULL) {
fprintf(stderr, "fopen");
}
fwrite(body, 1, body_len, fp);
fclose(fp);
fprintf(stderr, "\nSuccess. Press Ctrl-C to exit.\n");
wait_group.done();//
std::cout<<"down: "<<req->get_request_uri()<<std::endl<<std::endl;
}
void sig_handler(int signo)
{
wait_group.done();
}
int main(int argc, char *argv[])
{
double t = tick();
/*
if (argc != 2)
{
fprintf(stderr, "USAGE: %s <http URL>\n", argv[0]);
exit(1);
}
*/
//signal(SIGINT, sig_handler);
/*
std::string url = argv[1];
if (strncasecmp(argv[1], "http://", 7) != 0 &&
strncasecmp(argv[1], "https://", 8) != 0)
{
url = "http://" + url;
}
*/
std::vector<std::string> url{
"https://meituba.jmsla.cn/uploads/allimg/2019/04/02/92_2644.jpg",
"https://meituba.jmsla.cn/uploads/allimg/2019/04/02/92_2645.jpg",
"https://meituba.jmsla.cn/uploads/allimg/2019/04/02/92_2646.jpg",
"https://meituba.jmsla.cn/uploads/allimg/2019/04/02/92_2647.jpg",
"https://meituba.jmsla.cn/uploads/allimg/2019/04/02/92_2652.jpg",
};
for (int i = 0; i < task_num; i++) {
WFHttpTask *task;
task = WFTaskFactory::create_http_task(url[i], REDIRECT_MAX, RETRY_MAX,
wget_callback);
protocol::HttpRequest *req = task->get_req();
req->add_header_pair("Accept", "*/*");
req->add_header_pair("User-Agent", "Wget/1.14 (linux-gnu)");
req->add_header_pair("Connection", "close");
task->start();
std::cout<<"start: "<<url[i]<<std::endl;
}
wait_group.wait();
t = tick() - t;
printf("main end, Completed in %.6f secs \n", t);
return 0;
}
g++ -std=c++11 tutorial-01-wget_bak.cc -I/usr/local/include/workflow -lworkflow
[gdut17@localhost tutorial]$ ./a.out
start: https://meituba.jmsla.cn/uploads/allimg/2019/04/02/92_2644.jpg
start: https://meituba.jmsla.cn/uploads/allimg/2019/04/02/92_2645.jpg
start: https://meituba.jmsla.cn/uploads/allimg/2019/04/02/92_2646.jpg
start: https://meituba.jmsla.cn/uploads/allimg/2019/04/02/92_2647.jpg
start: https://meituba.jmsla.cn/uploads/allimg/2019/04/02/92_2652.jpg
GET HTTP/1.1 /uploads/allimg/2019/04/02/92_2652.jpg
Host: meituba.jmsla.cn
Accept: */*
User-Agent: Wget/1.14 (linux-gnu)
Connection: close
HTTP/1.1 200 OK
Server: nginx
Date: Thu, 22 Oct 2020 12:58:52 GMT
Content-Type: image/jpeg
Content-Length: 15485
Last-Modified: Tue, 20 Aug 2019 03:39:11 GMT
Connection: close
ETag: "5d5b6b5f-3c7d"
Expires: Sat, 21 Nov 2020 12:58:52 GMT
Cache-Control: max-age=2592000
Accept-Ranges: bytes
Success. Press Ctrl-C to exit.
down: /uploads/allimg/2019/04/02/92_2652.jpg
GET HTTP/1.1 /uploads/allimg/2019/04/02/92_2647.jpg
Host: meituba.jmsla.cn
Accept: */*
User-Agent: Wget/1.14 (linux-gnu)
Connection: close
HTTP/1.1 200 OK
Server: nginx
Date: Thu, 22 Oct 2020 12:58:52 GMT
Content-Type: image/jpeg
Content-Length: 118955
Last-Modified: Tue, 20 Aug 2019 03:39:15 GMT
Connection: close
ETag: "5d5b6b63-1d0ab"
Expires: Sat, 21 Nov 2020 12:58:52 GMT
Cache-Control: max-age=2592000
Accept-Ranges: bytes
Success. Press Ctrl-C to exit.
down: /uploads/allimg/2019/04/02/92_2647.jpg
GET HTTP/1.1 /uploads/allimg/2019/04/02/92_2644.jpg
Host: meituba.jmsla.cn
Accept: */*
User-Agent: Wget/1.14 (linux-gnu)
Connection: close
HTTP/1.1 200 OK
Server: nginx
Date: Thu, 22 Oct 2020 12:58:52 GMT
Content-Type: image/jpeg
Content-Length: 190624
Last-Modified: Tue, 20 Aug 2019 03:39:06 GMT
Connection: close
ETag: "5d5b6b5a-2e8a0"
Expires: Sat, 21 Nov 2020 12:58:52 GMT
Cache-Control: max-age=2592000
Accept-Ranges: bytes
Success. Press Ctrl-C to exit.
down: /uploads/allimg/2019/04/02/92_2644.jpg
GET HTTP/1.1 /uploads/allimg/2019/04/02/92_2645.jpg
Host: meituba.jmsla.cn
Accept: */*
User-Agent: Wget/1.14 (linux-gnu)
Connection: close
HTTP/1.1 200 OK
Server: nginx
Date: Thu, 22 Oct 2020 12:58:52 GMT
Content-Type: image/jpeg
Content-Length: 200403
Last-Modified: Tue, 20 Aug 2019 03:39:07 GMT
Connection: close
ETag: "5d5b6b5b-30ed3"
Expires: Sat, 21 Nov 2020 12:58:52 GMT
Cache-Control: max-age=2592000
Accept-Ranges: bytes
Success. Press Ctrl-C to exit.
down: /uploads/allimg/2019/04/02/92_2645.jpg
GET HTTP/1.1 /uploads/allimg/2019/04/02/92_2646.jpg
Host: meituba.jmsla.cn
Accept: */*
User-Agent: Wget/1.14 (linux-gnu)
Connection: close
HTTP/1.1 200 OK
Server: nginx
Date: Thu, 22 Oct 2020 12:58:52 GMT
Content-Type: image/jpeg
Content-Length: 244893
Last-Modified: Tue, 20 Aug 2019 03:38:58 GMT
Connection: close
ETag: "5d5b6b52-3bc9d"
Expires: Sat, 21 Nov 2020 12:58:52 GMT
Cache-Control: max-age=2592000
Accept-Ranges: bytes
Success. Press Ctrl-C to exit.
down: /uploads/allimg/2019/04/02/92_2646.jpg
main end, Completed in 0.472164 secs
[gdut17@localhost tutorial]$

这篇博客探讨了如何利用wget工具在workflow框架中实现HTTP请求,类似于Go语言中的sync.WaitGroup,通过添加和完成计数来同步多个并发请求,并确保所有任务执行完毕。
647

被折叠的 条评论
为什么被折叠?



