概要:
hadoop上以streaming方式运行map任务,map任务为socke client端,和在外部系统运行的server交互
代码:
server.cpp
client.cpp
#include <stdio.h> #include <stdlib.h> #include <errno.h> #include <string.h> #include <netdb.h> #include <unistd.h> #include <sys/types.h> #include <netinet/in.h> #include <sys/socket.h> #include <iostream> #define SERVPORT 9899 #define MAXDATASIZE 100 /*每次最大数据传输量 */ using namespace std; int main(int argc, char *argv[]) { int sock_fd, recvbytes; char buf[MAXDATASIZE]; struct hostent *host; struct sockaddr_in serv_addr; //读取输入 string input; while(cin >> input) { cout << input << endl; } if(argc< 2) { fprintf(stderr,"Please enter the server's hostname!\n"); exit(1); } if((host=gethostbyname(argv[1])) == NULL) { herror("gethostbyname出错!"); exit(1); } //初始化socket if((sock_fd = socket(AF_INET, SOCK_STREAM, 0)) == -1) { perror("socket创建出错!"); exit(1); } serv_addr.sin_family=AF_INET; serv_addr.sin_port=htons(SERVPORT); serv_addr.sin_addr = *((struct in_addr *)host->h_addr); bzero(&(serv_addr.sin_zero),8); //连接 if(connect(sock_fd, (struct sockaddr *)&serv_addr, sizeof(struct sockaddr)) == -1) { perror("connect出错!"); exit(1); } //接受数据 if((recvbytes=recv(sock_fd, buf, MAXDATASIZE, 0)) == -1) { perror("recv出错!"); exit(1); } //发送数据 if(send(sock_fd, "hello server", 13, 0) == -1) { perror("send出错!"); } buf[recvbytes] = '\0'; printf("Received: %s",buf); close(sock_fd); return 0; }
编译程序:
$ g++ -o client client.cpp
$ g++ -o server server.cpp
测试:
测试机上运行server
$./server
hadoop上运行任务:
$ hadoop streaming -input /user/test.txt -output /user/result -mapper "./client **domain**" -reducer "cat" -file client-jobconf mapred.reduce.tasks=1 -jobconf mapred.map.tasks=5 -jobconf mapre.job.name="socket_test"
input内容/user/test.txt 为1324
-jobconf mapred.reduce.tasks=1 -jobconf mapred.map.tasks=5 :mapper任务5个,reduce任务1个
./server输出:
received a connection from 10.*.*.*
hello server
received a connection from 10.*.*.*
hello server
received a connection from 10.*.*.*
hello server
received a connection from 10.*.*.*
hello server
received a connection from 10.*.*.*
hello server
运行结果:
$ hadoop fs -cat /user/result/part-00000
1324
Received: Hello, you are connected!
Received: Hello, you are connected!
Received: Hello, you are connected!
Received: Hello, you are connected!
Received: Hello, you are connected!