hadoop-Streaming执行C代码实例

最新推荐文章于 2024-08-05 18:30:24 发布

原创最新推荐文章于 2024-08-05 18:30:24 发布 · 224 阅读

·

0

·

CC 4.0 BY-SA版权

版权声明：本文为博主原创文章，遵循 CC 4.0 BY-SA 版权协议，转载请附上原文出处链接和本声明。

文章标签：

#hadoop-Streaming

hadoop-Streaming 专栏收录该内容

3 篇文章

订阅专栏

C Code

Map: Mapper.c

#include <stdio.h>

#include <string.h>

#include <stdlib.h>

#define BUF_SIZE 2048

#define DELIM "\n"

int main( int argc, char *argv[]){

char buffer[BUF_SIZE];

while ( fgets (buffer, BUF_SIZE - 1, stdin)){

int len = strlen (buffer);

if (buffer[len-1] == '\n' )

buffer[len-1] = 0;

char *querys = index(buffer, ' ' );

char *query = NULL;

if (querys == NULL) continue ;

querys += 1; /* not to include '\t' */

query = strtok (buffer, " " );

while (query){

printf ( "%s\t1\n" , query);

query = strtok (NULL, " " );

}

}

return 0;

}

Reduce: Reducer.c

#include <stdio.h>

#include <string.h>

#include <stdlib.h>

#define BUFFER_SIZE 1024

#define DELIM "\t"

int main( int argc, char *argv[]){

char strLastKey[BUFFER_SIZE];

char strLine[BUFFER_SIZE];

int count = 0;

*strLastKey = '\0' ;

*strLine = '\0' ;

while ( fgets (strLine, BUFFER_SIZE - 1, stdin) ){

char *strCurrKey = NULL;

char *strCurrNum = NULL;

strCurrKey = strtok (strLine, DELIM);

strCurrNum = strtok (NULL, DELIM); /* necessary to check error but.... */

if ( strLastKey[0] == '\0' ){

strcpy (strLastKey, strCurrKey);

}

if ( strcmp (strCurrKey, strLastKey)){

printf ( "%s\t%d\n" , strLastKey, count);

count = atoi (strCurrNum);

} else {

count += atoi (strCurrNum);

}

strcpy (strLastKey, strCurrKey);

}

printf ( "%s\t%d\n" , strLastKey, count); /* flush the count */

return 0;

}

首先编译 C code：

$gcc /home/user/mapred/Mapper.c -o mapper.o

$gcc /home/user/mapred/Reducer.c -o reducer.o

在 hdfs上创建 input目录

$HADOOP_HOME/bin/hadoop fs -mkdir input

然后在本地创建一个文件 /home/user/input/input.txt并上传到 hdfs

$HADOOP_HOME/bin/hadoop fs -put /home/user/input/input.txt input

此时准备工作完成，接着执行 hadoop-streaming命令

$HADOOP_HOME/bin/hadoop jar HADOOP_HOME/hadoop-streaming.jar -input input/* -output output -mapper "/home/user/mapred/mapper .o" -reducer "/home/hadoop/mapred/reducer .o"

运行成功的日志如下：

packageJobJar: [/tmp/hadoop-cp/hadoop-unjar2910356701799592623/] [] /tmp/streamjob4550176904973722526.jar tmpDir=null

11/12/15 19:27:23 INFO mapred.FileInputFormat: Total input paths to process : 1

11/12/15 19:27:23 INFO streaming.StreamJob: getLocalDirs(): [/tmp/hadoop-cp/mapred/local]

11/12/15 19:27:23 INFO streaming.StreamJob: Running job: job_201112151707_0019

11/12/15 19:27:23 INFO streaming.StreamJob: To kill this job, run:

11/12/15 19:27:23 INFO streaming.StreamJob: /home//hadoop/bin/../bin/hadoop job -Dmapred.job.tracker=localhost:9001 -kill job_201112151707_0019

11/12/15 19:27:23 INFO streaming.StreamJob: Tracking URL: http://localhost:50030/jobdetails.jsp?jobid=job_201112151707_0019

11/12/15 19:27:24 INFO streaming.StreamJob: map 0% reduce 0%

11/12/15 19:27:34 INFO streaming.StreamJob: map 100% reduce 0%

11/12/15 19:27:46 INFO streaming.StreamJob: map 100% reduce 100%

11/12/15 19:27:49 INFO streaming.StreamJob: Job complete: job_201112151707_0019

11/12/15 19:27:49 INFO streaming.StreamJob: Output: output

评论

被折叠的条评论为什么被折叠?

到【灌水乐园】发言

查看更多评论

添加红包

成就一亿技术人!

hope_wisdom

发出的红包

实付元

使用余额支付

点击重新获取

扫码支付

钱包余额 0

抵扣说明：

1.余额是钱包充值的虚拟货币，按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载，可以购买VIP、付费专栏及课程。