hadoop pipes编程示例_hadoop3.3 鈥淗adooppipes::templatefactory<wordcountm-优快云博客

本文介绍如何使用Hadoop Pipes C++接口实现简单的单词计数程序。包括代码实现、编译过程及Hadoop环境下的运行步骤。

hadoop pipes是hadoop的c++正式接口，通过socket与Map/Reduce框架通信，具体原理这里不在详述，下面通过一个单词统计的示例来说明用法。

1.代码

#include "hadoop/Pipes.hh"
#include "hadoop/TemplateFactory.hh"
#include "hadoop/StringUtils.hh"

const std::string WORDCOUNT = "WORDCOUNT";
const std::string INPUT_WORDS = "INPUT_WORDS";
const std::string OUTPUT_WORDS = "OUTPUT_WORDS";

class WordCountMap: public HadoopPipes::Mapper {
public:
HadoopPipes::TaskContext::Counter* inputWords;

WordCountMap(HadoopPipes::TaskContext& context) {
    inputWords = context.getCounter(WORDCOUNT, INPUT_WORDS);
}

void map(HadoopPipes::MapContext& context) {
    std::vector<std::string> words =
      HadoopUtils::splitString(context.getInputValue(), " ");
    for(unsigned int i=0; i < words.size(); ++i) {
      context.emit(words[i], "1");
    }
    context.incrementCounter(inputWords, words.size());
}
};

class WordCountReduce: public HadoopPipes::Reducer {
public:
HadoopPipes::TaskContext::Counter* outputWords;

WordCountReduce(HadoopPipes::TaskContext& context) {
outputWords = context.getCounter(WORDCOUNT, OUTPUT_WORDS);
}

void reduce(HadoopPipes::ReduceContext& context) {
    int sum = 0;
    while (context.nextValue()) {
      sum += HadoopUtils::toInt(context.getInputValue());
    }
    context.emit(context.getInputKey(), HadoopUtils::toString(sum));
    context.incrementCounter(outputWords, 1);
}
};

int main(int argc, char *argv[]) {
return HadoopPipes::runTask(HadoopPipes::TemplateFactory<WordCountMap,
WordCountReduce>());
}
2.编译

makefile如下：

CC = g++

HADOOP_INSTALL = /home/keke/hadoop-0.20.2-cdh3u4

PLATFORM = Linux-i386-32

CPPFLAGS = -m32 -I$(HADOOP_INSTALL)/c++/$(PLATFORM)/include

wordcount:wordcount.cpp

$(CC) $(CPPFLAGS) $< -Wall -L$(HADOOP_INSTALL)/c++/$(PLATFORM)/lib -lhadooppipes -lhadooputils -lpthread -lcrypto -g -O2 -o $@

3.运行

先将只执行文件复制到HDFS上面，例如放在HDFS的bin下

执行：

hadoop pipes -D hadoop.pipes.java.recordreader=true -D hadoop.pipes.java.recordwrite=true -input /user/keke/input -output output -program bin/wordcount