ChainMapper和ChainReducer

本文介绍 ChainMapper 和 ChainReducer 的使用方法,并通过两个示例详细展示了如何利用 ChainMapper 将多个 Map 函数串联起来,实现复杂的数据处理流程。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

ChainMapper和ChainReducer

The ChainMapper class allows to use multiple Mapper classes within a single Map task. 

The ChainReducer class allows to chain multiple Mapper classes after a Reducer within the Reducer task.

 

 http://www.oratea.net/?p=371

 通过ChainMapper可以将多个map类合并成一个map任务。

下面个这个例子没什么实际意思,但是很好的演示了ChainMapper的作用。

源文件
100 tom 90
101 mary 85
102 kate 60

map00的结果,过滤掉100的记录
101 mary 85
102 kate 60

map01的结果,过滤掉101的记录
102 kate 60

reduce结果
102 kate 60

 package org.myorg;

复制代码

import  java.io.IOException;
import  java.util. * ;
import  java.lang.String;

import  org.apache.hadoop.fs.Path;
import  org.apache.hadoop.conf. * ;
import  org.apache.hadoop.io. * ;
import  org.apache.hadoop.mapred. * ;
import  org.apache.hadoop.util. * ;
import  org.apache.hadoop.mapred.lib. * ;

public   class  WordCount
{

    
public   static   class  Map00  extends  MapReduceBase  implements  Mapper
    {

        
public   void  map(Text key, Text value, OutputCollector output, Reporter reporter)  throws  IOException
        {

            Text ft 
=   new  Text(“ 100 ″);

            
if ( ! key.equals(ft))
            {
                output.collect(key, value);
            }
        }
    }

    
public   static   class  Map01  extends  MapReduceBase  implements  Mapper
    {

        
public   void  map(Text key, Text value, OutputCollector output, Reporter reporter)  throws  IOException
        {

            Text ft 
=   new  Text(“ 101 ″);

            
if ( ! key.equals(ft))
            {
                output.collect(key, value);
            }
        }
    }

    
public   static   class  Reduce  extends  MapReduceBase  implements  Reducer
    {
        
public   void  reduce(Text key, Iterator values, OutputCollector output, Reporter reporter)  throws  IOException
        {

            
while (values.hasNext())
            {
                output.collect(key, values.next());
            }

        }
    }

    
public   static   void  main(String[] args)  throws  Exception
    {

        JobConf conf 
=   new  JobConf(WordCount. class );
        conf.setJobName(“wordcount00″);

        conf.setInputFormat(KeyValueTextInputFormat.
class );
        conf.setOutputFormat(TextOutputFormat.
class );

        ChainMapper cm 
=   new  ChainMapper();

        JobConf mapAConf 
=   new  JobConf( false );
        cm.addMapper(conf, Map00.
class , Text. class , Text. class , Text. class , Text. class true , mapAConf);

        JobConf mapBConf 
=   new  JobConf( false );
        cm.addMapper(conf, Map01.
class , Text. class , Text. class , Text. class , Text. class true , mapBConf);

        conf.setReducerClass(Reduce.
class );

        conf00.setOutputKeyClass(Text.
class );
        conf00.setOutputValueClass(Text.
class );

        FileInputFormat.setInputPaths(conf, 
new  Path(args[ 0 ]));
        FileOutputFormat.setOutputPath(conf, 
new  Path(args[ 1 ]));

        JobClient.runJob(conf);

    }
}
复制代码
 

 

另外一个例子,代码很多,其实很简单,Conn几个类都是相同的

http://yixiaohuamax.iteye.com/blog/684244 

复制代码
package  com.oncedq.code;

import  java.io.DataInput;
import  java.io.DataOutput;
import  java.io.IOException;
import  java.text.SimpleDateFormat;

import  org.apache.hadoop.fs.Path;
import  org.apache.hadoop.io.LongWritable;
import  org.apache.hadoop.io.Text;
import  org.apache.hadoop.io.WritableComparable;
import  org.apache.hadoop.mapred.FileInputFormat;
import  org.apache.hadoop.mapred.FileOutputFormat;
import  org.apache.hadoop.mapred.JobConf;
import  org.apache.hadoop.mapred.MapReduceBase;
import  org.apache.hadoop.mapred.Mapper;
import  org.apache.hadoop.mapred.OutputCollector;
import  org.apache.hadoop.mapred.Reporter;
import  org.apache.hadoop.mapred.TextInputFormat;
import  org.apache.hadoop.mapred.TextOutputFormat;
import  org.apache.hadoop.mapred.jobcontrol.Job;
import  org.apache.hadoop.mapred.jobcontrol.JobControl;
import  org.apache.hadoop.mapred.lib.ChainMapper;

import  com.oncedq.code.util.DateUtil;

public   class  ProcessSample {
    
public   static   class  ExtractMappper  extends  MapReduceBase  implements
            Mapper
< LongWritable, Text, LongWritable, Conn1 >  {

        @Override
        
public   void  map(LongWritable arg0, Text arg1,
                OutputCollector
< LongWritable, Conn1 >  arg2, Reporter arg3)
                
throws  IOException {
            String line 
=  arg1.toString();
            String[] strs 
=  line.split( " ; " );
            Conn1 conn1 
=   new  Conn1();
            conn1.orderKey 
=  Long.parseLong(strs[ 0 ]);
            conn1.customer 
=  Long.parseLong(strs[ 1 ]);
            conn1.state 
=  strs[ 2 ];
            conn1.price 
=  Double.parseDouble(strs[ 3 ]);
            conn1.orderDate 
=  DateUtil.getDateFromString(strs[ 4 ],  " yyyy-MM-dd " );
            LongWritable lw 
=   new  LongWritable(conn1.orderKey);
            arg2.collect(lw, conn1);
        }

    }

    
private   static   class  Conn1  implements  WritableComparable < Conn1 >  {
        
public   long  orderKey;
        
public   long  customer;
        
public  String state;
        
public   double  price;
        
public  java.util.Date orderDate;

        @Override
        
public   void  readFields(DataInput in)  throws  IOException {
            orderKey 
=  in.readLong();
            customer 
=  in.readLong();
            state 
=  Text.readString(in);
            price 
=  in.readDouble();
            orderDate 
=  DateUtil.getDateFromString(Text.readString(in),
                    
" yyyy-MM-dd " );
        }

        @Override
        
public   void  write(DataOutput out)  throws  IOException {
            out.writeLong(orderKey);
            out.writeLong(customer);
            Text.writeString(out, state);
            out.writeDouble(price);
            Text.writeString(out, DateUtil.getDateStr(orderDate, 
" yyyy-MM-dd " ));
        }

        @Override
        
public   int  compareTo(Conn1 arg0) {
            
//  TODO Auto-generated method stub
             return   0 ;
        }

    }

    
public   static   class  Filter1Mapper  extends  MapReduceBase  implements
            Mapper
< LongWritable, Conn1, LongWritable, Conn2 >  {

        @Override
        
public   void  map(LongWritable inKey, Conn1 c2,
                OutputCollector
< LongWritable, Conn2 >  collector, Reporter report)
                
throws  IOException {
            
if  (c2.state.equals( " F " )) {
                Conn2 inValue 
=   new  Conn2();
                inValue.customer 
=  c2.customer;
                inValue.orderDate 
=  c2.orderDate;
                inValue.orderKey 
=  c2.orderKey;
                inValue.price 
=  c2.price;
                inValue.state 
=  c2.state;
                collector.collect(inKey, inValue);
            }
        }

    }

    
private   static   class  Conn2  implements  WritableComparable < Conn1 >  {
        
public   long  orderKey;
        
public   long  customer;
        
public  String state;
        
public   double  price;
        
public  java.util.Date orderDate;

        @Override
        
public   void  readFields(DataInput in)  throws  IOException {
            orderKey 
=  in.readLong();
            customer 
=  in.readLong();
            state 
=  Text.readString(in);
            price 
=  in.readDouble();
            orderDate 
=  DateUtil.getDateFromString(Text.readString(in),
                    
" yyyy-MM-dd " );
        }

        @Override
        
public   void  write(DataOutput out)  throws  IOException {
            out.writeLong(orderKey);
            out.writeLong(customer);
            Text.writeString(out, state);
            out.writeDouble(price);
            Text.writeString(out, DateUtil.getDateStr(orderDate, 
" yyyy-MM-dd " ));
        }

        @Override
        
public   int  compareTo(Conn1 arg0) {
            
//  TODO Auto-generated method stub
             return   0 ;
        }

    }

    
public   static   class  RegexMapper  extends  MapReduceBase  implements
            Mapper
< LongWritable, Conn2, LongWritable, Conn3 >  {

        @Override
        
public   void  map(LongWritable inKey, Conn2 c3,
                OutputCollector
< LongWritable, Conn3 >  collector, Reporter report)
                
throws  IOException {
            c3.state 
=  c3.state.replaceAll( " F " " Find " );
            Conn3 c2 
=   new  Conn3();
            c2.customer 
=  c3.customer;
            c2.orderDate 
=  c3.orderDate;
            c2.orderKey 
=  c3.orderKey;
            c2.price 
=  c3.price;
            c2.state 
=  c3.state;
            collector.collect(inKey, c2);
        }
    }

    
private   static   class  Conn3  implements  WritableComparable < Conn1 >  {
        
public   long  orderKey;
        
public   long  customer;
        
public  String state;
        
public   double  price;
        
public  java.util.Date orderDate;

        @Override
        
public   void  readFields(DataInput in)  throws  IOException {
            orderKey 
=  in.readLong();
            customer 
=  in.readLong();
            state 
=  Text.readString(in);
            price 
=  in.readDouble();
            orderDate 
=  DateUtil.getDateFromString(Text.readString(in),
                    
" yyyy-MM-dd " );
        }

        @Override
        
public   void  write(DataOutput out)  throws  IOException {
            out.writeLong(orderKey);
            out.writeLong(customer);
            Text.writeString(out, state);
            out.writeDouble(price);
            Text.writeString(out, DateUtil.getDateStr(orderDate, 
" yyyy-MM-dd " ));
        }

        @Override
        
public   int  compareTo(Conn1 arg0) {
            
//  TODO Auto-generated method stub
             return   0 ;
        }

    }

    
public   static   class  LoadMapper  extends  MapReduceBase  implements
            Mapper
< LongWritable, Conn3, LongWritable, Conn3 >  {

        @Override
        
public   void  map(LongWritable arg0, Conn3 arg1,
                OutputCollector
< LongWritable, Conn3 >  arg2, Reporter arg3)
                
throws  IOException {
            arg2.collect(arg0, arg1);
        }

    }

    
public   static   void  main(String[] args) {
        JobConf job 
=   new  JobConf(ProcessSample. class );
        job.setJobName(
" ProcessSample " );
        job.setNumReduceTasks(
0 );
        job.setInputFormat(TextInputFormat.
class );
        job.setOutputFormat(TextOutputFormat.
class );
        JobConf mapper1 
=   new  JobConf();
        JobConf mapper2 
=   new  JobConf();
        JobConf mapper3 
=   new  JobConf();
        JobConf mapper4 
=   new  JobConf();
        ChainMapper cm 
=   new  ChainMapper();
        cm.addMapper(job, ExtractMappper.
class , LongWritable. class , Text. class ,
                LongWritable.
class , Conn1. class true , mapper1);
        cm.addMapper(job, Filter1Mapper.
class , LongWritable. class , Conn1. class ,
                LongWritable.
class , Conn2. class true , mapper2);
        cm.addMapper(job, RegexMapper.
class , LongWritable. class , Conn2. class ,
                LongWritable.
class , Conn3. class true , mapper3);
        cm.addMapper(job, LoadMapper.
class , LongWritable. class , Conn3. class ,
                LongWritable.
class , Conn3. class true , mapper4);
        FileInputFormat.setInputPaths(job, 
new  Path( " orderData " ));
        FileOutputFormat.setOutputPath(job, 
new  Path( " orderDataOutput " ));
        Job job1;
        
try  {
            job1 
=   new  Job(job);
            JobControl jc 
=   new  JobControl( " test " );
            jc.addJob(job1);
            jc.run();
        } 
catch  (IOException e) {
            
//  TODO Auto-generated catch block
            e.printStackTrace();
        }

    }
复制代码
资源下载链接为: https://pan.quark.cn/s/22ca96b7bd39 在当今的软件开发领域,自动化构建与发布是提升开发效率项目质量的关键环节。Jenkins Pipeline作为一种强大的自动化工具,能够有效助力Java项目的快速构建、测试及部署。本文将详细介绍如何利用Jenkins Pipeline实现Java项目的自动化构建与发布。 Jenkins Pipeline简介 Jenkins Pipeline是运行在Jenkins上的一套工作流框架,它将原本分散在单个或多个节点上独立运行的任务串联起来,实现复杂流程的编排与可视化。它是Jenkins 2.X的核心特性之一,推动了Jenkins从持续集成(CI)向持续交付(CD)及DevOps的转变。 创建Pipeline项目 要使用Jenkins Pipeline自动化构建发布Java项目,首先需要创建Pipeline项目。具体步骤如下: 登录Jenkins,点击“新建项”,选择“Pipeline”。 输入项目名称描述,点击“确定”。 在Pipeline脚本中定义项目字典、发版脚本预发布脚本。 编写Pipeline脚本 Pipeline脚本是Jenkins Pipeline的核心,用于定义自动化构建发布的流程。以下是一个简单的Pipeline脚本示例: 在上述脚本中,定义了四个阶段:Checkout、Build、Push packageDeploy/Rollback。每个阶段都可以根据实际需求进行配置调整。 通过Jenkins Pipeline自动化构建发布Java项目,可以显著提升开发效率项目质量。借助Pipeline,我们能够轻松实现自动化构建、测试部署,从而提高项目的整体质量可靠性。
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值