package com.yc.test3;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Scanner;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.filecache.DistributedCache;
public class MapClass extends Mapper<LongWritable, Text, Text, IntWritable> {
/**
* @param 张志刚 2015年9月2
* */
private Path localFiles[] = null;
public void map(LongWritable ikey, Text ivalue, Context context) throws IOException, InterruptedException {
System.out.println("map里输出了");
String val=ivalue.toString();
String str[]=val.split(" ");
for(String s:str){
context.write(new Text(s),new IntWritable(1));
}
}
@Override
protected void setup(Context context)
throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
try {
localFiles = DistributedCache.getLocalCacheFiles(conf);
System.out.println("获取的路径是: "+localFiles[0].toString());
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
FileSystem fsopen= FileSystem.getLocal(conf);
FSDataInputStream in = fsopen.open(localFiles[0]);
Scanner scan=new Scanner(in);
while(scan.hasNext()){
System.out.println(Thread.currentThread().getName()+"扫描的内容: "+scan.next());
}
scan.close();
}
}
package com.yc.test3;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class Drive2 {
/**
* @param 张志刚 2015年9月2
* */
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Path in = new Path("hdfs://localhost:9000/user/input/README.txt");
Path out = new Path("hdfs://localhost:9000/user/output/test1");
Job job = Job.getInstance(conf, "JobName");
DistributedCache.addCacheFile(in.toUri(),job.getConfiguration());
FileSystem fs=FileSystem.get(conf);
if(fs.exists(out)){
fs.delete(out, true);
System.out.println("输出路径存在,已删除!");
}
job.setJarByClass(com.yc.test3.Drive2.class);
// TODO: specify a mapper
job.setMapperClass(MapClass.class);
// TODO: specify a reducer
job.setReducerClass(Reduce.class);
// TODO: specify output types
job.setOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
// TODO: specify input and output DIRECTORIES (not files)
FileInputFormat.setInputPaths(job,in);
FileOutputFormat.setOutputPath(job, out);
if (!job.waitForCompletion(true))
return;
}
}
package com.yc.test3;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class Reduce extends Reducer<Text, IntWritable, Text,IntWritable> {
/**
* @param 张志刚 2015年9月2
* */
public void reduce(Text key, Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException {
// process values
int sum=0;
for(IntWritable val:values){
sum+=val.get();
}
context.write(key,new IntWritable(sum));
}
}
WARN [main] - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
输出路径存在,已删除!
INFO [main] - session.id is deprecated. Instead, use dfs.metrics.session-id
INFO [main] - Initializing JVM Metrics with processName=JobTracker, sessionId=
WARN [main] - Hadoop command-line option parsing not performed. Implement the Tool interface and execute your application with ToolRunner to remedy this.
WARN [main] - No job jar file set. User classes may not be found. See Job or Job#setJar(String).
INFO [main] - Total input paths to process : 1
INFO [main] - number of splits:1
INFO [main] - Submitting tokens for job: job_local159353495_0001
INFO [main] - Creating symlink: /usr/local/application/hadoop-2.6.0/tmp/mapred/local/1441131372887/README.txt <- /home/a/workspace/Hadoop2/README.txt
INFO [main] - Localized hdfs://localhost:9000/user/input/README.txt as file:/usr/local/application/hadoop-2.6.0/tmp/mapred/local/1441131372887/README.txt
INFO [main] - The url to track the job: http://localhost:8080/
INFO [main] - Running job: job_local159353495_0001
INFO [Thread-12] - OutputCommitter set in config null
INFO [Thread-12] - OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
INFO [Thread-12] - Waiting for map tasks
INFO [LocalJobRunner Map Task Executor #0] - Starting task: attempt_local159353495_0001_m_000000_0
INFO [LocalJobRunner Map Task Executor #0] - Using ResourceCalculatorProcessTree : [ ]
INFO [LocalJobRunner Map Task Executor #0] - Processing split: hdfs://localhost:9000/user/input/README.txt:0+1366
INFO [main] - Job job_local159353495_0001 running in uber mode : false
INFO [LocalJobRunner Map Task Executor #0] - (EQUATOR) 0 kvi 26214396(104857584)
INFO [LocalJobRunner Map Task Executor #0] - mapreduce.task.io.sort.mb: 100
INFO [LocalJobRunner Map Task Executor #0] - soft limit at 83886080
INFO [LocalJobRunner Map Task Executor #0] - bufstart = 0; bufvoid = 104857600
INFO [LocalJobRunner Map Task Executor #0] - kvstart = 26214396; length = 6553600
INFO [main] - map 0% reduce 0%
INFO [LocalJobRunner Map Task Executor #0] - Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
获取的路径是: file:/usr/local/application/hadoop-2.6.0/tmp/mapred/local/1441131372887/README.txt
LocalJobRunner Map Task Executor #0扫描的内容: For
LocalJobRunner Map Task Executor #0扫描的内容: the
LocalJobRunner Map Task Executor #0扫描的内容: latest
LocalJobRunner Map Task Executor #0扫描的内容: information
LocalJobRunner Map Task Executor #0扫描的内容: about
LocalJobRunner Map Task Executor #0扫描的内容: Hadoop,
LocalJobRunner Map Task Executor #0扫描的内容: please
LocalJobRunner Map Task Executor #0扫描的内容: visit
LocalJobRunner Map Task Executor #0扫描的内容: our
LocalJobRunner Map Task Executor #0扫描的内容: website
LocalJobRunner Map Task Executor #0扫描的内容: at:
LocalJobRunner Map Task Executor #0扫描的内容: http://hadoop.apache.org/core/
LocalJobRunner Map Task Executor #0扫描的内容: and
LocalJobRunner Map Task Executor #0扫描的内容: our
LocalJobRunner Map Task Executor #0扫描的内容: wiki,
LocalJobRunner Map Task Executor #0扫描的内容: at:
LocalJobRunner Map Task Executor #0扫描的内容: http://wiki.apache.org/hadoop/
LocalJobRunner Map Task Executor #0扫描的内容: This
LocalJobRunner Map Task Executor #0扫描的内容: distribution
LocalJobRunner Map Task Executor #0扫描的内容: includes
LocalJobRunner Map Task Executor #0扫描的内容: cryptographic
LocalJobRunner Map Task Executor #0扫描的内容: software.
LocalJobRunner Map Task Executor #0扫描的内容: The
LocalJobRunner Map Task Executor #0扫描的内容: country
LocalJobRunner Map Task Executor #0扫描的内容: in
LocalJobRunner Map Task Executor #0扫描的内容: which
LocalJobRunner Map Task Executor #0扫描的内容: you
LocalJobRunner Map Task Executor #0扫描的内容: currently
LocalJobRunner Map Task Executor #0扫描的内容: reside
LocalJobRunner Map Task Executor #0扫描的内容: may
LocalJobRunner Map Task Executor #0扫描的内容: have
LocalJobRunner Map Task Executor #0扫描的内容: restrictions
LocalJobRunner Map Task Executor #0扫描的内容: on
LocalJobRunner Map Task Executor #0扫描的内容: the
LocalJobRunner Map Task Executor #0扫描的内容: import,
LocalJobRunner Map Task Executor #0扫描的内容: possession,
LocalJobRunner Map Task Executor #0扫描的内容: use,
LocalJobRunner Map Task Executor #0扫描的内容: and/or
LocalJobRunner Map Task Executor #0扫描的内容: re-export
LocalJobRunner Map Task Executor #0扫描的内容: to
LocalJobRunner Map Task Executor #0扫描的内容: another
LocalJobRunner Map Task Executor #0扫描的内容: country,
LocalJobRunner Map Task Executor #0扫描的内容: of
LocalJobRunner Map Task Executor #0扫描的内容: encryption
LocalJobRunner Map Task Executor #0扫描的内容: software.
LocalJobRunner Map Task Executor #0扫描的内容: BEFORE
LocalJobRunner Map Task Executor #0扫描的内容: using
LocalJobRunner Map Task Executor #0扫描的内容: any
LocalJobRunner Map Task Executor #0扫描的内容: encryption
LocalJobRunner Map Task Executor #0扫描的内容: software,
LocalJobRunner Map Task Executor #0扫描的内容: please
LocalJobRunner Map Task Executor #0扫描的内容: check
LocalJobRunner Map Task Executor #0扫描的内容: your
LocalJobRunner Map Task Executor #0扫描的内容: country's
LocalJobRunner Map Task Executor #0扫描的内容: laws,
LocalJobRunner Map Task Executor #0扫描的内容: regulations
LocalJobRunner Map Task Executor #0扫描的内容: and
LocalJobRunner Map Task Executor #0扫描的内容: policies
LocalJobRunner Map Task Executor #0扫描的内容: concerning
LocalJobRunner Map Task Executor #0扫描的内容: the
LocalJobRunner Map Task Executor #0扫描的内容: import,
LocalJobRunner Map Task Executor #0扫描的内容: possession,
LocalJobRunner Map Task Executor #0扫描的内容: or
LocalJobRunner Map Task Executor #0扫描的内容: use,
LocalJobRunner Map Task Executor #0扫描的内容: and
LocalJobRunner Map Task Executor #0扫描的内容: re-export
LocalJobRunner Map Task Executor #0扫描的内容: of
LocalJobRunner Map Task Executor #0扫描的内容: encryption
LocalJobRunner Map Task Executor #0扫描的内容: software,
LocalJobRunner Map Task Executor #0扫描的内容: to
LocalJobRunner Map Task Executor #0扫描的内容: see
LocalJobRunner Map Task Executor #0扫描的内容: if
LocalJobRunner Map Task Executor #0扫描的内容: this
LocalJobRunner Map Task Executor #0扫描的内容: is
LocalJobRunner Map Task Executor #0扫描的内容: permitted.
LocalJobRunner Map Task Executor #0扫描的内容: See
LocalJobRunner Map Task Executor #0扫描的内容: <http://www.wassenaar.org/>
LocalJobRunner Map Task Executor #0扫描的内容: for
LocalJobRunner Map Task Executor #0扫描的内容: more
LocalJobRunner Map Task Executor #0扫描的内容: information.
LocalJobRunner Map Task Executor #0扫描的内容: The
LocalJobRunner Map Task Executor #0扫描的内容: U.S.
LocalJobRunner Map Task Executor #0扫描的内容: Government
LocalJobRunner Map Task Executor #0扫描的内容: Department
LocalJobRunner Map Task Executor #0扫描的内容: of
LocalJobRunner Map Task Executor #0扫描的内容: Commerce,
LocalJobRunner Map Task Executor #0扫描的内容: Bureau
LocalJobRunner Map Task Executor #0扫描的内容: of
LocalJobRunner Map Task Executor #0扫描的内容: Industry
LocalJobRunner Map Task Executor #0扫描的内容: and
LocalJobRunner Map Task Executor #0扫描的内容: Security
LocalJobRunner Map Task Executor #0扫描的内容: (BIS),
LocalJobRunner Map Task Executor #0扫描的内容: has
LocalJobRunner Map Task Executor #0扫描的内容: classified
LocalJobRunner Map Task Executor #0扫描的内容: this
LocalJobRunner Map Task Executor #0扫描的内容: software
LocalJobRunner Map Task Executor #0扫描的内容: as
LocalJobRunner Map Task Executor #0扫描的内容: Export
LocalJobRunner Map Task Executor #0扫描的内容: Commodity
LocalJobRunner Map Task Executor #0扫描的内容: Control
LocalJobRunner Map Task Executor #0扫描的内容: Number
LocalJobRunner Map Task Executor #0扫描的内容: (ECCN)
LocalJobRunner Map Task Executor #0扫描的内容: 5D002.C.1,
LocalJobRunner Map Task Executor #0扫描的内容: which
LocalJobRunner Map Task Executor #0扫描的内容: includes
LocalJobRunner Map Task Executor #0扫描的内容: information
LocalJobRunner Map Task Executor #0扫描的内容: security
LocalJobRunner Map Task Executor #0扫描的内容: software
LocalJobRunner Map Task Executor #0扫描的内容: using
LocalJobRunner Map Task Executor #0扫描的内容: or
LocalJobRunner Map Task Executor #0扫描的内容: performing
LocalJobRunner Map Task Executor #0扫描的内容: cryptographic
LocalJobRunner Map Task Executor #0扫描的内容: functions
LocalJobRunner Map Task Executor #0扫描的内容: with
LocalJobRunner Map Task Executor #0扫描的内容: asymmetric
LocalJobRunner Map Task Executor #0扫描的内容: algorithms.
LocalJobRunner Map Task Executor #0扫描的内容: The
LocalJobRunner Map Task Executor #0扫描的内容: form
LocalJobRunner Map Task Executor #0扫描的内容: and
LocalJobRunner Map Task Executor #0扫描的内容: manner
LocalJobRunner Map Task Executor #0扫描的内容: of
LocalJobRunner Map Task Executor #0扫描的内容: this
LocalJobRunner Map Task Executor #0扫描的内容: Apache
LocalJobRunner Map Task Executor #0扫描的内容: Software
LocalJobRunner Map Task Executor #0扫描的内容: Foundation
LocalJobRunner Map Task Executor #0扫描的内容: distribution
LocalJobRunner Map Task Executor #0扫描的内容: makes
LocalJobRunner Map Task Executor #0扫描的内容: it
LocalJobRunner Map Task Executor #0扫描的内容: eligible
LocalJobRunner Map Task Executor #0扫描的内容: for
LocalJobRunner Map Task Executor #0扫描的内容: export
LocalJobRunner Map Task Executor #0扫描的内容: under
LocalJobRunner Map Task Executor #0扫描的内容: the
LocalJobRunner Map Task Executor #0扫描的内容: License
LocalJobRunner Map Task Executor #0扫描的内容: Exception
LocalJobRunner Map Task Executor #0扫描的内容: ENC
LocalJobRunner Map Task Executor #0扫描的内容: Technology
LocalJobRunner Map Task Executor #0扫描的内容: Software
LocalJobRunner Map Task Executor #0扫描的内容: Unrestricted
LocalJobRunner Map Task Executor #0扫描的内容: (TSU)
LocalJobRunner Map Task Executor #0扫描的内容: exception
LocalJobRunner Map Task Executor #0扫描的内容: (see
LocalJobRunner Map Task Executor #0扫描的内容: the
LocalJobRunner Map Task Executor #0扫描的内容: BIS
LocalJobRunner Map Task Executor #0扫描的内容: Export
LocalJobRunner Map Task Executor #0扫描的内容: Administration
LocalJobRunner Map Task Executor #0扫描的内容: Regulations,
LocalJobRunner Map Task Executor #0扫描的内容: Section
LocalJobRunner Map Task Executor #0扫描的内容: 740.13)
LocalJobRunner Map Task Executor #0扫描的内容: for
LocalJobRunner Map Task Executor #0扫描的内容: both
LocalJobRunner Map Task Executor #0扫描的内容: object
LocalJobRunner Map Task Executor #0扫描的内容: code
LocalJobRunner Map Task Executor #0扫描的内容: and
LocalJobRunner Map Task Executor #0扫描的内容: source
LocalJobRunner Map Task Executor #0扫描的内容: code.
LocalJobRunner Map Task Executor #0扫描的内容: The
LocalJobRunner Map Task Executor #0扫描的内容: following
LocalJobRunner Map Task Executor #0扫描的内容: provides
LocalJobRunner Map Task Executor #0扫描的内容: more
LocalJobRunner Map Task Executor #0扫描的内容: details
LocalJobRunner Map Task Executor #0扫描的内容: on
LocalJobRunner Map Task Executor #0扫描的内容: the
LocalJobRunner Map Task Executor #0扫描的内容: included
LocalJobRunner Map Task Executor #0扫描的内容: cryptographic
LocalJobRunner Map Task Executor #0扫描的内容: software:
LocalJobRunner Map Task Executor #0扫描的内容: Hadoop
LocalJobRunner Map Task Executor #0扫描的内容: Core
LocalJobRunner Map Task Executor #0扫描的内容: uses
LocalJobRunner Map Task Executor #0扫描的内容: the
LocalJobRunner Map Task Executor #0扫描的内容: SSL
LocalJobRunner Map Task Executor #0扫描的内容: libraries
LocalJobRunner Map Task Executor #0扫描的内容: from
LocalJobRunner Map Task Executor #0扫描的内容: the
LocalJobRunner Map Task Executor #0扫描的内容: Jetty
LocalJobRunner Map Task Executor #0扫描的内容: project
LocalJobRunner Map Task Executor #0扫描的内容: written
LocalJobRunner Map Task Executor #0扫描的内容: by
LocalJobRunner Map Task Executor #0扫描的内容: mortbay.org.
map里输出了
map里输出了
map里输出了
map里输出了
map里输出了
map里输出了
map里输出了
map里输出了
map里输出了
map里输出了
map里输出了
map里输出了
map里输出了
map里输出了
map里输出了
map里输出了
map里输出了
map里输出了
map里输出了
map里输出了
map里输出了
map里输出了
map里输出了
map里输出了
map里输出了
map里输出了
map里输出了
map里输出了
map里输出了
map里输出了
map里输出了
INFO [LocalJobRunner Map Task Executor #0] -
INFO [LocalJobRunner Map Task Executor #0] - Starting flush of map output
INFO [LocalJobRunner Map Task Executor #0] - Spilling map output
INFO [LocalJobRunner Map Task Executor #0] - bufstart = 0; bufend = 2145; bufvoid = 104857600
INFO [LocalJobRunner Map Task Executor #0] - kvstart = 26214396(104857584); kvend = 26213612(104854448); length = 785/6553600
INFO [LocalJobRunner Map Task Executor #0] - Finished spill 0
INFO [LocalJobRunner Map Task Executor #0] - Task:attempt_local159353495_0001_m_000000_0 is done. And is in the process of committing
INFO [LocalJobRunner Map Task Executor #0] - map
INFO [LocalJobRunner Map Task Executor #0] - Task 'attempt_local159353495_0001_m_000000_0' done.
INFO [LocalJobRunner Map Task Executor #0] - Finishing task: attempt_local159353495_0001_m_000000_0
INFO [Thread-12] - map task executor complete.
INFO [Thread-12] - Waiting for reduce tasks
INFO [pool-6-thread-1] - Starting task: attempt_local159353495_0001_r_000000_0
INFO [pool-6-thread-1] - Using ResourceCalculatorProcessTree : [ ]
INFO [pool-6-thread-1] - Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@363cef1d
INFO [pool-6-thread-1] - MergerManager: memoryLimit=503893184, maxSingleShuffleLimit=125973296, mergeThreshold=332569504, ioSortFactor=10, memToMemMergeOutputsThreshold=10
INFO [EventFetcher for fetching Map Completion Events] - attempt_local159353495_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events
INFO [localfetcher#1] - localfetcher#1 about to shuffle output of map attempt_local159353495_0001_m_000000_0 decomp: 2541 len: 2545 to MEMORY
INFO [localfetcher#1] - Read 2541 bytes from map-output for attempt_local159353495_0001_m_000000_0
INFO [localfetcher#1] - closeInMemoryFile -> map-output of size: 2541, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->2541
INFO [EventFetcher for fetching Map Completion Events] - EventFetcher is interrupted.. Returning
INFO [pool-6-thread-1] - 1 / 1 copied.
INFO [pool-6-thread-1] - finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs
INFO [pool-6-thread-1] - Merging 1 sorted segments
INFO [pool-6-thread-1] - Down to the last merge-pass, with 1 segments left of total size: 2538 bytes
INFO [pool-6-thread-1] - Merged 1 segments, 2541 bytes to disk to satisfy reduce memory limit
INFO [pool-6-thread-1] - Merging 1 files, 2545 bytes from disk
INFO [pool-6-thread-1] - Merging 0 segments, 0 bytes from memory into reduce
INFO [pool-6-thread-1] - Merging 1 sorted segments
INFO [pool-6-thread-1] - Down to the last merge-pass, with 1 segments left of total size: 2538 bytes
INFO [pool-6-thread-1] - 1 / 1 copied.
INFO [pool-6-thread-1] - mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords
INFO [main] - map 100% reduce 0%
INFO [pool-6-thread-1] - Task:attempt_local159353495_0001_r_000000_0 is done. And is in the process of committing
INFO [pool-6-thread-1] - 1 / 1 copied.
INFO [pool-6-thread-1] - Task attempt_local159353495_0001_r_000000_0 is allowed to commit now
INFO [pool-6-thread-1] - Saved output of task 'attempt_local159353495_0001_r_000000_0' to hdfs://localhost:9000/user/output/test1/_temporary/0/task_local159353495_0001_r_000000
INFO [pool-6-thread-1] - reduce > reduce
INFO [pool-6-thread-1] - Task 'attempt_local159353495_0001_r_000000_0' done.
INFO [pool-6-thread-1] - Finishing task: attempt_local159353495_0001_r_000000_0
INFO [Thread-12] - reduce task executor complete.
INFO [main] - map 100% reduce 100%
INFO [main] - Job job_local159353495_0001 completed successfully
INFO [main] - Counters: 38
File System Counters
FILE: Number of bytes read=8166
FILE: Number of bytes written=535401
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=5464
HDFS: Number of bytes written=1310
HDFS: Number of read operations=37
HDFS: Number of large read operations=0
HDFS: Number of write operations=6
Map-Reduce Framework
Map input records=31
Map output records=197
Map output bytes=2145
Map output materialized bytes=2545
Input split bytes=108
Combine input records=0
Combine output records=0
Reduce input groups=132
Reduce shuffle bytes=2545
Reduce input records=197
Reduce output records=132
Spilled Records=394
Shuffled Maps =1
Failed Shuffles=0
Merged Map outputs=1
GC time elapsed (ms)=81
CPU time spent (ms)=0
Physical memory (bytes) snapshot=0
Virtual memory (bytes) snapshot=0
Total committed heap usage (bytes)=249561088
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
File Input Format Counters
Bytes Read=1366
File Output Format Counters
Bytes Written=1310
什么是DistributedCache进行复制联结,原理
DistributedCache的原理是将小的那个文件复制到所有节点上。
我们使用DistributedCache.addCacheFile()来设定要传播的文件,然后在mapper的初始化方法setup中用DistributedCache.getLocalCacheFiles()方法获取该文件并装入内存中。
为什么要用
上节讨论了Reduce侧联结,在Mapper中对记录进行包装后输出,中间结果经过网络重排后到达Reducer,combine()函数会对大量的数据进行筛除丢弃,这样以来Reduce侧联结浪费了宝贵的网络帶宽资源,而且处理效率低下。出于这点考虑,提出基于DistributedCache复制联结。
你可以将扫描得到的结果存到一个全局变量里,然后要用就从结合里面取数据。setup一次,map任务多次
public void run(Context context) throws IOException, InterruptedException {
setup(context);
try {
while (context.nextKeyValue()) {
map(context.getCurrentKey(), context.getCurrentValue(), context);
}
} finally {
cleanup(context);
}
}