hadoop-java：表与表的左连接

最新推荐文章于 2025-01-03 09:52:35 发布

原创最新推荐文章于 2025-01-03 09:52:35 发布 · 403 阅读

0 ·

CC 4.0 BY-SA版权

MapReduce编程(java) 专栏收录该内容

4 篇文章

订阅专栏

本文介绍了一个使用Java实现的单表自连接示例程序，该程序通过MapReduce框架处理包含用户与星标关系的数据集，并输出所有可能的两两配对组合。

程序如下：

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;


public class MapperReduce {
	public MapperReduce(){//文件初始化，如果存在则删除
		File file=new File("F:","map_result.txt");
		if(file.exists()){
			file.delete();
		}
		File file1=new File("F:","output.txt");
		if(file1.exists()){
			file1.delete();
		}
	}
	
	public Map<String,List<String>> shuff() throws IOException{//shuffle过程，将mapper处理结果相同key的记录合并
		BufferedReader br=new BufferedReader(new FileReader("F:\\map_result.txt"));
        String line="";
        Map<String, List<String>> map=new HashMap<String, List<String>>();
        while((line=br.readLine())!=null){
        	String[] content=line.split(" ",2);
        	if(map.containsKey(content[0])){
        		List<String> list=new ArrayList<String>();
        		list=map.get(content[0]);
        		if(content.length==2){
        		    list.add(content[1]);
        		}else{
        			list.add(" ");
        		}
        		map.remove(content[0]);
        		map.put(content[0],list);
        	}else{
        			List<String> list=new ArrayList<String>();
            		if(content.length==2){
        		        list.add(content[1]);
            		}else{
            			list.add(" ");
            		}
        		    map.put(content[0],list);
        	}
        }
        map.remove("");//把key为空格的行删除
		Iterator<String> It = map.keySet().iterator();
		//while(It.hasNext()){System.out.println(map.get(It.next()));}//测试的时候用
		return map;
	}
	
	public void IterMapper() throws IOException{
		//指定input数据文本路径，循环地按行输入，每一行中分key和value，其中区分标志为第一个空格,按行遍历mapper
		BufferedReader br=new BufferedReader(new FileReader("F:\\input.txt"));
        String line="";
        int count=1;
        while((line=br.readLine())!=null){
        	if(!line.trim().equals("")){
        		Mapper(String.valueOf(count),line);
        	} 
        	count +=1;
        }
	}
	
	public void IterReducer() throws IOException{
		//shuffle过程的生成的数据是一个map数据类型记录，按每条记录遍历reducer
		Map<String,List<String>> map=shuff();
		Iterator<String> Iter=map.keySet().iterator();
    	while(Iter.hasNext()){
    		String tmp=Iter.next();
    		Iterator<String> IterList=map.get(tmp).iterator();
    		Reducer(tmp,IterList);
    	}
		
	}
	
	public void WriteMaper(String new_key,String new_value) throws IOException{
		//指定mapper输出的文本路径，然后写入一行  new_key 与 new_value 用空格空开
		FileWriter fw=new FileWriter("F:\\map_result.txt",true); 
		String line=new_key+" "+new_value+"\r\n";
		fw.write(line);
		fw.close();
	}
	
	public void WriteReducer(String new_key,String new_value) throws IOException{
		//指定reducer输出的文件路径，然后写入一行  new_key 与 new_value 用空格空开
		FileWriter fw=new FileWriter("F:\\output.txt",true); 
		String line=new_key+" "+new_value+"\r\n";
		fw.write(line);
		fw.close();
	}
	
//map函数开始
	public void Mapper(String key,String value) throws IOException{

	}
//map函数结束
	
//reduce函数开始	
	public void Reducer(String key,Iterator<String> value) throws IOException{

	}
//reduce函数结束		
}










import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.StringTokenizer;


public class MapReduce extends MapperReduce{

	public MapReduce(){
		super();
	}
	
	public void printf(List<String> list1,List<String> list2) throws IOException{
		if((!list1.isEmpty()) && (!list2.isEmpty())){
			Iterator<String> Iter1=list1.iterator();
			while(Iter1.hasNext()){
				String tmp1=Iter1.next();
				Iterator<String> Iter2=list2.iterator();
				while(Iter2.hasNext()){
					String tmp2=Iter2.next();
					WriteReducer(tmp1,tmp2);
				}
			}
		}
	}
	
	//map函数开始
		public void Mapper(String key,String value) throws IOException{
			//key为行偏移量，value为每一行的值
			String[] count=value.split(" ");
			if(!count[1].trim().equals("")){
				WriteMaper(count[1].trim(),count[0].trim()+" "+"1");
				WriteMaper(count[1].trim(),count[0].trim()+" "+"2");
			}
		}
	//map函数结束
		
	//reduce函数开始	
		public void Reducer(String key,Iterator<String> value) throws IOException{
			List<String> list1=new ArrayList<String>();
			List<String> list2=new ArrayList<String>();
			while(value.hasNext()){
				String tmp=value.next();
				String[] tmp1=tmp.split(" ");
				if(tmp1[1].equals("1")){
					list1.add(tmp1[0]);
				}else{
					list2.add(tmp1[0]);
				}
			}
			printf(list1,list2);
		}
	//reduce函数结束
	public static void main(String[] args) throws IOException {
		MapReduce a=new MapReduce();
		a.IterMapper();
		a.IterReducer();
	}
}

本程序实例是：单表自连接，连接字段是 user

输入：

star1 user1
star2 user1
star3 user1
star3 user2

输出：

star1 star1
star1 star2
star1 star3
star2 star1
star2 star2
star2 star3
star3 star1
star3 star2
star3 star3
star3 star3