并行文件对比代码

本文介绍了一种通过并行处理和文件位置定位来优化大量数据比对速度的方法。利用线程池对数据进行分区处理,针对源文件和改进后的文件进行逐行对比,确保数据一致性。然而,当前实现仍存在性能瓶颈,需要进一步优化。

还需要优化,有点慢啊,当然我机器性能不好。。。

package computeResultCheck;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;

/**
 *  由于任务划分不确定等繁琐因素,全部采用统一格式合并后,进行并行+文件位置定位比对。
 *  由于id连续有序增加,因此id即为行号,
 *  但还是太慢!
 */

public class ImproveResultCheck {
	public static Integer MAX_PARTITION_NUMBER = 5;
    public static String COMPARE_RESULTS_DIR =
    	"D:\\计算结果汇总\\" ;
    public static String ALGORITHM = "CC-BerkStan\\" ;
    public static String sourceResultFile = COMPARE_RESULTS_DIR + ALGORITHM  + "range\\source\\" ;
    public static String improveResultFile = COMPARE_RESULTS_DIR + ALGORITHM  + "range\\improve\\" ;
	
	public static void main(String[] args) {
		Integer keySourceSize = ResultCombine.combinePartfile(sourceResultFile,  sourceResultFile + "sourceResult.txt") ;
		Integer keyImproveSize = ResultCombine.combinePartfile(improveResultFile, improveResultFile + "improveResult.txt") ;
		
		if(!keyImproveSize.equals(keySourceSize))
			System.out.println("Size not equal! ");
		
		long startTime = System.currentTimeMillis() ;
		String sourceFile =  sourceResultFile + "sourceResult.txt" ;
		String improveFile =  improveResultFile + "improveResult.txt" ;
		ImproveResultCheck irc = new ImproveResultCheck();
		Thread[] paralFind = new Thread[MAX_PARTITION_NUMBER ] ;
		Integer range = Math.abs(keyImproveSize/5) ;
		Integer startId = 0;
		Integer endId = 0;
		
		for (int p = 0; p <MAX_PARTITION_NUMBER; p++) {
			startId = endId + 1 ;
			endId = startId + range ;
			paralFind[p] = new Thread(irc.new FindSmallIdRunnable(startId, endId, sourceFile , improveFile)) ;
			paralFind[p].start();
		}
		
		try {
			for(Thread t : paralFind) 
				t.join() ;
		} catch (InterruptedException e1) {
			e1.printStackTrace();
		}
		System.out.println("Congratulations ! last " +(System.currentTimeMillis() - startTime) + "ms");
		
	}
	
	class FindSmallIdRunnable implements Runnable {
		public String sourceFileName = "";
		public String improveFileName = "";
		Integer startId = -1 ;
		Integer endId = -1 ;
		
		public FindSmallIdRunnable(Integer startId, Integer endId, String sourceFileName, String improveFileName) {
			this.improveFileName = improveFileName ;
			this.sourceFileName = sourceFileName ;
			this.startId = startId ;
			this.endId = endId ;
		}

		@Override
		public void run() {
			BufferedReader sourceReader ;
			BufferedReader improveReader ;
			
			try {
				sourceReader = new BufferedReader(new FileReader(sourceFileName));
				String sourceTempString = "" ;
				String improveTempString = "" ;
				while ((sourceTempString = sourceReader.readLine()) != null)  
				{
					if (!"".equals(sourceTempString)) {
						String[] keyValuesSource = sourceTempString.split("\t");
	                    Integer keySource = Integer.parseInt(keyValuesSource[0]);
	                    Integer valueSource = Integer.parseInt(keyValuesSource[1]);
	                    
	                    if(keySource > endId)
	                    	break ;
	                    
	                    if(keySource<startId) //从startId处开始执行
	                    	continue ;
	                    
                        improveReader = new BufferedReader(new FileReader(improveFileName));  
        				while((improveTempString = improveReader.readLine()) != null && !"".equals(improveTempString) ) {
        					String[] keyValuesImprove = improveTempString.split("\t");
        					Integer keyImprove = Integer.parseInt(keyValuesImprove[0]) ;
        					Integer valueImprove = Integer.parseInt(keyValuesImprove[1]);
        					
        					//由于id连续有序增加,因此id即为行号
        					if(keySource<keyImprove)
        						break ;
        					
    						if(keySource>keyImprove ) 
    							continue ;

    						if(keySource.equals(keyImprove) && !valueSource.equals(valueImprove) ) { //key equal, value not equal
    							System.out.println(Thread.currentThread().getId() + " key: " +keySource +", Source Value: "+valueSource + ", valueImrpove: " +
        					        		Integer.parseInt(keyValuesImprove[1]) );     
    						}
        				}
        				improveReader.close() ;
					}
				}
				sourceReader.close() ;
			} catch (FileNotFoundException e) {
				e.printStackTrace();
			} catch (NumberFormatException e) {
				e.printStackTrace();
			} catch (IOException e) {
				e.printStackTrace();
			}catch (Exception e1) {
				e1.printStackTrace();
			}
		}
	}
}

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值