MappedByteBuffer共享内存

本文介绍了一种通过本地共享内存来模拟并加速节点间消息传递的方法。通过在共享内存中直接写入序列化的消息,避免了网络通信延迟,进而提高了通信效率。文中详细展示了消息的生成、序列化、写入及读取过程。

写入

package shareMemory;

import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.channels.FileChannel.MapMode;
import java.nio.channels.FileLock;
import java.util.Iterator;
import java.util.Map.Entry;
import java.util.concurrent.Callable;
import java.util.concurrent.CompletionService;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;

import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hama.bsp.Combiner;
import org.apache.hama.graph.GraphJobMessage;
import org.apache.hama.util.ReflectionUtils;
import org.apache.hama.util.WritableUtils;

/**
 * 模拟将同节点内部消息写在本地共享内存中以加速通信
 * 
 * @author qiangliu
 * 
 */
public class ShareMemory {

	public MappedByteBuffer mbb;
	public static int bufferSize = 50 * 1024 * 1024;  //内存
	RandomAccessFile raf; // 共享内存对应文件
//	public static String shareFile = "/opt/hama-0.7.1/shareMemory/shareMemory.txt";
	public static String shareFile = "F:\\test\\shareMemory\\sharememory.txt";

	ConcurrentHashMap<IntWritable, VertexMessage> vertexMessages = new ConcurrentHashMap<IntWritable, VertexMessage>(); //消息容器
	private Combiner<Writable> combiner;
	  
	ShareMemory() {
		try {
			raf = new RandomAccessFile(shareFile, "rw");
		    final String combinerName = "org.apache.hama.examples.PageRank$PagerankCombiner"; 
//		    Class com = Class.forName(combinerName) ;
			combiner = (Combiner<Writable>) ReflectionUtils
		            .newInstance(combinerName);
		} catch (IOException e) {
			e.printStackTrace();
		} catch (ClassNotFoundException e) {
			e.printStackTrace();
		}
	}

  /**
   * mock 10000 Pagerank消息,消息在写入共享内存时,发送消息时先保存在链表中,写入时用WritableUtils序列化后按字节写入
   */
	public void generateMessages() {
  	    //method 1 : 使用WritableUtils,后面建议改成在最后写入时再序列化,前面直接combine! 但是会存在当到同一个id的值很多且不能combine时就不好处理了!
		for(int j = 1; j<3 ; j++) {  //模拟3个super-step
			for(int i = 0 ;i<1000; i++) {
				DoubleWritable value = new DoubleWritable(i/j) ;
				VertexMessage vMessage = new VertexMessage(new IntWritable(i), value) ;
				
				if (combiner != null && vertexMessages.get(vMessage.getVertexId())!=null ) {
					DoubleWritable combined = (DoubleWritable) combiner.combine(getIterableMessages(value)) ;
			        vMessage.setVertexVertexValue(combined) ; //更改合并后的值
			        vertexMessages.put(vMessage.getVertexId(), vMessage);
				}  
					
				vertexMessages.put(vMessage.getVertexId(), vMessage) ;
			}
		}
	}

	public void close() {
		try {
			mbb.clear() ;
			raf.close() ;
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	public static void main(String[] args) {
		 ShareMemory shm = new ShareMemory() ;
		 long startTime = System.currentTimeMillis() ;
		 shm.generateMessages() ;
		 System.out.println("Generate Messages last :" + (System.currentTimeMillis() - startTime));

		 startTime = System.currentTimeMillis() ;
		 ExecutorService pool = Executors.newCachedThreadPool() ;   
		 CompletionService<Boolean> exchangeResult = new ExecutorCompletionService<Boolean>(pool); 
		 int destSize = 0 ;
		 destSize++ ;
		 exchangeResult.submit(shm.new MesssageShareSender()) ;
	     int count = 0 ;  
	     while(count < destSize) {  
	         Future<Boolean> f = exchangeResult.poll();  
	         if(f == null)  continue ;
	         count++;  
	     }
		 System.out.println("Send Messages last :" + (System.currentTimeMillis() - startTime));

		 pool.shutdown() ;
		 shm.close() ;
	 }
	
	  public static Iterable<Writable> getIterableMessages(final Writable vertexValue) {

		    return new Iterable<Writable>() {
		      Writable value ;

		      @Override
		      public Iterator<Writable> iterator() {
		    	 value = vertexValue ;
		    	
		        return new Iterator<Writable>() {
                  int index = 1 ;
		        	
		          @Override
		          public boolean hasNext() {
		            return (index == 1) ? true : false;
		          }

		          @Override
		          public Writable next() {
		            index--;
		            return vertexValue;
		          }

		          @Override
		          public void remove() {
		          }
		        };
		      }
		    };
		  }
	
   /**
    * 同节点消息产生后,多线程发送给共享内存.两个问题: 1.为什么要用GraphJobMessage? 2. 直接写入快还是用流快?
    * @author Administrator
    */
	class MesssageShareSender implements  Callable<Boolean>{

		@Override
		public Boolean call() throws Exception {
			try {
				FileChannel fc = raf.getChannel();
				FileLock flock = fc.tryLock();
				
				if(flock==null) {
					Thread.sleep(10) ;
				} else {
					mbb = fc.map(MapMode.READ_WRITE, 0, ShareMemory.bufferSize);  //因为写之前不知道需要映射多大共享内存,暂定50M
					mbb.position(4) ; //预留一个int长度(4个字节)作为文件长度
					int totalLength  = 4 ; //最大2G
					
					//模拟发送,最好像v0.6.4版本中,为每个任务的数据设置一个目录,所有发送到该任务的共享内存数据全部映射到这里!
				    Iterator<Entry<IntWritable, VertexMessage>> it = vertexMessages.entrySet()
				            .iterator();
			        while (it.hasNext()) {
			          Entry<IntWritable, VertexMessage> e = it.next();
			          it.remove();
			          byte[] serialized = WritableUtils.serialize(e.getValue().getVertexVertexValue()) ;
			          GraphJobMessage gjm = new GraphJobMessage(e.getValue().getVertexId(), serialized) ;
			          gjm.setVertexId(e.getKey());
			          gjm.setFlag(GraphJobMessage.VERTEX_FLAG);
			          byte[] message = WritableUtils.serialize(gjm) ;
			          int msgLen = message.length ;
 			          mbb.putInt(msgLen) ;                   //这样快还是写入流读取,比如写到流里, DataOutput output output.toByteArray()
// 			          System.out.println("Position : " + mbb.position());
			          mbb.put(message);
//			          System.out.println("Position : " + mbb.position());
			          totalLength = totalLength + msgLen +4 ;
			        }
			        mbb.putInt(0,mbb.position()) ; //补写长度
//					System.out.println(" IsLoaded: " + mbb.isLoaded() + " Length: "+ totalLength +" Position: " + mbb.position());
				}
			} catch (Exception e) {
				e.printStackTrace();
			}
			return true;
		}
	}
}


读取

package shareMemory;

import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.channels.FileLock;
import java.nio.channels.FileChannel.MapMode;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hama.graph.GraphJobMessage;
import org.apache.hama.graph.GraphJobRunner;
import org.apache.hama.util.WritableUtils;


public class ReadingProcess {
	byte[] buffer ; //缓存
	public MappedByteBuffer mbb;
	RandomAccessFile raf; // 共享内存对应文件
	ConcurrentHashMap<IntWritable, VertexMessage> vertexMessages = new ConcurrentHashMap<IntWritable, VertexMessage>(); // 消息容器
	
	public ReadingProcess() {
		try {
			raf = new RandomAccessFile(ShareMemory.shareFile, "rw");
			FileChannel fc = raf.getChannel();
			mbb = fc.map(MapMode.READ_ONLY, 0, fc.size()); // 映射的共享内存
			mbb.load() ; // 预加载进内存
			GraphJobRunner.VERTEX_ID_CLASS = IntWritable.class ;
			GraphJobRunner.VERTEX_VALUE_CLASS = DoubleWritable.class ;
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

	public void close() {
		try {
			mbb.clear() ; //清空mbb
			raf.close() ;
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	public void readData() {
		try {
			FileChannel fc = raf.getChannel();
			FileLock flock = fc.tryLock();
			while(flock==null) {           //轮询等待读取消息
				Thread.sleep(10) ;
			} 
			System.out.println("IsLoaded: " + mbb.isLoaded() +" position:"+ mbb.position());
			int fileLength = mbb.getInt() ;
//			buffer = new byte[fileLength] ;
//			mbb.get(buffer) ; //本地消息缓存, 是否一次性读出?
			
			while(mbb.position() < fileLength ) {
				int msgLength = mbb.getInt() ;
				if(msgLength>0) {
//					System.out.println("Position : " + mbb.position());
					byte[] message = new byte[msgLength] ;
					mbb.get(message) ;
//					System.out.println("Position : " + mbb.position());
					GraphJobMessage gjm = new GraphJobMessage() ;
					WritableUtils.deserialize(message, gjm) ;
					if (!vertexMessages.containsKey(gjm.getVertexId())) {
						DoubleWritable vertexValue = new DoubleWritable() ;
						WritableUtils.deserialize(gjm.getValuesBytes(), vertexValue) ;
						IntWritable vertexId = (IntWritable) gjm.getVertexId() ;
						vertexMessages.put(vertexId, new VertexMessage(vertexId, vertexValue)) ;
	 				} else {
	 					System.out.println("Combine Error!");
	 				}
				}
			}
//			System.out.println("test ");
			close() ;
		} catch (Exception e) {
			e.printStackTrace();
		}
	}

 	public static void main(String[] args) {
 		ReadingProcess rp = new ReadingProcess();
 		rp.readData() ;
	}
}


内存共享是指多个进程可以同时访问同一块内存区域,从而实现数据共享。Java和Python都支持内存共享,下面分别介绍Java和Python的内存共享方法: Java内存共享: Java中可以使用共享内存来实现多个进程之间的数据共享。Java提供了一种基于内存映射文件的方式来实现共享内存。具体实现方法如下: 1.创建共享内存文件 ```java File file = new File("test.txt"); RandomAccessFile raf = new RandomAccessFile(file, "rw"); raf.setLength(1024*1024); // 设置文件大小为1MB ``` 2.将文件映射到内存中 ```java FileChannel channel = raf.getChannel(); MappedByteBuffer buffer = channel.map(FileChannel.MapMode.READ_WRITE, 0, 1024*1024); ``` 3.使用共享内存 ```java buffer.putInt(0, 123); // 在共享内存中写入数据 int data = buffer.getInt(0); // 从共享内存中读取数据 ``` Python内存共享: Python中可以使用multiprocessing模块来实现多个进程之间的数据共享。multiprocessing模块提供了Value和Array两种方式来实现共享内存。具体实现方法如下: 1.使用Value实现共享内存 ```python from multiprocessing import Value, Process def func(val): val.value += 1 if __name__ == '__main__': val = Value('i', 0) # 创建共享内存变量 p1 = Process(target=func, args=(val,)) p2 = Process(target=func, args=(val,)) p1.start() p2.start() p1.join() p2.join() print(val.value) # 输出:2 ``` 2.使用Array实现共享内存 ```python from multiprocessing import Array, Process def func(arr): for i in range(len(arr)): arr[i] += 1 if __name__ == '__main__': arr = Array('i', [0, 0, 0]) # 创建共享内存数组 p1 = Process(target=func, args=(arr,)) p2 = Process(target=func, args=(arr,)) p1.start() p2.start() p1.join() p2.join() print(arr[:]) # 输出:[2, 2, 2] ```
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值