1.多线程大文件外加jedis插入数据
package readfile;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import org.apache.log4j.Logger;
import com.wandoulabs.jodis.JedisResourcePool;
import com.wandoulabs.jodis.RoundRobinJedisPool;
import redis.clients.jedis.Jedis;
import redis.clients.jedis.JedisPoolConfig;
public class FileReader {
private int threadNum = 3;// 线程数,默认为3
private String filePath;// 文件路径
private int bufSize = 1024;// 缓冲区大小,默认为1024
private DataProcessHandler dataProcessHandler;// 数据处理接口
private ExecutorService threadPool;
protected static Logger logger = Logger.getLogger(FileReader.class);
// private static Jedis jedis;
private static JedisResourcePool jedisPool;
private static void initialPool() {
JedisPoolConfig i2 = new JedisPoolConfig();
i2.setMaxTotal(-1);
i2.setMinIdle(2);
i2.setMaxIdle(-1);
i2.setMaxWaitMillis(10000);
i2.setTestOnBorrow(true);
i2.setTestOnReturn(true);
jedisPool = new RoundRobinJedisPool("zk ip:2181", 10000, "/zk/codis/db_test/proxy", i2, 10000);
}
private static synchronized void poolInit() {
if (jedisPool == null) {
initialPool();
}
}
public synchronized static Jedis getJedis() {
if (jedisPool == null) {
poolInit();
}
Jedis jedis = null;
// try {
if (jedisPool != null) {
jedis = jedisPool.getResource();
}
// } catch (Exception e) {
// logger.error("Get jedis error : " + e);
// } finally {
// returnResource(jedis);
// }
return jedis;
}
public static void returnResource(final Jedis jedis) {
if (jedis != null && jedisPool != null) {
jedis.close();;
}
}
public FileReader(String filePath, int bufSize, int threadNum) {
this.threadNum = threadNum;
this.bufSize = bufSize;
this.filePath = filePath;
this.threadPool = Executors.newFixedThreadPool(threadNum);
// this.jedis = jedis;
}
/**
* 启动多线程读取文件
*/
public void startRead() {
FileChannel infile = null;
try {
@SuppressWarnings("resource")
RandomAccessFile raf = new RandomAccessFile(filePath, "r");
infile = raf.getChannel();
long size = infile.size();
long subSize = size / threadNum;
for (int i = 0; i < threadNum; i++) {
long startIndex = i * subSize;
if (size % threadNum > 0 && i == threadNum - 1) {
subSize += size % threadNum;
}
@SuppressWarnings("resource")
RandomAccessFile accessFile = new RandomAccessFile(filePath, "r");
FileChannel inch = accessFile.getChannel();
threadPool.execute(new MultiThreadReader(inch, startIndex, subSize));
}
threadPool.shutdown();
} catch (FileNotFoundException e1) {
e1.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (infile != null) {
infile.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
/**
* 注册数据处理接口
*
* @param dataHandler
*/
public void registerHanlder(DataProcessHandler dataHandler) {
this.dataProcessHandler = dataHandler;
}
/**
* 多线程按行读取文件具体实现类
*
* @author zyh
*
*/
public class MultiThreadReader implements Runnable {
private FileChannel channel;
private long startIndex;
private long rSize;
public MultiThreadReader(FileChannel channel, long startIndex, long rSize) {
this.channel = channel;
this.startIndex = startIndex > 0 ? startIndex - 1 : startIndex;
this.rSize = rSize;
}
public void run() {
readByLine();
}
/**
* 按行读取文件实现逻辑
*
* @return
*/
public void readByLine() {
// JedisPoolConfig i2 = new JedisPoolConfig();
// i2.setMaxTotal(-1);
// i2.setMinIdle(2);
// i2.setMaxIdle(-1);
// i2.setMaxWaitMillis(10000);
// i2.setTestOnBorrow(true);
// i2.setTestOnReturn(true);
// JedisResourcePool i3 = new RoundRobinJedisPool("zk ip:2181", 10000,
// "/zk/codis/db_test/proxy", i2,
// 10000);
Jedis jedis = getJedis();
// Jedis jedis = Single.getPool().getResource();
long start = System.currentTimeMillis();
try {
ByteBuffer rbuf = ByteBuffer.allocate(bufSize);
channel.position(startIndex);// 设置读取文件的起始位置
long endIndex = startIndex + rSize;// 读取文件数据的结束位置
byte[] temp = new byte[0];// 用来缓存上次读取剩下的部分
int LF = "\n".getBytes()[0];// 换行符
boolean isEnd = false;// 用于判断数据是否读取完
boolean isWholeLine = false;// 用于判断第一行读取到的是否是完整的一行
long lineCount = 0;// 行数统计
long endLineIndex = startIndex;// 当前处理字节所在位置
while (channel.read(rbuf) != -1 && !isEnd) {
int position = rbuf.position();
byte[] rbyte = new byte[position];
rbuf.flip();
rbuf.get(rbyte);
int startnum = 0;// 每行的起始位置下标,相对于当前所读取到的byte数组
// 判断是否有换行符
// 如果读取到最后一行不是完整的一行时,则继续往后读取直至读取到完整的一行才结束
for (int i = 0; i < rbyte.length; i++) {
endLineIndex++;
if (rbyte[i] == LF) {// 若存在换行符
if (channel.position() == startIndex) {// 若改数据片段第一个字节为换行符,说明第一行读取到的是完整的一行
isWholeLine = true;
startnum = i + 1;
} else {
byte[] line = new byte[temp.length + i - startnum + 1];
System.arraycopy(temp, 0, line, 0, temp.length);
System.arraycopy(rbyte, startnum, line, temp.length, i - startnum + 1);
startnum = i + 1;
lineCount++;
temp = new byte[0];
String k = "";
String v = "";
// 处理数据
if (startIndex != 0) {// 如果不是第一个数据段
if (lineCount == 1) {
if (isWholeLine) {// 当且仅当第一行为完整行时才处理
String[] kv = dataProcessHandler.process(line);
k = kv[0];
v = kv[1];
// System.out.println("键:"+k+",值:"+v);
jedis.set(k, v);
//jedis.del(k);
//jedis.get(k);
}
} else {
String[] kv = dataProcessHandler.process(line);
k = kv[0];
v = kv[1];
// System.out.println("键:"+k+",值:"+v);
jedis.set(k, v);
// jedis.del(k);
// jedis.get(k);
}
} else {
// dataProcessHandler.process(line);
String[] kv = dataProcessHandler.process(line);
k = kv[0];
v = kv[1];
// System.out.println("键:"+k+",值:"+v);
jedis.set(k, v);
//jedis.del(k);
// jedis.get(k);
}
// 结束读取的判断
if (endLineIndex >= endIndex) {
isEnd = true;
break;
}
}
}
}
if (!isEnd && startnum < rbyte.length) {// 说明rbyte最后还剩不完整的一行
byte[] temp2 = new byte[temp.length + rbyte.length - startnum];
System.arraycopy(temp, 0, temp2, 0, temp.length);
System.arraycopy(rbyte, startnum, temp2, temp.length, rbyte.length - startnum);
temp = temp2;
}
rbuf.clear();
}
// 兼容最后一行没有换行的情况
if (temp.length > 0) {
if (dataProcessHandler != null) {
dataProcessHandler.process(temp);
}
}
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
channel.close();
} catch (IOException e) {
e.printStackTrace();
}
}
long end = System.currentTimeMillis();
System.out.println("查询单个文件" + "共耗时:::" + (end - start) * 1.0 / 1000 + "s");
jedis.close();
}
}
public int getThreadNum() {
return threadNum;
}
public String getFilePath() {
return filePath;
}
public ExecutorService getThreadPool() {
return threadPool;
}
public int getBufSize() {
return bufSize;
}
}
2.辅助类:文件内容有bytes[]转为Stringpackage readfile;
import java.io.UnsupportedEncodingException;
public class FileLineDataHandler implements DataProcessHandler {
private String encode = "utf-8";
@Override
public String[] process(byte[] data) {
String[] kv = new String[2];
try {
kv[0] = new String(data, encode).toString().split("H")[0];
kv[1] = new String(data, encode).toString().split("H")[1];
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
return kv;
}
}
package readfile;
public interface DataProcessHandler {
String[] process(byte[] data);
//void process(byte[] data);
}
3.主类:遍历读取目录下的所有txt文件
package readfile;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
public class MultiThreadReadByLine {
public static void main(String[] args) {
List<File> filePathsList = new ArrayList<File>();
//File f = new File("C:\\Users\\test-wangkezhou\\Desktop\\test");
//File f = new File("/home/hadoop/test");
File f = new File(args[0]);
File[] filePaths = f.listFiles();
for (File s : filePaths) {
if (-1 != s.getName().lastIndexOf(".txt")) {
filePathsList.add(s);
System.out.println(s.getName());
}
}
int index = 0;
while (index < filePathsList.size()) {
if (index >= filePathsList.size()) {
continue;
}
//FileReader fileReader = new FileReader( "C:\\Users\\test-wangkezhou\\Desktop\\test\\"+filePathsList.get(index).getName(), 1024, 10);
//FileReader fileReader = new FileReader( "/home/hadoop/test"+filePathsList.get(index).getName(), 1024, 10);
//目录 缓存大小 线程数
FileReader fileReader = new FileReader( args[0]+filePathsList.get(index).getName(), Integer.parseInt(args[1]), Integer.parseInt(args[2]));
fileReader.registerHanlder(new FileLineDataHandler());
fileReader.startRead();
index++;
}
}
}
4.随机查询类:根据key值有规律,做出的随机查询
package chaxun;
import java.util.Random;
import com.wandoulabs.jodis.JedisResourcePool;
import com.wandoulabs.jodis.RoundRobinJedisPool;
import redis.clients.jedis.Jedis;
import redis.clients.jedis.JedisPoolConfig;
public class RandomGet {
private static JedisResourcePool jedisPool;
private static void initialPool() {
JedisPoolConfig i2 = new JedisPoolConfig();
i2.setMaxTotal(-1);
i2.setMinIdle(2);
i2.setMaxIdle(-1);
i2.setMaxWaitMillis(10000);
i2.setTestOnBorrow(true);
i2.setTestOnReturn(true);
jedisPool = new RoundRobinJedisPool("10.191.20.224:2181", 10000, "/zk/codis/db_test/proxy", i2, 10000);
}
private static synchronized void poolInit() {
if (jedisPool == null) {
initialPool();
}
}
public synchronized static Jedis getJedis() {
if (jedisPool == null) {
poolInit();
}
Jedis jedis = null;
// try {
if (jedisPool != null) {
jedis = jedisPool.getResource();
}
// } catch (Exception e) {
// logger.error("Get jedis error : " + e);
// } finally {
// returnResource(jedis);
// }
return jedis;
}
public static void returnResource(final Jedis jedis) {
if (jedis != null && jedisPool != null) {
jedis.close();
}
}
public static void main(String[] args) {
// 查询条数
int numThread = Integer.parseInt(args[0]);
int count = Integer.parseInt(args[1]);
RandomGet rg = new RandomGet();
rg.thread(numThread, count);
}
private void thread(int threadNumber, int OneThreadCount) {
int i = 0;
for (i = 0; i < threadNumber; i++) {
Jedis jedis = getJedis();
Thread i1 = new MyThread(i, OneThreadCount, jedis);
i1.start();
}
}
}
class MyThread extends Thread {
int OneThreadCount;
Jedis _jedis;
int num;
public MyThread(int i, int OneThreadCount, Jedis i4) {
super();
this.num = i;
this.OneThreadCount = OneThreadCount;
this._jedis = i4;
}
public void run() {
long start = System.currentTimeMillis();
Random k = new Random();
Random v = new Random();
for (int i = 1; i <= OneThreadCount; i++) {
_jedis.get(k.nextInt(251) + "K" + v.nextInt(13229912) + "\t");
}
_jedis.close();
long end = System.currentTimeMillis();
System.out.println("线程---:::Thread-" + num + ":::查询" + OneThreadCount + "条数据," + "共耗时:::"
+ (end - start) * 1.0 / 1000 + "s");
}
}