10W行以上的数据,存在txt文件中,每行数据包括id和描述。id是有序的。写一个Java方法,传入id,从文件中快速查询出对应的描述。时间控制在0.1s内
解决方案,使用二分查找+随机文件读取
import java.io.IOException;
import java.io.RandomAccessFile;
import java.util.Calendar;
/**
* 问题描述
* 在10W行数据的文本文件中,每行包括一个id和一个描述,给一个id快速查找对应的描述
* id是有序的
* @author song
*
*/
public class Main {
public static void main(String[] args) throws IOException {
RandomAccessFile reader = new RandomAccessFile("1.txt", "r");
long aver = 0;
int testCount = 200;
for(int i=0; i<testCount; i++){
long startTime = Calendar.getInstance().getTimeInMillis();
int searchId = (int) (Math.random() * 105000);
System.out.print("No." + i +", ");
binarySearch(reader, searchId);
long endTime = Calendar.getInstance().getTimeInMillis();
System.out.println(" |use time: " + (endTime - startTime) + " MilliSecond");
aver += (endTime - startTime);
}
System.out.println("averange time: " + aver / testCount);
}
/**
* 从文件指针当前位置读取一个整数
* @param reader
* @return
* @throws IOException
*/
static int readInt(RandomAccessFile reader) throws IOException{
long startpos = reader.getFilePointer();
StringBuffer sb = new StringBuffer();
int ch;
while((ch=reader.read()) != ' '){
sb.append((char)ch);
}
reader.seek(startpos);
return Integer.parseInt(sb.toString());
}
/**
* 读文件的最后一行,最后文件指针定位在最后一行的第一个字符上
* @param reader
* @return 最后一行的id
* @throws IOException
*/
static int readEndline(RandomAccessFile reader) throws IOException{
int endId = 0;
long end = reader.getFilePointer();
long cur = end - 1;
int ch;
boolean tag = false;
while(cur >= 0){
reader.seek(cur);
ch = reader.read();
if(ch == '\n'){
endId = readInt(reader);
tag = true;
break;
}
cur--;
}
if(!tag){
reader.seek(0);
endId = readInt(reader);
}
return endId;
}
static boolean binarySearch(RandomAccessFile reader, int searchId) throws IOException{
long start = 0;
long end = reader.length() - 1;
//读第一行
reader.seek(start);
int startId = readInt(reader);
//读最后一行
reader.seek(end);
int endId = readEndline(reader);
if(searchId < startId || searchId > endId){
System.out.print("connot find: " + searchId);
return false;
}
while(startId <= endId){
//读居中一行
reader.seek((start + end) / 2);
int midId = readEndline(reader);
long midpos = reader.getFilePointer();
if(searchId == midId){
System.out.print("find it: " + reader.readLine());
return true;
}
else if(searchId > midId){
//二分搜索(mid, end]区间
reader.readLine();
start = reader.getFilePointer();
startId = readInt(reader);
}
else{
//二分搜索[start, mid)区间
end = midpos - 1;
reader.seek(end);
endId = readEndline(reader);
}
}
System.out.print("connot find: " + searchId);
return false;
}
}