// 对于大数据,如几个G的csv文件,如何随机抽取一定量的数据(并保持有序)是个复杂的问题
// 这段代码并不能做到随机取样,紧紧只是每十个数据取第一个数据而已
// 如有更加高效且随机的方法,欢迎留言~
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
//import java.util.ArrayList;
public class Test {
//private String filename = null;
public static void main(String[] args) {
try {
String inFile = "train.csv";
String outFile = "sub.train.csv";
BufferedWriter writer = new BufferedWriter(new FileWriter(outFile));
writer.write("T,X,Y,Z,Device \n");
BufferedReader bufferedreader = new BufferedReader(new FileReader(inFile));
String stemp = bufferedreader.readLine();
int count = 0;
// (stemp = bufferedreader.readLine()) != null
while((stemp = bufferedreader.readLine()) != null) {
if (count == 0) {
writer.write(stemp + "\n");
}
count = (count+1) % 10;
}
writer.close();
bufferedreader.close();
} catch (IOException e) {
// TODO Auto-generated catch block
System.out.println("文件未找到");
}
}
}
大数据 取样
最新推荐文章于 2021-12-28 11:01:05 发布