import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.util.ArrayList;
import java.util.Iterator;
public class Findduplicate {
public static void main(String[] args) {
try{
//一次性读取所有html的名称,保留CIK和year
ArrayList<int[]> allComYears = new ArrayList<int[]>();
File f = new File("F:\\fraud & nonfraud公司处理\\fraud\\已复核");
File[] files = f.listFiles();
for(int i=0;i<files.length;i++)
{
if (files[i].getName().endsWith(".html"))
{
String name_temp = files[i].getName();
String[] name_items = name_temp.split("-");
String CIK,fraudyear;
if(name_items[0].equals("10KSB"))
{
CIK = name_items[1];
fraudyear = name_items[2];
}
else {
CIK = name_items[2];
fraudyear = name_items[3];
}
int[] temparray = new int[2];
temparray[0] = Integer.parseInt(CIK);
temparray[1] = Integer.parseInt(fraudyear);
allComYears.add(temparray);
}
}
// 读取CSV文件信息
BufferedReader reader = new BufferedReader(new FileReader("F:\\fraud & nonfraud公司处理\\company and year.csv"));
//写入另一个CSV文件中便于统计
File outfile = new File("F:\\fraud & nonfraud公司处理\\count.csv");
BufferedWriter bw = new BufferedWriter(new FileWriter(outfile));
String line = null;
while((line = reader.readLine())!=null)
{
int numofreport = 0;
String item[] = line.split(",");
Iterator it1 = allComYears.iterator();
while(it1.hasNext()){
int[] temp = (int[]) it1.next();
if (Integer.parseInt(item[0])==temp[0] && Integer.parseInt(item[1])==temp[1]){
numofreport++;
}
}
//开始写入
bw.newLine();
bw.write(item[0] +"," + item[1] + "," + numofreport);//CIK,fraudyear,numofreport
}
bw.close();
}
catch(Exception e)
{e.printStackTrace();
}
}
}
JAVA读写CSV文件(包括arraylist遍历)
最新推荐文章于 2024-12-20 15:48:42 发布