package org.liwei.dm;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintStream;
import org.ictclas4j.bean.SegResult;
import org.ictclas4j.segment.SegTag;
public class Divid {
public static void main(String[] args) throws Exception{
File file = new File ("c:" + File.separator + "dm" + File.separator + "dividsource");
String path[] = file.list();
System.out.println(path.length);
PrintStream out = System.out;
for (int i = 0; i<path.length; i++){
System.out.println(path[i]);
BufferedReader buf = null;
File f = new File ("c:" + File.separator + "dm" + File.separator + "dividsource"+ File.separator + path[i]);
System.setIn(new FileInputStream(f));
InputStream input = System.in;
buf = new BufferedReader(new InputStreamReader(input));
char b[]=new char[100000];
int a=0,len=0;
while((a=buf.read(b,0,100000))!=-1){
len = a;
}
SegTag st = new SegTag(1);
SegResult sr = st.split(new String(b,0,len));
File s = new File ("c:" + File.separator + "dm" + File.separator + "dividresult"+ File.separator + i+".txt");
PrintStream ps=new PrintStream(new FileOutputStream(s));
System.setOut(ps);
System.out.println(sr.getFinalResult());
System.setOut(out);
buf.close();
buf=null;
}
}
}
该程序使用ICTCLAS4J库实现对指定目录下多个文本文件的分词处理,并将处理后的结果输出到另一个目录中对应的文件。程序通过读取源文件夹中的所有文件,逐个进行分词操作,然后将分词结果写入目标文件。
1335

被折叠的 条评论
为什么被折叠?



