文本文件中查询中文字符串
package hda.search_chinese;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Daan Han
*
*/
public class App {
public static void main(String[] args) throws IOException {
String dir = "/your/dir/here";
listFile(dir);
System.out.println("===========");
System.out.println("totalLine: " + totalLine);
System.out.println("totalWords: " + totalWords);
}
static int totalLine = 0;
static int totalWords = 0;
public static void dodo(String s) {
for (String string : getChinese(s)) {
System.out.println(string);
}
}
public static List<String> getChinese(String s) {
String temp = null;
List<String> ret = new ArrayList<String>(2);
Pattern p = Pattern.compile("(\\([ |\\( \\)]*)*[\u0391-\uFFE5]+([ |\\( \\)]+[\u0391-\uFFE5]+)*\\)*");
Matcher m = p.matcher(s);
while (m.find()) {
temp = m.group(0);
ret.add(temp);
totalLine++;
totalWords+=temp.length();
}
return ret;
}
public static void listFile(String dir) throws IOException {
String encoding = "GBK";
File d = new File(dir);
for (File file : d.listFiles()) {
System.out.printf("\n%s\n",file.getAbsoluteFile());
if (file.exists() && file.isDirectory()){
if (!file.getName().equals("WEB-INF")) // 过滤WEB-INF
listFile(file.getAbsolutePath());
}
else if (file.exists() && file.isFile() && file.getName().endsWith(".jsp")) { // 只处理jsp
InputStreamReader read = new InputStreamReader(
new FileInputStream(file), encoding);
BufferedReader bufferedReader = new BufferedReader(read);
String lineTxt = null;
while ((lineTxt = bufferedReader.readLine()) != null) {
// System.out.println(lineTxt);
dodo(lineTxt);
}
read.close();
} else {
System.out.println("找不到指定的文件");
}
}
}
}