- package com.wlh.lucene.test2;
- import java.io.BufferedReader;
- import java.io.BufferedWriter;
- import java.io.File;
- import java.io.FileReader;
- import java.io.FileWriter;
- import java.io.IOException;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
- import net.sourceforge.pinyin4j.PinyinHelper;
- import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
- import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
- import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
- import net.sourceforge.pinyin4j.format.HanyuPinyinVCharType;
- public class Test {
- //源文件与目标文件的全路径名
- private static final String READ_FILE="E:/workspace65/paoding-analysis_test/src/com/wlh/lucene/test2/txt.txt";
- private static final String WRITE_FILE="E:/workspace65/paoding-analysis_test/src/com/wlh/lucene/test2/txt1.txt";
- private static HanyuPinyinOutputFormat spellFormat = new HanyuPinyinOutputFormat();
- private static BufferedWriter writer = null;
- private static BufferedReader reader = null;
- //初始化信息
- public static void init() throws IOException{
- writer = new BufferedWriter(new FileWriter(new File(WRITE_FILE),false));
- reader = new BufferedReader(new FileReader(new File(READ_FILE)));
- spellFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
- spellFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
- spellFormat.setVCharType(HanyuPinyinVCharType.WITH_V);
- }
- // 判断字符串是否包含有中文
- public static boolean isChinese(String str) {
- String regex = "[\\u4e00-\\u9fa5]";
- Pattern pattern = Pattern.compile(regex);
- Matcher matcher = pattern.matcher(str);
- return matcher.find();
- }
- //使用PinYin4j.jar将汉字转换为拼音
- public static String chineneToSpell(String chineseStr){
- return PinyinHelper.toHanyuPinyinString(chineseStr , spellFormat ,"");
- }
- //将转换后的字符串写入目标文件
- public static void writeToFile(String spellStr) throws IOException{
- writer.write(spellStr);
- }
- //从源文件读取按行数据
- public static void readFromFile() throws IOException{
- String line = null;
- while ((line = reader.readLine()) != null) {
- line = line.trim();
- //是中文
- if(isChinese(line)){
- line = chineneToSpell(line);
- }
- writeToFile(line + "\n");
- }
- }
- //关闭文件流
- public static void destory() throws IOException{
- reader.close();
- writer.close();
- }
- public static void main(String[] args) throws IOException {
- init();
- readFromFile();
- destory();
- }
- }
使用PinYin4j.jar将汉字转换为拼音使用实例
最新推荐文章于 2019-05-10 15:51:54 发布