将指定路径中文件中的内容转换成 utf8编码文件并输出.

最新推荐文章于 2022-07-15 12:17:28 发布

原创最新推荐文章于 2022-07-15 12:17:28 发布 · 394 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#工作 #.net

常用函数备忘专栏收录该内容

2 篇文章

订阅专栏

本文介绍了一个实用工具，该工具能够检测文件的原始编码，并将其转换为UTF-8编码。通过使用特定的Java类和方法，文章详细说明了如何读取文件、检测其编码类型、进行编码转换并将结果保存到新文件的过程。

测试功能:将指定路径中文件中的内容转换成 utf8编码文件并输出.

注意点:分隔符、获得编码的方式、代码转换的方式、文件的输入输出.

源码展示:

/**
 * 将指定路径中文件中的内容 转 换成 utf8编码 文件并输出.
 *
 */
public class CharSetTest {

	private static final String lineSep = System.getProperty("line.separator"); // 当前系统的换行符.
	private static final String userDir = System.getProperty("user.dir"); // 当前工作目录.
	private static final String fileSep = System.getProperty("file.separator");// 文件之间的分隔符

	public static void main(String[] args) {

		String fileInPath = userDir + fileSep + "testDir" + fileSep + "testIn.txt";// 在当前工作路径下的testDir\testIn.txt文件.
		String fileOutPath = userDir + fileSep + "testDir" + fileSep + "testOut.txt";// 在当前工作路径下的testDir\testOut.txt文件.

		File file = new File(fileInPath);

		SinoDetect sinoDetect = new SinoDetect();
		int codePage = sinoDetect.detectEncoding(file);
		String charset = getCodePage(codePage);

		String fileContent = readFromFile(fileInPath, charset);

		fileContent = convertToUTF(fileContent, charset);
		
		write2Utf8File(fileContent,fileOutPath,charset,true);
	}

	/**
	 * 根据cp的值找到对应的字符集编码,依据 Zhcode开源项目中 Encoding 类中的定义
	 * 
	 * @param cp
	 *            字符集
	 * @return 整型变量对应的字符集
	 */
	public static String getCodePage(int cp) {
		String charset = "UTF8";

		if (cp == 0) {
			charset = "GB2312";
		} else if (cp == 1) {
			charset = "GBK";
		} else if (cp == 2) {
			charset = "GB18030";
		} else if (cp == 3) {
			charset = "HZ";
		} else if (cp == 4) {
			charset = "BIG5";
		} else if (cp == 6) {
			charset = "UTF8";
		}

		return charset;
	}

	/**
	 * 将指定字符集的字符串source转换成编码为"utf8"的目标串
	 * 
	 * @param source
	 *            源文件串
	 * @param charSet
	 *            源文件串的编码
	 * @return 转换后的编码
	 */
	private static String convertToUTF(String source, String charSet) {

		if (source == null || source.equals("")) {
			return null;
		}
		source = source.trim();
		String target = source;
		Zhcode zhcoder = new Zhcode();
		if (charSet.equals("GB2312")) {
			target = zhcoder.convertString(source, 0, 6);
		} else if (charSet.equals("GBK")) {
			target = zhcoder.convertString(source, 1, 6);
		} else if (charSet.equals("GB18030")) {
			target = zhcoder.convertString(source, 2, 6);
		} else if (charSet.equals("HZ")) {
			target = zhcoder.convertString(source, 3, 6);
		} else if (charSet.equals("BIG5")) {
			target = zhcoder.convertString(source, 4, 6);
		}
		return target;
	}

	/**
	 * 从 路径filePath 中按 charset编码 读取内容
	 * 
	 * @param filePath
	 *            文件路径
	 * @param charset
	 * @return 文件中的内容
	 */
	private static String readFromFile(String filePath, String charset) {

		if (!(new File(filePath)).exists()) {
			return null;
		}
		String line = null;
		StringBuilder result = new StringBuilder();
		BufferedReader in = null;
		try {
			in = new BufferedReader(new InputStreamReader(new FileInputStream(
					filePath), charset));
			try {
				while ((line = in.readLine()) != null) {
					result.append(line + lineSep);
				}
			} catch (IOException e) {
				System.out.println(e.toString());

			}
		} catch (Exception e) {
			System.out.println(e.toString());
		} finally {

			try {
				in.close();
			} catch (IOException e) {
				System.out.println(e.toString());
			}
		}
		return result.toString();
	}

	/**
	 * 将字符串以 utf8 的形式写出到 filePath 指定的文件
	 * 
	 * @param source
	 *            待写入文件的字符串
	 * @param filePath
	 *            输出的文件的路径
	 * @param charset
	 *            文件的字符集
	 * @param isAppand
	 *            文件的字符集 ,如果为 true，则将字节写入文件末尾处，而不是写入文件开始处 ;反之则重新从文件开始处写.
	 */
	public static void write2Utf8File(String source, String filePath,
			String charset, boolean isAppand) {

		Writer out = null;
		File file = new File(filePath);
		file.getParentFile().mkdirs();
		try {
			out = new BufferedWriter(new OutputStreamWriter(
					new FileOutputStream(filePath, isAppand), charset));
			try {
				out.write(source);
				out.flush();
			} catch (IOException e) {
				System.out.println(e.toString());
			}

		} catch (UnsupportedEncodingException e) {
			System.out.println(e.getMessage());
		} catch (FileNotFoundException e) {
			System.out.println(e.getMessage());
		} finally {
			if (out != null) {
				try {
					out.close();
				} catch (IOException e) {
					System.out.println(e.getMessage());
				}
			}
		}
	}
}

附件中为所需jar包与源码.

参考资料: http://sourceforge.net/projects/zhcode/