转换文件编码格式，以指定编码格式读取和写入文件，GBK向UTF-8或UTF-8向GBK相互转换文件编码格式...-优快云博客

本文介绍了一个用于检测Java文件编码的实用工具类，主要功能包括检查文件是否为UTF-8编码，将文件转换为字节数组，以及读写文件内容。通过一系列的方法实现了对文件编码的高效检测和处理。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

2019独角兽企业重金招聘Python工程师标准>>>

import lombok.extern.slf4j.Slf4j;

import java.io.*;

/**
 * 目前版本只支持文本文件.
 *
 * @Auther: zyx.
 * @Date: 2018/12/27 19:08
 */
@Slf4j
public class EncodeUtils {
    /**
     * 检查文件编码是否是utf-8
     *
     * @param: [file]
     * @return: boolean
     * @date: 2018/12/26 15:43
     */
    public static boolean utf8EncodeCheck(File file) {
        return utf8EncodeCheck(file.getAbsolutePath());
    }

    /**
     * 检查文件编码是否是utf-8
     *
     * @param: [filePath]
     * @return: boolean
     * @date: 2018/12/26 15:43
     */
    public static boolean utf8EncodeCheck(String filePath) {
        File file = new File(filePath);
        if(file == null || !file.exists()){
            log.error("file no exist, path is ：{}...", filePath);
            return false;
        }
        byte[] bytes = fileTransformByte(file);
        if (isUTF8(bytes)) return true;

        return false;
    }

    /**
     * 将文件转换为字节数组
     *
     * @param: [file]
     * @return: byte[]
     * @date: 2018/12/27 19:15
     */
    public static byte[] fileTransformByte(File file) {
        byte[] buffer = null;
        try {
            FileInputStream fis = new FileInputStream(file);
            ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
            byte[] b = new byte[1024];
            int n;
            while ((n = fis.read(b)) != -1) {
                bos.write(b, 0, n);
            }
            fis.close();
            bos.close();
            buffer = bos.toByteArray();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return buffer;
    }

    /**
     * 以指定编码格式读取文件内容.
     *
     * @param: [filePath, charsetName]
     * @return: java.lang.String
     * @date: 2018/12/27 19:39
     */
    public static String readFile(String filePath, String charsetName) throws Exception {
        StringBuilder fileContent = new StringBuilder();
        File f = new File(filePath);
        if (f.isFile() && f.exists()) {
            InputStreamReader read = new InputStreamReader(new FileInputStream(f), charsetName);
            BufferedReader reader = new BufferedReader(read);
            String line;
            while ((line = reader.readLine()) != null) {
                fileContent.append(line);
                fileContent.append("\r\n");
            }
            read.close();
        }
        return fileContent.toString();
    }

    /**
     * 以指定编码格式写入文件.
     *
     * @param: [filePath, fileContent, charsetName]
     * @return: void
     * @date: 2018/12/27 19:46
     */
    public static void writeFile(String filePath, String fileContent, String charsetName) throws Exception {
        File f = new File(filePath);
        if (!f.exists()) {
            f.createNewFile();
        }

        //利用getBytes将unicode字符串转成UTF-8格式的字节数组
//            byte[] utf8Bytes = fileContent.getBytes("UTF-8");

        //然后用utf-8 对这个字节数组解码成新的字符串
//            String utf8Str = new String(utf8Bytes, "UTF-8");

        OutputStreamWriter write = new OutputStreamWriter(new FileOutputStream(f), charsetName);
        BufferedWriter writer = new BufferedWriter(write);
        writer.write(fileContent);
        writer.close();
    }

    /**
     * 检查字节编码是否是utf-8
     *
     * @param: [rawtext]
     * @return: boolean
     * @date: 2018/12/27 19:18
     */
    public static boolean isUTF8(byte[] rawtext) {
        int score = 0;
        int i, rawtextlen = 0;
        int goodbytes = 0, asciibytes = 0;
        // Maybe also use UTF8 Byte Order Mark: EF BB BF
        // Check to see if characters fit into acceptable ranges
        rawtextlen = rawtext.length;
        for (i = 0; i < rawtextlen; i++) {
            if ((rawtext[i] & (byte) 0x7F) == rawtext[i]) {
                // 最高位是0的ASCII字符
                asciibytes++;
                // Ignore ASCII, can throw off count
            } else if (-64 <= rawtext[i] && rawtext[i] <= -33
                    //-0x40~-0x21
                    && // Two bytes
                    i + 1 < rawtextlen && -128 <= rawtext[i + 1]
                    && rawtext[i + 1] <= -65) {
                goodbytes += 2;
                i++;
            } else if (-32 <= rawtext[i]
                    && rawtext[i] <= -17
                    && // Three bytes
                    i + 2 < rawtextlen && -128 <= rawtext[i + 1]
                    && rawtext[i + 1] <= -65 && -128 <= rawtext[i + 2]
                    && rawtext[i + 2] <= -65) {
                goodbytes += 3;
                i += 2;
            }
        }
        if (asciibytes == rawtextlen) {
            return false;
        }
        score = 100 * goodbytes / (rawtextlen - asciibytes);
        // If not above 98, reduce to zero to prevent coincidental matches
        // Allows for some (few) bad formed sequences
        if (score > 98) {
            return true;
        } else if (score > 95 && goodbytes > 30) {
            return true;
        } else {
            return false;
        }
    }
}

转载于:https://my.oschina.net/zhangyaxin/blog/2994987