package com.imooc.io;
import java.io.UnsupportedEncodingException;
public class EncodeDemo {
public static void main(String[] args) throws UnsupportedEncodingException {
String s = "慕课ABC";
System.out.printf("Original string: %n[%s]%n", s);
System.out.println("Charset default:");
printBytes(s.getBytes()); // [c4 bd bf ce 41 42 43]
System.out.println("Charset gbk:");
printBytes(s.getBytes("gbk")); // [c4 bd bf ce 41 42 43]
System.out.println("Charset utf-8:");
printBytes(s.getBytes("utf-8")); // [e6 85 95 e8 af be 41 42 43]
System.out.println("Charset utf-16be:");
printBytes(s.getBytes("utf-16be")); // [61 55 8b fe 0 41 0 42 0 43]
}
private static void printBytes(byte[] bytes) {
StringBuilder result = new StringBuilder();
for (byte b : bytes) {
// Show each byte in form of HEX-numbered string
result.append(Integer.toHexString(b & 0xff)).append(" ");
}
System.out.printf("[%s]%n", result.toString().trim());
}
/* Note:
* -----------------
* 1. 中文OS创建文本文件时,编码默认为ANSI,即GBK编码;联通、联按utf-8保存仅为巧合
* 2. Java 采用双字节编码 UTF-16BE,其中文、英文均占两个字节(61 55、0 41)
* 3. 按 utf-8 编码,一个中文占 3 个字节,英文占 1 个字节
* 4. string -> bytes[](按charset编码):byte[] bytes = str.getBytes(charset);
* 5. bytes[] -> string(按charset编码):String string = new String(bytes, charset);
*/
}
随堂笔记:文件的编码
最新推荐文章于 2021-12-19 16:03:56 发布