一: 编码的方法
getBytes()方法
public byte] getBytes();
此方法根据java命令运行时参数file.encoding设置的编码表进行编码的。(file.encoding在idea中一般设置默认是UTF-8,但是在电脑系统中就是用的GBK)
String str =“黑马";
byte[] bytes = str.getBytes(); //对"黑马"进行编码使用idea中默认的UTF-8进行编码.
//byte中高位为1则为负数
system. out.println(Array's.tostring(bytes));//[-23,-69,-111,-23,-87,-84]
查看getBytes()源码
IDEA中验证: (idea中的编码格式也是可以改变的)
String propertya = System.getProperty("file.encoding");
system.out.println(property); //UTE-8 可见idea中系统默认的环境为UTF-8
string str=“黑马";
byte[] bytes.=.str.getBytes()
system.out.println(Arrays.toString(bytes)); //[-23,-69,-111,-23,-87,-84]
电脑系统验证
这个编码格式是可以改变的
二: 解码的方法
string(byte[] code)
public String(byte[] code); //此方法根据file.encoding进行解码 (idea中默认是UTF-8编码)
1: 不乱码的情况
public static void main( string[] args) throws Exception{
string str ="黑马";
byte[] bytes = str.getBytes("UTF-8");
System.out.println(Arrays.toString(bytes)); //[-23, -69, -111,-23,-87,-84]
byte[] gbks = str.getBytes("GBK");
system.out.println(Arrays.tostring(gbks)); //[-70, -38,-62,-19]
}
public static void main(String[ ] args) throws Exception{
byte[] bytes =f-23,-69,-111,-23,-87,-84};
string str = new String(bytes); // file.encoding(idea默认是UTF-8)
system.out.println(str);//黑马
string str1 = new String(bytes, "UTF-8" );
system.out.println(str1); //黑马
byte[] bys = {-70,-38,-62y -19};
String str2 = new string(bys, charsetName: "GBK");
system.out.println(str2); //黑马
}
2: 乱码的情况(可逆的)
public static void main(string[] args) throws Exception{
string str =“黑马";
byte[] gbks = str.getBytes( charsetName: "GBK"); //对"用GBK对黑马进行编码"
system.out.println(Arrays.toString(gbks)); //[-70, -38,-62,-19]I
string s = new string(gbks, charsetName: "UTF-8"); //用UTF-8对gbks数组进行解码
system.out.println(s); //???? 乱码
string s1 = new string(gbks, charsetName: "GBK");
system.out.println(s1); //黑马
}
3: 乱码的情况(不可逆的)
public static void main(string[] args) throws Exception{
string str ="黑马";
byte[] bytes = str.getBytes( charsetName: "IS0-8859-1"); // ?? 63 63(在ASCII码中代表?)
system.out.println(Arrays.tostring(bytes)); //[63,63]
string s = new String(bytes, charsetName: "IS0-8859-1");
system.out.println(s); // ? ?
//原因(因为黑马在iso-8859-1中是不存在对应编码的,所以被当做?处理了,从本质上改变了它的字节,
所以后面无论以什么来解码它都是无效的)
string s1 = new String(bytes, charsetName: "GBK");
system. out.print1n(s1); // ? ?
string s2 = new String(bytes, charsetName: "UTF-8");
System.out.println(s2); // ? ?
}
string str = "\uD842\UDFB7";
//代表中文"吉(上短下长,因为打不出来)"在GBK中也不存在,所以编码的时候被当做?来处理了
byte[] gbks = str.getBytes( charsetName: "GBK");
system.out.print1n(Arrays.tostring(gbks)); //[63]
string s = new string(gbks , charsetName: "gbk");
system.out.println(s); //?
string str = "♥";
byte[] gbks = str.getBytes( charsetName: "GBK");
system.out.println( Arrays.toString(gbks)); //[63]
//跟前面一样的道理
String s = new String(gbks, charsetName:"gbk"");
system.out.println(s); //?
大家在思考一下这个问题.
public static vo1d main( str1ng[] args) throws Except1on{
string s =“黑马";
byte[] bytes = s.getBytes( charsetName: "GBK");
system.out.println(Arrays.toString(bytes)); //[-70, -38, -62, -19]
string str = new String(bytes, charsetName: "UTF-8");
system.out.println(str); //实心????(当没有办法解析或者解析不了的时候) //(注意:实心?在UTF-8中有对应的编码)
byte[] bytes1 = str.getBytes( charsetName: "UTF-8");
system.out.println(Arrays.toString(bytes1)); //[-17, -65, -67,-17,-65, -67,-17,
-65,-67,-17,-65,-67]
string str1 = new String(bytes1, charsetName: "GBK");
system.out.println(str1); //银斤拷银斤拷
ISO8859-1解决上述场景所遇到的问题
String s =“黑马";
byte[] bytes = s.getBytes( charsetName: "GBK");
system.out.println(Arrays.tostring(bytes)); //[-70,-38,-62, -19]
String str = new string(bytes, charsetName: "IS0-8859-1"); //关键在于:(一个字节对应一个字符)
system.out.println(str): //-oii
byte[] bytes1 = str.getBytes( charsetName: "ISO-8859-1");
system.out.println(Arrays.tostring(bytes1)); //[-70,-38, -62,-19]
string str1 = new String(bytes1, charsetName: "GBK");
system.out.println(str1); //黑马