txt文件编码判断

txt文件,首先分有无bom(Byte Order Mark)。有的话,相对简单,没有的话,还要区分uft-8和ANSI的区别。代码如下:

private static boolean isutf8(byte[] str)
	{
	    int i = 0;
	    int size = str.length;
	 
	    while(i < size)
	    {
	        int step = 0;
	        if((str[i] & 0x80) == 0x00)
	        {
	            step = 1;
	        }
	        else if((str[i] & 0xe0) == 0xc0)
	        {
	            if(i + 1 >= size) return false;
	            if((str[i + 1] & 0xc0) != 0x80) return false;
	 
	            step = 2;
	        }
	        else if((str[i] & 0xf0) == 0xe0)
	        {
	            if(i + 2 >= size) return false;
	            if((str[i + 1] & 0xc0) != 0x80) return false;
	            if((str[i + 2] & 0xc0) != 0x80) return false;
	 
	            step = 3;
	        }
	        else
	        {
	            return false;
	        }
	 
	        i += step;
	    }
	 
	    if(i == size) return true;
	 
	    return false;
	}

	private static String getCode(byte[] bytes){
		InputStream in = new ByteArrayInputStream(bytes);
		BufferedInputStream bin = new BufferedInputStream(in);
		String code = null;//判断字符编码
		try {
			int p = (bin.read() << 8) + bin.read();
			switch (p) {
			case 0xefbb:
				code = "UTF-8";
				break;
			case 0xfffe:
				code = "Unicode";
				break;
			case 0xfeff:
				code = "UTF-16BE";
				break;
			default:
				if(isutf8(bytes))
					code = "UTF-8";
				else{
					code = "GBK";
				}
				
			}
		} catch (IOException e1) {
			// TODO Auto-generated catch block
			e1.printStackTrace();
		}finally{
			try {
				bin.close();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		}
		return code;
	}
private static String getCode(byte[] bytes) {
		String hexCode = "";
		if(bytes.length > 1){
			for (int i = 0; i < 2; i++) {
				String hex = Integer.toHexString(bytes[i] & 0xFF);
				if (hex.length() == 1) {
					hex = '0' + hex;
				}
				hexCode+=hex;
			}
		}
		
		String code = null;//判断字符编码
		if(hexCode.toLowerCase().equals("efbb"))
				code = "UTF-8";
		else if(hexCode.toLowerCase().equals("fffe"))
				code = "Unicode";
		else if(hexCode.toLowerCase().equals("feff"))
				code = "UTF-16BE";
		else{
			if(isutf8(bytes))
				code = "UTF-8";
			else{
				code = "GBK";
			}
		}
		return code;
	}



评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值