有时需要InputStreamReader(InputStream in, Charset cs)这个构造来处理字符流。然而Charset不一定知道。这个时候就需要检测编码方式了。
jchardet是firefox使用的字节流编码检测算法的java开源实现,协议为
MPL(Mozilla Public License),对商业友好。下载源代码后发现示例并不怎么好使用,于是封装了一下。下面就封装类和使用Demo。
package cn.xddai.chardet;importjava.io.BufferedInputStream;importjava.io.IOException;importjava.io.InputStream;importorg.mozilla.intl.chardet.nsDetector;importorg.mozilla.intl.chardet.nsICharsetDetectionObserver;importorg.mozilla.intl.chardet.nsPSMDetector;/** * * @author xddai */publicclassCharsetDetector{private boolean found =false;private String result;private int lang;public String[] detectChineseCharset(InputStream in)throws IOException
{
lang = nsPSMDetector.CHINESE;
String[] prob;// Initalize the nsDetector() ;
nsDetector det =new nsDetector(lang);// Set an observer...// The Notify() will be called when a matching charset is found.
det.Init(new nsICharsetDetectionObserver(){public void Notify(String charset){
found =true;
result = charset;}});
BufferedInputStream imp =new BufferedInputStream(in);
byte[] buf =new byte[1024];
int len;
boolean isAscii =true;while((len = imp.read(buf,0, buf.length))!=-1){// Check if the stream is only ascii.if(isAscii)
isAscii = det.isAscii(buf, len);// DoIt if non-ascii and not done yet.if(!isAscii){if(det.DoIt(buf, len,false))break;}}
imp.close();
in.close();
det.DataEnd();if(isAscii){
found =true;
prob =new String[]{"ASCII"};}elseif(found){
prob =new String[]{
result
};}else{
prob = det.getProbableCharsets();}return prob;}public String[] detectAllCharset(InputStream in)throws IOException
{try{
lang = nsPSMDetector.ALL;return detectChineseCharset(in);}catch(IOException e){throw e;}}}