<script type="text/javascript"
src="http://pagead2.googlesyndication.com/pagead/show_ads.js">
</script>
//编码转换类
//本类提供两种转换功能:GBK繁体->GBK简体,Big5->GBK简体
import java.io.*;
public class EncodingConvertor
{
//转换表所在的目录以及文件名
private static final String TABLE_FILE_DIR = "converttable";
private static final String BIG5_TO_GBKSIMP_TABLE_FILE_NAME = "big52gb.tab";
//GBK中非GB2312码的汉字编码范围(即可能需要转换的编码范围,GBK-3区和GBK-4区)
//GBK-3区,高字节范围:[0x81, 0xA0];低字节范围:[0x40, 0xFE]
private static int GBK3_HIGH_DOWN = 0x81;
private static int GBK3_HIGH_UP = 0xA0;
private static int GBK3_LOW_DOWN = 0x40;
private static int GBK3_LOW_UP = 0xFE;
//GBK-4区,高字节范围:[0xAA, 0xFE];低字节范围:[0x40, 0xA0]
private static int GBK4_HIGH_DOWN = 0xAA;
private static int GBK4_HIGH_UP = 0xFE;
private static int GBK4_LOW_DOWN = 0x40;
private static int GBK4_LOW_UP = 0xA0;
//Big5编码范围,高字节:[0x81, 0xFE],低字节:[0x40, 0xFE]
private static int BIG5_HIGH_DOWN = 0x81;
private static int BIG5_HIGH_UP = 0xFE;
private static int BIG5_LOW_DOWN = 0x40;
private static int BIG5_LOW_UP = 0xFE;
//是否加载Big5->GBK转换表的Bool值(如果仅仅需要进行GBK繁体->GBK简体的转化,则不必加载Big5->GBK转换表;但Big5->GBK简体转换必须两个表都加载)
private boolean ifloadbig5;
//存放转换表的数组
//GBK-3区的转换表
private byte gbk3table[][][] = new byte[GBK3_HIGH_UP - GBK3_HIGH_DOWN 1][GBK3_LOW_UP - GBK3_LOW_DOWN 1][2];
//GBK-4区的转换表
private byte gbk4table[][][] = new byte[GBK4_HIGH_UP - GBK4_HIGH_DOWN 1][GBK4_LOW_UP - GBK4_LOW_DOWN 1][2];
//Big5的转换表
private byte big5table[][][] = new byte[BIG5_HIGH_UP - BIG5_HIGH_DOWN 1][BIG5_LOW_UP - BIG5_LOW_DOWN 1][2];
//加载转换表的函数
private void loadConvertTables()
{
try
{
DataInputStream dis = new DataInputStream(new FileInputStream(TABLE_FILE_DIR File.separator BIG5_TO_GBKSIMP_TABLE_FILE_NAME));
int i, j;
//加载GBK-3区的转换表
for (i = GBK3_HIGH_DOWN; i <= GBK3_HIGH_UP; i )
{
for (j = GBK3_LOW_DOWN; j <= GBK3_LOW_UP; j )
{
dis.read(gbk3table[i - GBK3_HIGH_DOWN][j - GBK3_LOW_DOWN], 0, 2);
}
}
//加载GBK-4区的转换表
for (i = GBK4_HIGH_DOWN; i <= GBK4_HIGH_UP; i )
{
for (j = GBK4_LOW_DOWN; j <= GBK4_LOW_UP; j )
{
dis.read(gbk4table[i - GBK4_HIGH_DOWN][j - GBK4_LOW_DOWN], 0, 2);
}
}
dis.close();
if (ifloadbig5)
{
dis = new DataInputStream(new FileInputStream(TABLE_FILE_DIR File.separator BIG5_TO_GBKSIMP_TABLE_FILE_NAME));
//加载Big5转换表
for (i = BIG5_HIGH_DOWN; i <= BIG5_HIGH_UP; i )
{
for (j = BIG5_LOW_DOWN; j <= BIG5_LOW_UP; j )
{
dis.read(big5table[i - BIG5_HIGH_DOWN][j - BIG5_LOW_DOWN], 0, 2);
}
}
dis.close();
}
else
{
big5table = null;
}
}
catch (FileNotFoundException fnfe)
{
fnfe.printStackTrace();
ifloadbig5 = false;
}
catch (IOException ioe)
{
ioe.printStackTrace();
ifloadbig5 = false;
}
}
//构造函数,加载转换表
//参数ifbig5指明是否加载Big5->GBK的转换表
//如果仅仅需要进行GBK繁体->GBK简体的转化,则不必加载Big5->GBK转换表;但Big5->GBK简体转换必须两个表都加载
public EncodingConvertor(boolean ifbig5)
{
ifloadbig5 = ifbig5;
loadConvertTables();
}
//Big5编码->GBK简体转换的外部调用接口
//参数text[]为待转换的字符串的Big5编码的字节数组,textlen为字节个数
//本函数将text[]中的Big5编码的汉字转换成相应GBK编码的简体字,转换后的结果覆盖原来的数组返回
//不能转换的字符按原值返回
public void big52gbk(byte text[], int textlen)
{
//如果没有加载Big5转换表,直接返回
if (!ifloadbig5)
{
return;
}
int i;
int high, low;
i = 0;
while (i < textlen)
{
if (text[i] >= 0)
{
i ;
continue;
}
if ((i 1) >= textlen)
{
break;
}
high = text[i];
high = 0x100;
low = text[i 1];
if (low < 0)
{
low = 0x100;
}
if ((high >= BIG5_HIGH_DOWN) && (high <= BIG5_HIGH_UP) && (low >= BIG5_LOW_DOWN) && (low <= BIG5_LOW_UP))
{
text[i] = big5table[high - BIG5_HIGH_DOWN][low - BIG5_LOW_DOWN][0];
text[i 1] = big5table[high - BIG5_HIGH_DOWN][low - BIG5_LOW_DOWN][1];
}
i = 2;
}
}
public String big52gbk(byte text[])
{
//如果没有加载Big5转换表,直接返回
if (!ifloadbig5)
{
return "";
}
int i;
int high, low;
i = 0;
while (i < text.length)
{
if (text[i] >= 0)
{
i ;
continue;
}
if ((i 1) >= text.length)
{
break;
}
high = text[i];
high = 0x100;
low = text[i 1];
if (low < 0)
{
low = 0x100;
}
if ((high >= BIG5_HIGH_DOWN) && (high <= BIG5_HIGH_UP) && (low >= BIG5_LOW_DOWN) && (low <= BIG5_LOW_UP))
{
text[i] = big5table[high - BIG5_HIGH_DOWN][low - BIG5_LOW_DOWN][0];
text[i 1] = big5table[high - BIG5_HIGH_DOWN][low - BIG5_LOW_DOWN][1];
}
i = 2;
}
return new String(text,0,text.length);
}
}
BIG5转为GBK的类
最新推荐文章于 2021-03-20 22:44:43 发布