BIG5转为GBK的类

本文介绍了一个Java类,用于实现GBK繁体到GBK简体及Big5到GBK简体的编码转换。该类提供了详细的内部机制说明,包括转换表的加载与使用等。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

<script type="text/javascript" src="http://pagead2.googlesyndication.com/pagead/show_ads.js"> </script> //编码转换类 //本类提供两种转换功能:GBK繁体->GBK简体,Big5->GBK简体 import java.io.*; public class EncodingConvertor { //转换表所在的目录以及文件名 private static final String TABLE_FILE_DIR = "converttable"; private static final String BIG5_TO_GBKSIMP_TABLE_FILE_NAME = "big52gb.tab"; //GBK中非GB2312码的汉字编码范围(即可能需要转换的编码范围,GBK-3区和GBK-4区) //GBK-3区,高字节范围:[0x81, 0xA0];低字节范围:[0x40, 0xFE] private static int GBK3_HIGH_DOWN = 0x81; private static int GBK3_HIGH_UP = 0xA0; private static int GBK3_LOW_DOWN = 0x40; private static int GBK3_LOW_UP = 0xFE; //GBK-4区,高字节范围:[0xAA, 0xFE];低字节范围:[0x40, 0xA0] private static int GBK4_HIGH_DOWN = 0xAA; private static int GBK4_HIGH_UP = 0xFE; private static int GBK4_LOW_DOWN = 0x40; private static int GBK4_LOW_UP = 0xA0; //Big5编码范围,高字节:[0x81, 0xFE],低字节:[0x40, 0xFE] private static int BIG5_HIGH_DOWN = 0x81; private static int BIG5_HIGH_UP = 0xFE; private static int BIG5_LOW_DOWN = 0x40; private static int BIG5_LOW_UP = 0xFE; //是否加载Big5->GBK转换表的Bool值(如果仅仅需要进行GBK繁体->GBK简体的转化,则不必加载Big5->GBK转换表;但Big5->GBK简体转换必须两个表都加载) private boolean ifloadbig5; //存放转换表的数组 //GBK-3区的转换表 private byte gbk3table[][][] = new byte[GBK3_HIGH_UP - GBK3_HIGH_DOWN 1][GBK3_LOW_UP - GBK3_LOW_DOWN 1][2]; //GBK-4区的转换表 private byte gbk4table[][][] = new byte[GBK4_HIGH_UP - GBK4_HIGH_DOWN 1][GBK4_LOW_UP - GBK4_LOW_DOWN 1][2]; //Big5的转换表 private byte big5table[][][] = new byte[BIG5_HIGH_UP - BIG5_HIGH_DOWN 1][BIG5_LOW_UP - BIG5_LOW_DOWN 1][2]; //加载转换表的函数 private void loadConvertTables() { try { DataInputStream dis = new DataInputStream(new FileInputStream(TABLE_FILE_DIR File.separator BIG5_TO_GBKSIMP_TABLE_FILE_NAME)); int i, j; //加载GBK-3区的转换表 for (i = GBK3_HIGH_DOWN; i <= GBK3_HIGH_UP; i ) { for (j = GBK3_LOW_DOWN; j <= GBK3_LOW_UP; j ) { dis.read(gbk3table[i - GBK3_HIGH_DOWN][j - GBK3_LOW_DOWN], 0, 2); } } //加载GBK-4区的转换表 for (i = GBK4_HIGH_DOWN; i <= GBK4_HIGH_UP; i ) { for (j = GBK4_LOW_DOWN; j <= GBK4_LOW_UP; j ) { dis.read(gbk4table[i - GBK4_HIGH_DOWN][j - GBK4_LOW_DOWN], 0, 2); } } dis.close(); if (ifloadbig5) { dis = new DataInputStream(new FileInputStream(TABLE_FILE_DIR File.separator BIG5_TO_GBKSIMP_TABLE_FILE_NAME)); //加载Big5转换表 for (i = BIG5_HIGH_DOWN; i <= BIG5_HIGH_UP; i ) { for (j = BIG5_LOW_DOWN; j <= BIG5_LOW_UP; j ) { dis.read(big5table[i - BIG5_HIGH_DOWN][j - BIG5_LOW_DOWN], 0, 2); } } dis.close(); } else { big5table = null; } } catch (FileNotFoundException fnfe) { fnfe.printStackTrace(); ifloadbig5 = false; } catch (IOException ioe) { ioe.printStackTrace(); ifloadbig5 = false; } } //构造函数,加载转换表 //参数ifbig5指明是否加载Big5->GBK的转换表 //如果仅仅需要进行GBK繁体->GBK简体的转化,则不必加载Big5->GBK转换表;但Big5->GBK简体转换必须两个表都加载 public EncodingConvertor(boolean ifbig5) { ifloadbig5 = ifbig5; loadConvertTables(); } //Big5编码->GBK简体转换的外部调用接口 //参数text[]为待转换的字符串的Big5编码的字节数组,textlen为字节个数 //本函数将text[]中的Big5编码的汉字转换成相应GBK编码的简体字,转换后的结果覆盖原来的数组返回 //不能转换的字符按原值返回 public void big52gbk(byte text[], int textlen) { //如果没有加载Big5转换表,直接返回 if (!ifloadbig5) { return; } int i; int high, low; i = 0; while (i < textlen) { if (text[i] >= 0) { i ; continue; } if ((i 1) >= textlen) { break; } high = text[i]; high = 0x100; low = text[i 1]; if (low < 0) { low = 0x100; } if ((high >= BIG5_HIGH_DOWN) && (high <= BIG5_HIGH_UP) && (low >= BIG5_LOW_DOWN) && (low <= BIG5_LOW_UP)) { text[i] = big5table[high - BIG5_HIGH_DOWN][low - BIG5_LOW_DOWN][0]; text[i 1] = big5table[high - BIG5_HIGH_DOWN][low - BIG5_LOW_DOWN][1]; } i = 2; } } public String big52gbk(byte text[]) { //如果没有加载Big5转换表,直接返回 if (!ifloadbig5) { return ""; } int i; int high, low; i = 0; while (i < text.length) { if (text[i] >= 0) { i ; continue; } if ((i 1) >= text.length) { break; } high = text[i]; high = 0x100; low = text[i 1]; if (low < 0) { low = 0x100; } if ((high >= BIG5_HIGH_DOWN) && (high <= BIG5_HIGH_UP) && (low >= BIG5_LOW_DOWN) && (low <= BIG5_LOW_UP)) { text[i] = big5table[high - BIG5_HIGH_DOWN][low - BIG5_LOW_DOWN][0]; text[i 1] = big5table[high - BIG5_HIGH_DOWN][low - BIG5_LOW_DOWN][1]; } i = 2; } return new String(text,0,text.length); } }
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值