和荣笔记 -- 从 GB2312 到 Unicode 转换表制作程式

本文介绍了一个用于将GB2312编码转换为Unicode编码的Java程序。该程序通过使用GBK编码作为中间步骤,实现了从GB2312到UTF-16BE和UTF-8的字符编码转换,并生成了包含GB2312编码汉字及其Unicode编码对照的手册。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >


这本手册里的字符与汉字编码列表由下面的程式所生成。

  1ExpandedBlockStart.gifContractedBlock.gif/** *//**
  2InBlock.gif* GB2312Unicde.java
  3InBlock.gif* Copyright (c) 1997-2003 by Dr. Herong Yang
  4ExpandedBlockEnd.gif*/

  5None.gifimport java.io.*;
  6None.gifimport java.nio.*;
  7None.gifimport java.nio.charset.*;
  8ExpandedBlockStart.gifContractedBlock.gifclass GB2312Unicde dot.gif{
  9InBlock.gifstatic OutputStream out = null;
 10ExpandedSubBlockStart.gifContractedSubBlock.gifstatic char hexDigit[] = dot.gif{'0''1''2''3''4''5''6''7',
 11ExpandedSubBlockEnd.gif'8''9''A''B''C''D''E''F'}
;
 12ExpandedSubBlockStart.gifContractedSubBlock.gifstatic int b_out[] = dot.gif{201,267,279,293,484,587,625,657,734,782,827,
 13ExpandedSubBlockEnd.gif874,901,980,5590}
;
 14ExpandedSubBlockStart.gifContractedSubBlock.gifstatic int e_out[] = dot.gif{216,268,280,294,494,594,632,694,748,794,836,
 15ExpandedSubBlockEnd.gif894,903,994,5594}
;
 16ExpandedSubBlockStart.gifContractedSubBlock.gifpublic static void main(String[] args) dot.gif{
 17ExpandedSubBlockStart.gifContractedSubBlock.giftry dot.gif{
 18InBlock.gifout = new FileOutputStream("gb2312.gb");
 19InBlock.gifwriteCode();
 20InBlock.gifout.close();
 21ExpandedSubBlockStart.gifContractedSubBlock.gif}
 catch (IOException e) dot.gif{
 22InBlock.gifSystem.out.println(e.toString());
 23ExpandedSubBlockEnd.gif}

 24ExpandedSubBlockEnd.gif}

 25ExpandedSubBlockStart.gifContractedSubBlock.gifpublic static void writeCode() throws IOException dot.gif{
 26InBlock.gifboolean reserved = false;
 27InBlock.gifString name = null;
 28InBlock.gif// GB2312 is not supported by JDK. So I am using GBK.
 29InBlock.gifCharsetDecoder gbdc = Charset.forName("GBK").newDecoder();
 30InBlock.gifCharsetEncoder uxec = Charset.forName("UTF-16BE").newEncoder();
 31InBlock.gifCharsetEncoder u8ec = Charset.forName("UTF-8").newEncoder();
 32InBlock.gifByteBuffer gbbb = null;
 33InBlock.gifByteBuffer uxbb = null;
 34InBlock.gifByteBuffer u8bb = null;
 35InBlock.gifCharBuffer cb = null;
 36InBlock.gifint count = 0;
 37ExpandedSubBlockStart.gifContractedSubBlock.giffor (int i=1; i<=94; i++dot.gif{
 38InBlock.gif// Defining row settings
 39ExpandedSubBlockStart.gifContractedSubBlock.gifif (i>=1 && i<=9dot.gif{
 40InBlock.gifreserved = false;
 41InBlock.gifname = "Graphic symbols";
 42ExpandedSubBlockStart.gifContractedSubBlock.gif}
 else if (i>=10 && i<=15dot.gif{
 43InBlock.gifreserved = true;
 44InBlock.gifname = "Reserved";
 45ExpandedSubBlockStart.gifContractedSubBlock.gif}
 else if (i>=16 && i<=55dot.gif{
 46InBlock.gifreserved = false;
 47InBlock.gifname = "Level 1 characters";
 48ExpandedSubBlockStart.gifContractedSubBlock.gif}
 else if (i>=56 && i<=87dot.gif{
 49InBlock.gifreserved = false;
 50InBlock.gifname = "Level 2 characters";
 51ExpandedSubBlockStart.gifContractedSubBlock.gif}
 else if (i>=88 && i<=94dot.gif{
 52InBlock.gifreserved = true;
 53InBlock.gifname = "Reserved";
 54ExpandedSubBlockEnd.gif}

 55InBlock.gif// writing row title
 56InBlock.gifwriteln();
 57InBlock.gifwriteString("<p>");
 58InBlock.gifwriteNumber(i);
 59InBlock.gifwriteString(" Row: "+name);
 60InBlock.gifwriteln();
 61InBlock.gifwriteString("</p>");
 62InBlock.gifwriteln();
 63ExpandedSubBlockStart.gifContractedSubBlock.gifif (!reserved) dot.gif{
 64InBlock.gifwriteln();
 65InBlock.gifwriteHeader();
 66InBlock.gif// looping through all characters in one row
 67ExpandedSubBlockStart.gifContractedSubBlock.giffor (int j=1; j<=94; j++dot.gif{
 68InBlock.gifbyte hi = (byte)(0xA0 + i);
 69InBlock.gifbyte lo = (byte)(0xA0 + j);
 70ExpandedSubBlockStart.gifContractedSubBlock.gifif (validGB(i,j)) dot.gif{
 71InBlock.gif// getting GB, UTF-16BE, UTF-8 codes
 72ExpandedSubBlockStart.gifContractedSubBlock.gifgbbb = ByteBuffer.wrap(new byte[]dot.gif{hi,lo});
 73ExpandedSubBlockStart.gifContractedSubBlock.giftry dot.gif{
 74InBlock.gifcb = gbdc.decode(gbbb);
 75InBlock.gifuxbb = uxec.encode(cb);
 76InBlock.gifcb.rewind();
 77InBlock.gifu8bb = u8ec.encode(cb);
 78ExpandedSubBlockStart.gifContractedSubBlock.gif}
 catch (CharacterCodingException e) dot.gif{
 79InBlock.gifcb = null;
 80InBlock.gifuxbb = null;
 81InBlock.gifu8bb = null;
 82ExpandedSubBlockEnd.gif}

 83ExpandedSubBlockStart.gifContractedSubBlock.gif}
 else dot.gif{
 84InBlock.gifcb = null;
 85InBlock.gifuxbb = null;
 86InBlock.gifu8bb = null;
 87ExpandedSubBlockEnd.gif}

 88InBlock.gifwriteNumber(i);
 89InBlock.gifwriteNumber(j);
 90InBlock.gifwriteString(" ");
 91ExpandedSubBlockStart.gifContractedSubBlock.gifif (cb!=nulldot.gif{
 92InBlock.gifwriteByte(hi);
 93InBlock.gifwriteByte(lo);
 94InBlock.gifwriteString(" ");
 95InBlock.gifwriteHex(hi);
 96InBlock.gifwriteHex(lo);
 97InBlock.gifcount++;
 98ExpandedSubBlockStart.gifContractedSubBlock.gif}
 else dot.gif{
 99InBlock.gifwriteGBSpace();
100InBlock.gifwriteString(" null");
101ExpandedSubBlockEnd.gif}

102InBlock.gifwriteString(" ");
103InBlock.gifwriteByteBuffer(uxbb,2);
104InBlock.gifwriteString(" ");
105InBlock.gifwriteByteBuffer(u8bb,3);
106ExpandedSubBlockStart.gifContractedSubBlock.gifif (j%2 == 0dot.gif{
107InBlock.gifwriteln();
108ExpandedSubBlockStart.gifContractedSubBlock.gif}
 else dot.gif{
109InBlock.gifwriteString("   ");
110ExpandedSubBlockEnd.gif}

111ExpandedSubBlockEnd.gif}

112InBlock.gifwriteFooter();
113ExpandedSubBlockEnd.gif}

114ExpandedSubBlockEnd.gif}

115InBlock.gifSystem.out.println("Number of GB characters worte: "+count);
116ExpandedSubBlockEnd.gif}

117ExpandedSubBlockStart.gifContractedSubBlock.gifpublic static void writeln() throws IOException dot.gif{
118InBlock.gifout.write(0x0D);
119InBlock.gifout.write(0x0A);
120ExpandedSubBlockEnd.gif}

121ExpandedSubBlockStart.gifContractedSubBlock.gifpublic static void writeByte(byte b) throws IOException dot.gif{
122InBlock.gifout.write(b & 0xFF);
123ExpandedSubBlockEnd.gif}

124InBlock.gifpublic static void writeByteBuffer(ByteBuffer b, int l)
125ExpandedSubBlockStart.gifContractedSubBlock.gifthrows IOException dot.gif{
126InBlock.gifint i = 0;
127ExpandedSubBlockStart.gifContractedSubBlock.gifif (b==nulldot.gif{
128InBlock.gifwriteString("null");
129InBlock.gif= 2;
130ExpandedSubBlockStart.gifContractedSubBlock.gif}
 else dot.gif{
131InBlock.giffor (i=0; i<b.limit(); i++) writeHex(b.get(i));
132ExpandedSubBlockEnd.gif}

133InBlock.giffor (int j=i; j<l; j++) writeString("  ");
134ExpandedSubBlockEnd.gif}

135ExpandedSubBlockStart.gifContractedSubBlock.gifpublic static void writeGBSpace() throws IOException dot.gif{
136InBlock.gifout.write(0xA1);
137InBlock.gifout.write(0xA1);
138ExpandedSubBlockEnd.gif}

139ExpandedSubBlockStart.gifContractedSubBlock.gifpublic static void writeString(String s) throws IOException dot.gif{
140ExpandedSubBlockStart.gifContractedSubBlock.gifif (s!=nulldot.gif{
141ExpandedSubBlockStart.gifContractedSubBlock.giffor (int i=0; i<s.length(); i++dot.gif{
142InBlock.gifout.write((int) (s.charAt(i) & 0xFF));
143ExpandedSubBlockEnd.gif}

144ExpandedSubBlockEnd.gif}

145ExpandedSubBlockEnd.gif}

146ExpandedSubBlockStart.gifContractedSubBlock.gifpublic static void writeNumber(int i) throws IOException dot.gif{
147InBlock.gifString s = "00" + String.valueOf(i);
148InBlock.gifwriteString(s.substring(s.length()-2,s.length()));
149ExpandedSubBlockEnd.gif}

150ExpandedSubBlockStart.gifContractedSubBlock.gifpublic static void writeHex(byte b) throws IOException dot.gif{
151InBlock.gifout.write((int) hexDigit[(b >> 4& 0x0F]);
152InBlock.gifout.write((int) hexDigit[b & 0x0F]);
153ExpandedSubBlockEnd.gif}

154ExpandedSubBlockStart.gifContractedSubBlock.gifpublic static void writeHeader() throws IOException dot.gif{
155InBlock.gifwriteString("<pre>");
156InBlock.gifwriteln();
157InBlock.gifwriteString("Q.W. ");
158InBlock.gifwriteGBSpace();
159InBlock.gifwriteString(" GB   Uni. UTF-8 ");
160InBlock.gifwriteString("   ");
161InBlock.gifwriteString("Q.W. ");
162InBlock.gifwriteGBSpace();
163InBlock.gifwriteString(" GB   Uni. UTF-8 ");
164InBlock.gifwriteln();
165InBlock.gifwriteln();
166ExpandedSubBlockEnd.gif}

167ExpandedSubBlockStart.gifContractedSubBlock.gifpublic static void writeFooter() throws IOException dot.gif{
168InBlock.gifwriteString("</pre>");
169InBlock.gifwriteln();
170ExpandedSubBlockEnd.gif}

171ExpandedSubBlockStart.gifContractedSubBlock.gifpublic static boolean validGB(int i,int j) dot.gif{
172ExpandedSubBlockStart.gifContractedSubBlock.giffor (int l=0; l<b_out.length; l++dot.gif{
173InBlock.gifif (i*100+j>=b_out[l] && i*100+j<=e_out[l]) return false;
174ExpandedSubBlockEnd.gif}

175InBlock.gifreturn true;
176ExpandedSubBlockEnd.gif}

177ExpandedBlockEnd.gif}

转载于:https://www.cnblogs.com/meil/archive/2007/01/31/635873.html

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值