给定汉字获得拼音首字母

汉字拼音首字母提取算法

最新推荐文章于 2022-02-25 18:19:43 发布

最新推荐文章于 2022-02-25 18:19:43 发布 · 184 阅读

文章标签：

#C #C++ #C# #F# #J#


import java.io.UnsupportedEncodingException;

public class CharacterUtil {

	// 存放国标一级汉字不同读音的起始区位码
	private static final int[] secPosvalueList = { 1601, 1637, 1833, 2078,2274, 2302, 2433, 2594, 2787, 
		         3106, 3212, 3472, 3635, 3722, 3730,3858, 4027, 4086, 4390, 4558, 4684, 4925, 5249, 5600 };
	// 存放国标一级汉字不同读音的起始区位码对应读音
	private static final String[] firstLetter = { "A","B","C","D","E","F","G","H","J",
		             "K","L","M","N","O","P","Q","R","S","T","W","X","Y","Z","0"};

	public static String getFirstLetter(String str) throws UnsupportedEncodingException{
		//特殊字符：_ & @
		//数字   48-57
		//字母 65-90    97-122
		//汉字
		//空格
		if(null == str){
			new UnsupportedEncodingException();
		}
		if(str.trim().length() == 0){
			return firstLetter[firstLetter.length - 1];
		}
		String firstString = str.trim().substring(0, 1).toUpperCase();
		try {
			byte[] gbCode = firstString.getBytes("GBK");
			if(gbCode[0] > 0){
				if(gbCode[0] >= 65 && gbCode[0] <= 90){
					return firstString;
				}else{
					return firstLetter[firstLetter.length - 1];
				}
			}
			int code = ((gbCode[0] & 0xff) - 160 )*100 + (gbCode[1] & 0xff) - 160;
			for(int i = 0; i < secPosvalueList.length - 1; i++){
				if(code >= secPosvalueList[i] && code <=secPosvalueList[i + 1]){
					return firstLetter[i];
				}
			}
		} catch (UnsupportedEncodingException e) {
			e.printStackTrace();
			throw e;
		}
		return firstLetter[firstLetter.length - 1];
	}

	public static String[] getLetterArray(){
		return firstLetter;
	}

	public static void main(String args[]) throws UnsupportedEncodingException{

	}
}