原文地址:http://www.oschina.net/code/snippet_228898_9674
[文件] DigitUtil.java ~ 4KB 下载(14)
001 | package com.baijob.vsearch.util; |
002 |
003 | import java.util.*; |
004 |
005 | /** |
006 | * 处理数词的工具类 |
007 | * @author liushiquan |
008 | * |
009 | */ |
010 | public class DigitUtil { |
011 | /** |
012 | * 阿拉伯数字 |
013 | */ |
014 | private static Set<Character> araDigits = new HashSet<Character>(); |
015 | /** |
016 | * 汉字中的数字字符 |
017 | */ |
018 | private static char[] SCDigits = {'零','一','二','三','四','五','六','七','八','九','十','百','千','万','亿'}; |
019 | |
020 | /** |
021 | * 汉字中的大写数字字符 |
022 | */ |
023 | private static char[] TCDigits = {'零','壹','贰','叁','肆','伍','陆','柒','捌','玖','拾','佰','仟','万','亿'}; |
024 | /** |
025 | * 繁体中文和简体中文的对应关系 |
026 | */ |
027 | private static Map<Character,Character> map = new HashMap<Character,Character>(); |
028 | static { |
029 | for (int i = 0; i < TCDigits.length; i++) { |
030 | map.put(TCDigits[i], SCDigits[i]); |
031 | } |
032 | for (char i = '0'; i <= '9'; i++) { |
033 | araDigits.add(i); |
034 | } |
035 | } |
036 | private DigitUtil(){ |
037 | |
038 | } |
039 | public static void main(String[] args) { |
040 | System.out.println(parseDigits("零三")); |
041 | } |
042 | /** |
043 | * 解析中文格式的数字,假定参数中全是汉字,否则会解析异常,解析失败返回null |
044 | * @param hanzi |
045 | * @return |
046 | */ |
047 | public static Integer parseDigits(String hanzi) { |
048 | if (!isDigits(hanzi)) |
049 | return null; |
050 | int ret; |
051 | try { |
052 | if (hanzi.charAt(0) == '+') |
053 | hanzi = hanzi.substring(1); |
054 | |
055 | ret = Integer.parseInt(hanzi); |
056 | } catch (Exception e) { |
057 | |
058 | char[] chars = hanzi.toCharArray(); |
059 | changeTCtoSC(chars); |
060 | |
061 | ret = parse(chars,0,chars.length,1); |
062 | } |
063 | |
064 | return ret; |
065 | } |
066 | public static boolean isDigits(String s) { |
067 | if (s.charAt(0) == '+') |
068 | s = s.substring(1); |
069 | try { |
070 | Integer.parseInt(s); |
071 | return true; |
072 | } catch (Exception e) { |
073 | for (int i = 0; i < s.length(); i++) { |
074 | char c = s.charAt(i); |
075 | if (!map.values().contains(c) && !araDigits.contains(c)) |
076 | return false; |
077 | } |
078 | |
079 | return true; |
080 | } |
081 | } |
082 | private static int parse(char[] chars,int start,int end, int preNumber) { |
083 | int ret = 0; |
084 | if (start == end) { |
085 | ret = 0; |
086 | } else if (start + 1 == end) { |
087 | switch (chars[start]) { |
088 | case '一': |
089 | case '1': |
090 | ret = 1 * preNumber; |
091 | break; |
092 | case '二': |
093 | case '2': |
094 | ret = 2 * preNumber; |
095 | break; |
096 | case '三': |
097 | case '3': |
098 | ret = 3 * preNumber; |
099 | break; |
100 | case '四': |
101 | case '4': |
102 | ret = 4 * preNumber; |
103 | break; |
104 | case '五': |
105 | case '5': |
106 | ret = 5 * preNumber; |
107 | break; |
108 | case '六': |
109 | case '6': |
110 | ret = 6 * preNumber; |
111 | break; |
112 | case '七': |
113 | case '7': |
114 | ret = 7 * preNumber; |
115 | break; |
116 | case '八': |
117 | case '8': |
118 | ret = 8 * preNumber; |
119 | break; |
120 | case '九': |
121 | case '9': |
122 | ret = 9 * preNumber; |
123 | break; |
124 | } |
125 | } else { |
126 | int index; |
127 | if ((index = indexOf(chars,start,end,'零')) == 0 || (index = indexOf(chars,start,end,'0')) == 0) { |
128 | ret = parse(chars, start + 1, end, 1); |
129 | } else if ((index = indexOf(chars,start,end,'亿')) != -1) { |
130 | ret = parse(chars, start,index, 1) * 100000000 + parse(chars,index +1,end,10000000); |
131 | } else if ((index = indexOf(chars,start,end,'万')) != -1) { |
132 | ret = parse(chars, start,index, 1) * 10000 + parse(chars,index +1,end,1000); |
133 | } else if ((index = indexOf(chars,start,end,'千')) != -1) { |
134 | ret = parse(chars, start, index, 1) * 1000 + parse(chars,index +1,end,100); |
135 | } else if ((index = indexOf(chars,start,end,'百')) != -1) { |
136 | ret = parse(chars, start, index, 1) * 100 + parse(chars,index +1,end,10); |
137 | } else if ((index = indexOf(chars,start,end,'十')) != -1) { |
138 | ret = parse(chars, start, index, 1) * 10 + parse(chars,index + 1,end,1); |
139 | } |
140 | |
141 | } |
142 | return ret; |
143 | } |
144 | private static int indexOf(char[] chars, int start, int end, char c) { |
145 | for (int i = start; i < end; i++) { |
146 | if (chars[i] == c) |
147 | return i; |
148 | } |
149 | return -1; |
150 | } |
151 | /** |
152 | * 将繁体中文转换为简体中文 |
153 | * @param chars |
154 | */ |
155 | private static void changeTCtoSC(char[] chars) { |
156 | for (int i = 0; i < chars.length; i++) { |
157 | Character c = map.get(chars[i]); |
158 | if (c != null) |
159 | chars[i] = c; |
160 | } |
161 | } |
162 | } |
数字转换工具
2034

被折叠的 条评论
为什么被折叠?



