前言
今天和大家分享一个从Android 7.0 系统源代码提取出来的汉字转成拼音实现方案,使用此方案必须要 android 7.0 以上系统。没啥复杂的逻辑 直接上代码,代码中注释详细说明。
代码如下(示例):
import android.icu.text.Transliterator;
import android.text.TextUtils;
import android.util.Log;
import java.util.ArrayList;
public class HanZiToPinYin {
private static final String TAG = "HanZiToPinYin";
private static HanZiToPinYin instance;
private Transliterator pinYinTrans;
private Transliterator asciiTrans;
public static class Token {
/**
* 每个源字符的目标字符串之间的分隔符
*/
public static final String SEPARATOR = " ";
/**
* 拉丁语
**/
public static final int LATIN = 1;
/**
* 拼音
**/
public static final int PINYIN = 2;
/**
* 未知
**/
public static final int UNKNOWN = 3;
public Token() {
}
public Token(int type, String source, String target) {
this.type = type;
this.source = source;
this.target = target;
}
/**
* Type of this token, ASCII, PINYIN or UNKNOWN.
*/
public int type;
/**
* Original string before translation.
*/
public String source;
/**
* Translated string of source. For Han, target is corresponding Pinyin. Otherwise target is
* original string in source.
*/
public String target;
}
private HanZiToPinYin() {
try {
pinYinTrans = Transliterator.getInstance("Han-Latin/Names; Latin-Ascii; Any-Upper");
asciiTrans = Transliterator.getInstance("Latin-Ascii");
} catch (IllegalArgumentException e) {
Log.w(TAG, "Han-Latin/Names transliterator data is missing," + " HanZiToPinYin is disabled");
}
}
public boolean hasChineseTransliterator() {
return pinYinTrans != null;
}
public static HanZiToPinYin getInstance() {
synchronized (HanZiToPinYin.class) {
if (instance == null) {
instance = new HanZiToPinYin();
}
return instance;
}
}
private void tokenize(char character, Token token) {
token.source = Character.toString(character);
// ASCII
if (character < 128) {
token.type = Token.LATIN;
token.target = token.source;
return;
}
// Extended Latin. Transcode these to ASCII equivalents
if (character < 0x250 || (0x1e00 <= character && character < 0x1eff)) {
token.type = Token.LATIN;
token.target = asciiTrans == null ? token.source : asciiTrans.transliterate(token.source);
return;
}
token.type = Token.PINYIN;
token.target = pinYinTrans.transliterate(token.source);
if (TextUtils.isEmpty(token.target) || TextUtils.equals(token.source, token.target)) {
token.type = Token.UNKNOWN;
token.target = token.source;
}
}
/**
* 汉字转拼音
*
* @param input 输入源
**/
public String transliterate(final String input) {
if (!hasChineseTransliterator() || TextUtils.isEmpty(input)) {
return null;
}
return pinYinTrans.transliterate(input);
}
/**
* 将输入转换为令牌数组。没有空格的 ASCII 或未知字符序列将被放入令牌中,一个带有拼音的汉字字符将被视为令牌。
* 如果没有中文音译器或则 input为空,则返回空标记数组。
*
* @param input 输入源
*/
public ArrayList<Token> getTokens(final String input) {
ArrayList<Token> tokens = new ArrayList<>();
if (!hasChineseTransliterator() || TextUtils.isEmpty(input)) {
return tokens;
}
final int inputLength = input.length();
final StringBuilder sb = new StringBuilder();
int tokenType = Token.LATIN;
Token token = new Token();
// 遍历输入,创建新令牌时。令牌类型已更改 b。获取当前字符的拼音。C. 当前字符为空格。
for (int i = 0; i < inputLength; i++) {
final char character = input.charAt(i);
if (Character.isSpaceChar(character)) {
if (sb.length() > 0) {
addToken(sb, tokens, tokenType);
}
} else {
tokenize(character, token);
if (token.type == Token.PINYIN) {
if (sb.length() > 0) {
addToken(sb, tokens, tokenType);
}
tokens.add(token);
token = new Token();
} else {
if (tokenType != token.type && sb.length() > 0) {
addToken(sb, tokens, tokenType);
}
sb.append(token.target);
}
tokenType = token.type;
}
}
if (sb.length() > 0) {
addToken(sb, tokens, tokenType);
}
return tokens;
}
private void addToken(final StringBuilder sb, final ArrayList<Token> tokens, final int tokenType) {
String str = sb.toString();
tokens.add(new Token(tokenType, str, str));
sb.setLength(0);
}
}
总结
这类并不支持多音字,所以如果一定要考虑多音字的问题,这个类就不适合了