类
Character
1.概述
public final class Characterextends Objectimplements Serializable, Comparable<Character>Character
类在对象中包装一个基本类型 char 的值。Character 类型的对象包含类型为 char 的单个字段。
此外,该类提供了几种方法,以确定字符的类别(小写字母,数字,等等),并将字符从大写转换成小写,反之亦然。
2.此类的UML类图
因为 UnicodeScript 是enum 类,所以继承抽象的Enum类。
3.源码解析
源码太多了,就捡几个不太常用的来讲。
/**
* 判断是否为BMP代码点
* 从 U+0000 到 U+FFFF 的字符集有时也称为 Basic Multilingual Plane (BMP)
*/
public static boolean isBmpCodePoint(int codePoint) {
return codePoint >>> 16 == 0;
// Optimized form of:
// codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
// We consistently use logical shift (>>>) to facilitate
// additional runtime optimizations.
}
/**
* 确定使用指定基数的特定数字的字符表示形式。
*
*/
public static char forDigit(int digit, int radix) {
// 只要一个为ture就为true
if ((digit >= radix) || (digit < 0)) {
return '\0';
// radix<2 或者 radix>36
if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
return '\0';
}
if (digit < 10) {
return (char)('0' + digit);
}
return (char)('a' - 10 + digit);
}
/**
* 返回一个指示字符的常规类别的值。
* 数字:9 字母:2 转义字符:24 转义序列:15 ……
*/
public static int getType(char ch) {
return getType((int)ch);
}
public static int getType(int codePoint) {
// 调用了CharacterData.of(int ch)方法,这个方法返回 Character的子类
return CharacterData.of(codePoint).getType(codePoint);
}
/**
* 确定指定的代码点是否为从 0x0000 到 0x10FFFF 范围之内的有效 Unicode 代码点值。
*/
public static boolean isValidCodePoint(int codePoint) {
// Optimized form of:
// codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
int plane = codePoint >>> 16;
return plane < ((MAX_CODE_POINT + 1) >>> 16);
}
/**
* 确定指定字符是否为小写字母。
*/
public static boolean isLowSurrogate(char ch) {
// MIN_LOW_SURROGATE = '\uDC00' MAX_LOW_SURROGATE = '\uDFFF'
return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
}
还有很多方法不……
4.Character内部类Subset 和UnicodeBlock
Subset源码:
public static class Subset {
private String name;
protected Subset(String name) {
if (name == null) {
throw new NullPointerException("name");
}
this.name = name;
}
public final boolean equals(Object obj) {
return (this == obj);
}
public final int hashCode() {
return super.hashCode();
}
public final String toString() {
return name;
}
}
UnicodeBlock
public static final class UnicodeBlock extends Subset {
private static Map<String, UnicodeBlock> map = new HashMap<>(256);
private UnicodeBlock(String idName) {
super(idName);
map.put(idName, this);
}
private UnicodeBlock(String idName, String alias) {
this(idName);
map.put(alias, this);
}
//可变形参 构造方法
private UnicodeBlock(String idName, String... aliases) {
this(idName);
for (String alias : aliases)
map.put(alias, this);
}
public static final UnicodeBlock BASIC_LATIN =
new UnicodeBlock("BASIC_LATIN",
"BASIC LATIN",
"BASICLATIN");
// 还有很多UnicodeBlock类型的常量省略。。。。。。。
private static final UnicodeBlock[] blocks = {
BASIC_LATIN,
//省略很多UnicodeBlock实例
}
// 获取 c 所属的UnicodeBlock实例
public static UnicodeBlock of(char c) {
return of((int)c);
}
public static UnicodeBlock of(int codePoint) {
//判断codePoint是否有效
if (!isValidCodePoint(codePoint)) {
throw new IllegalArgumentException();
}
// 二分查找UnicodeBlock
int top, bottom, current;
bottom = 0;
top = blockStarts.length;
current = top/2;
// invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
while (top - bottom > 1) {
if (codePoint >= blockStarts[current]) {
bottom = current;
} else {
top = current;
}
current = (top + bottom) / 2;
}
return blocks[current];
}
// 返回带有给定名称的 UnicodeBlock实例。
public static final UnicodeBlock forName(String blockName) {
UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
if (block == null) {
throw new IllegalArgumentException();
}
return block;
}
}
5.枚举UnicodeScript
源码不贴上了
有2个方法 public static UnicodeScript of(int codePoint)和 public static final UnicodeScript forName(String scriptName)。
方法作用和UnicodeBlock一样。
6.Character 简单运用
/**
* 用 UnicodeBlock来判断中文字符
*/
public boolean isChineseUBynicodeBlock(char c) {
//
Character.UnicodeBlock unicodeBlock = Character.UnicodeBlock.of(c);
if (unicodeBlock == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
|| unicodeBlock == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
|| unicodeBlock == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
|| unicodeBlock == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C
|| unicodeBlock == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D
|| unicodeBlock == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
|| unicodeBlock == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT
)
return true;
else
return false;
}
/**
* 用 UnicodeScript 来判断中文字符
* @param c
* @return
*/
public boolean isChineseByUnicodeScript(char c) {
Character.UnicodeScript unicodeScript = Character.UnicodeScript.of(c);
if (unicodeScript == Character.UnicodeScript.HAN)
return true;
else
return false;
}
/**
* 用 Character.getType()来判断中文字符
* 不知道对不对
*/
public boolean isGetType(char c){
if (Character.getType(c) == 5)
return true;
else
return false;
}