Character类提供一些方法,比如:(小写字母、数字等),并用于大小写转换。Character基于Unicode标准。
Java平台使用UTF-16表示Char、String、StringBUffer类。
目前Unicode字符分为17组编排,从0X0000到0x10FFFF,每组有65536个码位,共1114112个码位。
基本多文种平面(第零平面)BMP范围:U+0000到U+FFFF,共65536个码位。
BMP中有一个专用区范围:0xE000到0xF8FF,共6400个码位。其中0xD800到0xDFFF,这2048个码位叫代理区。代理区的目的是用2个UTF-16字符表示BMP以外的字符,所以大于0x00FFFF的码位都需要使用代理区的码点。
代理区分为高位替代和低位替代,0xD800到0xDBFF为高位替代区,0xDC00到0xDFFF为低位替代区。
由于Unicode的最大码位为0x10FFFF,0xFFFF之前的(65536个码位)使用2个字节表示,0xFFFF之后(1048575(0xFFFFF)个码位)仅需要20位二进制就可以表示,因此规定多余的12位,高位字节前6位为:110110,低位字节前6位为:110111。
Character类将基本类型char包装在一个对象中
1、内部类、枚举
public static class Subset | Unicode字符集子类 |
public static final class UnicodeBlock extends Subset | 根据Unicode硬编码范围划分 |
public static enum UnicodeScript | 根据语言书写规则对Unicode字符划分 |
2、常量
public static final int MIN_RADIX = 2; | 最小基数 |
public static final int MAX_RADIX = 36; | 最大基数 |
public static final char MIN_VALUE = '\u0000'; | 最小值 |
public static final char MAX_VALUE = '\uFFFF'; | 最大值 |
public static final byte UNASSIGNED = 0; | 通用 Unicode规范 |
...... | |
public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; | |
public static final char MIN_HIGH_SURROGATE = '\uD800'; | 高位代理区最小值 |
public static final char MAX_HIGH_SURROGATE = '\uDBFF'; | 高位代理区最大值 |
public static final char MIN_LOW_SURROGATE = '\uDC00'; | 低位代理区最小值 |
public static final char MAX_LOW_SURROGATE = '\uDFFF'; | 低位代理区最大值 |
public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE; | 代理区最小值 |
public static final char MAX_SURROGATE = MAX_LOW_SURROGATE; | 代理区最大值 |
public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000; | 补充码点最小值 |
public static final int MIN_CODE_POINT = 0x000000; | 代码点最小值 |
public static final int MAX_CODE_POINT = 0X10FFFF; | 代码点最大值 |
private final char value; |
|
3.1、Subset类
public final boolean equals(Object obj) |
|
public final int hashCode() |
|
public final String toString() |
|
3.2、UnicodeBlock类
private static Map<String, UnicodeBlock> map | 缓存池 |
public static final UnicodeBlock BASIC_LATIN | Unicode字符块 |
...... | |
public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D | |
private static final int blockStarts[] | Unicode字符集开始地址 |
private static final UnicodeBlock[] blocks | Unicode字符块数组 |
public static UnicodeBlock of(char c) | 根据代码点查找Unicode字符块(折半查找) |
public static UnicodeBlock of(int codePoint) | |
public static final UnicodeBlock forName(String blockName) | 从map缓存池中查找Unicode字符块 |
3.3、UnicodeScript枚举
COMMON......UNKNOWN | 枚举 |
private static final int[] scriptStarts | Unicode字符集开始地址 |
private static final UnicodeScript[] scripts | Unicode字符块数组 |
private static HashMap<String,Character.UnicodeScript> aliases | 缓存池 |
public static UnicodeScript of(int codePoint) | 根据代码点查找Unicode字符块(折半查找) |
public static final UnicodeScript forName(String scriptName) | 从aliases缓存池中查找Unicode字符块 |
4、方法
private static class CharacterCache | 将字符0-127放到缓存池中 |
public static Character valueOf(char c) | 字符小于128则从缓存池中取,否则New一个对象 |
public char charValue() | 返回value |
public int hashCode() |
|
public boolean equals(Object obj) |
|
public String toString() |
|
public static String toString(char c) |
|
public static boolean isValidCodePoint(int codePoint) | 是否为有效的代码点 |
public static boolean isBmpCodePoint(int codePoint) | 是否为Bmp代码点 |
public static boolean isSupplementaryCodePoint(int codePoint) | 是否为补充代码点 |
public static boolean isHighSurrogate(char ch) | 是否为高位代理区 |
public static boolean isLowSurrogate(char ch) | 是否为低位代理区 |
public static boolean isSurrogate(char ch) | 是否为代理区 |
public static boolean isSurrogatePair(char high, char low) | 是否为一个有效Unicode的代理对 |
public static int charCount(int codePoint) | 所需字符的数目(Bmp范围为1,否则为2) |
public static int toCodePoint(char high, char low) | 代理对转换代码点 |
public static int codePointAt(CharSequence seq, int index) | 索引字符代码点 |
public static int codePointAt(char[] a, int index) | 索引字符代码点 |
public static int codePointAt(char[] a, int index, int limit) | |
static int codePointAtImpl(char[] a, int index, int limit) | |
public static int codePointBefore(CharSequence seq, int index) | 索引前一个字符的代码点 |
public static int codePointBefore(char[] a, int index) | 索引前一个字符的代码点 |
public static int codePointBefore(char[] a, int index, int start) | |
static int codePointBeforeImpl(char[] a, int index, int start) | |
public static char highSurrogate(int codePoint) | 代码点返回高位代理 |
public static char lowSurrogate(int codePoint) | 代码点返回低位代理 |
public static int toChars(int codePoint, char[] dst, int dstIndex) | 根据代码点返回字符 |
public static char[] toChars(int codePoint) | |
static void toSurrogates(int codePoint, char[] dst, int index) | 码点换成代理存到dst数组中 |
public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) | 获取代码点总数 |
public static int codePointCount(char[] a, int offset, int count) | 获取代码点总数 |
static int codePointCountImpl(char[] a, int offset, int count) | |
public static int offsetByCodePoints(CharSequence seq, int index, int codePointOffset) | 返回给定的字符子阵列是给定的索引由codePointOffset代码点偏移中的索引 |
public static int offsetByCodePoints(char[] a, int start, int count,int index, int codePointOffset) | 返回给定的字符子阵列是给定的索引由codePointOffset代码点偏移中的索引 |
static int offsetByCodePointsImpl(char[]a, int start, int count, int index int codePointOffset) | |
public static boolean isLowerCase(char ch) | 是否为小写字符 |
public static boolean isLowerCase(int codePoint) | |
public static boolean isUpperCase(char ch) | 是否为大写字符 |
public static boolean isUpperCase(int codePoint) | |
public static boolean isTitleCase(char ch) | 是否TitleCase字符 |
public static boolean isTitleCase(int codePoint) | |
public static boolean isDigit(char ch) | 是否为数字 |
public static boolean isDigit(int codePoint) | |
public static boolean isDefined(char ch) | 是否已定义 |
public static boolean isDefined(int codePoint) | |
public static boolean isLetter(char ch) | 是否为字母 |
public static boolean isLetter(int codePoint) | |
public static boolean isLetterOrDigit(char ch) | 是否为字母或数字 |
public static boolean isLetterOrDigit(int codePoint) | |
public static boolean isAlphabetic(int codePoint) |
|
public static boolean isIdeographic(int codePoint) |
|
public static boolean isJavaIdentifierStart(char ch) | 确定是否允许将指定字符作为 Java 标识符中的首字符 |
public static boolean isJavaIdentifierStart(int codePoint) | |
public static boolean isJavaIdentifierPart(char ch) | 确定字符(Unicode代码点)是否可以是 Java 标识符中首字符以外的部分。 |
public static boolean isJavaIdentifierPart(int codePoint) | |
public static boolean isUnicodeIdentifierStart(char ch) | 确定是否允许将指定字符作为 Unicode 标识符中的首字符 |
public static boolean isUnicodeIdentifierStart(int codePoint) | |
public static boolean isUnicodeIdentifierPart(char ch) | 确定指定字符是否可以是Unicode 标识符中首字符以外的部分 |
public static boolean isUnicodeIdentifierPart(int codePoint) | |
public static boolean isIdentifierIgnorable(char ch) | 确定是否应该认为指定字符是 Java 标识符或 Unicode 标识符中可忽略的一个字符 |
public static boolean isIdentifierIgnorable(int codePoint) | |
public static char toLowerCase(char ch) | 使用取自 UnicodeData 文件的大小写映射信息将字符参数转换为小写 |
public static int toLowerCase(int codePoint) | |
public static char toUpperCase(char ch) | 使用取自 UnicodeData 文件的大小写映射信息将字符参数转换为大写。 |
public static int toUpperCase(int codePoint) | |
public static char toTitleCase(char ch) | 使用取自 UnicodeData 文件的大小写映射信息将字符参数转换为首字母大写 |
public static int toTitleCase(int codePoint) | |
public static int digit(char ch, int radix) | 返回使用指定基数的字符ch的数值 |
public static int digit(int codePoint, int radix) | |
public static int getNumericValue(char ch) | 返回指定的 Unicode 字符示的int值 |
public static int getNumericValue(int codePoint) | |
public static boolean isSpaceChar(char ch) | 确定指定字符否Unicod空白字符 |
public static boolean isSpaceChar(int codePoint) | |
public static boolean isWhitespace(char ch) | 确定指定字符依据 Java 标准是否为空白字符 |
public static boolean isWhitespace(int codePoint) | |
public static boolean isISOControl(char ch) | 确定指定字符是否为 ISO 控制字符 |
public static boolean isISOControl(int codePoint) | |
public static int getType(char ch) | 返回一个指示字符的常规类别的值 |
public static int getType(int codePoint) | |
public static char forDigit(int digit, int radix) | 确定使用指定基数的特定数字的字符表示形式 |
public static byte getDirectionality(char ch) | 返回给定字符的 Unicode 方向属性。利用字符方向性来计算文本的视觉顺序 |
public static byte getDirectionality(int codePoint) | |
public static boolean isMirrored(char ch) | 确定指定字符依据 Unicode 规范是否对称 |
public static boolean isMirrored(int codePoint) | |
public int compareTo(Character anotherCharacter) | 根据数字比较两个Character对象 |
public static int compare(char x, char y) | 比较2个char的值 |
static int toUpperCaseEx(int codePoint) | 转换成大写后返回 |
static char[] toUpperCaseCharArray(int codePoint) | 返回大写字符数组 |
public static char reverseBytes(char ch) | 返回通过反转指定char值中的字节顺序而获得的值 |
public static String getName(int codePoint) | 返回指定名称 |
private static class CharacterCache {
private CharacterCache(){}
static final Character cache[] = new Character[127 + 1];
static {
for (int i = 0; i < cache.length; i++)
cache[i] = new Character((char)i);
}
}
在创建对象的时候可以通过valueOf来创建,如果是0-127内的字符,则直接通过缓存池直接返回。
public static Character valueOf(char c) {
if (c <= 127) { // must cache
return CharacterCache.cache[(int)c];
}
return new Character(c);
}