JDK源码阅读——java.lang.Character

最新推荐文章于 2024-07-10 09:18:29 发布

原创最新推荐文章于 2024-07-10 09:18:29 发布 · 1.9k 阅读

1 ·

CC 4.0 BY-SA版权

java.lang 同时被 2 个专栏收录

12 篇文章

订阅专栏

类

3 篇文章

订阅专栏

本文详细探讨了`java.lang.Character`类，包括它的概述、UML类图、源码解析、内部类Subset和UnicodeBlock，以及枚举UnicodeScript的使用。通过实例展示了Character类在处理字符分类和转换上的功能。

类

类
- Character

Character

1.概述

public final class Characterextends Objectimplements Serializable, Comparable<Character>Character
类在对象中包装一个基本类型 char 的值。Character 类型的对象包含类型为 char 的单个字段。
此外，该类提供了几种方法，以确定字符的类别（小写字母，数字，等等），并将字符从大写转换成小写，反之亦然。

2.此类的UML类图

这里写图片描述

因为 UnicodeScript 是enum 类，所以继承抽象的Enum类。

3.源码解析

源码太多了，就捡几个不太常用的来讲。

/**
* 判断是否为BMP代码点
* 从 U+0000 到 U+FFFF 的字符集有时也称为 Basic Multilingual Plane (BMP)
*/
 public static boolean isBmpCodePoint(int codePoint) {
        return codePoint >>> 16 == 0;
        // Optimized form of:
        //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
        // We consistently use logical shift (>>>) to facilitate
        // additional runtime optimizations.
    }
    /**
    * 确定使用指定基数的特定数字的字符表示形式。
    *
    */
    public static char forDigit(int digit, int radix) {
        // 只要一个为ture就为true
        if ((digit >= radix) || (digit < 0)) {
            return '\0';
        //    radix<2 或者 radix>36
        if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
            return '\0';
        }
        if (digit < 10) {
            return (char)('0' + digit);
        }
        return (char)('a' - 10 + digit);
    }
    /**
    * 返回一个指示字符的常规类别的值。
    * 数字：9 字母：2 转义字符：24 转义序列：15 ……
    */
     public static int getType(char ch) {
        return getType((int)ch);
    }

       public static int getType(int codePoint) {
        // 调用了CharacterData.of(int ch)方法，这个方法返回 Character的子类
        return CharacterData.of(codePoint).getType(codePoint);
    }
    /**
    *  确定指定的代码点是否为从 0x0000 到 0x10FFFF 范围之内的有效 Unicode 代码点值。
    */
      public static boolean isValidCodePoint(int codePoint) {
        // Optimized form of:
        //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
        int plane = codePoint >>> 16;
        return plane < ((MAX_CODE_POINT + 1) >>> 16);
    }
    /**
    *  确定指定字符是否为小写字母。
    */
  public static boolean isLowSurrogate(char ch) {
      // MIN_LOW_SURROGATE = '\uDC00' MAX_LOW_SURROGATE  = '\uDFFF'
        return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
    }

还有很多方法不……

4.Character内部类Subset 和UnicodeBlock

Subset源码：

public static class Subset  {
        private String name;
        protected Subset(String name) {
            if (name == null) {
                throw new NullPointerException("name");
            }
            this.name = name;
        }
        public final boolean equals(Object obj) {
            return (this == obj);
        }
        public final int hashCode() {
            return super.hashCode();
        }
        public final String toString() {
            return name;
        }
    }

UnicodeBlock

 public static final class UnicodeBlock extends Subset {

        private static Map<String, UnicodeBlock> map = new HashMap<>(256);
        private UnicodeBlock(String idName) {
            super(idName);
            map.put(idName, this);
        }

        private UnicodeBlock(String idName, String alias) {
            this(idName);
            map.put(alias, this);
        }
        //可变形参  构造方法
        private UnicodeBlock(String idName, String... aliases) {
            this(idName);
            for (String alias : aliases)
                map.put(alias, this);
        }
        public static final UnicodeBlock  BASIC_LATIN =
            new UnicodeBlock("BASIC_LATIN",
                             "BASIC LATIN",
                             "BASICLATIN");

      // 还有很多UnicodeBlock类型的常量省略。。。。。。。
        private static final UnicodeBlock[] blocks = {
            BASIC_LATIN,
            //省略很多UnicodeBlock实例
          }
    // 获取 c 所属的UnicodeBlock实例
       public static UnicodeBlock of(char c) {
            return of((int)c);
        }
        public static UnicodeBlock of(int codePoint) {
            //判断codePoint是否有效
            if (!isValidCodePoint(codePoint)) {
                throw new IllegalArgumentException();
            }
           // 二分查找UnicodeBlock
            int top, bottom, current;
            bottom = 0;
            top = blockStarts.length;
            current = top/2;

            // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
            while (top - bottom > 1) {
                if (codePoint >= blockStarts[current]) {
                    bottom = current;
                } else {
                    top = current;
                }
                current = (top + bottom) / 2;
            }
            return blocks[current];
        }
        //  返回带有给定名称的 UnicodeBlock实例。
        public static final UnicodeBlock forName(String blockName) {
            UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
            if (block == null) {
                throw new IllegalArgumentException();
            }
            return block;
        }
    }

5.枚举UnicodeScript

源码不贴上了
有2个方法 public static UnicodeScript of(int codePoint)和 public static final UnicodeScript forName(String scriptName)。
方法作用和UnicodeBlock一样。

6.Character 简单运用

  /**
     * 用 UnicodeBlock来判断中文字符
     */
    public boolean isChineseUBynicodeBlock(char c) {
        //
        Character.UnicodeBlock unicodeBlock = Character.UnicodeBlock.of(c);
        if (unicodeBlock == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
                || unicodeBlock == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
                || unicodeBlock == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
                || unicodeBlock == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C
                || unicodeBlock == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D
                || unicodeBlock == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
                || unicodeBlock == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT
                )
            return true;
        else
            return false;
    }

    /**
     * 用 UnicodeScript 来判断中文字符
     * @param c
     * @return
     */
    public boolean isChineseByUnicodeScript(char c) {
        Character.UnicodeScript unicodeScript = Character.UnicodeScript.of(c);
        if (unicodeScript == Character.UnicodeScript.HAN)
            return true;
        else
            return false;
    }
    /**
     * 用 Character.getType()来判断中文字符
     * 不知道对不对
     */
    public boolean isGetType(char c){
        if (Character.getType(c) == 5)
            return true;
        else
            return false;
    }