String源码分析

最新推荐文章于 2024-03-01 20:15:58 发布

原创最新推荐文章于 2024-03-01 20:15:58 发布 · 414 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#string #源码

Java源码分析专栏收录该内容

1 篇文章

订阅专栏

类定义

public final class String
    implements java.io.Serializable, Comparable<String>, CharSequence

String 类为final类
- 不可被继承，所有方法隐式的指定为final的
- 不可被改变
实现的接口：java.io.Serializable, Comparable, CharSequence
- java.io.Serializable : 类通过实现 java.io.Serializable 接口以启用其序列化功能。未实现此接口的类将无法使其任何状态序列化或反序列化。可序列化类的所有子类型本身都是可序列化的。序列化接口没有方法或字段，仅用于标识可序列化的语义。
- Comparable :此接口强行对实现它的每个类的对象进行整体排序。这种排序被称为类的自然排序，类的 compareTo 方法被称为它的自然比较方法。
- CharSequence :是 char 值的一个可读序列。此接口对许多不同种类的 char 序列提供统一的只读访问。

构造方法

public String() {
    //默认创建一个大小为0 的字符串
    this.value = new char[0];
}

public String(byte bytes[], int offset, int length, Charset charset) {
        if (charset == null)
            throw new NullPointerException("charset");
        checkBounds(bytes, offset, length);
        //设置字符串的编码格式等属性
        this.value =  StringCoding.decode(charset, bytes, offset, length);
    }

public String(char value[]) {
    /*
     *   数组变量拷贝给另一个数组变量
     *   1. 两个变量将引用同一个数组
     *   2. 将一个数组的所有值拷贝到一个新的数组中去
    */
    this.value = Arrays.copyOf(value, value.length);
}

String对“+”的重载

public static void main(String[] args) {
    String string="hello";
    String string2 = string + "world";
}

反编译：

public static void main(String args[]){
   String string = "hello";
   String string2 = (new StringBuilder(String.valueOf(string))).append("world").toString();
}

重载的过程利用了StringBuilder的valueOf、append方法

方法

// 比较方法
public int compareTo(String anotherString) {
    int len1 = value.length;
    int len2 = anotherString.value.length;
    int lim = Math.min(len1, len2);
    char v1[] = value;
    char v2[] = anotherString.value;

    int k = 0;
    while (k < lim) {
        char c1 = v1[k];
        char c2 = v2[k];
        if (c1 != c2) {
            return c1 - c2;
        }
        k++;
    }
    return len1 - len2;
}

// 字符串连接
public String concat(String str) {
    int otherLen = str.length();
    if (otherLen == 0) {
        return this;
    }
    int len = value.length;
    char buf[] = Arrays.copyOf(value, len + otherLen);
    // 将字符从此字符串复制到目标字符数组
    str.getChars(buf, len);
    //创建一个新的字符串对象
    return new String(buf, true);
}

void getChars(char dst[], int dstBegin) {
    System.arraycopy(value, 0, dst, dstBegin, value.length);
}

/*
* 将此字符串与指定的 CharSequence 比较。当且仅当此 String 与指定序列表示相同的
* char值序列时，结果才为 true。
*/
public boolean contentEquals(CharSequence cs) {
    // Argument is a StringBuffer, StringBuilder
    //判断cs字符串类型
    if (cs instanceof AbstractStringBuilder) {
        if (cs instanceof StringBuffer) {
            synchronized(cs) {
                //判断字符串是否相等
               return nonSyncContentEquals((AbstractStringBuilder)cs);
            }
        } else {
            return nonSyncContentEquals((AbstractStringBuilder)cs);
        }
    }
    if (cs instanceof String) {
        return equals(cs);
    }
    char v1[] = value;
    int n = v1.length;
    if (n != cs.length()) {
        return false;
    }
    for (int i = 0; i < n; i++) {
        if (v1[i] != cs.charAt(i)) {
            return false;
        }
    }
    return true;
}

 //判断sb字符串与value字符串是否相等
 private boolean nonSyncContentEquals(AbstractStringBuilder sb) {
    char v1[] = value;
    char v2[] = sb.getValue();
    int n = v1.length;
    if (n != sb.length()) {
        return false;
    }
    for (int i = 0; i < n; i++) {
        if (v1[i] != v2[i]) {
            return false;
        }
    }
    return true;
}

//覆盖Object类中的equals方法
public boolean equals(Object anObject) {
    if (this == anObject) {
        return true;
    }
    //判断类是否为String类型
    if (anObject instanceof String) {
        String anotherString = (String)anObject;
        int n = value.length;
        //判断字符串是否相等
        if (n == anotherString.value.length) {
            char v1[] = value;
            char v2[] = anotherString.value;
            int i = 0;
            while (n-- != 0) {
                if (v1[i] != v2[i])
                    return false;
                i++;
            }
            return true;
        }
    }
    return false;
}

public boolean startsWith(String prefix, int toffset) {
    char ta[] = value;
    int to = toffset;
    char pa[] = prefix.value;
    int po = 0;
    int pc = prefix.value.length;
    // Note: toffset might be near -1>>>1.
    if ((toffset < 0) || (toffset > value.length - pc)) {
        return false;
    }
    //判断字符串从指定索引开始的子字符串是否以指定前缀开始。
    while (--pc >= 0) {
        if (ta[to++] != pa[po++]) {
            return false;
        }
    }
    return true;
}

public boolean equalsIgnoreCase(String anotherString) {
    return (this == anotherString) ? true
            : (anotherString != null)
            && (anotherString.value.length == value.length)
            //测试两个字符串区域是否相等。
            && regionMatches(true, 0, anotherString, 0, value.length);
}

/* 
    * 将此 String 对象的子字符串与参数 other 的子字符串进行比较。
    * 如果这两个子字符串表示相同的字符序列，则结果为 true，
    * 当且仅当 ignoreCase 为 true 时忽略大小写。
    * 要比较的此 String 对象的子字符串从索引 toffset 处开始，长度为 len。
    * 要比较的 other 的子字符串从索引 ooffset 处开始，长度为 len。
*/
public boolean regionMatches(boolean ignoreCase, int toffset,
            String other, int ooffset, int len) {
    char ta[] = value;
    int to = toffset;
    char pa[] = other.value;
    int po = ooffset;
    //toffset > (long)value.length - len : 表示 toffset + len 要大于 value.length
    //ooffset 同理
    if ((ooffset < 0) || (toffset < 0)
            || (toffset > (long)value.length - len)
            || (ooffset > (long)other.value.length - len)) {
        return false;
    }
    while (len-- > 0) {
        char c1 = ta[to++];
        char c2 = pa[po++];
        if (c1 == c2) {
            continue;
        }
        //ignoreCase为true时为大写忽略大小写
        if (ignoreCase) {
            char u1 = Character.toUpperCase(c1);
            char u2 = Character.toUpperCase(c2);
            if (u1 == u2) {
                continue;
            }
            //防止Georgian alphabet字符的特殊规则进行小写转换比较
            if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) {
                continue;
            }
        }
        return false;
    }
    return true;
}

// 查找指定字符
// ch - 一个字符（Unicode 代码点）。
// fromIndex - 开始搜索的索引。
public int indexOf(int ch, int fromIndex) {
        final int max = value.length;
        //判断索引是否符合要求
        if (fromIndex < 0) {
            fromIndex = 0;
        } else if (fromIndex >= max) {
            return -1;
        }
        //判断字符是否为Unicode字符
        if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
            final char[] value = this.value;
            for (int i = fromIndex; i < max; i++) {
                if (value[i] == ch) {
                    return i;
                }
            }
            return -1;
        } else {
            return indexOfSupplementary(ch, fromIndex);
        }
    }

/*
* 采用了BF（Brute Force）算法
* BF算法的思想就是将目标串S的第一个字符与模式串T的第一个字符进行匹配，若相等，则继续比较的 * 第二个字符和 T的第二个字符；若不相等，则比较S的第二个字符和T的第一个字符，依次比较下去，* 直到得出最后的匹配结果。BF算法是一种蛮力算法。时间复杂度O(M*N)
*/
static int indexOf(char[] source, int sourceOffset, int sourceCount,
            char[] target, int targetOffset, int targetCount,
            int fromIndex) {
        //source字符串的大小与起始下表比较大小
        if (fromIndex >= sourceCount) {
            //如果targetCount为0 返回sourceCount否则返回-1
            return (targetCount == 0 ? sourceCount : -1);
        }
        if (fromIndex < 0) {
            fromIndex = 0;
        }
        //如果fromIndex比源字符短，查找的字符长度为0，直接返回fromIndex
        if (targetCount == 0) {
            return fromIndex;
        }
        //先取出第一个字符
        char first = target[targetOffset];
        //max表示查找第一个字符的最远位置
        //超过sourceCount-targetCount长度后如果还没有找到，说明不存在这样的字符串
        int max = sourceOffset + (sourceCount - targetCount);

        for (int i = sourceOffset + fromIndex; i <= max; i++) {
           /*找到目标数组的第一个字符在原字符数组中第一次出现的位置 */
            if (source[i] != first) {
                while (++i <= max && source[i] != first);
            }

           /*
            * 通过上面的while循环找到第一次出现的位置后，循环对比目标数组
            * 剩下的字符是否和原字符数组相对位置的相同
            */
            if (i <= max) {
                int j = i + 1;
                int end = j + targetCount - 1;
                for (int k = targetOffset + 1; j < end && source[j]
                        == target[k]; j++, k++);
                 /* 如果j能到end，那就说明找到整个字符串啦，返回偏移 */
                if (j == end) {
                    /* Found whole string. */
                    return i - sourceOffset;//表示要查找的字符串的第一个字符所处位置
                }
            }
        }
        return -1;
    }

碰到的问题：为什么不用KMP、BM而用BF算法
回答：
The more advanced string search algorithms have a non-trivial setup time. If you are doing a once-off string search involving a not-too-large target string, you will find that you spend more time on the setup than you save during the string search.
And even just testing the lengths of the target and search string is not going to give a good answer as to whether it is “worth it” to use an advanced algorithm. The actual speedup you get from (say) Boyer-Moore depends on the values of the strings; i.e. the character patterns.
The Java implementors have take the pragmatic approach. They cannot guarantee that an advanced algorithm will give better performance, either on average, or for specific inputs. Therefore they have left it to the programmer to deal with … where necessary.
FWIW, I’m not aware of any other mainstream programming language that uses BM, etc in their runtime library’s standard “string find” functionality.

//串匹配算法改进
static int indexOf(char[] source, int sourceOffset, int sourceCount,
            char[] target, int targetOffset, int targetCount, int fromIndex) {
        if (fromIndex >= sourceCount) {
            return (targetCount == 0 ? sourceCount : -1);
        }
        if (fromIndex < 0) {
            fromIndex = 0;
        }
        if (targetCount == 0) {
            return fromIndex;
        }
        int[] T=new int[targetCount+1];
        int c=1,t=0;
        while(c<targetCount){
            if(t==0||target[c-1]==target[t-1]){
                t++;c++;
                if(target[t-1]!=target[c-1])T[c]=t;
                else T[c]=T[t];
            }else{
                t=T[t];
            }
        }
        c=targetOffset;
        t=sourceOffset;
        int max1=targetCount+targetOffset;
        int max2=sourceCount+sourceOffset;
        while(c<max1&&t<max2){
            if(c==targetOffset||target[c]==source[t]){
                c++;t++;}
            else {
                c=T[c]+targetOffset;
            }
        }
        if(c>=max1)return t-sourceOffset-targetCount;
        else return -1;
    }

static int lastIndexOf(char[] source, int sourceOffset, int sourceCount,
            char[] target, int targetOffset, int targetCount,
            int fromIndex) {
        /*
         * Check arguments; return immediately where possible. For
         * consistency, don't check for null str.
         */
        int rightIndex = sourceCount - targetCount;
        if (fromIndex < 0) {
            return -1;
        }
        if (fromIndex > rightIndex) {
            fromIndex = rightIndex;
        }
        /* Empty string always matches. */
        if (targetCount == 0) {
            return fromIndex;
        }

        int strLastIndex = targetOffset + targetCount - 1;
        char strLastChar = target[strLastIndex];
        int min = sourceOffset + targetCount - 1;
        int i = min + fromIndex;

    startSearchForLastChar:
        while (true) {
            while (i >= min && source[i] != strLastChar) {
                i--;
            }
            if (i < min) {
                return -1;
            }
            int j = i - 1;
            int start = j - (targetCount - 1);
            int k = strLastIndex - 1;

            while (j > start) {
                if (source[j--] != target[k--]) {
                    i--;
                    continue startSearchForLastChar;
                }
            }
            return start - sourceOffset + 1;
        }
    }

//去除字符串中的前置和后置空字符
public String trim() {
        int len = value.length;
        int st = 0;
        char[] val = value;    /* avoid getfield opcode */

        while ((st < len) && (val[st] <= ' ')) {
            st++;
        }
        while ((st < len) && (val[len - 1] <= ' ')) {
            len--;
        }
        return ((st > 0) || (len < value.length)) ? substring(st, len) : this;
    }