HashMap源码-优快云博客

本文链接：https://blog.youkuaiyun.com/qq_44417662/article/details/116140452

0.底层数据结构
在 JDK1.7 中，由“数组+链表”组成，数组是 HashMap 的主体，链表则是主要为了解决哈希冲突而存在的。

在 JDK1.8 中，由“数组+链表+红黑树”组成。当链表过长，则会严重影响 HashMap 的性能，红黑树搜索时间复杂度是 O(logn)，而链表是糟糕的 O(n)。因此，JDK1.8 对数据结构做了进一步的优化，引入了红黑树，链表和红黑树在达到一定条件会进行转换：

当链表长度超过 8 且数据总量大于等于 64 才会转红黑树。
将链表转换成红黑树前会判断，如果当前数组的长度小于 64，那么会选择先进行数组扩容，而不是转换为红黑树，以减少搜索时间
1.默认容量

    /**
     * The default initial capacity - MUST be a power of two.
     */
    static final int DEFAULT_INITIAL_CAPACITY = 1 << 4; // aka 16

2.加载因子

    /**
  * The load factor used when none specified in constructor.
  */
 static final float DEFAULT_LOAD_FACTOR = 0.75f;

考虑预计的entry数在map及其负载系数，并且尽量减少rehash操作的次数 0.75是对空间和时间效率的一个平衡。

3.node


 static class Node<K,V> implements Map.Entry<K,V> {
      final int hash;
      final K key;
      V value;
      Node<K,V> next;

      Node(int hash, K key, V value, Node<K,V> next) {
          this.hash = hash;
          this.key = key;
          this.value = value;
          this.next = next;
      }

4.扩容,位运算高效

 static final int tableSizeFor(int cap) {
        int n = cap - 1;
        n |= n >>> 1;
        n |= n >>> 2;
        n |= n >>> 4;
        n |= n >>> 8;
        n |= n >>> 16;
        return (n < 0) ? 1 : (n >= MAXIMUM_CAPACITY) ? MAXIMUM_CAPACITY : n + 1;

5.散列算法
1.7

final int hash(Object k) {
   int h = hashSeed;
   if (0 != h && k instanceof String) {
       return sun.misc.Hashing.stringHash32((String) k);
   }

   h ^= k.hashCode();//以下为扰动运算，扰动了 4 次
   h ^= (h >>> 20) ^ (h >>> 12);//
   return h ^ (h >>> 7) ^ (h >>> 4);//
}

static int indexFor(int h, int length) {
   return h & (length-1);
}

1.8

    static final int hash(Object key) {
        int h;
        return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16);
    }

6.链表改为红黑树

for (int binCount = 0; ; ++binCount) {
    if ((e = p.next) == null) {
        p.next = newNode(hash, key, value, null);
                        if (binCount >= TREEIFY_THRESHOLD - 1) // -1 for 1st
                            treeifyBin(tab, hash);
        break;
    }
    if (e.hash == hash &&
        ((k = e.key) == key || (key != null && key.equals(k))))
        break;
    p = e;
}

1.7扩容(头插法会产生环，线程不安全，另一方面put多线程不安全）

    void resize(int newCapacity) {
        Entry[] oldTable = table;
        int oldCapacity = oldTable.length;
        if (oldCapacity == MAXIMUM_CAPACITY) {
            threshold = Integer.MAX_VALUE;
            return;
        }
 
        Entry[] newTable = new Entry[newCapacity];
        transfer(newTable, initHashSeedAsNeeded(newCapacity));
        table = newTable;
        threshold = (int)Math.min(newCapacity * loadFactor, MAXIMUM_CAPACITY + 1);
    }

    /**
    * Transfers all entries from current table to newTable.
    */
    void transfer(Entry[] newTable, boolean rehash) {
       int newCapacity = newTable.length;
       for (Entry<K,V> e : table) {
           while(null != e) {
               Entry<K,V> next = e.next;              //注释1
               if (rehash) {
                   e.hash = null == e.key ? 0 : hash(e.key);
               }
               int i = indexFor(e.hash, newCapacity); //注释2
               e.next = newTable[i];                  //注释3
               newTable[i] = e;                       //注释4
               e = next;                              //注释5
           }
       }
   }

1.8扩容尾插法

 final Node<K,V>[] resize() {
       Node<K,V>[] oldTab = table;
       int oldCap = (oldTab == null) ? 0 : oldTab.length;
       int oldThr = threshold;
       int newCap, newThr = 0;
       if (oldCap > 0) {
           if (oldCap >= MAXIMUM_CAPACITY) {
               threshold = Integer.MAX_VALUE;
               return oldTab;
           }
           else if ((newCap = oldCap << 1) < MAXIMUM_CAPACITY &&
                    oldCap >= DEFAULT_INITIAL_CAPACITY)                      //注释1
               newThr = oldThr << 1; // double threshold
       }
       else if (oldThr > 0) // initial capacity was placed in threshold
           newCap = oldThr;
       else {               // zero initial threshold signifies using defaults
           newCap = DEFAULT_INITIAL_CAPACITY;
           newThr = (int)(DEFAULT_LOAD_FACTOR * DEFAULT_INITIAL_CAPACITY);
       }
       if (newThr == 0) {
           float ft = (float)newCap * loadFactor;
           newThr = (newCap < MAXIMUM_CAPACITY && ft < (float)MAXIMUM_CAPACITY ?
                     (int)ft : Integer.MAX_VALUE);
       }
       threshold = newThr;
       @SuppressWarnings({"rawtypes","unchecked"})
           Node<K,V>[] newTab = (Node<K,V>[])new Node[newCap];
       table = newTab;
       if (oldTab != null) {
           for (int j = 0; j < oldCap; ++j) {                                 //注释2
               Node<K,V> e;
               if ((e = oldTab[j]) != null) {
                   oldTab[j] = null;
                   if (e.next == null)                                        //注释3
                       newTab[e.hash & (newCap - 1)] = e;
                   else if (e instanceof TreeNode)
                       ((TreeNode<K,V>)e).split(this, newTab, j, oldCap);
                   else { // preserve order
                       Node<K,V> loHead = null, loTail = null;
                       Node<K,V> hiHead = null, hiTail = null;
                       Node<K,V> next;
                       do {
                           next = e.next;
                           if ((e.hash & oldCap) == 0) {                      //注释4
                               if (loTail == null)                            //注释5
                                   loHead = e;
                               else
                                   loTail.next = e;                           //注释6
                               loTail = e;                                    //注释7
                           }
                           else {
                               if (hiTail == null)
                                   hiHead = e;
                               else
                                   hiTail.next = e;
                               hiTail = e;
                           }
                       } while ((e = next) != null);
                       if (loTail != null) {                                  /注释8
                           loTail.next = null;
                           newTab[j] = loHead;
                       }
                       if (hiTail != null) {
                           hiTail.next = null;
                           newTab[j + oldCap] = hiHead;
                       }
                   }
               }
           }
       }
       return newTab;
   }
   ```
9. 1.7put,get
```java
public V get(Object key) {
   // 判断key 是否为null
   
    if (key == null)
     	// 如果为null 获取key为null的值
         return getForNullKey();
     // 获取对应key为要查询的key的entry
     Entry<K,V> entry = getEntry(key);
     // 判断是否获取到entry,如果没有,返回null,如果不为null,返回对应entry的value值
     return null == entry ? null : entry.getValue();
 }

 // 当key为null时获取value的值
 private V getForNullKey() {
 	// 判断hashmap中总的entry的数量,如果为0,说明hashmap中还没有值,返回null
     if (size == 0) {
         return null;
     }
     // 如果size 不为0 , 获取entry[] 数组中 下标为0的位置的链表
     for (Entry<K,V> e = table[0]; e != null; e = e.next) {
     	// 如果有entry对应的key的值为null ,返回对应的value
         if (e.key == null)
             return e.value;
     }
   	// 如果没有,返回空
     return null;
 }

 // 如果key不为null,获取key对应的value
 final Entry<K,V> getEntry(Object key) {
 	// 如果key不为null,判断hashmap中entry的数量是否为0 如果为0 返回null
     if (size == 0) {
         return null;
     }

     // 获取key的value值,如果key为null,返回hash值为0,反之,计算key对应的hash值
     int hash = (key == null) ? 0 : hash(key);
     // 遍历指定下标的entry数组元素链表
     for (Entry<K,V> e = table[indexFor(hash, table.length)];
          e != null;
          e = e.next) {
         Object k;
     	// 判断key的hash值与entry中的hash值是否相同,并且key通过== 和 equal 比较,
     	// 都为true时,返回这个 entry 对象 
         if (e.hash == hash &&
             ((k = e.key) == key || (key != null && key.equals(k))))
             return e;
     }
     // 如果指定下标key中的entry没有满足条件的,返回null
     return null;
 }

 // 计算 hash值
 final int hash(Object k) {
     int h = hashSeed;
     if (0 != h && k instanceof String) {
         return sun.misc.Hashing.stringHash32((String) k);
     }
     h ^= k.hashCode();
     h ^= (h >>> 20) ^ (h >>> 12);
     return h ^ (h >>> 7) ^ (h >>> 4);
 }

 // 通过hash值以及数组长度的位运算,获取entry的下标
 static int indexFor(int h, int length) {
     // assert Integer.bitCount(length) == 1 : "length must be a non-zero power of 2";
     return h & (length-1);
 }

public V put(K key, V value) {
    // 判断 entry[] 数组是否为空数组 如果为空 初始化entry数组
    if (table == EMPTY_TABLE) {
    	// 初始化hashmap
        inflateTable(threshold);
    }
    // 如果key 为null 把这个value对应的entry放进table[0]位置中
    if (key == null)
        return putForNullKey(value);
    // 计算 key的hash值
    int hash = hash(key);
    // 计算 key对应的 entry所在数组的下标
    int i = indexFor(hash, table.length);
    // 获取上面计算的下标的链表
    for (Entry<K,V> e = table[i]; e != null; e = e.next) {
        Object k;
        // 如果存在hash值相同,并且key相同的entry,对value进行覆盖,并返回覆盖区的value
        if (e.hash == hash && ((k = e.key) == key || key.equals(k))) {
            V oldValue = e.value;
            e.value = value;
            e.recordAccess(this);
            return oldValue;
        }
    }

    // 如果链表中没有,则添加
    modCount++;
    addEntry(hash, key, value, i);
    return null;
	}


	// 初始化 hashmap 
    private void inflateTable(int toSize) {
        // 根据初始化的值,获取对应的小的大于这个值的 2 的n次方的值,也就是hashmap的容量
        int capacity = roundUpToPowerOf2(toSize);
        // 通过容量与扩容因子的相乘,获取最大不触发扩容的容量
        threshold = (int) Math.min(capacity * loadFactor, MAXIMUM_CAPACITY + 1);
        // 创建数组
        table = new Entry[capacity];
        initHashSeedAsNeeded(capacity);
    }

    // 获取当前数值最接近并且大于当前数值的最小2的n次方
	private static int roundUpToPowerOf2(int number) {
        // Integer.highestOneBit((number - 1) << 1) 获取当前数值减去1后向左位移1位,并且第二位往后都为0的值
        return number >= MAXIMUM_CAPACITY
                ? MAXIMUM_CAPACITY
                : (number > 1) ? Integer.highestOneBit((number - 1) << 1) : 1;
    }

    // 当key为null的时候,对应value存放的位置
    private V putForNullKey(V value) {
    	// 获取table[0]的entry, 遍历这个链表
        for (Entry<K,V> e = table[0]; e != null; e = e.next) {
        	// 如果存在key为null的entry,把value进行覆盖,并返回覆盖前的value
            if (e.key == null) {
                V oldValue = e.value;
                e.value = value;
                e.recordAccess(this);
                return oldValue;
            }
        }
        // 如果对应key为null的entry不存在,则在table[0]位置添加一个key为null的entry,并modcount加1
        modCount++;
        // 添加entry
        addEntry(0, null, value, 0);
        // 返回null
        return null;
    }
    // 添加 entry 参数是key的hash值,key,value,下标
    void addEntry(int hash, K key, V value, int bucketIndex) {
    	// 判断hashmap中所有entry的数量是否大于扩容临界值并且指定下标处的entry[]数组元素不为null 触发扩容
        if ((size >= threshold) && (null != table[bucketIndex])) {
        	// 扩容后的容量是原先的两倍
            resize(2 * table.length);
            // 获取key的hash值
            hash = (null != key) ? hash(key) : 0;
            // 重新计算下标
            bucketIndex = indexFor(hash, table.length);
        }
        // 创建entry
        createEntry(hash, key, value, bucketIndex);
    }

    // 创建entry的方法
    void createEntry(int hash, K key, V value, int bucketIndex) {
    	// 获取指定下标的entry
        Entry<K,V> e = table[bucketIndex];
        // 新创建的entry作为原先链表的最顶端,覆盖创建前的entry
        table[bucketIndex] = new Entry<>(hash, key, value, e);
        // 数组内总的entry加1
        size++;
    }

    // 扩容算法
    void resize(int newCapacity) {
    	// 首先把老的数组复制到一个临时数组中
        Entry[] oldTable = table;
        // 保存老的数组的长度
        int oldCapacity = oldTable.length;
        // 判断 老的数组长度是否等于最大值  如果等于, 扩容阙值为integer的最大值
        if (oldCapacity == MAXIMUM_CAPACITY) {
            threshold = Integer.MAX_VALUE;
            return;
        }
        // 如果不相等,根据新的数组长度创建数组
        Entry[] newTable = new Entry[newCapacity];
        // 移动老的数组中的数据到新的数组
        transfer(newTable, initHashSeedAsNeeded(newCapacity));
        // 把table的引用指向新的数组
        table = newTable;
        // 获取扩容的阙值
        threshold = (int)Math.min(newCapacity * loadFactor, MAXIMUM_CAPACITY + 1);
    }

    // 移到老的数组中的数据到新的数组里面
    void transfer(Entry[] newTable, boolean rehash) {
    	// 获取新的数组的长度
        int newCapacity = newTable.length;
        // 遍历老的entry数组
        for (Entry<K,V> e : table) {
        	// 判断entry不为null
            while(null != e) {
            	// 获取链表中的entry
                Entry<K,V> next = e.next;

                if (rehash) {
                	// 获取hash值
                    e.hash = null == e.key ? 0 : hash(e.key);
                }
                // 通过hash与新的数组长度,获取key在新的数组中的下标
                int i = indexFor(e.hash, newCapacity);
                // 当前entry添加到数组之前,先原来的entry存进当前entry下
                e.next = newTable[i];
                // 把当前entry赋给entry数组
                newTable[i] = e;
                // 对当前entry链表的下一个entry进行赋值
                e = next;
            }
        }
    }

1.8 首先根据 key 的值计算 hash 值，找到该元素在数组中存储的下标；

如果数组是空的，则调用 resize 进行初始化；

如果没有哈希冲突直接放在对应的数组下标里；

如果冲突了，且 key 已经存在，就覆盖掉 value；

如果冲突后，发现该节点是红黑树，就将这个节点挂在树上；

如果冲突后是链表，判断该链表是否大于 8 ，如果大于 8 并且数组容量小于 64，就进行扩容；

如果链表长度大于 8 并且数组的容量大于等于 64，则将这个结构转换为红黑树；

否则，链表插入键值对，若 key 存在，就覆盖掉 value。

Hash函数是指把一个大范围映射到一个小范围，目的往往是为了节省空间，使得数据容易保存。比较出名的有MurmurHash、MD4、MD5等等

一般用Integer、String 这种不可变类当 HashMap 当 key，而且 String 最为常用。

因为字符串是不可变的，所以在它创建的时候 hashcode 就被缓存了，不需要重新计算。这就是 HashMap 中的键往往都使用字符串的原因。
因为获取对象的时候要用到 equals() 和 hashCode() 方法，那么键对象正确的重写这两个方法是非常重要的,这些类已经很规范的重写了 hashCode() 以及 equals() 方法。