深入解析HashMap的内部实现及使用注意事项-优快云博客

本文链接：https://blog.youkuaiyun.com/wodewutai17quiet/article/details/46044311
package java.util;
import java.io.*;

/**
 * JDK7
 */
public class HashMap<K,V> extends AbstractMap<K,V> implements Map<K,V>, Cloneable, Serializable {

    /**
     * 默认的初始容量是16，且容量的值必须是2的整数次幂
     * The default initial capacity - MUST be a power of two.
     */
    static final int DEFAULT_INITIAL_CAPACITY = 1 << 4; // aka 16

    /**
     * The maximum capacity, used if a higher value is implicitly specified by either of the constructors with arguments.
     * MUST be a power of two <= 1<<30.
     */
    static final int MAXIMUM_CAPACITY = 1 << 30;

    /**
     * 默认的加载因子是0.75
     * The load factor used when none specified in constructor.
     */
    static final float DEFAULT_LOAD_FACTOR = 0.75f;

    /**
     * An empty table instance to share when the table is not inflated.
     */
    static final Entry<?,?>[] EMPTY_TABLE = {};

    /**
     * entry数组，用来储存数据的
     * The table, resized as necessary. Length MUST Always be a power of two.
     */
    transient Entry<K,V>[] table = (Entry<K,V>[]) EMPTY_TABLE;

    /**
     * The number of key-value mappings contained in this map.
     */
    transient int size;

    /**
     * 阀值，当size的值大于或等于阀值时，需要调整HashMap的容量(threshold = capacity * load factor)。
     * 		new一个HashMap(new HashMap<K, V>();)后，阀值的大小默认为初始容量的大小，即16
     * The next size value at which to resize (capacity * load factor).
     * @serial
     */
    int threshold;

    /**
     * 加载因子
     * 说明：
     * 		1)加载因子越大，空间利用率越高，但是发生碰撞的几率也会变大，从而导致链表的长度变大，最终造成查找的速度变慢。
     * 		2)加载因子太小，会导致table在扩容的时候还存在很多空闲的空间，造成空间的严重浪费。
     * 		3)一般使用默认的大小0.75即可。
     */
    final float loadFactor;

    /**
     * The number of times this HashMap has been structurally modified
     * Structural modifications are those that change the number of mappings in
     * the HashMap or otherwise modify its internal structure (e.g.,
     * rehash).  This field is used to make iterators on Collection-views of
     * the HashMap fail-fast.  (See ConcurrentModificationException).
     */
    transient int modCount;

	
	// **************************** 1.重要的数据结构 ****************************
	
    /**
     * 链表中节点的数据结构，根据节点的数据结构可以看出：该链表是一个单向链表。
     * 说明：
     * 		table中元素的类型是Entry，即:table中的元素是一个单向链表。
     */
    static class Entry<K,V> implements Map.Entry<K,V> {
        final K key;
        V value;
        Entry<K,V> next;
        int hash;

        Entry(int h, K k, V v, Entry<K,V> n) {
            value = v;
            next = n;
            key = k;
            hash = h;
        }

        public final K getKey() {
            return key;
        }

        public final V getValue() {
            return value;
        }

        public final V setValue(V newValue) {
            V oldValue = value;
            value = newValue;
            return oldValue;
        }

		
        /**
         * 判断两个entry是否相等，当它们的Key和Value都相等时返回true
         */
        public final boolean equals(Object o) {
            if (!(o instanceof Map.Entry))
                return false;
            Map.Entry e = (Map.Entry)o;
            Object k1 = getKey();
            Object k2 = e.getKey();
            if (k1 == k2 || (k1 != null && k1.equals(k2))) {
                Object v1 = getValue();
                Object v2 = e.getValue();
                if (v1 == v2 || (v1 != null && v1.equals(v2)))
                    return true;
            }
            return false;
        }

        public final int hashCode() {
            return Objects.hashCode(getKey()) ^ Objects.hashCode(getValue());
        }

        public final String toString() {
            return getKey() + "=" + getValue();
        }

        /**
         * This method is invoked whenever the value in an entry is
         * overwritten by an invocation of put(k,v) for a key k that's already
         * in the HashMap.
         */
        void recordAccess(HashMap<K,V> m) {
        }

        /**
         * This method is invoked whenever the entry is
         * removed from the table.
         */
        void recordRemoval(HashMap<K,V> m) {
        }
    }
	
	// **************************** 重要的数据结构 ****************************
	
	
	// **************************** 2.put、get方法 ****************************
    /**
     * 将key-value对添加到HashMap中，如果key已经存在，则新值替旧值，并返回旧值。
     * Associates the specified value with the specified key in this map.
     * If the map previously contained a mapping for the key, the old value is replaced.
     * 
     * 说明：
     * 		1)当两个entry对象(一个是要新添加进来的对象，一个是已经存在的对象)的key不同，但它们的hash值相同，此时就会发生碰撞。
     * 		2)HashMap使用单向链表来存储元素，当碰撞发生时，HashMap会用传入的key-value来构造一个Entry对象，然后将该Entry放到指定链表的头部。
	 * 		3)我们一般使用String、包装类(Interger等) 作为HashMap的key，原因如下：
	 *			1>String、Interger等包装类是final修饰的类，并且重写了equals()和hashCode()方法。  
	 *			2>计算key的哈希值的前提是：key的值不会发生变化。如果一个key在 调用get(key)方法时的值 和 调用hash(key)方法时的值 不同的话，那么就不能从HashMap中找到正确的entry对象。  
	 *			3>键对象正确地重写equals()和hashCode()方法是非常重要的。hashCode()方法应尽量使两个不相等的key返回不同的hashcode，降低碰撞的几率，这样HashMap的性能就能提高些。  
	 *			4>键的不可变性使得程序能够缓存不同键的hashcode，这将提高整个获取对象的速度，这样HashMap的性能也可提高些。
     */
    public V put(K key, V value) {
        if (table == EMPTY_TABLE) {
            inflateTable(threshold);
        }
		// 1.如果key为null：
		// 		1.1若map中不存在key为null的键值对，则将该键值对添加到table[0]中的链表的头部，并返回null。
		// 		1.2若map中已存在key为null的键值对，则用新的value取代旧的value，并且返回旧的value。
		// 	注：table[0]中的链表不只是用来存储key为null的键值对，也存储着其它的键值对。
        if (key == null)
            return putForNullKey(value);
				
		// 2.如果key不为null：
		// 		2.1若该key在map中不存在，则计算该key的哈希值，根据key的哈希值找到bucket的位置，然后将key-value对添加到bucket中的链表的头部，并返回null。
		//			注：这里的bucket即Entry数组table的元素，bucket的位置即table的下标。
		// 		2.2若该key在map中已存在，则用新的value取代旧的value，并且返回旧的value。
		// 			注：判断key是否存在的依据是：两个key的哈希值相等，并且key1.equals(key2)
        int hash = hash(key);
        int i = indexFor(hash, table.length);
		
        for (Entry<K,V> e = table[i]; e != null; e = e.next) {
            Object k;
            if (e.hash == hash && ((k = e.key) == key || key.equals(k))) { // 2.2
                V oldValue = e.value;
                e.value = value;
                e.recordAccess(this);
                return oldValue;
            }
        }

        modCount++;
        addEntry(hash, key, value, i); // 2.1
        return null;
    }
	
    /**
     * Inflates the table.
     */
    private void inflateTable(int toSize) {
        // Find a power of 2 >= toSize	(根据入参toSize获取一个大于等于toSized的2的整数次幂的数作为数组的容量)
        int capacity = roundUpToPowerOf2(toSize);

        threshold = (int) Math.min(capacity * loadFactor, MAXIMUM_CAPACITY + 1);
        table = new Entry[capacity];
        initHashSeedAsNeeded(capacity);
    }

    /**
     * 将key为null的键值对put到map中。
     */
    private V putForNullKey(V value) {
        for (Entry<K,V> e = table[0]; e != null; e = e.next) {
            if (e.key == null) {
                V oldValue = e.value;
                e.value = value;
                e.recordAccess(this);
                return oldValue;
            }
        }
        modCount++;
        addEntry(0, null, value, 0);
        return null;
    }
	
    /**
     * 获取数组的下标
     * 
     * 说明：
     * 	1)根据key获得一个hashValue[注:hashValue=hash(key)]，然后用hashValue对length-1进行按位与运算得到数组的下标，即：hashValue&(length-1)
	 * 	2)数组的length必须是2的整数次幂，原因如下：
     * 		第一：若length是2的整数次幂，则hashValue&(length-1)等价于hashValue%length，那么hashValue&(length-1)同样也实现了均匀散列，但是(位运算)效率会更高。
     * 		第二：若length为奇数，则length-1为偶数，偶数(二进制)的最后一位是0，从而导致hashValue&(length-1)的最后一位永远为0，
     * 			  即：hashValue&(length-1)的结果永远为偶数，最终导致数组中下标为奇数的空间全部被浪费掉。
     */
    static int indexFor(int h, int length) {
        // assert Integer.bitCount(length) == 1 : "length must be a non-zero power of 2";
        return h & (length-1);
    }
	
	
    /**
     * 将key-value添加到map中，即：使用传入的key-value来构造一个Entry对象，然后将该Entry放到指定链表的头部。
     * 
     * 说明：
     * 	1)如果数组的size已经达到阀值，则先将数组的容量扩展到之前的2倍，然后再将该Entry放到指定链表的头部。
     * 	2)扩容是一个相当耗时的操作，因为它需要重新计算这些元素在新数组中的位置并进行复制处理。
     * 	3)在使用HashMap时，最好能提前预估下HashMap中元素的个数，这样有助于提高HashMap的性能。
     * 
     * Adds a new entry with the specified key, value and hash code to the specified bucket.  
     * It is the responsibility of this method to resize the table if appropriate.
     */
    void addEntry(int hash, K key, V value, int bucketIndex) {
        if ((size >= threshold) && (null != table[bucketIndex])) {
            resize(2 * table.length);
            hash = (null != key) ? hash(key) : 0;
            bucketIndex = indexFor(hash, table.length);
        }

        createEntry(hash, key, value, bucketIndex);
    }
	

    /**
     * 获取key对应value
     * Returns the value to which the specified key is mapped, or {@code null} if this map contains no mapping for the key.
     * 
     * 说明：
     * 		1)当两个entry对象的key不同，而它们的hash值相同时(同一个链表中的Entry对象:它们的hash值都相同)，就会发生碰撞。
	 *		2)此时，HashMap会使用key1.equals(key2)找到链表中正确的entry对象的value，并返回。
     */
    public V get(Object key) {
        if (key == null) return getForNullKey();
		
        Entry<K,V> entry = getEntry(key);

        return null == entry ? null : entry.getValue();
    }
	
    /**
     * Offloaded version of get() to look up null keys.
     */
    private V getForNullKey() {
        if (size == 0) {
            return null;
        }
        for (Entry<K,V> e = table[0]; e != null; e = e.next) {
            if (e.key == null)
                return e.value;
        }
        return null;
    }

    /**
     * Returns the entry associated with the specified key in the HashMap. 
     */
    final Entry<K,V> getEntry(Object key) {
        if (size == 0) {
            return null;
        }

        int hash = (key == null) ? 0 : hash(key);
        for (Entry<K,V> e = table[indexFor(hash, table.length)]; e != null; e = e.next) {
            Object k;
            if (e.hash == hash && ((k = e.key) == key || (key != null && key.equals(k))))
                return e;
        }
        return null;
    }
	
	
	// **************************** put、get方法 ****************************
	
    /**
     * The default threshold of map capacity above which alternative hashing is
     * used for String keys. Alternative hashing reduces the incidence of
     * collisions due to weak hash code calculation for String keys.
     */
    static final int ALTERNATIVE_HASHING_THRESHOLD_DEFAULT = Integer.MAX_VALUE;

    /**
     * Constructs an empty <tt>HashMap</tt> with the specified initial
     * capacity and load factor.
     *
     * @param  initialCapacity the initial capacity
     * @param  loadFactor      the load factor
     * @throws IllegalArgumentException if the initial capacity is negative
     *         or the load factor is nonpositive
     */
    public HashMap(int initialCapacity, float loadFactor) {
        if (initialCapacity < 0) throw new IllegalArgumentException("Illegal initial capacity: " + initialCapacity);
        if (initialCapacity > MAXIMUM_CAPACITY)
            initialCapacity = MAXIMUM_CAPACITY;
        if (loadFactor <= 0 || Float.isNaN(loadFactor))
            throw new IllegalArgumentException("Illegal load factor: " + loadFactor);

        this.loadFactor = loadFactor;
        threshold = initialCapacity;
        init();
    }

    /**
     * Constructs an empty <tt>HashMap</tt> with the specified initial capacity and the default load factor (0.75).
     */
    public HashMap(int initialCapacity) {
        this(initialCapacity, DEFAULT_LOAD_FACTOR);
    }

    /**
     * Constructs an empty <tt>HashMap</tt> with the default initial capacity (16) and the default load factor (0.75).
     */
    public HashMap() {
        this(DEFAULT_INITIAL_CAPACITY, DEFAULT_LOAD_FACTOR);
    }


    /**
     * Rehashes the contents of this map into a new array with a larger capacity. 
     * This method is called automatically when the number of keys in this map reaches its threshold.
     *
     * If current capacity is MAXIMUM_CAPACITY, this method does not resize the map, but sets threshold to Integer.MAX_VALUE.
     * This has the effect of preventing future calls.
     *
     * @param newCapacity the new capacity, MUST be a power of two;
     *        must be greater than current capacity unless current capacity is MAXIMUM_CAPACITY (in which case value is irrelevant).
     */
    void resize(int newCapacity) {
        Entry[] oldTable = table;
        int oldCapacity = oldTable.length;
        if (oldCapacity == MAXIMUM_CAPACITY) {
            threshold = Integer.MAX_VALUE;
            return;
        }

        Entry[] newTable = new Entry[newCapacity];
        transfer(newTable, initHashSeedAsNeeded(newCapacity));
        table = newTable;
        threshold = (int)Math.min(newCapacity * loadFactor, MAXIMUM_CAPACITY + 1);
    }

    /**
     * 将当前数组中所有的Entry转移到新的数组中。
     * 
     * 说明：
     * 	在多线程环境下使用HashMap可能会导致死锁的发生：
     * 		1)如果两个线程都发现HashMap需要重新调整大小了，那么很可能出现两个线程同时试着调整数组大小(即:两个线程同时调用transfer方法)的情景，
     * 		2)在这两个线程同时调整数组大小(调用transfer方法)的时候，很可能出现两个Entry互相引用的情况(即产生了循环链表)，如果相互引用的情况发生了，那么代码(transfer方法)就进入就死循环了，最直观的表现就是：cpu的利用率极高，几乎接近100%。
	 *		3)排查死锁(死循环)：
	 *			1>找到JVM的进程ID(PID)：	ps aux | grep java | grep tomcat-advertise
	 *			2>导出JVM当前的线程快照：	jstack -l PID > /data/test/threadDumpFile
	 *			3>找到发生死循环的线程ID：	top -Hp PID		注：cpu利用率极高的那个线程就是发生死循环的线程，该线程ID是用十进制表示的。
	 *			4>将十进制的线程ID转换为十六进制的线程ID：	printf "%x\n" 十进制的ID
	 *			5>在线程快照文件threadDumpFile中找到该线程的堆栈信息，然后根据堆栈信息找到发生死循环的方法。
     */
    void transfer(Entry[] newTable, boolean rehash) {
        int newCapacity = newTable.length;
        for (Entry<K,V> e : table) {
            while(null != e) {
                Entry<K,V> next = e.next;
                if (rehash) {
                    e.hash = null == e.key ? 0 : hash(e.key);
                }
                int i = indexFor(e.hash, newCapacity);
                e.next = newTable[i];
                newTable[i] = e;
                e = next;
            }
        }
    }

	// ...

}
HashMap源码分析