版本说明:本次源码解读基于 JDK1.8,请注意区分版本差异。
本来计划这段时间继续完善 Tomcat 源码解读系列,但是发现要有所产出的话,需要花费大量的时间,因此这段时间在研读 Tomcat 源码的间隙,穿插着整理一下 Java 集合框架的源码解读系列,同时也为了复习一下基础知识。
1. 重要属性及构造方法
/**
* The table, initialized on first use, and resized as
* necessary. When allocated, length is always a power of two.
* (We also tolerate length zero in some operations to allow
* bootstrapping mechanics that are currently not needed.)
*/
transient Node<K,V>[] table;// 存放元素的数组
/**
* Holds cached entrySet(). Note that AbstractMap fields are used
* for keySet() and values().
*/
transient Set<Map.Entry<K,V>> entrySet;
/**
* The number of key-value mappings contained in this map.
*/
transient int size;// 数组中的元素数量
/**
* The number of times this HashMap has been structurally modified
* Structural modifications are those that change the number of mappings in
* the HashMap or otherwise modify its internal structure (e.g.,
* rehash). This field is used to make iterators on Collection-views of
* the HashMap fail-fast. (See ConcurrentModificationException).
*/
transient int modCount;// HashMap 结构被修改的次数
/**
* The next size value at which to resize (capacity * load factor).
*
* @serial
*/
// (The javadoc description is true upon serialization.
// Additionally, if the table array has not been allocated, this
// field holds the initial array capacity, or zero signifying
// DEFAULT_INITIAL_CAPACITY.)
// 在 table 初始化前,threshold 中保存的是 table 的初始容量(整形后的)。
// 在 table 初始化后,threshold 中保存的是触发扩容的阈值。
int threshold;
/**
* The load factor for the hash table.
*
* @serial
*/
final float loadFactor;// 负载因子,默认值 0.75f
构造函数
// 如果调用的是无参的构造函数,会在首次 put 操作时,将初始容量设置为默认值 16
public HashMap(int initialCapacity) {
this(initialCapacity, DEFAULT_LOAD_FACTOR);
}
public HashMap(int initialCapacity, float loadFactor) {
if (initialCapacity < 0)
throw new IllegalArgumentException("Illegal initial capacity: " +
initialCapacity);
if (initialCapacity > MAXIMUM_CAPACITY)
initialCapacity = MAXIMUM_CAPACITY;
if (loadFactor <= 0 || Float.isNaN(loadFactor))
throw new IllegalArgumentException("Illegal load factor: " +
loadFactor);
this.loadFactor = loadFactor;
// 在 table 初始化前,threshold 中保存的是 table 的初始容量(整形后的)。
// 在 table 初始化后,threshold 中保存的是触发扩容的阈值。
// 初始化时将 threshold 整形成 2 的整数次幂,首次 put 操作时,threshold 将被作为 table 的初始大小,
// 并且基于 threshold 的初始值计算新的 threshold。
this.threshold = tableSizeFor(initialCapacity);
}
static final int tableSizeFor(int cap) {
int n = cap - 1;
n |= n >>> 1;
n |= n >>> 2;
n |= n >>> 4;
n |= n >>> 8;
n |= n >>> 16;
return (n < 0) ? 1 : (n >= MAXIMUM_CAPACITY) ? MAXIMUM_CAPACITY : n + 1;
}
table 属性的元素类型 – Node
static class Node<K,V> implements Map.Entry<K,V> {
final int hash;// key 的哈希值(处理后的)
final K key;
V value;
Node<K,V> next;// next 节点
Node(int hash, K key, V value, Node<K,V> next) {
this.hash = hash;
this.key = key;
this.value = value;
this.next = next;
}
public final K getKey() { return key; }
public final V getValue() { return value; }
public final String toString() { return key + "=" + value; }
public final int hashCode() {
return Objects.hashCode(key) ^ Objects.hashCode(value);
}
public final V setValue(V newValue) {
V oldValue = value;
value = newValue;
return oldValue;
}
public final boolean equals(Object o) {
if (o == this)
return true;
if (o instanceof Map.Entry) {
Map.Entry<?,?> e = (Map.Entry<?,?>)o;
if (Objects.equals(key, e.getKey()) &&
Objects.equals(value, e.getValue()))
return true;
}
return false;
}
}
2. put()
public V put(K key, V value) {
return putVal(hash(key), key, value, false, true);
}
final V putVal(int hash, K key, V value, boolean onlyIfAbsent,
boolean evict) {
Node<K,V>[] tab; Node<K,V> p; int n, i;
// 如果 table 未初始化,先进行初始化
if ((tab = table) == null || (n = tab.length) == 0)
n = (tab = resize()).length;
// key 的哈希值对 table 数组的最大索引值取模,得到节点应放置的索引位置
// 如果所在索引位置为空,构造新的节点放入
if ((p = tab[i = (n - 1) & hash]) == null)
tab[i] = newNode(hash, key, value, null);
else {
Node<K,V> e; K k;
// 如果已存在 key 相等的节点,按需覆盖
if (p.hash == hash &&
((k = p.key) == key || (key != null && key.equals(k))))
e = p;
// 处理红黑树,本篇对红黑树不做重点说明
else if (p instanceof TreeNode)
e = ((TreeNode<K,V>)p).putTreeVal(this, tab, hash, key, value);
// 所在索引位置已有节点存在,从尾部加入链表
else {
for (int binCount = 0; ; ++binCount) {
if ((e = p.next) == null) {
p.next = newNode(hash, key, value, null);
if (binCount >= TREEIFY_THRESHOLD - 1) // -1 for 1st
treeifyBin(tab, hash);
break;
}
if (e.hash == hash &&
((k = e.key) == key || (key != null && key.equals(k))))
break;
p = e;
}
}
// 如果 e 不为空,说明上面的处理中,如果已存在 key 相等的节点,
// 因此进一步判断是否需要覆盖。
// 值的覆盖不改变结构,因此不计入修改次数。
if (e != null) { // existing mapping for key
V oldValue = e.value;
if (!onlyIfAbsent || oldValue == null)
e.value = value;
// 抽象方法,供自定义扩展使用
afterNodeAccess(e);
return oldValue;
}
}
// 对链表或树的结构做了修改,修改次数加一
++modCount;
// 判断是否需要触发扩容
if (++size > threshold)
resize();
// 抽象方法,供自定义扩展使用
afterNodeInsertion(evict);
return null;
}
final Node<K,V>[] resize() {
Node<K,V>[] oldTab = table;
int oldCap = (oldTab == null) ? 0 : oldTab.length;
int oldThr = threshold;
int newCap, newThr = 0;
if (oldCap > 0) {
if (oldCap >= MAXIMUM_CAPACITY) {
threshold = Integer.MAX_VALUE;
return oldTab;
}
else if ((newCap = oldCap << 1) < MAXIMUM_CAPACITY &&
oldCap >= DEFAULT_INITIAL_CAPACITY)
// 每次扩容一倍,因此阈值也是扩大一倍
newThr = oldThr << 1; // double threshold
}
else if (oldThr > 0) // initial capacity was placed in threshold
newCap = oldThr;
// table 为初始状态且构造 HashMap 时采用的是无参构造
else { // zero initial threshold signifies using defaults
newCap = DEFAULT_INITIAL_CAPACITY;
// 阈值 = 容量 * 负载因子
newThr = (int)(DEFAULT_LOAD_FACTOR * DEFAULT_INITIAL_CAPACITY);
}
if (newThr == 0) {
float ft = (float)newCap * loadFactor;
newThr = (newCap < MAXIMUM_CAPACITY && ft < (float)MAXIMUM_CAPACITY ?
(int)ft : Integer.MAX_VALUE);
}
threshold = newThr;
@SuppressWarnings({"rawtypes","unchecked"})
Node<K,V>[] newTab = (Node<K,V>[])new Node[newCap];
table = newTab;
// 如果原 table 不为空,需要数据迁移
if (oldTab != null) {
for (int j = 0; j < oldCap; ++j) {
Node<K,V> e;
if ((e = oldTab[j]) != null) {
oldTab[j] = null;
if (e.next == null)
newTab[e.hash & (newCap - 1)] = e;
else if (e instanceof TreeNode)
((TreeNode<K,V>)e).split(this, newTab, j, oldCap);
else { // preserve order
// 低位的首尾节点,低位链迁移时,索引值不需要变化
Node<K,V> loHead = null, loTail = null;
// 高位的首尾节点,高位链迁移时,索引值发生变化
Node<K,V> hiHead = null, hiTail = null;
Node<K,V> next;
do {
next = e.next;
// 因为 table 的长度是 2 的整数次幂,
// 因此 e.hash & oldCap 的结果要么为 0 要么为 oldCap,
// 又由于每次都是扩容一倍,因此可以推断出:
// 如果 e.hash & oldCap == 0,
// 那么 e.hash & (oldCap - 1) == e.hash & (newCap - 1);
// 即:迁移前后索引值不发生变化。
// 如果 e.hash & oldCap == oldCap,
// 那么 e.hash & (newCap - 1) == (e.hash & (oldCap - 1) + oldCap);
// 即:迁移前后索引值关系为 newIndex == (oldIndex + oldCap)。
if ((e.hash & oldCap) == 0) {
if (loTail == null)
loHead = e;
else
loTail.next = e;
loTail = e;
}
else {
if (hiTail == null)
hiHead = e;
else
hiTail.next = e;
hiTail = e;
}
} while ((e = next) != null);
if (loTail != null) {
loTail.next = null;
newTab[j] = loHead;
}
if (hiTail != null) {
hiTail.next = null;
newTab[j + oldCap] = hiHead;
}
}
}
}
}
return newTab;
}
// Callbacks to allow LinkedHashMap post-actions
void afterNodeAccess(Node<K,V> p) { }
void afterNodeInsertion(boolean evict) { }
void afterNodeRemoval(Node<K,V> p) { }
这里扩展了解一下 JDK1.7 版本的扩容时关键逻辑:
void transfer(Entry[] newTable, boolean rehash) {
int newCapacity = newTable.length;
for (Entry<K,V> e : table) {
while(null != e) {
Entry<K,V> next = e.next;
if (rehash) {
e.hash = null == e.key ? 0 : hash(e.key);
}
int i = indexFor(e.hash, newCapacity);
e.next = newTable[i];
newTable[i] = e;
e = next;
}
}
}
元素插入时是从前往后遍历,插入链表尾部,而扩容时采用的是相反的顺序,因此可能导致并发情况下出现环链导致 get() 操作进入死循环。
3. remove()
public V remove(Object key) {
Node<K,V> e;
return (e = removeNode(hash(key), key, null, false, true)) == null ?
null : e.value;
}
final Node<K,V> removeNode(int hash, Object key, Object value,
boolean matchValue, boolean movable) {
Node<K,V>[] tab; Node<K,V> p; int n, index;
if ((tab = table) != null && (n = tab.length) > 0 &&
(p = tab[index = (n - 1) & hash]) != null) {
Node<K,V> node = null, e; K k; V v;
// 直接匹配上链表的头节点
if (p.hash == hash &&
((k = p.key) == key || (key != null && key.equals(k))))
node = p;
// 否则遍历链表
else if ((e = p.next) != null) {
if (p instanceof TreeNode)
node = ((TreeNode<K,V>)p).getTreeNode(hash, key);
else {
do {
if (e.hash == hash &&
((k = e.key) == key ||
(key != null && key.equals(k)))) {
node = e;
break;
}
p = e;
} while ((e = e.next) != null);
}
}
if (node != null && (!matchValue || (v = node.value) == value ||
(value != null && value.equals(v)))) {
if (node instanceof TreeNode)
((TreeNode<K,V>)node).removeTreeNode(this, tab, movable);
else if (node == p)// 要删除的节点就是链表的头节点
tab[index] = node.next;
else// 将 node 断开
p.next = node.next;
++modCount;// 修改次数加一
--size;// 元素数量减一
// 抽象方法,供自定义扩展使用
afterNodeRemoval(node);
return node;
}
}
// map 为空或者匹配不到,返回 null
return null;
}
4. 序列化和反序列化
// 反序列化
private void readObject(java.io.ObjectInputStream s)
throws IOException, ClassNotFoundException {
// Read in the threshold (ignored), loadfactor, and any hidden stuff
// 读入非 transient 修饰的成员属性
s.defaultReadObject();
// 将 transient 修饰的成员属性重置为零值
reinitialize();
if (loadFactor <= 0 || Float.isNaN(loadFactor))
throw new InvalidObjectException("Illegal load factor: " +
loadFactor);
// 序列化时写入的 buckets,将其读入并忽略
s.readInt(); // Read and ignore number of buckets
// 读入 size,序列化时被写入
int mappings = s.readInt(); // Read number of mappings (size)
// 如果 size 为 0,不需要进一步写入
if (mappings < 0)
throw new InvalidObjectException("Illegal mappings count: " +
mappings);
// 根据负载因子计算出数组的容量并初始化数组
else if (mappings > 0) { // (if zero, use defaults)
// Size the table using given load factor only if within
// range of 0.25...4.0
float lf = Math.min(Math.max(0.25f, loadFactor), 4.0f);
float fc = (float)mappings / lf + 1.0f;
int cap = ((fc < DEFAULT_INITIAL_CAPACITY) ?
DEFAULT_INITIAL_CAPACITY :
(fc >= MAXIMUM_CAPACITY) ?
MAXIMUM_CAPACITY :
tableSizeFor((int)fc));
float ft = (float)cap * lf;
threshold = ((cap < MAXIMUM_CAPACITY && ft < MAXIMUM_CAPACITY) ?
(int)ft : Integer.MAX_VALUE);
@SuppressWarnings({"rawtypes","unchecked"})
Node<K,V>[] tab = (Node<K,V>[])new Node[cap];
table = tab;
// Read the keys and values, and put the mappings in the HashMap
for (int i = 0; i < mappings; i++) {
@SuppressWarnings("unchecked")
K key = (K) s.readObject();
@SuppressWarnings("unchecked")
V value = (V) s.readObject();
putVal(hash(key), key, value, false, false);
}
}
}
// 序列化
private void writeObject(java.io.ObjectOutputStream s)
throws IOException {
int buckets = capacity();
// Write out the threshold, loadfactor, and any hidden stuff
// 写入非 transient 修饰的成员属性
s.defaultWriteObject();
s.writeInt(buckets);
s.writeInt(size);
// 循环写入数组中的每一条链上的 node 的 key 与 value,
// 不直接序列化整个 node 节点,减小空间开销
internalWriteEntries(s);
}
5. 迭代器
abstract class HashIterator {
Node<K,V> next; // next entry to return
Node<K,V> current; // current entry
int expectedModCount; // for fast-fail
int index; // current slot
HashIterator() {
expectedModCount = modCount;
Node<K,V>[] t = table;
current = next = null;
index = 0;
if (t != null && size > 0) { // advance to first entry
do {} while (index < t.length && (next = t[index++]) == null);
}
}
public final boolean hasNext() {
return next != null;
}
final Node<K,V> nextNode() {
Node<K,V>[] t;
Node<K,V> e = next;
if (modCount != expectedModCount)
throw new ConcurrentModificationException();
if (e == null)
throw new NoSuchElementException();
if ((next = (current = e).next) == null && (t = table) != null) {
do {} while (index < t.length && (next = t[index++]) == null);
}
return e;
}
public final void remove() {
Node<K,V> p = current;
if (p == null)
throw new IllegalStateException();
if (modCount != expectedModCount)
throw new ConcurrentModificationException();
current = null;
K key = p.key;
removeNode(hash(key), key, null, false, false);
// 更新 expectedModCount 避免 CME 异常。由此可知:
// 使用迭代器进行迭代时,如果要删除元素,
// 使用迭代器的 remove() 方法可以避免 CME 异常。
expectedModCount = modCount;
}
}
final class KeyIterator extends HashIterator
implements Iterator<K> {
public final K next() { return nextNode().key; }
}
final class ValueIterator extends HashIterator
implements Iterator<V> {
public final V next() { return nextNode().value; }
}
final class EntryIterator extends HashIterator
implements Iterator<Map.Entry<K,V>> {
public final Map.Entry<K,V> next() { return nextNode(); }
}
6. 总结
前面我们在 JUC 源码解读系列中了解了 CHM 的实现原理,通过对比我们可以发现 HashMap 线程不安全到底体现在什么地方。这里简单的总结一下:
- 元素数量的计算不安全
- 修改次数的计算不安全
- 元素的修改操作不安全(包括修改和删除)
- 扩容不安全