什么是ConcurrentHashMap
首先要了解java的 java.util.concurrent包,这是java在1.5之后提供的一个功能,官方api文档描述 Utility classes commonly useful in concurrent programming.用于并发编程的实用类。
在Class Summary列表中我们可以找到今天的主角 ConcurrentHashMap
通过java API的描述知道类这是一个用于高并发检索和修改的哈希表,API里也有对ConcurrentHashMap非常详细的描述,只不过是全英文的。
Package java.util.concurrent
Class ConcurrentHashMap<K,V>
java.lang.Object
java.util.AbstractMap<K,V>
java.util.concurrent.ConcurrentHashMap<K,V>
Type Parameters:
K - the type of keys maintained by this map
V - the type of mapped values
All Implemented Interfaces:
Serializable, ConcurrentMap<K,V>, Map<K,V>
ConcurrentHashMap做了什么
1.关键属性
/**
* The largest possible table capacity. This value must be
* exactly 1<<30 to stay within Java array allocation and indexing
* bounds for power of two table sizes, and is further required
* because the top two bits of 32bit hash fields are used for
* control purposes.
* 最大容量2^30
*/
private static final int MAXIMUM_CAPACITY = 1 << 30;
/**
* The default initial table capacity. Must be a power of 2
* (i.e., at least 1) and at most MAXIMUM_CAPACITY.
* 默认初始值 必须是2的幂并且不大于 MAXIMUM_CAPACITY
*/
private static final int DEFAULT_CAPACITY = 16;
/**
* The load factor for this table. Overrides of this value in
* constructors affect only the initial table capacity. The
* actual floating point value isn't normally used -- it is
* simpler to use expressions such as {@code n - (n >>> 2)} for
* the associated resizing threshold.
* 负载因子,通过构造方法初始化table容量
*/
private static final float LOAD_FACTOR = 0.75f;
/**
* The bin count threshold for using a tree rather than list for a
* bin. Bins are converted to trees when adding an element to a
* bin with at least this many nodes. The value must be greater
* than 2, and should be at least 8 to mesh with assumptions in
* tree removal about conversion back to plain bins upon
* shrinkage.
* list 转为红黑树的阀值
*/
static final int TREEIFY_THRESHOLD = 8;
/**
* The array of bins. Lazily initialized upon first insertion.
* Size is always a power of two. Accessed directly by iterators.
* 用来存放Node节点数据的,大小为2的幂,由迭代器直接访问
*/
transient volatile Node<K,V>[] table;
/**
* The next table to use; non-null only while resizing.
* 扩容时新生成的数据
*/
private transient volatile Node<K,V>[] nextTable;
2.关键方法
- Node<K,V>
static class Node<K,V> implements Map.Entry<K,V>,Node是ConcurrentHashMap最重要的内部类,它实现了Map.Entry<K,V>,储存着key-value键值对。
/**
* 大有volatile 保证并发操作的可见性
**/
volatile V val; //value值
volatile Node<K,V> next; //下一个节点
//不允许通过setValue方法修改
public final V setValue(V value) {
throw new UnsupportedOperationException();
}
/**
* Virtualized support for map.get(); overridden in subclasses.
* 提供find方法供map.get 调用
*/
Node<K,V> find(int h, Object k)
- TreeNode<K,V>
/**
* Nodes for use in TreeBins
* 用来储存在TreeBins内,在链表转为红黑树时储存key-value
*/
static final class TreeNode<K,V> extends Node<K,V>
//树形结构的属性定义
TreeNode<K,V> parent; // red-black tree links
TreeNode<K,V> left;
TreeNode<K,V> right;
TreeNode<K,V> prev; // needed to unlink next upon deletion
boolean red; //标志红黑树的红节点
/**
*
* h为key的hashCode k为 key
**/
Node<K,V> find(int h, Object k) {
return findTreeNode(h, k, null);
}
- TreeBin<K,V>
存储树形结构的容器,用来储存TreeNode
// 读写锁状态
static final int WRITER = 1; // 获取写锁的状态
static final int WAITER = 2; // 等待写锁的状态
static final int READER = 4; // 增加数据时读锁的状态
/**
* 初始化红黑树
*/
TreeBin(TreeNode<K,V> b)
- initTable()
初始化对象的方法,ConcurrentHashTable并没有在new 的时候初始化而是在 put操作的时候调用initTable方法完成初始化
private final Node<K,V>[] initTable() {
Node<K,V>[] tab; int sc;
while ((tab = table) == null || tab.length == 0) {//空的table才能进入初始化操作
if ((sc = sizeCtl) < 0) //sizeCtl<0表示其他线程已经在初始化了或者扩容了,挂起当前线程
Thread.yield(); // lost initialization race; just spin
else if (U.compareAndSwapInt(this, SIZECTL, sc, -1)) {//CAS操作SIZECTL为-1,表示初始化状态
try {
if ((tab = table) == null || tab.length == 0) {
int n = (sc > 0) ? sc : DEFAULT_CAPACITY;
@SuppressWarnings("unchecked")
Node<K,V>[] nt = (Node<K,V>[])new Node<?,?>[n];//初始化
table = tab = nt;
sc = n - (n >>> 2);//记录下次扩容的大小
}
} finally {
sizeCtl = sc;
}
break;
}
}
return tab;
- put() putVal()
/**
* Maps the specified key to the specified value in this table.
* Neither the key nor the value can be null.
*
* <p>The value can be retrieved by calling the {@code get} method
* with a key that is equal to the original key.
*
* @param key key with which the specified value is to be associated
* @param value value to be associated with the specified key
* @return the previous value associated with {@code key}, or
* {@code null} if there was no mapping for {@code key}
* @throws NullPointerException if the specified key or value is null
*/
public V put(K key, V value) {
return putVal(key, value, false);
}
/** Implementation for put and putIfAbsent */
final V putVal(K key, V value, boolean onlyIfAbsent) {
if (key == null || value == null) throw new NullPointerException();
int hash = spread(key.hashCode());//两次hash,减少hash冲突,可以均匀分布
int binCount = 0;
for (Node<K,V>[] tab = table;;) { //对这个table进行迭代
Node<K,V> f; int n, i, fh;
//当table未初始化时,调用initTable(),懒加载模式
if (tab == null || (n = tab.length) == 0)
tab = initTable();
else if ((f = tabAt(tab, i = (n - 1) & hash)) == null) {
//如果当前位置为空,直接插入 no lock
if (casTabAt(tab, i, null,
new Node<K,V>(hash, key, value, null)))
break; // no lock when adding to empty bin
}
else if ((fh = f.hash) == MOVED) //如果在进行扩容,则先进行扩容操作
tab = helpTransfer(tab, f);
else {
V oldVal = null;
//锁住链表或者红黑树的头结点,分段锁
synchronized (f) {
if (tabAt(tab, i) == f) {
//大于零说明是链表,否则f是ForwardingNode节点 hash=-1
if (fh >= 0) {
binCount = 1;
for (Node<K,V> e = f;; ++binCount) {
K ek;
//如果存在当前key
if (e.hash == hash &&
((ek = e.key) == key ||
(ek != null && key.equals(ek)))) {
oldVal = e.val;
//存在则替换val
if (!onlyIfAbsent)
e.val = value;
break;
}
Node<K,V> pred = e;
//插入到队尾
if ((e = e.next) == null) {
pred.next = new Node<K,V>(hash, key,
value, null);
break;
}
}
}
//树节点操作
else if (f instanceof TreeBin) {
Node<K,V> p;
binCount = 2;
if ((p = ((TreeBin<K,V>)f).putTreeVal(hash, key,
value)) != null) {
oldVal = p.val;
if (!onlyIfAbsent)
p.val = value;
}
}
}
}
//当binCount >= 转红黑树的临界值时 转为红黑树
if (binCount != 0) {
if (binCount >= TREEIFY_THRESHOLD)
treeifyBin(tab, i);
if (oldVal != null)
return oldVal;
break;
}
}
}
//size + 1
addCount(1L, binCount);
return null;
}
- get()
get方法比较简单,只是判断下key的hashCode
/**
* Returns the value to which the specified key is mapped,
* or {@code null} if this map contains no mapping for the key.
*
* <p>More formally, if this map contains a mapping from a key
* {@code k} to a value {@code v} such that {@code key.equals(k)},
* then this method returns {@code v}; otherwise it returns
* {@code null}. (There can be at most one such mapping.)
*
* @throws NullPointerException if the specified key is null
*/
public V get(Object key) {
Node<K,V>[] tab; Node<K,V> e, p; int n, eh; K ek;
int h = spread(key.hashCode());
if ((tab = table) != null && (n = tab.length) > 0 &&
(e = tabAt(tab, (n - 1) & h)) != null) {//获取table中的Node节点(tabAt(tab, (n – 1) & h))
//当key的hash与node节点hash相同时直接返回该节点
if ((eh = e.hash) == h) {
if ((ek = e.key) == key || (ek != null && key.equals(ek)))
return e.val;
}
//为树节点
else if (eh < 0)
return (p = e.find(h, key)) != null ? p.val : null;
//遍历链表
while ((e = e.next) != null) {
if (e.hash == h &&
((ek = e.key) == key || (ek != null && key.equals(ek))))
return e.val;
}
}
return null;
}
- addCount()
当ConcurrentHashMap中table元素个数达到了容量阈值(sizeCtl)时,则需要进行扩容操作。在put操作时最后一个会调用addCount(long x, int check),该方法主要做两个工作:1.更新baseCount;2.检测是否需要扩容操作。如下:
private final void addCount(long x, int check) {
CounterCell[] as; long b, s;
// 更新baseCount
//check >= 0 :则需要进行扩容操作
if (check >= 0) {
Node<K,V>[] tab, nt; int n, sc;
while (s >= (long)(sc = sizeCtl) && (tab = table) != null &&
(n = tab.length) < MAXIMUM_CAPACITY) {
int rs = resizeStamp(n);
if (sc < 0) {
if ((sc >>> RESIZE_STAMP_SHIFT) != rs || sc == rs + 1 ||
sc == rs + MAX_RESIZERS || (nt = nextTable) == null ||
transferIndex <= 0)
break;
if (U.compareAndSwapInt(this, SIZECTL, sc, sc + 1))
transfer(tab, nt);
}
//当前线程是唯一的或是第一个发起扩容的线程 此时nextTable=null
else if (U.compareAndSwapInt(this, SIZECTL, sc,
(rs << RESIZE_STAMP_SHIFT) + 2))
transfer(tab, null);
s = sumCount();
}
}
}
transfer()方法为ConcurrentHashMap扩容操作的核心方法。由于ConcurrentHashMap支持多线程扩容,而且也没有进行加锁,所以实现会变得有点儿复杂。整个扩容操作分为两步:
- 构建一个nextTable,其大小为原来大小的两倍,这个步骤是在单线程环境下完成的
- 将原来table里面的内容复制到nextTable中,这个步骤是允许多线程操作的,所以性能得到提升,减少了扩容的时间消耗
private final void transfer(Node<K,V>[] tab, Node<K,V>[] nextTab) {
int n = tab.length, stride;
// 每核处理的量小于16,则强制赋值16
if ((stride = (NCPU > 1) ? (n >>> 3) / NCPU : n) < MIN_TRANSFER_STRIDE)
.
.
.
try {
@SuppressWarnings("unchecked")
Node<K,V>[] nt = (Node<K,V>[])new Node<?,?>[n << 1]; //构建一个nextTable对象,其容量为原来容量的两倍
.
.
.
int nextn = nextTab.length;
// 连接点指针,用于标志位(fwd的hash值为-1,fwd.nextTable=nextTab)
ForwardingNode<K,V> fwd = new ForwardingNode<K,V>(nextTab);
// 当advance == true时,表明该节点已经处理过了
boolean advance = true;
boolean finishing = false; // to ensure sweep before committing nextTab
for (int i = 0, bound = 0;;) {
Node<K,V> f; int fh;
// 控制 --i ,遍历原hash表中的节点
while (advance) {
int nextIndex, nextBound;
if (--i >= bound || finishing)
advance = false;
else if ((nextIndex = transferIndex) <= 0) {
i = -1;
advance = false;
}
// 用CAS计算得到的transferIndex
else if (U.compareAndSwapInt
(this, TRANSFERINDEX, nextIndex,
nextBound = (nextIndex > stride ?
nextIndex - stride : 0))) {
bound = nextBound;
i = nextIndex - 1;
advance = false;
}
}
.
.
.
if (i < 0 || i >= n || i + n >= nextn) {
int sc;
// 已经完成所有节点复制了
if (finishing) {
nextTable = null;
table = nextTab; // table 指向nextTable
sizeCtl = (n << 1) - (n >>> 1); // sizeCtl阈值为原来的1.5倍
return; // 跳出死循环,
}
// CAS 更扩容阈值,在这里面sizectl值减一,说明新加入一个线程参与到扩容操作
if (U.compareAndSwapInt(this, SIZECTL, sc = sizeCtl, sc - 1)) {
if ((sc - 2) != resizeStamp(n) << RESIZE_STAMP_SHIFT)
return;
finishing = advance = true;
i = n; // recheck before commit
}
}
// 遍历的节点为null,则放入到ForwardingNode 指针节点
else if ((f = tabAt(tab, i)) == null)
advance = casTabAt(tab, i, null, fwd);
// f.hash == -1 表示遍历到了ForwardingNode节点,意味着该节点已经处理过了
// 这里是控制并发扩容的核心
else if ((fh = f.hash) == MOVED)
.
.
.
// 节点加锁
synchronized (f) {
// 节点复制工作
if (tabAt(tab, i) == f) {
Node<K,V> ln, hn;
// fh >= 0 ,表示为链表节点
if (fh >= 0) {
// 构造两个链表 一个是原链表 另一个是原链表的反序排列
int runBit = fh & n;
Node<K,V> lastRun = f;
for (Node<K,V> p = f.next; p != null; p = p.next) {
int b = p.hash & n;
if (b != runBit) {
runBit = b;
lastRun = p;
}
}
if (runBit == 0) {
ln = lastRun;
hn = null;
}
else {
hn = lastRun;
ln = null;
}
for (Node<K,V> p = f; p != lastRun; p = p.next) {
int ph = p.hash; K pk = p.key; V pv = p.val;
if ((ph & n) == 0)
ln = new Node<K,V>(ph, pk, pv, ln);
else
hn = new Node<K,V>(ph, pk, pv, hn);
}
// 在nextTable i 位置处插上链表
setTabAt(nextTab, i, ln);
// 在nextTable i + n 位置处插上链表
setTabAt(nextTab, i + n, hn);
// 在table i 位置处插上ForwardingNode 表示该节点已经处理过了
setTabAt(tab, i, fwd);
// advance = true 可以执行--i动作,遍历节点
advance = true;
}
// 如果是TreeBin,则按照红黑树进行处理,处理逻辑与上面一致
else if (f instanceof TreeBin) {
.
.
.
.
.
// 扩容后树节点个数若<=6,将树转链表
ln = (lc <= UNTREEIFY_THRESHOLD) ? untreeify(lo) :
(hc != 0) ? new TreeBin<K,V>(lo) : t;
hn = (hc <= UNTREEIFY_THRESHOLD) ? untreeify(hi) :
(lc != 0) ? new TreeBin<K,V>(hi) : t;
setTabAt(nextTab, i, ln);
setTabAt(nextTab, i + n, hn);
setTabAt(tab, i, fwd);
advance = true;
- 为每个内核分任务,并保证其不小于16
- 检查nextTable是否为null,如果是,则初始化nextTable,使其容量为table的两倍
- 死循环遍历节点,知道finished:节点从table复制到nextTable中,支持并发,请思路如下:
- 如果节点 f 为null,则插入ForwardingNode(采用Unsafe.compareAndSwapObjectf方法实现),这个是触发并发扩容的关键
- 如果f为链表的头节点(fh >= 0),则先构造一个反序链表,然后把他们分别放在nextTable的i和i + n位置,并将ForwardingNode 插入原节点位置,代表已经处理过了
- 如果f为TreeBin节点,同样也是构造一个反序 ,同时需要判断是否需要进行unTreeify()操作,并把处理的结果分别插入到nextTable的i 和i+nw位置,并插入ForwardingNode 节点
- 所有节点复制完成后,则将table指向nextTable,同时更新sizeCtl = nextTable的0.75倍,完成扩容过程
在多线程环境下,ConcurrentHashMap用两点来保证正确性:ForwardingNode和synchronized。当一个线程遍历到的节点如果是ForwardingNode,则继续往后遍历,如果不是,则将该节点加锁,防止其他线程进入,完成后设置ForwardingNode节点,以便要其他线程可以看到该节点已经处理过了,如此交叉进行,高效而又安全。
- 转红黑树
在put操作是,如果发现链表结构中的元素超过了TREEIFY_THRESHOLD(默认为8),则会把链表转换为红黑树,已便于提高查询效率。如下:
if (binCount >= TREEIFY_THRESHOLD)
treeifyBin(tab, i);
调用treeifyBin方法用与将链表转换为红黑树。
private final void treeifyBin(Node<K,V>[] tab, int index) {
Node<K,V> b; int n, sc;
if (tab != null) {
if ((n = tab.length) < MIN_TREEIFY_CAPACITY)//如果table.length<64 就扩大一倍 返回
tryPresize(n << 1);
else if ((b = tabAt(tab, index)) != null && b.hash >= 0) {
synchronized (b) {
if (tabAt(tab, index) == b) {
TreeNode<K,V> hd = null, tl = null;
//构造了一个TreeBin对象 把所有Node节点包装成TreeNode放进去
for (Node<K,V> e = b; e != null; e = e.next) {
TreeNode<K,V> p =
new TreeNode<K,V>(e.hash, e.key, e.val,
null, null);//这里只是利用了TreeNode封装 而没有利用TreeNode的next域和parent域
if ((p.prev = tl) == null)
hd = p;
else
tl.next = p;
tl = p;
}
//在原来index的位置 用TreeBin替换掉原来的Node对象
setTabAt(tab, index, new TreeBin<K,V>(hd));
}
}
}
}
}
从上面源码可以看出,构建红黑树的过程是同步的,进入同步后过程如下:
- 根据table中index位置Node链表,重新生成一个hd为头结点的TreeNode
- 根据hd头结点,生成TreeBin树结构,并用TreeBin替换掉原来的Node对象。