架构之高性能
引言
在互联网高并发时代,系统性能直接关系到用户体验和业务成功。传统的锁机制虽然能够保证线程安全,但在高并发场景下却成为性能瓶颈。无锁化编程(Lock-Free Programming)作为现代高性能架构的核心技术,通过精巧的算法设计和硬件原语,实现了在不使用锁的情况下保证线程安全,大幅提升了系统的并发处理能力。
高性能法则强调:通过无锁化编程、异步处理、内存优化等技术手段,构建能够支撑海量并发请求的高性能系统架构。这不仅是对技术深度的挑战,更是对架构智慧的考验。
高性能架构的核心理念
什么是高性能?
高性能指的是系统在处理大量请求时仍能保持低延迟、高吞吐量的能力。具体表现为:
- 低延迟:请求响应时间短,用户体验好
- 高吞吐量:单位时间内处理请求数量多
- 高并发:能够同时处理大量并发请求
- 资源利用率高:充分利用CPU、内存、网络等资源
- 可扩展性强:能够通过横向扩展提升性能
性能瓶颈的根源
无锁化编程的优势
无锁化编程核心技术
1. CAS原子操作
CAS(Compare-And-Swap)是无锁编程的基础,它提供了一种原子方式更新共享变量的机制。
CAS实现原理
// CAS操作的基本原理
public class CASExample {
// 模拟CAS操作
public static boolean compareAndSwap(AtomicInteger value, int expected, int newValue) {
// 获取当前值
int current = value.get();
// 比较期望值和当前值
if (current == expected) {
// 如果相等,则更新为新值
value.set(newValue);
return true;
}
// 如果不相等,更新失败
return false;
}
// 使用CAS实现线程安全的计数器
public class LockFreeCounter {
private final AtomicLong counter = new AtomicLong(0);
public long increment() {
long oldValue;
long newValue;
do {
// 读取当前值
oldValue = counter.get();
// 计算新值
newValue = oldValue + 1;
// 尝试CAS更新
} while (!counter.compareAndSet(oldValue, newValue));
return newValue;
}
public long get() {
return counter.get();
}
}
// 使用CAS实现无锁栈
public class LockFreeStack<T> {
private final AtomicReference<Node<T>> head = new AtomicReference<>();
private static class Node<T> {
final T value;
final Node<T> next;
Node(T value, Node<T> next) {
this.value = value;
this.next = next;
}
}
public void push(T value) {
Node<T> newHead = new Node<>(value, null);
Node<T> oldHead;
do {
oldHead = head.get();
newHead = new Node<>(value, oldHead);
} while (!head.compareAndSet(oldHead, newHead));
}
public T pop() {
Node<T> oldHead;
Node<T> newHead;
do {
oldHead = head.get();
if (oldHead == null) {
return null;
}
newHead = oldHead.next;
} while (!head.compareAndSet(oldHead, newHead));
return oldHead.value;
}
}
}
2. 内存屏障与可见性
内存屏障(Memory Barrier)确保指令的执行顺序和内存可见性。
// 内存屏障的使用
public class MemoryBarrierExample {
// 使用volatile实现轻量级内存屏障
private volatile boolean flag = false;
private int data = 0;
public void writer() {
data = 42; // 普通写操作
flag = true; // volatile写,相当于StoreStore屏障 + StoreLoad屏障
}
public void reader() {
if (flag) { // volatile读,相当于LoadLoad屏障 + LoadStore屏障
// 这里一定能看到data = 42
System.out.println(data);
}
}
// 使用AtomicInteger的内存语义
public class VisibilityExample {
private final AtomicInteger counter = new AtomicInteger(0);
private int value = 0;
public void update() {
value = 100; // 普通写
counter.lazySet(1); // 释放语义,相当于StoreStore屏障
}
public int read() {
while (counter.get() == 0) { // 获取语义,相当于LoadLoad屏障
// 等待更新
}
return value; // 一定能看到value = 100
}
}
}
3. ABA问题及解决方案
ABA问题是无锁编程中的经典问题,需要通过版本号等机制解决。
// ABA问题演示
public class ABAProblem {
private final AtomicReference<String> value = new AtomicReference<>("A");
public void demonstrateABAProblem() {
String original = value.get(); // 读取值为A
// 其他线程将A改为B,再改回A
// 当前线程的CAS操作会成功,但实际上值已经被修改过
boolean success = value.compareAndSet(original, "C");
System.out.println("CAS成功: " + success); // 会输出true,但值已经被修改过
}
}
// 使用AtomicStampedReference解决ABA问题
public class ABASolution {
private final AtomicStampedReference<String> value =
new AtomicStampedReference<>("A", 0);
public void safeUpdate() {
int[] stampHolder = new int[1];
String current = value.get(stampHolder);
int currentStamp = stampHolder[0];
// 模拟其他操作...
// 使用版本号进行CAS操作
boolean success = value.compareAndSet(current, "C", currentStamp, currentStamp + 1);
System.out.println("安全CAS成功: " + success);
}
}
// 自定义版本号解决方案
public class VersionedReference<T> {
private final AtomicReference<VersionedValue<T>> ref;
private static class VersionedValue<T> {
final T value;
final long version;
VersionedValue(T value, long version) {
this.value = value;
this.version = version;
}
}
public VersionedReference(T initialValue) {
this.ref = new AtomicReference<>(new VersionedValue<>(initialValue, 0));
}
public boolean compareAndSet(T expectedValue, T newValue) {
VersionedValue<T> current;
VersionedValue<T> newVal;
do {
current = ref.get();
if (current.value != expectedValue) {
return false;
}
newVal = new VersionedValue<>(newValue, current.version + 1);
} while (!ref.compareAndSet(current, newVal));
return true;
}
}
Netty无锁化架构深度解析
Netty作为高性能网络通信框架,大量使用了无锁化技术来实现卓越的性能。
Netty无锁化设计概览
1. EventLoop无锁化设计
Netty的EventLoop采用单线程模型,避免了多线程竞争,实现了无锁化处理。
// Netty EventLoop无锁化实现
public abstract class SingleThreadEventLoop extends SingleThreadEventExecutor implements EventLoop {
// 使用MPSC队列实现无锁任务提交
private final Queue<Runnable> taskQueue;
protected SingleThreadEventLoop(EventLoopGroup parent, Executor executor, boolean addTaskWakesUp) {
super(parent, executor, addTaskWakesUp);
// 使用MPSC队列,支持多生产者单消费者无锁操作
this.taskQueue = newTaskQueue();
}
@Override
protected Queue<Runnable> newTaskQueue() {
// 返回MPSC队列,实现无锁任务提交
return PlatformDependent.newMpscQueue();
}
// 无锁化任务执行
@Override
protected void run() {
for (;;) {
Runnable task = takeTask(); // 从MPSC队列获取任务
if (task != null) {
task.run(); // 串行执行任务,无需同步
updateLastExecutionTime();
}
if (confirmShutdown()) {
break;
}
}
}
}
// MPSC队列实现(多生产者单消费者)
public class MpscLinkedQueue<T> extends BaseLinkedQueue<T> {
// 使用原子引用实现无锁队列
private final AtomicReference<LinkedQueueNode<T>> producerNode;
private final AtomicReference<LinkedQueueNode<T>> consumerNode;
public MpscLinkedQueue() {
producerNode = new AtomicReference<>();
consumerNode = new AtomicReference<>();
// 初始化哨兵节点
LinkedQueueNode<T> node = newNode();
consumerNode.lazySet(node);
producerNode.lazySet(node);
}
// 无锁入队操作(多生产者)
@Override
public boolean offer(T e) {
if (null == e) {
throw new NullPointerException();
}
final LinkedQueueNode<T> nextNode = newNode(e);
final LinkedQueueNode<T> prevProducerNode = producerNode.getAndSet(nextNode);
// 链接新节点
prevProducerNode.soNext(nextNode); // StoreStore屏障
return true;
}
// 出队操作(单消费者,无需同步)
@Override
public T poll() {
final LinkedQueueNode<T> currConsumerNode = consumerNode.get();
final LinkedQueueNode<T> nextNode = currConsumerNode.lvNext(); // LoadLoad屏障
if (nextNode != null) {
final T nextValue = nextNode.getAndNullValue();
consumerNode.lazySet(nextNode); // 更新消费者节点
currConsumerNode.soNext(null); // 帮助GC
return nextValue;
}
return null;
}
}
2. Channel状态无锁化管理
Netty使用原子字段更新器实现Channel状态的无锁化管理。
// Channel状态的无锁化管理
public abstract class AbstractChannel extends DefaultAttributeMap implements Channel {
// 使用原子字段更新器管理状态
private static final AtomicIntegerFieldUpdater<AbstractChannel> STATE_UPDATER =
AtomicIntegerFieldUpdater.newUpdater(AbstractChannel.class, "state");
private volatile int state = ST_NOT_YET_CONNECTED;
private static final int ST_NOT_YET_CONNECTED = 1;
private static final int ST_CONNECTED = 2;
private static final int ST_CLOSING = 3;
private static final int ST_CLOSED = 4;
// 无锁化状态更新
@Override
public boolean isOpen() {
return state != ST_CLOSED;
}
@Override
public boolean isActive() {
return state == ST_CONNECTED;
}
// 使用CAS更新Channel状态
protected boolean setState(int oldState, int newState) {
return STATE_UPDATER.compareAndSet(this, oldState, newState);
}
// 无锁化关闭操作
@Override
public ChannelFuture close() {
return close(newPromise());
}
@Override
public ChannelFuture close(final ChannelPromise promise) {
// 检查是否已经关闭
if (!setState(ST_CONNECTED, ST_CLOSING) && !setState(ST_NOT_YET_CONNECTED, ST_CLOSING)) {
// 已经关闭或正在关闭
return promise.setSuccess();
}
// 执行关闭操作
EventLoop loop = eventLoop();
if (loop.inEventLoop()) {
closeUnsafe(promise);
} else {
loop.execute(() -> closeUnsafe(promise));
}
return promise;
}
}
// 无锁化读写操作管理
public abstract class AbstractNioChannel extends AbstractChannel {
// 读写状态的原子管理
private static final AtomicIntegerFieldUpdater<AbstractNioChannel> READ_SUSPENDED_UPDATER =
AtomicIntegerFieldUpdater.newUpdater(AbstractNioChannel.class, "readSuspended");
private volatile int readSuspended;
// 无锁化暂停读取
void suspendRead() {
READ_SUSPENDED_UPDATER.set(this, 1);
}
// 无锁化恢复读取
void resumeRead() {
READ_SUSPENDED_UPDATER.set(this, 0);
}
// 检查是否暂停读取
boolean isReadSuspended() {
return readSuspended != 0;
}
}
3. 内存分配无锁化
Netty的内存分配器使用无锁化设计,大幅提升内存分配性能。
// PoolArena无锁化内存分配
abstract class PoolArena<T> implements PoolArenaMetric {
// 使用原子整数管理内存块状态
private final AtomicInteger numAllocations = new AtomicInteger();
private final AtomicInteger numDeallocations = new AtomicInteger();
// 无锁化内存分配
PooledByteBuf<T> allocate(PoolThreadCache cache, int reqCapacity, int maxCapacity) {
// 增加分配计数
numAllocations.incrementAndGet();
// 选择合适的内存块
PoolChunk<T> chunk = findChunk(cache, reqCapacity);
if (chunk != null {
// 分配内存
return allocateFromChunk(chunk, reqCapacity, maxCapacity);
}
// 分配新chunk
return allocateNewChunk(reqCapacity, maxCapacity);
}
// 无锁化内存释放
boolean free(PoolChunk<T> chunk, long handle, int normCapacity, PoolThreadCache cache) {
// 增加释放计数
numDeallocations.incrementAndGet();
// 释放内存块
chunk.decrementPinned();
return chunk.parent.free(chunk, handle, normCapacity, cache);
}
}
// PoolThreadCache线程本地缓存,避免锁竞争
public final class PoolThreadCache {
private final MemoryRegionCache<byte[]>[] tinySubPageHeapCaches;
private final MemoryRegionCache<byte[]>[] smallSubPageHeapCaches;
private final MemoryRegionCache<ByteBuffer>[] tinySubPageDirectCaches;
private final MemoryRegionCache<ByteBuffer>[] smallSubPageDirectCaches;
// 无锁化内存分配(从线程本地缓存)
boolean allocateTiny(PoolArena<?> area, PooledByteBuf<?> buf, int reqCapacity, int normCapacity) {
return allocate(cacheForTiny(area, normCapacity), buf, reqCapacity);
}
// 无锁化内存释放(归还到线程本地缓存)
boolean add(PoolArena<?> area, PoolChunk chunk, long handle, int normCapacity, SizeClass sizeClass) {
MemoryRegionCache<?> cache = cache(area, normCapacity, sizeClass);
if (cache == null) {
return false;
}
return cache.add(chunk, handle);
}
// 内存分配(无锁化)
private boolean allocate(MemoryRegionCache<?> cache, PooledByteBuf buf, int reqCapacity) {
if (cache == null) {
return false;
}
boolean allocated = cache.allocate(buf, reqCapacity);
if (++ allocations >= freeSweepAllocationThreshold) {
allocations = 0;
trim();
}
return allocated;
}
}
4. 计数器无锁化实现
Netty使用各种原子操作实现高性能计数器。
// LongCounter无锁化计数器
public abstract class LongCounter implements LongCounterMetric {
// 使用原子长整型实现无锁计数
private final AtomicLong counter = new AtomicLong();
@Override
public void add(long delta) {
counter.addAndGet(delta);
}
@Override
public void increment() {
counter.incrementAndGet();
}
@Override
public void decrement() {
counter.decrementAndGet();
}
@Override
public long value() {
return counter.get();
}
}
// 使用原子字段更新器优化内存使用
public class ChannelOutboundBuffer {
// 使用原子字段更新器管理写状态
private static final AtomicIntegerFieldUpdater<ChannelOutboundBuffer> UNWRITABLE_UPDATER =
AtomicIntegerFieldUpdater.newUpdater(ChannelOutboundBuffer.class, "unwritable");
private volatile int unwritable;
// 无锁化状态更新
void setUnwritable(boolean unwritable) {
if (unwritable) {
UNWRITABLE_UPDATER.set(this, 1);
} else {
UNWRITABLE_UPDATER.set(this, 0);
}
}
// 无锁化状态检查
boolean isUnwritable() {
return unwritable != 0;
}
}
Disruptor无锁化架构深度解析
Disruptor是高性能的并发框架,其核心是完全无锁化的环形缓冲区设计。
Disruptor无锁化设计概览
1. RingBuffer无锁化实现
RingBuffer是Disruptor的核心,通过精巧的设计实现完全无锁化。
// RingBuffer无锁化实现
public final class RingBuffer<E> extends RingBufferFields<E> implements Cursored, EventSequencer<E>, EventSink<E> {
// 使用填充避免伪共享
protected long p1, p2, p3, p4, p5, p6, p7;
// 核心序列号管理
private final Sequence cursor = new Sequence(Sequencer.INITIAL_CURSOR_VALUE);
// 使用数组存储事件,避免对象创建
private final Object[] entries;
// 使用掩码实现快速模运算
protected final int indexMask;
// 构造函数预分配所有内存
public RingBuffer(EventFactory<E> eventFactory, int bufferSize) {
this(eventFactory, new SingleProducerSequencer(bufferSize, new BlockingWaitStrategy()), bufferSize);
}
// 无锁化事件发布
@Override
public long next() {
return sequencer.next();
}
@Override
public long next(int n) {
return sequencer.next(n);
}
// 无锁化事件获取
@Override
public E get(long sequence) {
return elementAt(sequence);
}
// 使用位运算快速定位元素
protected final E elementAt(long sequence) {
return (E) entries[(int) sequence & indexMask];
}
// 无锁化发布事件
@Override
public void publish(long sequence) {
cursor.set(sequence); // 原子更新游标
sequencer.publish(sequence); // 通知等待者
}
}
// 单生产者序列器(完全无锁化)
public final class SingleProducerSequencer extends SingleProducerSequencerFields {
// 使用填充避免伪共享
protected long p1, p2, p3, p4, p5, p6, p7;
// 游标位置
protected long cursor = Sequence.INITIAL_VALUE;
// 缓存的消费者序列号
protected volatile long cachedValue = Sequence.INITIAL_VALUE;
// 使用gating sequences控制生产速度
private volatile Sequence[] gatingSequences = new Sequence[0];
// 无锁化申请序列号
@Override
public long next(int n) {
if (n < 1) {
throw new IllegalArgumentException("n must be > 0");
}
long nextValue = this.cursor;
long nextSequence = nextValue + n;
long wrapPoint = nextSequence - bufferSize;
long cachedGatingSequence = this.cachedValue;
// 检查是否有足够的空间
if (wrapPoint > cachedGatingSequence || cachedGatingSequence > nextValue) {
long minSequence;
// 等待消费者消费
while (wrapPoint > (minSequence = getMinimumSequence(gatingSequences, nextValue))) {
// 使用等待策略避免CPU空转
waitStrategy.signalAllWhenBlocking();
LockSupport.parkNanos(1L);
}
this.cachedValue = minSequence;
}
this.cursor = nextSequence; // 更新游标
return nextSequence;
}
// 无锁化发布
@Override
public void publish(long sequence) {
cursor.set(sequence); // 原子更新游标
waitStrategy.signalAllWhenBlocking(); // 通知等待的消费者
}
}
2. Sequence无锁化管理
Sequence是Disruptor中管理序列号的核心组件,使用无锁化设计。
// Sequence无锁化实现
public class Sequence extends RhsPadding {
static final long INITIAL_VALUE = -1L;
// 使用volatile保证可见性
private volatile long value;
// 使用填充避免伪共享
private final RhsPadding padding = new RhsPadding();
public Sequence() {
this(INITIAL_VALUE);
}
public Sequence(long initialValue) {
setOrdered(initialValue); // 有序写入
}
// 获取当前值
public long get() {
return value;
}
// 有序设置值(使用StoreStore屏障)
public void setOrdered(final long value) {
this.value = value; // volatile写提供StoreStore屏障
}
// CAS更新值
public boolean compareAndSet(final long expectedValue, final long newValue) {
return UNSAFE.compareAndSwapLong(this, VALUE_OFFSET, expectedValue, newValue);
}
// 原子递增
public long incrementAndGet() {
long current;
long next;
do {
current = get();
next = current + 1;
} while (!compareAndSet(current, next));
return next;
}
// 原子添加
public long addAndGet(final long increment) {
long current;
long next;
do {
current = get();
next = current + increment;
} while (!compareAndSet(current, next));
return next;
}
}
// 使用填充避免伪共享
class RhsPadding {
protected long p1, p2, p3, p4, p5, p6, p7;
}
class Value extends RhsPadding {
protected volatile long value;
}
class LhsPadding extends Value {
protected long p1, p2, p3, p4, p5, p6, p7;
}
public class Sequence extends LhsPadding {
// 完整的缓存行填充,避免伪共享
}
3. 等待策略无锁化
Disruptor提供多种无锁化的等待策略,适应不同场景。
// 自旋等待策略(完全无锁化)
public final class BusySpinWaitStrategy implements WaitStrategy {
@Override
public long waitFor(long sequence, Sequence cursor, Sequence dependentSequence, SequenceBarrier barrier)
throws AlertException, InterruptedException {
long availableSequence;
// 自旋等待,无锁化检查
while ((availableSequence = dependentSequence.get()) < sequence) {
barrier.checkAlert();
// 使用Thread.yield()让出CPU,避免过度自旋
Thread.yield();
}
return availableSequence;
}
@Override
public void signalAllWhenBlocking() {
// 自旋策略不需要信号通知
}
}
// Yield等待策略(无锁化)
public final class YieldingWaitStrategy implements WaitStrategy {
private static final int SPIN_TRIES = 100;
@Override
public long waitFor(long sequence, Sequence cursor, Sequence dependentSequence, SequenceBarrier barrier)
throws AlertException, InterruptedException {
long availableSequence;
int counter = SPIN_TRIES;
// 多阶段等待策略
while ((availableSequence = dependentSequence.get()) < sequence) {
barrier.checkAlert();
if (counter > 0) {
counter--;
} else if (counter == 0) {
Thread.yield(); // 让出CPU
} else {
LockSupport.parkNanos(1L); // 短暂休眠
}
}
return availableSequence;
}
@Override
public void signalAllWhenBlocking() {
// Yield策略不需要信号通知
}
}
// 阻塞等待策略(最小化锁使用)
public final class BlockingWaitStrategy implements WaitStrategy {
private final Object mutex = new Object();
@Override
public long waitFor(long sequence, Sequence cursor, Sequence dependentSequence, SequenceBarrier barrier)
throws AlertException, InterruptedException {
long availableSequence;
if ((availableSequence = cursor.get()) < sequence) {
synchronized (mutex) {
while ((availableSequence = cursor.get()) < sequence) {
barrier.checkAlert();
mutex.wait(); // 等待通知
}
}
}
// 检查依赖序列
while ((availableSequence = dependentSequence.get()) < sequence) {
barrier.checkAlert();
Thread.yield(); // 让出CPU
}
return availableSequence;
}
@Override
public void signalAllWhenBlocking() {
synchronized (mutex) {
mutex.notifyAll(); // 唤醒等待线程
}
}
}
4. EventProcessor无锁化处理
EventProcessor是Disruptor的事件处理器,采用单线程无锁化设计。
// 单线程事件处理器(完全无锁化)
public final class BatchEventProcessor<T> implements EventProcessor {
private final AtomicBoolean running = new AtomicBoolean(false);
private final Sequence sequence = new Sequence(Sequencer.INITIAL_CURSOR_VALUE);
private final RingBuffer<T> ringBuffer;
private final EventHandler<? super T> eventHandler;
private final SequenceBarrier sequenceBarrier;
// 无锁化事件处理循环
@Override
public void run() {
if (!running.compareAndSet(false, true)) {
throw new IllegalStateException("Thread is already running");
}
sequenceBarrier.clearAlert();
notifyStart();
T event = null;
long nextSequence = sequence.get() + 1L;
try {
while (true) {
try {
// 等待可用事件
final long availableSequence = sequenceBarrier.waitFor(nextSequence);
if (nextSequence <= availableSequence) {
// 批处理事件
while (nextSequence <= availableSequence) {
event = ringBuffer.get(nextSequence);
eventHandler.onEvent(event, nextSequence, nextSequence == availableSequence);
nextSequence++;
}
// 更新处理位置
sequence.set(availableSequence);
}
} catch (final TimeoutException e) {
notifyTimeout(sequence.get());
} catch (final AlertException ex) {
if (!running.get()) {
break;
}
} catch (final Throwable ex) {
exceptionHandler.handleEventException(ex, nextSequence, event);
sequence.set(nextSequence);
nextSequence++;
}
}
} finally {
notifyShutdown();
running.set(false);
}
}
// 无锁化获取当前处理位置
@Override
public long getSequence() {
return sequence.get();
}
// 无锁化停止处理
@Override
public void halt() {
running.set(false);
sequenceBarrier.alert();
}
}
// WorkProcessor工作处理器(无锁化)
public final class WorkProcessor<T> implements EventProcessor {
private final AtomicBoolean running = new AtomicBoolean(false);
private final Sequence sequence = new Sequence(Sequencer.INITIAL_CURSOR_VALUE);
private final RingBuffer<T> ringBuffer;
private final WorkHandler<? super T> workHandler;
private final SequenceBarrier sequenceBarrier;
private final WorkSequence workSequence;
// 无锁化工作处理
@Override
public void run() {
if (!running.compareAndSet(false, true)) {
throw new IllegalStateException("Thread is already running");
}
sequenceBarrier.clearAlert();
notifyStart();
boolean processedSequence = true;
long cachedAvailableSequence = Long.MIN_VALUE;
long nextSequence = sequence.get();
T event = null;
try {
while (true) {
try {
// 尝试获取工作许可
if (processedSequence) {
processedSequence = false;
do {
nextSequence = workSequence.get() + 1L;
sequence.set(nextSequence - 1L);
} while (!workSequence.compareAndSet(nextSequence - 1L, nextSequence));
}
// 等待事件可用
if (cachedAvailableSequence >= nextSequence) {
event = ringBuffer.get(nextSequence);
workHandler.onEvent(event);
processedSequence = true;
} else {
cachedAvailableSequence = sequenceBarrier.waitFor(nextSequence);
}
} catch (final TimeoutException e) {
notifyTimeout(sequence.get());
} catch (final AlertException ex) {
if (!running.get()) {
break;
}
} catch (final Throwable ex) {
exceptionHandler.handleEventException(ex, nextSequence, event);
processedSequence = true;
}
}
} finally {
notifyShutdown();
running.set(false);
}
}
}
无锁化编程实践案例
1. 高性能计数器实现
// 无锁化高性能计数器
public class LockFreeCounter {
// 使用Striped64实现高性能计数
private final LongAdder counter = new LongAdder();
public void increment() {
counter.increment();
}
public void add(long x) {
counter.add(x);
}
public long sum() {
return counter.sum();
}
public void reset() {
counter.reset();
}
}
// 自定义无锁化计数器
public class StripedCounter {
// 使用多个Cell减少竞争
private static class Cell {
volatile long value;
Cell(long x) { value = x; }
final boolean cas(long cmp, long val) {
return UNSAFE.compareAndSwapLong(this, valueOffset, cmp, val);
}
// Unsafe机制
private static final sun.misc.Unsafe UNSAFE;
private static final long valueOffset;
static {
try {
UNSAFE = sun.misc.Unsafe.getUnsafe();
Class<?> ak = Cell.class;
valueOffset = UNSAFE.objectFieldOffset
(ak.getDeclaredField("value"));
} catch (Exception e) {
throw new Error(e);
}
}
}
private volatile Cell[] cells;
private final LongAdder base = new LongAdder();
public void add(long x) {
Cell[] as; long b, v; int m; Cell a;
if ((as = cells) != null || !base.compareAndSet(b = base.get(), b + x)) {
boolean uncontended = true;
if (as == null || (m = as.length - 1) < 0 ||
(a = as[getProbe() & m]) == null ||
!(uncontended = a.cas(v = a.value, v + x)))
longAccumulate(x, null, uncontended);
}
}
public long sum() {
long sum = base.sum();
Cell[] as = cells;
if (as != null) {
for (Cell a : as)
if (a != null)
sum += a.value;
}
return sum;
}
}
2. 无锁化队列实现
// Michael-Scott无锁队列
public class LockFreeQueue<T> {
private static class Node<T> {
final T item;
volatile Node<T> next;
Node(T item) {
this.item = item;
}
}
private final Node<T> dummy = new Node<>(null);
private final AtomicReference<Node<T>> head = new AtomicReference<>(dummy);
private final AtomicReference<Node<T>> tail = new AtomicReference<>(dummy);
// 无锁化入队
public void enqueue(T item) {
Node<T> newNode = new Node<>(item);
Node<T> oldTail;
while (true) {
oldTail = tail.get();
Node<T> oldTailNext = oldTail.next;
if (oldTail == tail.get()) { // 检查tail是否变化
if (oldTailNext != null) { // 有其他线程在更新
// 帮助其他线程完成更新
tail.compareAndSet(oldTail, oldTailNext);
} else {
// 尝试更新next指针
if (oldTail.next.compareAndSet(null, newNode)) {
break;
}
}
}
}
// 更新tail指针
tail.compareAndSet(oldTail, newNode);
}
// 无锁化出队
public T dequeue() {
Node<T> oldHead;
Node<T> newHead;
while (true) {
oldHead = head.get();
Node<T> oldTail = tail.get();
Node<T> oldHeadNext = oldHead.next;
if (oldHead == head.get()) { // 检查head是否变化
if (oldHead == oldTail) { // 队列为空或正在更新
if (oldHeadNext == null) {
return null; // 队列为空
}
// 帮助其他线程更新tail
tail.compareAndSet(oldTail, oldHeadNext);
} else {
// 获取数据
T result = oldHeadNext.item;
newHead = oldHeadNext;
// 尝试更新head
if (head.compareAndSet(oldHead, newHead)) {
return result;
}
}
}
}
}
}
3. 无锁化缓存实现
// 无锁化LRU缓存
public class LockFreeLRUCache<K, V> {
private static final class Node<K, V> {
final K key;
volatile V value;
volatile Node<K, V> prev;
volatile Node<K, V> next;
Node(K key, V value) {
this.key = key;
this.value = value;
}
}
private final int capacity;
private final ConcurrentHashMap<K, Node<K, V>> map;
private final AtomicReference<Node<K, V>> head = new AtomicReference<>();
private final AtomicReference<Node<K, V>> tail = new AtomicReference<>();
public LockFreeLRUCache(int capacity) {
this.capacity = capacity;
this.map = new ConcurrentHashMap<>(capacity);
}
// 无锁化获取
public V get(K key) {
Node<K, V> node = map.get(key);
if (node == null) {
return null;
}
// 移动到头部(最近使用)
moveToHead(node);
return node.value;
}
// 无锁化放入
public void put(K key, V value) {
Node<K, V> node = map.get(key);
if (node != null) {
// 更新值并移动到头部
node.value = value;
moveToHead(node);
} else {
// 创建新节点
Node<K, V> newNode = new Node<>(key, value);
// 检查容量
while (map.size() >= capacity) {
removeTail();
}
// 添加到头部
addToHead(newNode);
map.put(key, newNode);
}
}
// 无锁化移动到头部
private void moveToHead(Node<K, V> node) {
// 从原位置移除
Node<K, V> prev = node.prev;
Node<K, V> next = node.next;
if (prev != null) {
prev.next = next;
}
if (next != null) {
next.prev = prev;
}
// 添加到头部
addToHead(node);
}
// 无锁化添加到头部
private void addToHead(Node<K, V> node) {
Node<K, V> oldHead;
do {
oldHead = head.get();
node.next = oldHead;
node.prev = null;
if (oldHead != null) {
oldHead.prev = node;
}
} while (!head.compareAndSet(oldHead, node));
if (tail.get() == null) {
tail.set(node);
}
}
// 无锁化移除尾部
private void removeTail() {
Node<K, V> oldTail = tail.get();
if (oldTail == null) return;
Node<K, V> newTail = oldTail.prev;
if (newTail != null) {
newTail.next = null;
}
tail.compareAndSet(oldTail, newTail);
map.remove(oldTail.key);
}
}
性能对比与基准测试
1. 锁与无锁化性能对比
// 性能测试基准
public class LockVsLockFreeBenchmark {
// 基于锁的实现
public static class LockBasedCounter {
private long count = 0;
private final ReentrantLock lock = new ReentrantLock();
public void increment() {
lock.lock();
try {
count++;
} finally {
lock.unlock();
}
}
public long getCount() {
lock.lock();
try {
return count;
} finally {
lock.unlock();
}
}
}
// 无锁化实现
public static class LockFreeCounter {
private final AtomicLong count = new AtomicLong(0);
public void increment() {
count.incrementAndGet();
}
public long getCount() {
return count.get();
}
}
// 性能测试
public static void main(String[] args) throws InterruptedException {
final int threadCount = 10;
final int operationsPerThread = 1000000;
// 测试基于锁的实现
System.out.println("=== 基于锁的实现性能测试 ===");
LockBasedCounter lockBased = new LockBasedCounter();
benchmarkCounter(lockBased, threadCount, operationsPerThread, "LockBased");
// 测试无锁化实现
System.out.println("\n=== 无锁化实现性能测试 ===");
LockFreeCounter lockFree = new LockFreeCounter();
benchmarkCounter(lockFree, threadCount, operationsPerThread, "LockFree");
}
private static void benchmarkCounter(Counter counter, int threadCount,
int operationsPerThread, String name) throws InterruptedException {
CountDownLatch startLatch = new CountDownLatch(1);
CountDownLatch endLatch = new CountDownLatch(threadCount);
long startTime = System.nanoTime();
// 创建测试线程
for (int i = 0; i < threadCount; i++) {
new Thread(() -> {
try {
startLatch.await(); // 等待开始信号
for (int j = 0; j < operationsPerThread; j++) {
counter.increment();
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
} finally {
endLatch.countDown();
}
}).start();
}
// 开始测试
startLatch.countDown();
endLatch.await(); // 等待所有线程完成
long endTime = System.nanoTime();
long totalOperations = (long) threadCount * operationsPerThread;
long durationNanos = endTime - startTime;
System.out.println(name + " 性能结果:");
System.out.println(" 总操作数: " + totalOperations);
System.out.println(" 总耗时: " + durationNanos / 1_000_000 + " ms");
System.out.println(" 每秒操作数: " + (totalOperations * 1_000_000_000L / durationNanos));
System.out.println(" 最终计数值: " + counter.getCount());
}
interface Counter {
void increment();
long getCount();
}
}
2. Netty性能测试
// Netty无锁化性能测试
public class NettyLockFreeBenchmark {
// 模拟Netty的EventLoop任务处理
public static class EventLoopSimulator {
private final Queue<Runnable> taskQueue;
private final AtomicBoolean running = new AtomicBoolean(false);
private final AtomicLong taskCount = new AtomicLong(0);
public EventLoopSimulator(boolean lockFree) {
this.taskQueue = lockFree ?
new MpscLinkedQueue<>() : new ConcurrentLinkedQueue<>();
}
public void submitTask(Runnable task) {
taskQueue.offer(task);
if (running.compareAndSet(false, true)) {
processTasks();
}
}
private void processTasks() {
while (!taskQueue.isEmpty()) {
Runnable task = taskQueue.poll();
if (task != null) {
task.run();
taskCount.incrementAndGet();
}
}
running.set(false);
// 双重检查
if (!taskQueue.isEmpty() && running.compareAndSet(false, true)) {
processTasks();
}
}
public long getTaskCount() {
return taskCount.get();
}
}
public static void main(String[] args) throws InterruptedException {
final int producerCount = 5;
final int tasksPerProducer = 100000;
// 测试有锁队列
System.out.println("=== 有锁队列性能测试 ===");
EventLoopSimulator lockedSimulator = new EventLoopSimulator(false);
benchmarkEventLoop(lockedSimulator, producerCount, tasksPerProducer, "LockedQueue");
// 测试无锁化队列
System.out.println("\n=== 无锁化队列性能测试 ===");
EventLoopSimulator lockFreeSimulator = new EventLoopSimulator(true);
benchmarkEventLoop(lockFreeSimulator, producerCount, tasksPerProducer, "LockFreeQueue");
}
private static void benchmarkEventLoop(EventLoopSimulator simulator, int producerCount,
int tasksPerProducer, String name) throws InterruptedException {
CountDownLatch startLatch = new CountDownLatch(1);
CountDownLatch endLatch = new CountDownLatch(producerCount);
long startTime = System.nanoTime();
// 创建生产者线程
for (int i = 0; i < producerCount; i++) {
new Thread(() -> {
try {
startLatch.await();
for (int j = 0; j < tasksPerProducer; j++) {
final int taskId = j;
simulator.submitTask(() -> {
// 模拟简单任务
int result = taskId * 2;
});
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
} finally {
endLatch.countDown();
}
}).start();
}
startLatch.countDown();
endLatch.await();
// 等待所有任务处理完成
Thread.sleep(1000);
long endTime = System.nanoTime();
long totalTasks = (long) producerCount * tasksPerProducer;
long durationNanos = endTime - startTime;
System.out.println(name + " 性能结果:");
System.out.println(" 总任务数: " + totalTasks);
System.out.println(" 处理任务数: " + simulator.getTaskCount());
System.out.println(" 总耗时: " + durationNanos / 1_000_000 + " ms");
System.out.println(" 每秒任务数: " + (totalTasks * 1_000_000_000L / durationNanos));
}
}
3. Disruptor性能测试
// Disruptor无锁化性能测试
public class DisruptorLockFreeBenchmark {
// 事件类
public static class ValueEvent {
private long value;
public void setValue(long value) {
this.value = value;
}
public long getValue() {
return value;
}
public final static EventFactory<ValueEvent> EVENT_FACTORY = new EventFactory<ValueEvent>() {
@Override
public ValueEvent newInstance() {
return new ValueEvent();
}
};
}
// 事件处理器
public static class ValueEventHandler implements EventHandler<ValueEvent> {
private final AtomicLong counter = new AtomicLong(0);
@Override
public void onEvent(ValueEvent event, long sequence, boolean endOfBatch) {
counter.incrementAndGet();
}
public long getCount() {
return counter.get();
}
}
public static void main(String[] args) throws InterruptedException {
final int bufferSize = 1024 * 64; // 64K缓冲区
final int eventCount = 10_000_000; // 1000万事件
// 创建Disruptor
ValueEventHandler handler = new ValueEventHandler();
Disruptor<ValueEvent> disruptor = new Disruptor<>(
ValueEvent.EVENT_FACTORY,
bufferSize,
DaemonThreadFactory.INSTANCE,
ProducerType.SINGLE, // 单生产者
new YieldingWaitStrategy() // 等待策略
);
disruptor.handleEventsWith(handler);
// 性能测试
System.out.println("=== Disruptor无锁化性能测试 ===");
benchmarkDisruptor(disruptor, eventCount, handler);
}
private static void benchmarkDisruptor(Disruptor<ValueEvent> disruptor,
int eventCount, ValueEventHandler handler) throws InterruptedException {
// 启动Disruptor
disruptor.start();
RingBuffer<ValueEvent> ringBuffer = disruptor.getRingBuffer();
long startTime = System.nanoTime();
// 发布事件
for (int i = 0; i < eventCount; i++) {
long sequence = ringBuffer.next();
try {
ValueEvent event = ringBuffer.get(sequence);
event.setValue(i);
} finally {
ringBuffer.publish(sequence);
}
}
// 等待所有事件处理完成
while (handler.getCount() < eventCount) {
Thread.sleep(10);
}
long endTime = System.nanoTime();
long durationNanos = endTime - startTime;
System.out.println("Disruptor 性能结果:");
System.out.println(" 总事件数: " + eventCount);
System.out.println(" 处理事件数: " + handler.getCount());
System.out.println(" 总耗时: " + durationNanos / 1_000_000 + " ms");
System.out.println(" 每秒事件数: " + (eventCount * 1_000_000_000L / durationNanos));
System.out.println(" 平均延迟: " + (durationNanos / eventCount) + " ns");
// 停止Disruptor
disruptor.shutdown();
}
}
最佳实践与常见陷阱
1. 无锁化编程最佳实践
// 最佳实践1:使用合适的原子类
public class BestPractices {
// 使用AtomicLong而不是synchronized
private final AtomicLong counter = new AtomicLong(0);
public void increment() {
counter.incrementAndGet(); // 比synchronized更高效
}
// 最佳实践2:使用LongAdder处理高并发计数
private final LongAdder highConcurrencyCounter = new LongAdder();
public void add(long value) {
highConcurrencyCounter.add(value); // 适合高并发场景
}
// 最佳实践3:正确使用CAS循环
public boolean updateValue(AtomicInteger value, int expected, int newValue) {
// 正确的CAS使用方式
while (true) {
int current = value.get();
if (current != expected) {
return false; // 值已改变
}
if (value.compareAndSet(current, newValue)) {
return true; // 更新成功
}
// 重试
}
}
// 最佳实践4:避免ABA问题
public class VersionedValue<T> {
private final AtomicStampedReference<T> ref;
public VersionedValue(T initialValue) {
this.ref = new AtomicStampedReference<>(initialValue, 0);
}
public boolean update(T expectedValue, T newValue) {
int[] stampHolder = new int[1];
T currentValue = ref.get(stampHolder);
int currentStamp = stampHolder[0];
if (!Objects.equals(currentValue, expectedValue)) {
return false;
}
return ref.compareAndSet(currentValue, newValue, currentStamp, currentStamp + 1);
}
}
// 最佳实践5:使用volatile保证可见性
private volatile boolean initialized = false;
private volatile Config config;
public void initialize(Config newConfig) {
this.config = newConfig; // volatile写保证可见性
this.initialized = true; // 状态标记
}
public Config getConfig() {
if (!initialized) { // volatile读
throw new IllegalStateException("Not initialized");
}
return config; // volatile读保证看到完整对象
}
}
2. 常见陷阱与解决方案
// 陷阱1:CAS循环没有退出条件
public class CommonPitfalls {
// 错误的CAS使用
public void wrongCASUsage(AtomicInteger value) {
int expected = value.get();
int newValue = expected + 1;
// 错误:没有处理CAS失败的情况
value.compareAndSet(expected, newValue); // 可能失败但没有重试
}
// 正确的CAS使用
public void correctCASUsage(AtomicInteger value) {
int expected;
int newValue;
do {
expected = value.get();
newValue = expected + 1;
} while (!value.compareAndSet(expected, newValue)); // 重试直到成功
}
// 陷阱2:忽略ABA问题
public class ABAPitfall {
private final AtomicReference<String> ref = new AtomicReference<>("A");
public void abaProblem() {
String expected = ref.get();
// 其他线程可能在此期间将A->B->A
doSomething();
// 这个CAS会成功,但值实际上已经被修改过
ref.compareAndSet(expected, "C"); // ABA问题
}
// 解决方案:使用版本号
private final AtomicStampedReference<String> versionedRef =
new AtomicStampedReference<>("A", 0);
public void solveABA() {
int[] stampHolder = new int[1];
String expectedValue = versionedRef.get(stampHolder);
int expectedStamp = stampHolder[0];
doSomething();
// 使用版本号避免ABA问题
versionedRef.compareAndSet(expectedValue, "C", expectedStamp, expectedStamp + 1);
}
}
// 陷阱3:伪共享问题
public class FalseSharing {
// 错误:可能导致伪共享
public static class VolatileLong {
public volatile long value = 0L; // 可能与其他变量在同一个缓存行
}
// 解决方案:使用缓存行填充
public static class PaddedAtomicLong extends AtomicLong {
// 填充前7个long
public volatile long p1, p2, p3, p4, p5, p6, p7 = 7L;
// 填充后7个long
public volatile long q1, q2, q3, q4, q5, q6, q7 = 7L;
public PaddedAtomicLong() {
super();
}
}
// 更好的解决方案:使用@Contended注解(JDK8+)
@sun.misc.Contended
public static class ContendedLong {
public volatile long value = 0L;
}
}
// 陷阱4:内存泄漏
public class MemoryLeak {
// 错误:无锁数据结构可能导致内存泄漏
private final ConcurrentLinkedQueue<Node> queue = new ConcurrentLinkedQueue<>();
public void wrongUpdate(Node newNode) {
// 错误:没有正确处理旧节点的引用
queue.offer(newNode);
// 旧节点可能仍然被引用,无法GC
}
// 正确做法:及时清理引用
public void correctUpdate(Node newNode) {
Node oldNode = queue.poll(); // 移除旧节点
if (oldNode != null) {
oldNode.next = null; // 帮助GC
oldNode.prev = null;
}
queue.offer(newNode);
}
}
// 陷阱5:过度使用无锁化
public class OveruseLockFree {
// 错误:简单操作过度使用无锁化
private final AtomicBoolean flag = new AtomicBoolean(false);
public void setFlag(boolean value) {
// 对于简单的布尔值,使用volatile就足够了
while (!flag.compareAndSet(flag.get(), value)) {
// 重试... 过度复杂
}
}
// 正确做法:使用volatile
private volatile boolean simpleFlag = false;
public void setSimpleFlag(boolean value) {
simpleFlag = value; // 简单直接
}
}
}
3. 性能调优建议
// 性能调优最佳实践
public class PerformanceTuning {
// 建议1:选择合适的等待策略
public enum WaitStrategyType {
SPIN() {
@Override
public WaitStrategy create() {
return new BusySpinWaitStrategy(); // 适合低延迟场景
}
},
YIELD() {
@Override
public WaitStrategy create() {
return new YieldingWaitStrategy(); // 适合平衡场景
}
},
SLEEP() {
@Override
public WaitStrategy create() {
return new SleepingWaitStrategy(); // 适合高吞吐量场景
}
};
public abstract WaitStrategy create();
}
// 建议2:使用批处理优化
public class BatchProcessor {
private final int batchSize = 100; // 批处理大小
public void processBatch(RingBuffer<ValueEvent> ringBuffer,
EventHandler<ValueEvent> handler) {
long nextSequence = 0;
while (true) {
try {
// 批量获取可用序列
long availableSequence = waitStrategy.waitFor(nextSequence + batchSize,
cursor,
sequenceBarrier);
// 批量处理事件
for (long sequence = nextSequence; sequence <= availableSequence; sequence++) {
ValueEvent event = ringBuffer.get(sequence);
handler.onEvent(event, sequence, sequence == availableSequence);
}
nextSequence = availableSequence + 1;
} catch (Exception e) {
handleException(e);
}
}
}
}
// 建议3:内存预分配和对象池化
public class ObjectPooling {
private final ConcurrentLinkedQueue<MyObject> pool = new ConcurrentLinkedQueue<>();
private final int maxPoolSize = 1000;
// 获取对象
public MyObject acquire() {
MyObject obj = pool.poll();
return obj != null ? obj : new MyObject(); // 池中没有则创建新对象
}
// 归还对象
public void release(MyObject obj) {
if (pool.size() < maxPoolSize) {
obj.reset(); // 重置对象状态
pool.offer(obj);
}
// 否则让GC回收,避免池过大
}
}
// 建议4:CPU亲和性设置
public class CPUAffinity {
public void bindToCPU(int cpuId) {
// 设置线程CPU亲和性(需要操作系统支持)
Thread.currentThread().setName("Worker-CPU-" + cpuId);
// 在实际应用中,可以使用JNA或JNI调用系统API
// 例如:taskset -c 0-3 java MyApplication
}
// 为关键线程分配专用CPU
public void setupDedicatedCPUs() {
// EventLoop线程绑定到特定CPU
for (int i = 0; i < Runtime.getRuntime().availableProcessors(); i++) {
final int cpuId = i;
Thread eventLoopThread = new Thread(() -> {
bindToCPU(cpuId);
// 运行EventLoop
}, "EventLoop-CPU-" + i);
eventLoopThread.start();
}
}
}
// 建议5:JVM调优
public class JVMOptimization {
// JVM参数优化建议
public static String getRecommendedJVMFlags() {
return String.join(" ",
"-server", // 服务器模式
"-XX:+UseG1GC", // 使用G1垃圾收集器
"-XX:MaxGCPauseMillis=200", // 最大GC暂停时间
"-XX:+UseStringDeduplication", // 字符串去重
"-XX:+OptimizeStringConcat", // 优化字符串连接
"-XX:+UseLargePages", // 使用大页内存
"-XX:+UnlockExperimentalVMOptions", // 解锁实验选项
"-XX:+UseCGroupMemoryLimitForHeap", // 容器内存限制
"-XX:+PrintGC", // 打印GC信息
"-XX:+PrintGCDetails", // 打印GC详情
"-Xloggc:gc.log" // GC日志文件
);
}
// 内存分配优化
public void optimizeMemoryAllocation() {
// 使用对象池避免频繁GC
// 使用堆外内存减少GC压力
// 合理设置年轻代大小
// 避免大对象直接进入老年代
}
}
}
总结
无锁化编程作为高性能架构的核心技术,通过精巧的算法设计和硬件原语支持,实现了在不使用传统锁机制的情况下保证线程安全,大幅提升了系统的并发处理能力和性能表现。
核心原则
- 原子操作:利用CAS等原子操作实现无锁化状态更新
- 可见性保证:通过volatile和内存屏障确保内存可见性
- ABA问题解决:使用版本号等机制避免ABA问题
- 伪共享避免:通过缓存行填充优化内存访问性能
关键技术
- CAS原子操作:无锁化编程的基础
- 内存屏障:保证指令执行顺序和内存可见性
- 原子引用:实现复杂数据结构的无锁化操作
- 版本控制:解决ABA问题的有效手段
实践要点
- 选择合适的场景:无锁化并非万能,需要根据具体场景选择
- 正确处理失败:CAS操作可能失败,需要有重试机制
- 避免过度优化:不要为了性能而牺牲代码可读性和维护性
- 充分测试验证:无锁化代码复杂,需要充分测试确保正确性
性能收益
通过Netty和Disruptor等实际案例可以看出,无锁化架构能够带来显著的性能提升:
- 延迟降低:减少锁竞争带来的等待时间
- 吞吐量提升:支持更高的并发处理能力
- 可扩展性增强:性能随CPU核心数线性扩展
- CPU利用率提高:减少上下文切换和线程阻塞
无锁化编程是一门平衡的艺术,需要在性能、复杂度、可维护性之间找到最佳平衡点。通过遵循高性能法则,我们可以构建出既能够处理海量并发请求,又具备优秀性能表现的系统架构。
高性能架构的核心在于理解硬件特性、算法原理和系统行为的深层次关系。无锁化编程正是这种深度理解的体现,它让我们能够突破传统锁机制的性能瓶颈,构建出真正高性能的分布式系统。
5343

被折叠的 条评论
为什么被折叠?



