原文作者:@玄冬Wong
key word:Non-blocking、Blocking、multi-productor、multi-customer、benchmark、performance
/************************************************************************/
/* 测试多个生产者多个消费者线程环境下,boost::lockfree::queue和std::mutex的性能 */
/************************************************************************/
#define _ENABLE_ATOMIC_ALIGNMENT_FIX
#include <windows.h>
#include <iostream>
#include <time.h>
#include <thread>
#include <list>
#include <boost/lockfree/spsc_queue.hpp>
#include <boost/lockfree/queue.hpp>
#include <boost/thread/mutex.hpp>
#include <boost/thread/condition_variable.hpp>
#include <boost/circular_buffer.hpp>
#include <mutex>
#define LOOP_COUNT 5000000
#define QUEUE_CAPACITY 65534
#define THREAD_COUNT 4
std::mutex m;
std::condition_variable cv;
boost::circular_buffer<int> cb(QUEUE_CAPACITY);
boost::lockfree::queue<int, boost::lockfree::capacity<QUEUE_CAPACITY>>* mul_queue;
std::atomic<unsigned int> push_count = 0;
std::atomic<unsigned int> pop_count = 0;
void blocking_productor()
{
while (1)
{
std::unique_lock<std::mutex> lk(m);
cv.wait(lk, [] {return cb.size() < QUEUE_CAPACITY; });
cb.push_back(push_count);
cv.notify_one();
if (++push_count >= LOOP_COUNT * THREAD_COUNT)
{
break;
}
}
}
void blocking_customer()
{
while (1)
{
std::unique_lock<std::mutex> lk(m);
cv.wait(lk, [] {return cb.size() > 0; });
cb.pop_front();
cv.notify_one();
if (++pop_count >= LOOP_COUNT * THREAD_COUNT)
{
break;
}
}
}
void nonblocking_productor()
{
while (1)
{
if (push_count/*.load(std::memory_order_acquire)*/ >= LOOP_COUNT * THREAD_COUNT)
{
break;
}
if (mul_queue->push(push_count))
{
++push_count;
//push_count.fetch_add(1, std::memory_order_relaxed);
}
else
{
Sleep(1);
}
}
}
void nonblocking_customer()
{
int val;
while (1)
{
if (pop_count/*.load(std::memory_order_acquire)*/ >= LOOP_COUNT * THREAD_COUNT)
{
break;
}
if (mul_queue->pop(val))
{
++pop_count;
//pop_count.fetch_add(1, std::memory_order_relaxed);
}
else
{
Sleep(1);
}
}
}
void test_blocking()
{
std::thread** carray = new std::thread*[THREAD_COUNT];
std::thread** parray = new std::thread*[THREAD_COUNT];
clock_t start = clock();
for (int i = 0; i < THREAD_COUNT; i++)
{
carray[i] = new std::thread((&blocking_customer));
parray[i] = new std::thread((&blocking_productor));
}
for (int i = 0; i < THREAD_COUNT; i++)
{
carray[i]->join();
parray[i]->join();
}
clock_t end = clock();
printf("[test_blocking]\nTHREAD_COUNT:%d\nQUEUE_CAPACITY:%d\ncost:%dms\n", THREAD_COUNT, QUEUE_CAPACITY, end - start);
printf("push_count:%d pop_count:%d\n", push_count, pop_count);
for (int i = 0; i < THREAD_COUNT; i++)
{
delete carray[i];
delete parray[i];
}
delete[] carray;
delete[] parray;
}
void test_nonblocking()
{
std::thread** carray = new std::thread*[THREAD_COUNT];
std::thread** parray = new std::thread*[THREAD_COUNT];
clock_t start = clock();
for (int i = 0; i < THREAD_COUNT; i++)
{
carray[i] = new std::thread((&nonblocking_customer));
parray[i] = new std::thread((&nonblocking_productor));
}
for (int i = 0; i < THREAD_COUNT; i++)
{
carray[i]->join();
parray[i]->join();
}
clock_t end = clock();
printf("[test_nonblocking]\nTHREAD_COUNT:%d\nQUEUE_CAPACITY:%d\ncost:%dms\n", THREAD_COUNT, QUEUE_CAPACITY, end - start);
printf("push_count:%d pop_count:%d\n", push_count, pop_count);
for (int i = 0; i < THREAD_COUNT; i++)
{
delete carray[i];
delete parray[i];
}
delete[] carray;
delete[] parray;
}
int main(char* args, int size)
{
mul_queue = new boost::lockfree::queue<int, boost::lockfree::capacity<QUEUE_CAPACITY>>;
std::cout << mul_queue->is_lock_free() << std::endl;
//为了排除测试程序的无关因素,测试时只开启一个:blocking或者nonblocking。
//test_blocking();
test_nonblocking();
}
输出结果:
生产者和消费者分别4个线程
[test_blocking]
THREAD_COUNT:4
QUEUE_CAPACITY:65534
cost:3598ms
push_count:20000003 pop_count:20000003
[test_nonblocking]
THREAD_COUNT:4
QUEUE_CAPACITY:65534
cost:6350ms
push_count:20000003 pop_count:20000003
生产者和消费者分别8个线程
[test_blocking]
THREAD_COUNT:8
QUEUE_CAPACITY:65534
cost:3620ms
push_count:20000007 pop_count:20000007
[test_nonblocking]
THREAD_COUNT:8
QUEUE_CAPACITY:65534
cost:6466ms
push_count:20000007 pop_count:20000004
生产者和消费者分别16个线程
[test_blocking]
THREAD_COUNT:16
QUEUE_CAPACITY:65534
cost:3590ms
push_count:20000015 pop_count:20000015
[test_nonblocking]
THREAD_COUNT:16
QUEUE_CAPACITY:65534
cost:6136ms
push_count:20000007 pop_count:20000007
结论:多读多写的多线程环境下,std::mutex的速度比boost::lockfree::queue的性能高一倍。queue的容量限制不得超过65535,具体原因没仔细看,可能是boost为了实现lock-free而采用了算法只能支持这么大的容量。
PS:上面的不同线程数量下,累加结果是一样的,是因为每个线程的循环次数做了均分,总循环次数为20000000
测试环境:
windows 10 pro x64
VS2015企业版 update2,release x64
CPU:i7二代移动版