Instruction_level_parallelism

最新推荐文章于 2025-12-31 21:25:26 发布

翻译最新推荐文章于 2025-12-31 21:25:26 发布 · 300 阅读

文章标签：

#算法

本文探讨了指令级并行技术及其对程序性能的影响，并深入分析了伪共享的概念及如何通过结构调整避免伪共享，提高多线程环境下的程序效率。

Instruction_level_parallelism and False_sharing

void Instruction_level_parallelism()
{
	unsigned int steps = 1 * 1024 * 1024 * 1024u - 1;
	cout << steps << endl;
	int *a = new int[2];
	time_t begin = time(nullptr);
	// Loop 1
	for (unsigned int i = 0; i < steps; i++)
	{
		a[0]++;
		a[1]++;
	}
	time_t end = time(nullptr);
	cout << end - begin << endl;
	// Loop 2
	begin = time(nullptr);
	for (unsigned int i = 0; i < steps; i++)
	{
		a[0]++;
		a[0]++;
	}
	end = time(nullptr);
	cout << end - begin << endl;
}

#ifdef __cpp_lib_hardware_interference_size
	using std::hardware_constructive_interference_size;
	using std::hardware_destructive_interference_size;
#else
	// Lucky guess �� __cacheline_aligned �� L1_CACHE_BYTES �� L1_CACHE_SHIFT �� ...
	constexpr std::size_t hardware_constructive_interference_size = 2 * sizeof(std::max_align_t);
	constexpr std::size_t hardware_destructive_interference_size = 2 * sizeof(std::max_align_t);
#endif

	struct Data
	{
		std::atomic<std::uint8_t> lock;
		std::uint8_t data;
	};

	struct DataPack
	{
		std::uint8_t port;
		alignas(hardware_constructive_interference_size) Data pack;
		std::uint8_t wait;
	};

	struct KeepApart
	{
		std::uint8_t air;
		alignas(hardware_destructive_interference_size) std::atomic<std::uint8_t> ice;
		alignas(hardware_destructive_interference_size) std::atomic<std::uint8_t> fire;
		std::uint8_t soil;
	};

	void test_false_share()
	{
		std::cout
			<< "alignof(std::max_align_t) == "
			<< alignof(std::max_align_t) << '\n'
			<< "sizeof( std::max_align_t ) == " 
			<< sizeof(std::max_align_t) << "\n\n";
		std::cout
			<< "hardware_destructive_interference_size == "
			<< hardware_destructive_interference_size << '\n'
			<< "hardware_constructive_interference_size == "
			<< hardware_constructive_interference_size << "\n\n";

		std::cout
			<< "offsetof( DataPack::port ) : " << offsetof(DataPack, port) << '\n'
			<< "offsetof( DataPack::pack ) : " << offsetof(DataPack, pack) << '\n'
			<< "offsetof( DataPack::wait ) : " << offsetof(DataPack, wait) << '\n'
			<< "sizeof  ( DataPack )       : " << sizeof(DataPack) << '\n'
			<< '\n';

		std::cout
			<< "offsetof( KeepApart::air  ) : " << offsetof(KeepApart, air) << '\n'
			<< "offsetof( KeepApart::ice  ) ! " << offsetof(KeepApart, ice) << '\n'
			<< "offsetof( KeepApart::fire ) ! " << offsetof(KeepApart, fire) << '\n'
			<< "offsetof( KeepApart::soil ) : " << offsetof(KeepApart, soil) << '\n'
			<< "sizeof  ( KeepApart )       : " << sizeof(KeepApart) << '\n'
			<< '\n';
	}


OutPut：
alignof(std::max_align_t) == 16
sizeof( std::max_align_t ) == 32

hardware_destructive_interference_size == 64
hardware_constructive_interference_size == 64

offsetof( DataPack::port ) : 0
offsetof( DataPack::pack ) : 64
offsetof( DataPack::wait ) : 66
sizeof  ( DataPack )       : 128

offsetof( KeepApart::air  ) : 0
offsetof( KeepApart::ice  ) ! 64
offsetof( KeepApart::fire ) ! 128
offsetof( KeepApart::soil ) : 129
sizeof  ( KeepApart )       : 192

false sharing，当两个成员变量在同一个cache line时，其中一个成员变量被修改，会导致cache line无效，另一个成员变量就没法享受cache带来的好处了。所以有时候要避免false sharing。
true sharing，如果一个结构体的sizeof的大小小于或等于cache line大小，那么这个结构体的成员间就存在true sharing
Destructive interference size: a number that’s suitable as an offset between two objects to likely avoid false-sharing due to different runtime access patterns from different threads.
Constructive interference size: a number that’s suitable as a limit on two objects’ combined memory footprint size and base alignment to likely promote true-sharing between them.