参考: discussion on modeling shared L3 cache hierarchy
------------------------------------------------------update 2019.3.12 ---------------------------------------------------------------
参考:gem5-stable添加l3 cache
三级缓存实现效果图:
缓存架构参数配置:
cpu: four core
private L1 dcache: 32KB/8-way; icache: 32KB/8-way
private L2 cache: 256KB/8-way
shared L3 cahce: 4MB/16-way
实现方法:
1.实现三级缓存配置涉及:configs/common/Caches.py, CacheConfig.py,Options.py,configs/example/se.py, ./src/mem/XBar.py,./src/cpu/BaseCPU.py
2.通常实现自己的功能不直接在源代码上修改,而是生成一份新的副本,如:l3Caches.py, l3CacheConfig.py和l3se.py
3. 分别修改上述文件
l3Caches.py中增加:
class L3Cache(BaseCache):
assoc = 16
block_size = 64
hit_latency = 20
response_latency = 20
mshrs = 512
tgts_per_mshr = 20
write_buffers = 256
l3se.py 中对Caches和CacheConfig模块的引用改为l3Caches和l3CacheConfig
l3CacheConfig.py 文件的修改如下:
def config_cache(options, system):
if options.cpu_type == "arm_detailed":
try:
from O3_ARM_v7a import *
except:
print "Did you compile the O3 model?"
sys.exit(1)
dcache_class, icache_class, l2_cache_class, l3_cache_class = \
O3_ARM_v7a_DCache, O3_ARM_v7a_ICache, O3_ARM_v7aL2, O3_ARM_v7aL3
else:
dcache_class, icache_class, l2_cache_class, l3_cache_class = \
L1Cache, L1Cache, L2Cache, L3Cache
# Set the cache line size of the system
system.cache_line_size = options.cacheline_size
# set the shared l3 cache
#配置三级共享换成,设置方法和源代码中二级缓存设置一样,只要更改bus连接方式
if options.l3cache:
system.l3=l3_cache_class(clk_domain=system.cpu_clk_domain,
size=options.l3_size,
assoc=options.l3_assoc)
system.tol3bus=L3XBar(clk_domain= #gem5新版本(2018.12)中改为了L3XBar,而不是以前的CoherentBus
system.cpu_clk_domain,
width = 32)
system.l3.cpu_side = system.tol3bus.master
system.l3.mem_side = system.membus.slave
for i in xrange(options.num_cpus):
if options.caches:
icache = icache_class(size=options.l1i_size,
assoc=options.l1i_assoc)
dcache = dcache_class(size=options.l1d_size,
assoc=options.l1d_assoc)
if buildEnv['TARGET_ISA'] == 'x86':
system.cpu[i].addPrivateSplitL1Caches(icache, dcache,
PageTableWalkerCache(),
PageTableWalkerCache())
else:
system.cpu[i].addPrivateSplitL1Caches(icache, dcache)
system.cpu[i].createInterruptController()
#配置二级私有缓存
if options.l2cache:
system.cpu[i].l2=l2_cache_class(clk_domain=system.cpu_clk_domain,
size=options.l2_size,
assoc=options.l2_assoc)
system.cpu[i].tol2bus = CoherentBus()
system.cpu[i].l2.cpu_side = system.cpu[i].tol2bus.master
system.cpu[i].l2.mem_side = system.tol3bus.slave
if options.l3cache:
#连接所有端口,主要是cpu上缓存端口和主存端口的连接,要理解该函数可以查看./src/cpu/BaseCPU.py源代码中对于connectAllPorts的定义
system.cpu[i].connectAllPorts(system.cpu[i].tol2bus, system.membus)
else:
if options.l2cache:
system.cpu[i].connectAllPorts(system.tol2bus, system.membus)
else:
system.cpu[i].connectAllPorts(system.membus)
return system
./src/mem/XBar.py 文件中增加L3XBar:
在gem5(2018年12月)的版本中,继续按照后续方法配置l3缓存将无法找到CoherentBus。新版本中变为了L2XBar,那么l3也需要配置对应的L3XBar。即将CoherentBus变为L3XBar。
L3XBar在 ./src/mem/XBar.py 定义,模仿L2XBar定义即可。
class L3XBar(CoherentXBar):
# 256-bit crossbar by default
width = 32
# Assume that most of this is covered by the cache latencies, with
# no more than a single pipeline stage for any packet.
frontend_latency = 1
forward_latency = 0
response_latency = 1
snoop_response_latency = 1
# Use a snoop-filter by default, and set the latency to zero as
# the lookup is assumed to overlap with the frontend latency of
# the crossbar
snoop_filter = SnoopFilter(lookup_latency = 0)
# This specialisation of the coherent crossbar is to be considered
# the point of unification, it connects the dcache and the icache
# to the first level of unified cache.
point_of_unification = True
在cpu里./src/cpu/BaseCPU.py 加入L3XBar信息,具体如下:
#导入L3XBar
from XBar import L3XBar
# 加入三级缓存架构配置
def addThreeLevelCacheHierarchy(self, ic, dc, l3c, iwc=None, dwc=None,
xbar=None):
self.addPrivateSplitL1Caches(ic, dc, iwc, dwc)
self.toL3Bus = xbar if xbar else L3XBar()
self.connectCachedPorts(self.toL3Bus)
self.l3cache = l3c
self.toL3Bus.master = self.l3cache.cpu_side
self._cached_ports = ['l3cache.mem_side']
Options.py 中增加l3cache选项:
parser.add_option("--l3cache", action="store_true")
4.配置好后用脚本命令执行
build/ARM/gem5.debug configs/example/spec06_l3_se.py -n 8 --caches --l2cache --l3cache -c tests/test-progs/hello/bin/arm/linux/hello
5.看到HelloWord即运行成功,可以在config.ini和config.dot中查看配置及体系架构信息。