linux 锁-- atomic & per_cpu

本文详细介绍了Linux内核中的Atomic操作原理及其在ARM32与ARM64架构下的实现方式,并探讨了Per-CPU变量的设计理念及应用场景。

atomic引入背景

对于 SMP 系统中,在开启 preempt 情况下,对于公共资源,如果存在两个 task 来进行更改,这就面临临界区资源竞争问题,此时会产生意想不到的结果,这是不符合预期的,因此需要来进行解决。

典型问题描述

对于变量的操作: a =0; a++; 汇编是如下实现:

ldr   r3, [r3, #0] 
adds  r2, r3, #1
str   r2, [r3, #0] 

也就是说,一个 a++ 实际上需要三条指令来完成,分别对应上图的 R,M,W。

这样如果 task1 在W之后,紧接着task2 也来W, 此时会产生不符合 task1 预想的结果,会产生问题。因此 arm 提出 atomic 来解决这种问题。

atomic 实现

arm32 实现

/* arch/arm/include/asm/atomic.h */
#undef ATOMIC_OPS
#define ATOMIC_OPS(op, c_op, asm_op)                    \
    ATOMIC_OP(op, c_op, asm_op)                 \
    ATOMIC_FETCH_OP(op, c_op, asm_op)

#define ATOMIC_OP(op, c_op, asm_op)                 \                                                                                
static inline void atomic_##op(int i, atomic_t *v)          \
{                                   \
    unsigned long tmp;                      \
    int result;                         \
                                    \
    prefetchw(&v->counter);                     \
    __asm__ __volatile__("@ atomic_" #op "\n"           \
"1: ldrex   %0, [%3]\n"                     \     ①
"   " #asm_op " %0, %0, %4\n"                   \ ②
"   strex   %1, %0, [%3]\n"                     \ ③
"   teq %1, #0\n"                       \         ④
"   bne 1b"                         \             ⑤
    : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter)       \
    : "r" (&v->counter), "Ir" (i)                   \
    : "cc");                            \
}                                   \

ATOMIC_OPS(add, +=, add)                     

这里选取了 atomic_add 来分析,上面的 #asm_op 就是 add 了,此时代码可以解析为:

  • Prefetch data
  • 将v->counter所在地址的数据加载到 result
  • result += i, 结果存放在 result 中
  • 将result 保存到 v->counter 所在地址,同时结果保存在 tmp
  • 检查 tmp 和 0 比较,如果不等于0需要重新处理一遍

那么为什么仅仅使用了 ldrex 和 strex 就实现了 atomic 功能呢?

实际上 ldrex 和 strex 在使用过程中使用了 monitor 的功能,这里选取蜗窝科技的介绍方式介绍:

 arm64实现

/* arch/arm64/include/asm/atomic_lse.h */
#define ATOMIC64_OP(op, asm_op)                     \                                                                                
static inline void __lse_atomic64_##op(s64 i, atomic64_t *v)        \
{                                   \   
    asm volatile(                           \   
    __LSE_PREAMBLE                          \   
"   " #asm_op " %[i], %[v]\n"                   \   
    : [i] "+r" (i), [v] "+Q" (v->counter)               \   
    : "r" (v));                         \   
}

ATOMIC64_OP(andnot, stclr)
ATOMIC64_OP(or, stset)
ATOMIC64_OP(xor, steor)
ATOMIC64_OP(add, stadd)  // 定义了 __lse_atomic64_add 函数, asm_op 是 stadd

/* arch/arm64/include/asm/lse.h */
#define __lse_ll_sc_body(op, ...)                   \                                                                                
({                                  \   
    system_uses_lse_atomics() ?                 \   
        __lse_##op(__VA_ARGS__) :               \   
        __ll_sc_##op(__VA_ARGS__);              \   
})

/* arch/arm64/include/asm/atomic.h */

#define ATOMIC64_OP(op)                         \
static __always_inline void arch_##op(long i, atomic64_t *v)        \
{                                   \
    __lse_ll_sc_body(op, i, v);                 \
}                                                                                                                                    

ATOMIC64_OP(atomic64_andnot)
ATOMIC64_OP(atomic64_or)
ATOMIC64_OP(atomic64_xor)
ATOMIC64_OP(atomic64_add) //这里传入的 op 是 atomic64_add, 定义了 arch_atomic64_add
ATOMIC64_OP(atomic64_and)
ATOMIC64_OP(atomic64_sub)

 对于 arch_atomic64_add ,其又调用了 __lse_ll_sc_body(atomic64_add, i, v);

这样 atomic64_add 就有了定义:

/* lib/atomic64.c */
#define ATOMIC64_OPS(op, c_op)                      \
    ATOMIC64_OP(op, c_op)                       \
    ATOMIC64_OP_RETURN(op, c_op)                    \
    ATOMIC64_FETCH_OP(op, c_op)

ATOMIC64_OPS(add, +=)                                                                                                                
ATOMIC64_OPS(sub, -=)

这里才真正使用 宏来声明了 atomic64_add 函数,它通过 stladd 将 i 加到 atomic64_add 的变量中的 counter 上面去。stladd 是 armv8.1 提供了原子操作变量,相对于 ldrex, strex 在性能又进一步提升。

atomic典型使用

atomic_t val;

atomic_set(&val, 10);

int read_val = atomic_read(&val);

per_cpu 引入背景

 对于 outer-shareable 的内存而言,由于cache MESI 机制(假设是outer shareable 的),会发生如下变化:

  1. 假设原始的 CPU cache情况如下:

 图中黄色的小球是 Cache 是否命中,且和 RAM 中内容一致

2.更改 CacheB 中内容

此时CacheB 内容被修改,内容发生变化。

3.Invalidate 其它cpu cache

 因为MESI 机制,因为B更改了,此时会自动 invalidate outer shareable 的 cache 内容。

这样会带来性能上的损耗,因为被invalidate 的内容,之后如果用到,还要重新加载。

假入内容有这样的一块内存,属于CPU自己独有,它的加载以及Cache 操作不会影响到别的CPU,这样就解决了上述面临的问题。因此linux中提出 per_cpu 变量来操作。

per_cpu 变量定义

#define __PCPU_ATTRS(sec)                       \
    __percpu __attribute__((section(PER_CPU_BASE_SECTION sec))) \
    PER_CPU_ATTRIBUTES

#define DEFINE_PER_CPU_SECTION(type, name, sec)             \
    __PCPU_ATTRS(sec) __typeof__(type) name
#endif

#define DEFINE_PER_CPU(type, name)                  \
    DEFINE_PER_CPU_SECTION(type, name, "")

定义一个位于 PER_CPU_BASE_SECTION 的一个变量,这是一个静态声明,指定了其位于的地址空间。其section 定义如下:

#ifndef PER_CPU_BASE_SECTION
#ifdef CONFIG_SMP
#define PER_CPU_BASE_SECTION ".data..percpu"
#else
#define PER_CPU_BASE_SECTION ".data"                                                                                
#endif
#ifdef MODULE
#define PER_CPU_SHARED_ALIGNED_SECTION ""                                                                           
#define PER_CPU_ALIGNED_SECTION ""
#else
#define PER_CPU_SHARED_ALIGNED_SECTION "..shared_aligned"
#define PER_CPU_ALIGNED_SECTION "..shared_aligned"
#endif

#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name)           \
    DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \
    ____cacheline_aligned_in_smp
    
#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name)             \ 
    DEFINE_PER_CPU_SECTION(type, name, "..page_aligned")    \
    __aligned(PAGE_SIZE)

分别也是定义了位于 section name 为 "..page_aligned" 和 "..shared_aligned" 的变量。

那么为什么需要特殊的 Section呢?

对于kernel中的普通变量,经过了编译和链接后,会被放置到.data或者.bss段,系统在初始化的时候会准备好一切(例如clear bss),由于per cpu变量的特殊性,内核将这些变量放置到了其他的section,位于kernel address space中__per_cpu_start和__per_cpu_end之间,我们称之Per-CPU变量的原始变量。(参考蜗窝科技).

典型应用

DEFINE_PER_CPU(int, state);
int cpu = 0;
per_cpu(state, cpu) = 1;
int got_state = per_cpu(state, cpu);

帮我整理下面的help,配置参数的意义,可以设置的值,请详细介绍列出来:ubuntu@studyubuntu:~/gem5$ build/X86/gem5.opt configs/deprecated/example/se.py --help gem5 Simulator System. https://www.gem5.org gem5 is copyrighted software; use the --copyright option for details. gem5 version 25.0.0.1 gem5 compiled Oct 15 2025 14:07:26 gem5 started Oct 22 2025 14:01:40 gem5 executing on studyubuntu, pid 3588 command line: build/X86/gem5.opt configs/deprecated/example/se.py --help warn: The se.py script is deprecated. It will be removed in future releases of gem5. usage: se.py [-h] [-n NUM_CPUS] [--sys-voltage SYS_VOLTAGE] [--sys-clock SYS_CLOCK] [--list-mem-types] [--mem-type {CfiMemory,DDR3_1600_8x8,DDR3_2133_8x8,DDR4_2400_16x4,DDR4_2400_4x16,DDR4_2400_8x8,DDR5_4400_4x8,DDR5_6400_4x8,DDR5_8400_4x8,DRAMInterface,GDDR5_4000_2x32,HBM_1000_4H_1x128,HBM_1000_4H_1x64,HBM_2000_4H_1x64,HMC_2500_1x32,LPDDR2_S4_1066_1x32,LPDDR3_1600_1x32,LPDDR5_5500_1x16_8B_BL32,LPDDR5_5500_1x16_BG_BL16,LPDDR5_5500_1x16_BG_BL32,LPDDR5_6400_1x16_8B_BL32,LPDDR5_6400_1x16_BG_BL16,LPDDR5_6400_1x16_BG_BL32,NVMInterface,NVM_2400_1x64,QoSMemSinkInterface,SimpleMemory,WideIO_200_1x128}] [--mem-channels MEM_CHANNELS] [--mem-ranks MEM_RANKS] [--mem-size MEM_SIZE] [--enable-dram-powerdown] [--mem-channels-intlv MEM_CHANNELS_INTLV] [--memchecker] [--external-memory-system EXTERNAL_MEMORY_SYSTEM] [--tlm-memory TLM_MEMORY] [--caches] [--l2cache] [--num-dirs NUM_DIRS] [--num-l2caches NUM_L2CACHES] [--num-l3caches NUM_L3CACHES] [--l1d_size L1D_SIZE] [--l1i_size L1I_SIZE] [--l2_size L2_SIZE] [--l3_size L3_SIZE] [--l1d_assoc L1D_ASSOC] [--l1i_assoc L1I_ASSOC] [--l2_assoc L2_ASSOC] [--l3_assoc L3_ASSOC] [--cacheline_size CACHELINE_SIZE] [--ruby] [-m TICKS] [--rel-max-tick TICKS] [--maxtime MAXTIME] [-P PARAM] [--list-cpu-types] [--cpu-type {AtomicSimpleCPU,BaseAtomicSimpleCPU,BaseMinorCPU,BaseNonCachingSimpleCPU,BaseO3CPU,BaseTimingSimpleCPU,DerivO3CPU,NonCachingSimpleCPU,O3CPU,TimingSimpleCPU,X86AtomicSimpleCPU,X86KvmCPU,X86MinorCPU,X86NonCachingSimpleCPU,X86O3CPU,X86TimingSimpleCPU}] [--list-bp-types] [--list-indirect-bp-types] [--bp-type {BiModeBP,LTAGE,LocalBP,MultiperspectivePerceptron64KB,MultiperspectivePerceptron8KB,MultiperspectivePerceptronTAGE64KB,MultiperspectivePerceptronTAGE8KB,TAGE,TAGE_SC_L_64KB,TAGE_SC_L_8KB,TournamentBP}] [--indirect-bp-type {SimpleIndirectPredictor}] [--list-rp-types] [--list-hwp-types] [--l1i-hwp-type {AMPMPrefetcher,BOPPrefetcher,DCPTPrefetcher,IndirectMemoryPrefetcher,IrregularStreamBufferPrefetcher,MultiPrefetcher,PIFPrefetcher,SBOOEPrefetcher,STeMSPrefetcher,SignaturePathPrefetcher,SignaturePathPrefetcherV2,SlimAMPMPrefetcher,SmsPrefetcher,StridePrefetcher,TaggedPrefetcher}] [--l1d-hwp-type {AMPMPrefetcher,BOPPrefetcher,DCPTPrefetcher,IndirectMemoryPrefetcher,IrregularStreamBufferPrefetcher,MultiPrefetcher,PIFPrefetcher,SBOOEPrefetcher,STeMSPrefetcher,SignaturePathPrefetcher,SignaturePathPrefetcherV2,SlimAMPMPrefetcher,SmsPrefetcher,StridePrefetcher,TaggedPrefetcher}] [--l2-hwp-type {AMPMPrefetcher,BOPPrefetcher,DCPTPrefetcher,IndirectMemoryPrefetcher,IrregularStreamBufferPrefetcher,MultiPrefetcher,PIFPrefetcher,SBOOEPrefetcher,STeMSPrefetcher,SignaturePathPrefetcher,SignaturePathPrefetcherV2,SlimAMPMPrefetcher,SmsPrefetcher,StridePrefetcher,TaggedPrefetcher}] [--checker] [--cpu-clock CPU_CLOCK] [--smt] [--elastic-trace-en] [--inst-trace-file INST_TRACE_FILE] [--data-trace-file DATA_TRACE_FILE] [--dist] [--dist-sync-on-pseudo-op] [--is-switch] [--dist-rank DIST_RANK] [--dist-size DIST_SIZE] [--dist-server-name DIST_SERVER_NAME] [--dist-server-port DIST_SERVER_PORT] [--dist-sync-repeat DIST_SYNC_REPEAT] [--dist-sync-start DIST_SYNC_START] [--ethernet-linkspeed ETHERNET_LINKSPEED] [--ethernet-linkdelay ETHERNET_LINKDELAY] [-I MAXINSTS] [--work-item-id WORK_ITEM_ID] [--num-work-ids NUM_WORK_IDS] [--work-begin-cpu-id-exit WORK_BEGIN_CPU_ID_EXIT] [--work-end-exit-count WORK_END_EXIT_COUNT] [--work-begin-exit-count WORK_BEGIN_EXIT_COUNT] [--init-param INIT_PARAM] [--initialize-only] [--simpoint-profile] [--simpoint-interval SIMPOINT_INTERVAL] [--take-simpoint-checkpoints TAKE_SIMPOINT_CHECKPOINTS] [--restore-simpoint-checkpoint] [--take-checkpoints TAKE_CHECKPOINTS] [--max-checkpoints MAX_CHECKPOINTS] [--checkpoint-dir CHECKPOINT_DIR] [-r CHECKPOINT_RESTORE] [--checkpoint-at-end] [--work-begin-checkpoint-count WORK_BEGIN_CHECKPOINT_COUNT] [--work-end-checkpoint-count WORK_END_CHECKPOINT_COUNT] [--work-cpus-checkpoint-count WORK_CPUS_CHECKPOINT_COUNT] [--restore-with-cpu {AtomicSimpleCPU,BaseAtomicSimpleCPU,BaseMinorCPU,BaseNonCachingSimpleCPU,BaseO3CPU,BaseTimingSimpleCPU,DerivO3CPU,NonCachingSimpleCPU,O3CPU,TimingSimpleCPU,X86AtomicSimpleCPU,X86KvmCPU,X86MinorCPU,X86NonCachingSimpleCPU,X86O3CPU,X86TimingSimpleCPU}] [--repeat-switch REPEAT_SWITCH] [-s STANDARD_SWITCH] [-p PROG_INTERVAL] [-W WARMUP_INSTS] [--bench BENCH] [-F FAST_FORWARD] [-S] [--at-instruction] [--spec-input {ref,test,train,smred,mdred,lgred}] [--arm-iset {arm,thumb,aarch64}] [--stats-root STATS_ROOT] [--override-vendor-string OVERRIDE_VENDOR_STRING] [-c CMD] [-o OPTIONS] [-e ENV] [-i INPUT] [--output OUTPUT] [--errout ERROUT] [--chroot CHROOT] [--interp-dir INTERP_DIR] [--redirects REDIRECTS] [--wait-gdb] options: -h, --help show this help message and exit -n NUM_CPUS, --num-cpus NUM_CPUS --sys-voltage SYS_VOLTAGE Top-level voltage for blocks running at system power supply --sys-clock SYS_CLOCK Top-level clock for blocks running at system speed --list-mem-types List available memory types --mem-type {CfiMemory,DDR3_1600_8x8,DDR3_2133_8x8,DDR4_2400_16x4,DDR4_2400_4x16,DDR4_2400_8x8,DDR5_4400_4x8,DDR5_6400_4x8,DDR5_8400_4x8,DRAMInterface,GDDR5_4000_2x32,HBM_1000_4H_1x128,HBM_1000_4H_1x64,HBM_2000_4H_1x64,HMC_2500_1x32,LPDDR2_S4_1066_1x32,LPDDR3_1600_1x32,LPDDR5_5500_1x16_8B_BL32,LPDDR5_5500_1x16_BG_BL16,LPDDR5_5500_1x16_BG_BL32,LPDDR5_6400_1x16_8B_BL32,LPDDR5_6400_1x16_BG_BL16,LPDDR5_6400_1x16_BG_BL32,NVMInterface,NVM_2400_1x64,QoSMemSinkInterface,SimpleMemory,WideIO_200_1x128} type of memory to use --mem-channels MEM_CHANNELS number of memory channels --mem-ranks MEM_RANKS number of memory ranks per channel --mem-size MEM_SIZE Specify the physical memory size (single memory) --enable-dram-powerdown Enable low-power states in DRAMInterface --mem-channels-intlv MEM_CHANNELS_INTLV Memory channels interleave --memchecker --external-memory-system EXTERNAL_MEMORY_SYSTEM use external ports of this port_type for caches --tlm-memory TLM_MEMORY use external port for SystemC TLM cosimulation --caches --l2cache --num-dirs NUM_DIRS --num-l2caches NUM_L2CACHES --num-l3caches NUM_L3CACHES --l1d_size L1D_SIZE --l1i_size L1I_SIZE --l2_size L2_SIZE --l3_size L3_SIZE --l1d_assoc L1D_ASSOC --l1i_assoc L1I_ASSOC --l2_assoc L2_ASSOC --l3_assoc L3_ASSOC --cacheline_size CACHELINE_SIZE --ruby -m TICKS, --abs-max-tick TICKS Run to absolute simulated tick specified including ticks from a restored checkpoint --rel-max-tick TICKS Simulate for specified number of ticks relative to the simulation start tick (e.g. if restoring a checkpoint) --maxtime MAXTIME Run to the specified absolute simulated time in seconds -P PARAM, --param PARAM Set a SimObject parameter relative to the root node. An extended Python multi range slicing syntax can be used for arrays. For example: 'system.cpu[0,1,3:8:2].max_insts_all_threads = 42' sets max_insts_all_threads for cpus 0, 1, 3, 5 and 7 Direct parameters of the root object are not accessible, only parameters of its children. --list-cpu-types List available CPU types --cpu-type {AtomicSimpleCPU,BaseAtomicSimpleCPU,BaseMinorCPU,BaseNonCachingSimpleCPU,BaseO3CPU,BaseTimingSimpleCPU,DerivO3CPU,NonCachingSimpleCPU,O3CPU,TimingSimpleCPU,X86AtomicSimpleCPU,X86KvmCPU,X86MinorCPU,X86NonCachingSimpleCPU,X86O3CPU,X86TimingSimpleCPU} type of cpu to run with --list-bp-types List available branch predictor types --list-indirect-bp-types List available indirect branch predictor types --bp-type {BiModeBP,LTAGE,LocalBP,MultiperspectivePerceptron64KB,MultiperspectivePerceptron8KB,MultiperspectivePerceptronTAGE64KB,MultiperspectivePerceptronTAGE8KB,TAGE,TAGE_SC_L_64KB,TAGE_SC_L_8KB,TournamentBP} type of branch predictor to run with (if not set, use the default branch predictor of the selected CPU) --indirect-bp-type {SimpleIndirectPredictor} type of indirect branch predictor to run with --list-rp-types List available replacement policy types --list-hwp-types List available hardware prefetcher types --l1i-hwp-type {AMPMPrefetcher,BOPPrefetcher,DCPTPrefetcher,IndirectMemoryPrefetcher,IrregularStreamBufferPrefetcher,MultiPrefetcher,PIFPrefetcher,SBOOEPrefetcher,STeMSPrefetcher,SignaturePathPrefetcher,SignaturePathPrefetcherV2,SlimAMPMPrefetcher,SmsPrefetcher,StridePrefetcher,TaggedPrefetcher} type of hardware prefetcher to use with the L1 instruction cache. (if not set, use the default prefetcher of the selected cache) --l1d-hwp-type {AMPMPrefetcher,BOPPrefetcher,DCPTPrefetcher,IndirectMemoryPrefetcher,IrregularStreamBufferPrefetcher,MultiPrefetcher,PIFPrefetcher,SBOOEPrefetcher,STeMSPrefetcher,SignaturePathPrefetcher,SignaturePathPrefetcherV2,SlimAMPMPrefetcher,SmsPrefetcher,StridePrefetcher,TaggedPrefetcher} type of hardware prefetcher to use with the L1 data cache. (if not set, use the default prefetcher of the selected cache) --l2-hwp-type {AMPMPrefetcher,BOPPrefetcher,DCPTPrefetcher,IndirectMemoryPrefetcher,IrregularStreamBufferPrefetcher,MultiPrefetcher,PIFPrefetcher,SBOOEPrefetcher,STeMSPrefetcher,SignaturePathPrefetcher,SignaturePathPrefetcherV2,SlimAMPMPrefetcher,SmsPrefetcher,StridePrefetcher,TaggedPrefetcher} type of hardware prefetcher to use with the L2 cache. (if not set, use the default prefetcher of the selected cache) --checker --cpu-clock CPU_CLOCK Clock for blocks running at CPU speed --smt Only used if multiple programs are specified. If true, then the number of threads per cpu is same as the number of programs. --elastic-trace-en Enable capture of data dependency and instruction fetch traces using elastic trace probe. --inst-trace-file INST_TRACE_FILE Instruction fetch trace file input to Elastic Trace probe in a capture simulation and Trace CPU in a replay simulation --data-trace-file DATA_TRACE_FILE Data dependency trace file input to Elastic Trace probe in a capture simulation and Trace CPU in a replay simulation --dist Parallel distributed gem5 simulation. --dist-sync-on-pseudo-op Use a pseudo-op to start dist-gem5 synchronization. --is-switch Select the network switch simulator process for adistributed gem5 run --dist-rank DIST_RANK Rank of this system within the dist gem5 run. --dist-size DIST_SIZE Number of gem5 processes within the dist gem5 run. --dist-server-name DIST_SERVER_NAME Name of the message server host DEFAULT: localhost --dist-server-port DIST_SERVER_PORT Message server listen port DEFAULT: 2200 --dist-sync-repeat DIST_SYNC_REPEAT Repeat interval for synchronisation barriers among dist-gem5 processes DEFAULT: --ethernet-linkdelay --dist-sync-start DIST_SYNC_START Time to schedule the first dist synchronisation barrier DEFAULT:5200000000000t --ethernet-linkspeed ETHERNET_LINKSPEED Link speed in bps DEFAULT: 10Gbps --ethernet-linkdelay ETHERNET_LINKDELAY Link delay in seconds DEFAULT: 10us -I MAXINSTS, --maxinsts MAXINSTS Total number of instructions to simulate (default: run forever) --work-item-id WORK_ITEM_ID the specific work id for exit & checkpointing --num-work-ids NUM_WORK_IDS Number of distinct work item types --work-begin-cpu-id-exit WORK_BEGIN_CPU_ID_EXIT exit when work starts on the specified cpu --work-end-exit-count WORK_END_EXIT_COUNT exit at specified work end count --work-begin-exit-count WORK_BEGIN_EXIT_COUNT exit at specified work begin count --init-param INIT_PARAM Parameter available in simulation with m5 initparam --initialize-only Exit after initialization. Do not simulate time. Useful when gem5 is run as a library. --simpoint-profile Enable basic block profiling for SimPoints --simpoint-interval SIMPOINT_INTERVAL SimPoint interval in num of instructions --take-simpoint-checkpoints TAKE_SIMPOINT_CHECKPOINTS <simpoint file,weight file,interval-length,warmup- length> --restore-simpoint-checkpoint restore from a simpoint checkpoint taken with --take- simpoint-checkpoints --take-checkpoints TAKE_CHECKPOINTS <M,N> take checkpoints at tick M and every N ticks thereafter --max-checkpoints MAX_CHECKPOINTS the maximum number of checkpoints to drop --checkpoint-dir CHECKPOINT_DIR Place all checkpoints in this absolute directory -r CHECKPOINT_RESTORE, --checkpoint-restore CHECKPOINT_RESTORE restore from checkpoint <N> --checkpoint-at-end take a checkpoint at end of run --work-begin-checkpoint-count WORK_BEGIN_CHECKPOINT_COUNT checkpoint at specified work begin count --work-end-checkpoint-count WORK_END_CHECKPOINT_COUNT checkpoint at specified work end count --work-cpus-checkpoint-count WORK_CPUS_CHECKPOINT_COUNT checkpoint and exit when active cpu count is reached --restore-with-cpu {AtomicSimpleCPU,BaseAtomicSimpleCPU,BaseMinorCPU,BaseNonCachingSimpleCPU,BaseO3CPU,BaseTimingSimpleCPU,DerivO3CPU,NonCachingSimpleCPU,O3CPU,TimingSimpleCPU,X86AtomicSimpleCPU,X86KvmCPU,X86MinorCPU,X86NonCachingSimpleCPU,X86O3CPU,X86TimingSimpleCPU} cpu type for restoring from a checkpoint --repeat-switch REPEAT_SWITCH switch back and forth between CPUs with period <N> -s STANDARD_SWITCH, --standard-switch STANDARD_SWITCH switch from timing to Detailed CPU after warmup period of <N> -p PROG_INTERVAL, --prog-interval PROG_INTERVAL CPU Progress Interval -W WARMUP_INSTS, --warmup-insts WARMUP_INSTS Warmup period in total instructions (requires --standard-switch) --bench BENCH base names for --take-checkpoint and --checkpoint- restore -F FAST_FORWARD, --fast-forward FAST_FORWARD Number of instructions to fast forward before switching -S, --simpoint Use workload simpoints as an instruction offset for --checkpoint-restore or --take-checkpoint. --at-instruction Treat value of --checkpoint-restore or --take- checkpoint as a number of instructions. --spec-input {ref,test,train,smred,mdred,lgred} Input set size for SPEC CPU2000 benchmarks. --arm-iset {arm,thumb,aarch64} ARM instruction set. --stats-root STATS_ROOT If given, dump only stats of objects under the given SimObject. SimObjects are identified with Python notation as in: system.cpu[0].mmu. All elements of an array can be selected at once with: system.cpu[:].mmu. If given multiple times, dump stats that are present under any of the roots. If not given, dump all stats. --override-vendor-string OVERRIDE_VENDOR_STRING Override vendor string returned by CPUID instruction in X86. -c CMD, --cmd CMD The binary to run in syscall emulation mode. -o OPTIONS, --options OPTIONS The options to pass to the binary, use around the entire string -e ENV, --env ENV Initialize workload environment from text file. -i INPUT, --input INPUT Read stdin from a file. --output OUTPUT Redirect stdout to a file. --errout ERROUT Redirect stderr to a file. --chroot CHROOT The chroot option allows a user to alter the search path for processes running in SE mode. Normally, the search path would begin at the root of the filesystem (i.e. /). With chroot, a user can force the process to begin looking atsome other location (i.e. /home/user/rand_dir).The intended use is to trick sophisticated software which queries the __HOST__ filesystem for information or functionality. Instead of finding files on the __HOST__ filesystem, the process will find the user's replacment files. --interp-dir INTERP_DIR The interp-dir option is used for setting the interpreter's path. This will allow to load the guest dynamic linker/loader itself from the elf binary. The option points to the parent folder of the guest /lib in the host fs --redirects REDIRECTS A collection of one or more redirect paths to be used in syscall emulation.Usage: gem5.opt [...] --redirects /dir1=/path/to/host/dir1 --redirects /dir2=/path/to/host/dir2 --wait-gdb Wait for remote GDB to connect.
10-24
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值