about Cache


//@arch/arm/kernel/setup.c
void __init setup_arch(char **cmdline_p)
+-- setup_processor(void)
    +-- struct proc_info_list *list; //struct proc_info_lsit ...
        //locate processor in the list of supported processor types.  The linker builds this 
        //table for us from the entries in arch/arm/mm/proc-*.S
    +-- list = lookup_processor_type(read_cpuid_id());  //lookup_processor_type()@
    +-- cpu_name = list->cpu_name;
    +-- __cpu_architecture = __get_cpu_architecture();
    +-- processor = *list->proc;
    +-- cpu_tlb = *list->tlb;            //所有tlb相关的函数集都从这里得到.
    +-- cpu_user = *list->user;
    +-- cpu_cache = *list->cache;        //cpu_cache是从list中拿到的,而list通过汇编函数lookup_processor_type获得
    +-- elf_hwcap = list->elf_hwcap;
    +-- feat_v6_fixup();
    +-- cacheid_init();
    +-- cpu_init();


//struct proc_info_lsit的定义如下:
//@arch/arm/include/asm/procinfo.h
##########################################
//Note!  struct processor is always defined if we're using MULTI_CPU, otherwise this entry 
//is unused, but still exists.
//NOTE! The following structure is defined by assembly language, NOT C code.  For more information, 
//check: arch/arm/mm/proc-*.S and arch/arm/kernel/head.S

struct proc_info_list {
        unsigned int            cpu_val;
        unsigned int            cpu_mask;
        unsigned long           __cpu_mm_mmu_flags;     /* used by head.S */
        unsigned long           __cpu_io_mmu_flags;     /* used by head.S */
        unsigned long           __cpu_flush;            /* used by head.S */
        const char              *arch_name;
        const char              *elf_name;
        unsigned int            elf_hwcap;
        const char              *cpu_name;
        struct processor        *proc;
        struct cpu_tlb_fns      *tlb;
        struct cpu_user_fns     *user;
        struct cpu_cache_fns    *cache;
};


@arch/arm/kernel/head-common.S
/*
 * This provides a C-API version of __lookup_processor_type
 */
ENTRY(lookup_processor_type)
        stmfd   sp!, {r4 - r6, r9, lr}
        mov     r9, r0
        bl      __lookup_processor_type
        mov     r0, r5
        ldmfd   sp!, {r4 - r6, r9, pc}
ENDPROC(lookup_processor_type)

/*
 * Read processor ID register (CP#15, CR0), and look up in the linker-built
 * supported processor list.  Note that we can't use the absolute addresses
 * for the __proc_info lists since we aren't running with the MMU on
 * (and therefore, we are not in the correct address space).  We have to
 * calculate the offset.
 *
 *      r9 = cpuid
 * Returns:
 *      r3, r4, r6 corrupted
 *      r5 = proc_info pointer in physical address space
 *      r9 = cpuid (preserved)
 */
        __CPUINIT
__lookup_processor_type:
        adr     r3, __lookup_processor_type_data
        ldmia   r3, {r4 - r6}
        sub     r3, r3, r4                      @ get offset between virt&phys
        add     r5, r5, r3                      @ convert virt addresses to
        add     r6, r6, r3                      @ physical address space
1:      ldmia   r5, {r3, r4}                    @ value, mask
        and     r4, r4, r9                      @ mask wanted bits
        teq     r3, r4
        beq     2f
        add     r5, r5, #PROC_INFO_SZ           @ sizeof(proc_info_list)
        cmp     r5, r6
        blo     1b
        mov     r5, #0                          @ unknown processor
2:      mov     pc, lr
ENDPROC(__lookup_processor_type)


/*
 * Look in <asm/procinfo.h> for information about the __proc_info structure.
 */
        .align  2
        .type   __lookup_processor_type_data, %object
__lookup_processor_type_data:
        .long   .
        .long   __proc_info_begin                                               @r4
        .long   __proc_info_end                                                 @r5 
        .size   __lookup_processor_type_data, . - __lookup_processor_type_data  @r6 




@arch/arm/kernel/vmlinux.lds.S
        ....
#define PROC_INFO                                                          \
        . = ALIGN(4);                                                      \
        VMLINUX_SYMBOL(__proc_info_begin) = .;                             \   
        *(.proc.info.init)                                                 \   
        VMLINUX_SYMBOL(__proc_info_end) = .;                


        ....
        .text : {         
        ....
        *(.got)                 /* Global offset table          */
        ARM_CPU_KEEP(PROC_INFO)
        ....

        _etext = .;                     /* End of text and rodata section */

        ....
        .init.proc.info : {
                ARM_CPU_DISCARD(PROC_INFO)
        }
        .init.arch.info : {
                __arch_info_begin = .;
                *(.arch.info.init)
                __arch_info_end = .;
        }
        .init.tagtable : {
                __tagtable_begin = .;
                *(.taglist.init)
                __tagtable_end = .;
        }

        ....
}
ASSERT((__proc_info_end - __proc_info_begin), "missing CPU support")
ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined")



//proc_info_list结构的定义如下
@arch/arm/include/asm/procinfo.h
struct proc_info_list {
        unsigned int            cpu_val;
        unsigned int            cpu_mask;
        unsigned long           __cpu_mm_mmu_flags;     /* used by head.S */
        unsigned long           __cpu_io_mmu_flags;     /* used by head.S */
        unsigned long           __cpu_flush;            /* used by head.S */
        const char              *arch_name;
        const char              *elf_name;
        unsigned int            elf_hwcap;
        const char              *cpu_name;
        struct processor        *proc;
        struct cpu_tlb_fns      *tlb;
        struct cpu_user_fns     *user;
        struct cpu_cache_fns    *cache;   //cpu cache的操作函数集.
};


//cpu_cache_fns结构的定义如下:
@arch/arm/include/asm/cacheflush.h
/*
 *	MM Cache Management
 *	===================
 *
 *	The arch/arm/mm/cache-*.S and arch/arm/mm/proc-*.S files
 *	implement these methods.
 *
 *	Start addresses are inclusive and end addresses are exclusive;
 *	start addresses should be rounded down, end addresses up.
 *
 *	See Documentation/cachetlb.txt for more information.
 *	Please note that the implementation of these, and the required
 *	effects are cache-type (VIVT/VIPT/PIPT) specific.
 *
 *	flush_icache_all()
 *
 *		Unconditionally clean and invalidate the entire icache.
 *		Currently only needed for cache-v6.S and cache-v7.S, see
 *		__flush_icache_all for the generic implementation.
 *
 *	flush_kern_all()
 *
 *		Unconditionally clean and invalidate the entire cache.
 *
 *	flush_user_all()
 *
 *		Clean and invalidate all user space cache entries
 *		before a change of page tables.
 *
 *	flush_user_range(start, end, flags)
 *
 *		Clean and invalidate a range of cache entries in the
 *		specified address space before a change of page tables.
 *		- start - user start address (inclusive, page aligned)
 *		- end   - user end address   (exclusive, page aligned)
 *		- flags - vma->vm_flags field
 *
 *	coherent_kern_range(start, end)
 *
 *		Ensure coherency between the Icache and the Dcache in the
 *		region described by start, end.  If you have non-snooping
 *		Harvard caches, you need to implement this function.
 *		- start  - virtual start address
 *		- end    - virtual end address
 *
 *	coherent_user_range(start, end)
 *
 *		Ensure coherency between the Icache and the Dcache in the
 *		region described by start, end.  If you have non-snooping
 *		Harvard caches, you need to implement this function.
 *		- start  - virtual start address
 *		- end    - virtual end address
 *
 *	flush_kern_dcache_area(kaddr, size)
 *
 *		Ensure that the data held in page is written back.
 *		- kaddr  - page address
 *		- size   - region size
 *
 *	DMA Cache Coherency
 *	===================
 *
 *	dma_flush_range(start, end)
 *
 *		Clean and invalidate the specified virtual address range.
 *		- start  - virtual start address
 *		- end    - virtual end address
 */

struct cpu_cache_fns {
	void (*flush_icache_all)(void);
	void (*flush_kern_all)(void);
	void (*flush_user_all)(void);
	void (*flush_user_range)(unsigned long, unsigned long, unsigned int);

	void (*coherent_kern_range)(unsigned long, unsigned long);
	int  (*coherent_user_range)(unsigned long, unsigned long);
	void (*flush_kern_dcache_area)(void *, size_t);

	void (*dma_map_area)(const void *, size_t, int);
	void (*dma_unmap_area)(const void *, size_t, int);

	void (*dma_flush_range)(const void *, const void *);
};




@arch/arm/include/asm/cacheflush.h
#ifdef MULTI_CPU
struct processor processor __read_mostly;
#endif
#ifdef MULTI_TLB
struct cpu_tlb_fns cpu_tlb __read_mostly;
#endif
#ifdef MULTI_USER
struct cpu_user_fns cpu_user __read_mostly;
#endif
#ifdef MULTI_CACHE
=>ruct cpu_cache_fns cpu_cache __read_mostly;
#endif
#ifdef CONFIG_OUTER_CACHE
struct outer_cache_fns outer_cache __read_mostly;
EXPORT_SYMBOL(outer_cache);
#endif

##########################################################
@arch/arm/include/asm/cacheflush.h

#ifdef MULTI_CACHE

extern struct cpu_cache_fns cpu_cache;

#define __cpuc_flush_icache_all         cpu_cache.flush_icache_all
#define __cpuc_flush_kern_all           cpu_cache.flush_kern_all
#define __cpuc_flush_user_all           cpu_cache.flush_user_all
#define __cpuc_flush_user_range         cpu_cache.flush_user_range
#define __cpuc_coherent_kern_range      cpu_cache.coherent_kern_range
#define __cpuc_coherent_user_range      cpu_cache.coherent_user_range
#define __cpuc_flush_dcache_area        cpu_cache.flush_kern_dcache_area

/*
 * These are private to the dma-mapping API.  Do not use directly.
 * Their sole purpose is to ensure that data held in the cache
 * is visible to DMA, or data written by DMA to system memory is
 * visible to the CPU.
 */
#define dmac_map_area                   cpu_cache.dma_map_area
#define dmac_unmap_area                 cpu_cache.dma_unmap_area
#define dmac_flush_range                cpu_cache.dma_flush_range

#else

extern void __cpuc_flush_icache_all(void);
extern void __cpuc_flush_kern_all(void);
extern void __cpuc_flush_user_all(void);
extern void __cpuc_flush_user_range(unsigned long, unsigned long, unsigned int);
extern void __cpuc_coherent_kern_range(unsigned long, unsigned long);
extern void __cpuc_coherent_user_range(unsigned long, unsigned long);
extern void __cpuc_flush_dcache_area(void *, size_t);

/*
 * These are private to the dma-mapping API.  Do not use directly.
 * Their sole purpose is to ensure that data held in the cache
 * is visible to DMA, or data written by DMA to system memory is
 * visible to the CPU.
 */
extern void dmac_map_area(const void *, size_t, int);
extern void dmac_unmap_area(const void *, size_t, int);
extern void dmac_flush_range(const void *, const void *);

#endif

//@arch/arm/mm/proc-arm920.S
        ...
        .align

        .section ".proc.info.init", #alloc, #execinstr   //__arm920_proc_info就被分配在这个段.

        .type   __arm920_proc_info,#object
__arm920_proc_info:
        .long   0x41009200
        .long   0xff00fff0
        .long   PMD_TYPE_SECT | \
                PMD_SECT_BUFFERABLE | \
                PMD_SECT_CACHEABLE | \
                PMD_BIT4 | \
                PMD_SECT_AP_WRITE | \
                PMD_SECT_AP_READ
        .long   PMD_TYPE_SECT | \
                PMD_BIT4 | \
                PMD_SECT_AP_WRITE | \
                PMD_SECT_AP_READ
        b       __arm920_setup
        .long   cpu_arch_name
        .long   cpu_elf_name
        .long   HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
        .long   cpu_arm920_name
        .long   arm920_processor_functions
        .long   v4wbi_tlb_fns
        .long   v4wb_user_fns
#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
        .long   arm920_cache_fns
#else
        .long   v4wt_cache_fns
#endif
        .size   __arm920_proc_info, . - __arm920_proc_info


//再找v4wt_cache_fns的定义,这个变量直接grep是找不到的,因此可以推断是用预处理器宏定义的
//结合struct cpu_cache_fns 的注释可以发现v4wt对应的函数集貌似在cache-v4wt.S文件中定义了
[zrlean@e4]$grep ENTRY cache-v4wt.S
ENTRY(v4wt_flush_icache_all)
ENTRY(v4wt_flush_user_cache_all)
ENTRY(v4wt_flush_kern_cache_all)
ENTRY(v4wt_flush_user_cache_range)
ENTRY(v4wt_coherent_kern_range)
ENTRY(v4wt_coherent_user_range)
ENTRY(v4wt_flush_kern_dcache_area)
ENTRY(v4wt_dma_unmap_area)
ENTRY(v4wt_dma_map_area)

//而在arch/arm/mm/cache-v4wt.S文件的最后有如下定义
__INITDATA

        @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
        define_cache_functions v4wt

//这里的define_cache_functions是一个汇编定义的宏
@arch/arm/mm/proc-macro.S
...
.macro define_cache_functions name:req
        .align 2
        .type   \name\()_cache_fns, #object
ENTRY(\name\()_cache_fns)
        .long   \name\()_flush_icache_all
        .long   \name\()_flush_kern_cache_all
        .long   \name\()_flush_user_cache_all
        .long   \name\()_flush_user_cache_range
        .long   \name\()_coherent_kern_range
        .long   \name\()_coherent_user_range
        .long   \name\()_flush_kern_dcache_area
        .long   \name\()_dma_map_area
        .long   \name\()_dma_unmap_area
        .long   \name\()_dma_flush_range
        .size   \name\()_cache_fns, . - \name\()_cache_fns
.endm

//因此 define_cache_functions v4wt 实际就是
        .align 2
        .type   v4wt_cache_fns, #object
ENTRY(v4wt_cache_fns)
        .long   v4wt_flush_icache_all
        .long   v4wt_flush_kern_cache_all
        .long   v4wt_flush_user_cache_all
        .long   v4wt_flush_user_cache_range
        .long   v4wt_coherent_kern_range
        .long   v4wt_coherent_user_range
        .long   v4wt_flush_kern_dcache_area
        .long   v4wt_dma_map_area
        .long   v4wt_dma_unmap_area
        .long   v4wt_dma_flush_range
        .size   v4wt_cache_fns, . - v4wt_cache_fns

//而v4wt_cache_fns,刚好就是__arm920_proc_info.cache被初始化成的那个struct proc_info_list指针.
//这样,所有这写操作cache的函数就都找到了
### Inner Cache and Peer Cache Concepts In the context of computer systems or networks, both **inner cache** and **peer cache** serve as mechanisms to improve data access speed by storing frequently accessed information closer to where it is needed. However, their roles and implementations differ significantly. #### Definition of Inner Cache An **inner cache**, often referred to as an internal cache, resides within a single system component such as a CPU core or memory controller. It stores copies of recently used data from main memory or other storage devices to reduce latency when accessing this data again. This type of caching improves performance at the local level without requiring communication with external components[^1]. For example, Level 1 (L1), Level 2 (L2), and sometimes even Level 3 (L3) caches found inside modern processors are examples of inner caches designed specifically for high-speed operations directly tied into individual processing units. #### Definition of Peer Cache On the contrary, **peer cache** refers more broadly across distributed environments like networked computers sharing resources through interconnections rather than being confined solely within one device's architecture alone; here multiple nodes act collaboratively forming what could be termed 'peers'. When any node requests certain pieces of content not already present locally but available elsewhere among these interconnected peers then instead retrieving anew each time they can opt querying others first who might have cached versions thereby saving bandwidth while speeding up delivery times overall compared against traditional methods involving direct server calls every instance required fresh downloads etcetera.[^2] #### Key Differences Between Them | Aspect | Inner Cache | Peer Cache | |---------------------|--------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------| | Location | Within specific hardware components | Across different machines/nodes | | Communication Scope | Limited mainly internally | Requires interaction between various entities | | Purpose | To enhance localized operation efficiency | Facilitates resource-sharing amongst several participants | | Example Usage | Processor L1/L2 Caching | Content Delivery Networks(CDNs); Distributed File Systems | While learning about Spring Boot integration with technologies including Redis may focus primarily on application development aspects related database management solutions offered via software frameworks [^3], understanding fundamental principles behind how underlying architectures manage memory hierarchies remains crucial especially considering potential optimizations achievable depending upon use cases encountered during real-world projects execution phases accordingly thus enhancing technical expertise further beyond mere coding skills acquisition only! ```python # A simple Python code snippet demonstrating basic usage of Redis client in conjunction with Flask framework underpinning concepts discussed earlier regarding efficient handling stored values efficiently leveraging appropriate layers throughout entire stack design process. from flask import Flask import redis app = Flask(__name__) cache = redis.Redis(host='localhost', port=6379) @app.route('/') def hello(): count = cache.incr('hits') return f'Hello World! I have been seen {count} times.' if __name__ == "__main__": app.run(debug=True) ```
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值