about Cache

最新推荐文章于 2024-09-02 12:20:24 发布

原创最新推荐文章于 2024-09-02 12:20:24 发布 · 431 阅读

CC 4.0 BY-SA版权


//@arch/arm/kernel/setup.c
void __init setup_arch(char **cmdline_p)
+-- setup_processor(void)
    +-- struct proc_info_list *list; //struct proc_info_lsit ...
        //locate processor in the list of supported processor types.  The linker builds this 
        //table for us from the entries in arch/arm/mm/proc-*.S
    +-- list = lookup_processor_type(read_cpuid_id());  //lookup_processor_type()@
    +-- cpu_name = list->cpu_name;
    +-- __cpu_architecture = __get_cpu_architecture();
    +-- processor = *list->proc;
    +-- cpu_tlb = *list->tlb;            //所有tlb相关的函数集都从这里得到.
    +-- cpu_user = *list->user;
    +-- cpu_cache = *list->cache;        //cpu_cache是从list中拿到的,而list通过汇编函数lookup_processor_type获得
    +-- elf_hwcap = list->elf_hwcap;
    +-- feat_v6_fixup();
    +-- cacheid_init();
    +-- cpu_init();


//struct proc_info_lsit的定义如下：
//@arch/arm/include/asm/procinfo.h
##########################################
//Note!  struct processor is always defined if we're using MULTI_CPU, otherwise this entry 
//is unused, but still exists.
//NOTE! The following structure is defined by assembly language, NOT C code.  For more information, 
//check: arch/arm/mm/proc-*.S and arch/arm/kernel/head.S

struct proc_info_list {
        unsigned int            cpu_val;
        unsigned int            cpu_mask;
        unsigned long           __cpu_mm_mmu_flags;     /* used by head.S */
        unsigned long           __cpu_io_mmu_flags;     /* used by head.S */
        unsigned long           __cpu_flush;            /* used by head.S */
        const char              *arch_name;
        const char              *elf_name;
        unsigned int            elf_hwcap;
        const char              *cpu_name;
        struct processor        *proc;
        struct cpu_tlb_fns      *tlb;
        struct cpu_user_fns     *user;
        struct cpu_cache_fns    *cache;
};


@arch/arm/kernel/head-common.S
/*
 * This provides a C-API version of __lookup_processor_type
 */
ENTRY(lookup_processor_type)
        stmfd   sp!, {r4 - r6, r9, lr}
        mov     r9, r0
        bl      __lookup_processor_type
        mov     r0, r5
        ldmfd   sp!, {r4 - r6, r9, pc}
ENDPROC(lookup_processor_type)

/*
 * Read processor ID register (CP#15, CR0), and look up in the linker-built
 * supported processor list.  Note that we can't use the absolute addresses
 * for the __proc_info lists since we aren't running with the MMU on
 * (and therefore, we are not in the correct address space).  We have to
 * calculate the offset.
 *
 *      r9 = cpuid
 * Returns:
 *      r3, r4, r6 corrupted
 *      r5 = proc_info pointer in physical address space
 *      r9 = cpuid (preserved)
 */
        __CPUINIT
__lookup_processor_type:
        adr     r3, __lookup_processor_type_data
        ldmia   r3, {r4 - r6}
        sub     r3, r3, r4                      @ get offset between virt&phys
        add     r5, r5, r3                      @ convert virt addresses to
        add     r6, r6, r3                      @ physical address space
1:      ldmia   r5, {r3, r4}                    @ value, mask
        and     r4, r4, r9                      @ mask wanted bits
        teq     r3, r4
        beq     2f
        add     r5, r5, #PROC_INFO_SZ           @ sizeof(proc_info_list)
        cmp     r5, r6
        blo     1b
        mov     r5, #0                          @ unknown processor
2:      mov     pc, lr
ENDPROC(__lookup_processor_type)


/*
 * Look in <asm/procinfo.h> for information about the __proc_info structure.
 */
        .align  2
        .type   __lookup_processor_type_data, %object
__lookup_processor_type_data:
        .long   .
        .long   __proc_info_begin                                               @r4
        .long   __proc_info_end                                                 @r5 
        .size   __lookup_processor_type_data, . - __lookup_processor_type_data  @r6 




@arch/arm/kernel/vmlinux.lds.S
        ....
#define PROC_INFO                                                          \
        . = ALIGN(4);                                                      \
        VMLINUX_SYMBOL(__proc_info_begin) = .;                             \   
        *(.proc.info.init)                                                 \   
        VMLINUX_SYMBOL(__proc_info_end) = .;                


        ....
        .text : {         
        ....
        *(.got)                 /* Global offset table          */
        ARM_CPU_KEEP(PROC_INFO)
        ....

        _etext = .;                     /* End of text and rodata section */

        ....
        .init.proc.info : {
                ARM_CPU_DISCARD(PROC_INFO)
        }
        .init.arch.info : {
                __arch_info_begin = .;
                *(.arch.info.init)
                __arch_info_end = .;
        }
        .init.tagtable : {
                __tagtable_begin = .;
                *(.taglist.init)
                __tagtable_end = .;
        }

        ....
}
ASSERT((__proc_info_end - __proc_info_begin), "missing CPU support")
ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined")



//proc_info_list结构的定义如下
@arch/arm/include/asm/procinfo.h
struct proc_info_list {
        unsigned int            cpu_val;
        unsigned int            cpu_mask;
        unsigned long           __cpu_mm_mmu_flags;     /* used by head.S */
        unsigned long           __cpu_io_mmu_flags;     /* used by head.S */
        unsigned long           __cpu_flush;            /* used by head.S */
        const char              *arch_name;
        const char              *elf_name;
        unsigned int            elf_hwcap;
        const char              *cpu_name;
        struct processor        *proc;
        struct cpu_tlb_fns      *tlb;
        struct cpu_user_fns     *user;
        struct cpu_cache_fns    *cache;   //cpu cache的操作函数集.
};


//cpu_cache_fns结构的定义如下：
@arch/arm/include/asm/cacheflush.h
/*
 *	MM Cache Management
 *	===================
 *
 *	The arch/arm/mm/cache-*.S and arch/arm/mm/proc-*.S files
 *	implement these methods.
 *
 *	Start addresses are inclusive and end addresses are exclusive;
 *	start addresses should be rounded down, end addresses up.
 *
 *	See Documentation/cachetlb.txt for more information.
 *	Please note that the implementation of these, and the required
 *	effects are cache-type (VIVT/VIPT/PIPT) specific.
 *
 *	flush_icache_all()
 *
 *		Unconditionally clean and invalidate the entire icache.
 *		Currently only needed for cache-v6.S and cache-v7.S, see
 *		__flush_icache_all for the generic implementation.
 *
 *	flush_kern_all()
 *
 *		Unconditionally clean and invalidate the entire cache.
 *
 *	flush_user_all()
 *
 *		Clean and invalidate all user space cache entries
 *		before a change of page tables.
 *
 *	flush_user_range(start, end, flags)
 *
 *		Clean and invalidate a range of cache entries in the
 *		specified address space before a change of page tables.
 *		- start - user start address (inclusive, page aligned)
 *		- end   - user end address   (exclusive, page aligned)
 *		- flags - vma->vm_flags field
 *
 *	coherent_kern_range(start, end)
 *
 *		Ensure coherency between the Icache and the Dcache in the
 *		region described by start, end.  If you have non-snooping
 *		Harvard caches, you need to implement this function.
 *		- start  - virtual start address
 *		- end    - virtual end address
 *
 *	coherent_user_range(start, end)
 *
 *		Ensure coherency between the Icache and the Dcache in the
 *		region described by start, end.  If you have non-snooping
 *		Harvard caches, you need to implement this function.
 *		- start  - virtual start address
 *		- end    - virtual end address
 *
 *	flush_kern_dcache_area(kaddr, size)
 *
 *		Ensure that the data held in page is written back.
 *		- kaddr  - page address
 *		- size   - region size
 *
 *	DMA Cache Coherency
 *	===================
 *
 *	dma_flush_range(start, end)
 *
 *		Clean and invalidate the specified virtual address range.
 *		- start  - virtual start address
 *		- end    - virtual end address
 */

struct cpu_cache_fns {
	void (*flush_icache_all)(void);
	void (*flush_kern_all)(void);
	void (*flush_user_all)(void);
	void (*flush_user_range)(unsigned long, unsigned long, unsigned int);

	void (*coherent_kern_range)(unsigned long, unsigned long);
	int  (*coherent_user_range)(unsigned long, unsigned long);
	void (*flush_kern_dcache_area)(void *, size_t);

	void (*dma_map_area)(const void *, size_t, int);
	void (*dma_unmap_area)(const void *, size_t, int);

	void (*dma_flush_range)(const void *, const void *);
};




@arch/arm/include/asm/cacheflush.h
#ifdef MULTI_CPU
struct processor processor __read_mostly;
#endif
#ifdef MULTI_TLB
struct cpu_tlb_fns cpu_tlb __read_mostly;
#endif
#ifdef MULTI_USER
struct cpu_user_fns cpu_user __read_mostly;
#endif
#ifdef MULTI_CACHE
=>ruct cpu_cache_fns cpu_cache __read_mostly;
#endif
#ifdef CONFIG_OUTER_CACHE
struct outer_cache_fns outer_cache __read_mostly;
EXPORT_SYMBOL(outer_cache);
#endif

##########################################################
@arch/arm/include/asm/cacheflush.h

#ifdef MULTI_CACHE

extern struct cpu_cache_fns cpu_cache;

#define __cpuc_flush_icache_all         cpu_cache.flush_icache_all
#define __cpuc_flush_kern_all           cpu_cache.flush_kern_all
#define __cpuc_flush_user_all           cpu_cache.flush_user_all
#define __cpuc_flush_user_range         cpu_cache.flush_user_range
#define __cpuc_coherent_kern_range      cpu_cache.coherent_kern_range
#define __cpuc_coherent_user_range      cpu_cache.coherent_user_range
#define __cpuc_flush_dcache_area        cpu_cache.flush_kern_dcache_area

/*
 * These are private to the dma-mapping API.  Do not use directly.
 * Their sole purpose is to ensure that data held in the cache
 * is visible to DMA, or data written by DMA to system memory is
 * visible to the CPU.
 */
#define dmac_map_area                   cpu_cache.dma_map_area
#define dmac_unmap_area                 cpu_cache.dma_unmap_area
#define dmac_flush_range                cpu_cache.dma_flush_range

#else

extern void __cpuc_flush_icache_all(void);
extern void __cpuc_flush_kern_all(void);
extern void __cpuc_flush_user_all(void);
extern void __cpuc_flush_user_range(unsigned long, unsigned long, unsigned int);
extern void __cpuc_coherent_kern_range(unsigned long, unsigned long);
extern void __cpuc_coherent_user_range(unsigned long, unsigned long);
extern void __cpuc_flush_dcache_area(void *, size_t);

/*
 * These are private to the dma-mapping API.  Do not use directly.
 * Their sole purpose is to ensure that data held in the cache
 * is visible to DMA, or data written by DMA to system memory is
 * visible to the CPU.
 */
extern void dmac_map_area(const void *, size_t, int);
extern void dmac_unmap_area(const void *, size_t, int);
extern void dmac_flush_range(const void *, const void *);

#endif


//@arch/arm/mm/proc-arm920.S
        ...
        .align

        .section ".proc.info.init", #alloc, #execinstr   //__arm920_proc_info就被分配在这个段.

        .type   __arm920_proc_info,#object
__arm920_proc_info:
        .long   0x41009200
        .long   0xff00fff0
        .long   PMD_TYPE_SECT | \
                PMD_SECT_BUFFERABLE | \
                PMD_SECT_CACHEABLE | \
                PMD_BIT4 | \
                PMD_SECT_AP_WRITE | \
                PMD_SECT_AP_READ
        .long   PMD_TYPE_SECT | \
                PMD_BIT4 | \
                PMD_SECT_AP_WRITE | \
                PMD_SECT_AP_READ
        b       __arm920_setup
        .long   cpu_arch_name
        .long   cpu_elf_name
        .long   HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
        .long   cpu_arm920_name
        .long   arm920_processor_functions
        .long   v4wbi_tlb_fns
        .long   v4wb_user_fns
#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
        .long   arm920_cache_fns
#else
        .long   v4wt_cache_fns
#endif
        .size   __arm920_proc_info, . - __arm920_proc_info


//再找v4wt_cache_fns的定义,这个变量直接grep是找不到的,因此可以推断是用预处理器宏定义的
//结合struct cpu_cache_fns 的注释可以发现v4wt对应的函数集貌似在cache-v4wt.S文件中定义了
[zrlean@e4]$grep ENTRY cache-v4wt.S
ENTRY(v4wt_flush_icache_all)
ENTRY(v4wt_flush_user_cache_all)
ENTRY(v4wt_flush_kern_cache_all)
ENTRY(v4wt_flush_user_cache_range)
ENTRY(v4wt_coherent_kern_range)
ENTRY(v4wt_coherent_user_range)
ENTRY(v4wt_flush_kern_dcache_area)
ENTRY(v4wt_dma_unmap_area)
ENTRY(v4wt_dma_map_area)

//而在arch/arm/mm/cache-v4wt.S文件的最后有如下定义
__INITDATA

        @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
        define_cache_functions v4wt

//这里的define_cache_functions是一个汇编定义的宏
@arch/arm/mm/proc-macro.S
...
.macro define_cache_functions name:req
        .align 2
        .type   \name\()_cache_fns, #object
ENTRY(\name\()_cache_fns)
        .long   \name\()_flush_icache_all
        .long   \name\()_flush_kern_cache_all
        .long   \name\()_flush_user_cache_all
        .long   \name\()_flush_user_cache_range
        .long   \name\()_coherent_kern_range
        .long   \name\()_coherent_user_range
        .long   \name\()_flush_kern_dcache_area
        .long   \name\()_dma_map_area
        .long   \name\()_dma_unmap_area
        .long   \name\()_dma_flush_range
        .size   \name\()_cache_fns, . - \name\()_cache_fns
.endm

//因此 define_cache_functions v4wt 实际就是
        .align 2
        .type   v4wt_cache_fns, #object
ENTRY(v4wt_cache_fns)
        .long   v4wt_flush_icache_all
        .long   v4wt_flush_kern_cache_all
        .long   v4wt_flush_user_cache_all
        .long   v4wt_flush_user_cache_range
        .long   v4wt_coherent_kern_range
        .long   v4wt_coherent_user_range
        .long   v4wt_flush_kern_dcache_area
        .long   v4wt_dma_map_area
        .long   v4wt_dma_unmap_area
        .long   v4wt_dma_flush_range
        .size   v4wt_cache_fns, . - v4wt_cache_fns

//而v4wt_cache_fns,刚好就是__arm920_proc_info.cache被初始化成的那个struct proc_info_list指针.
//这样,所有这写操作cache的函数就都找到了