一.为什么引入临时内存映射(temporary kernel mappings)
在永久内存映射中我们看到,如果
pkmap_page_table页表里面没有空的entry,那么就会导致这次映射被阻塞,所以我们说不能在一些原子的上下文情况下调用kmap()函数。而在临时内存映射中,不会去判断该pte是否已经被用掉了,它采用的是覆盖的策略,所以把总是能成功的建立映射。 会不会被阻塞就是临时内存映射和永久内存映射一个最明显的区别 。
二. 临时内存映射
内核地址空间布局
FIXADDR_TOP = 0xfffff000:因为最后一个页面(0xfffff000--0xffffffff)被内核所保留。
enum fixed_addresses
{
# ifdef CONFIG_X86_32
FIX_HOLE,
FIX_VDSO,
# else
VSYSCALL_LAST_PAGE,
VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE
+
( ( VSYSCALL_END- VSYSCALL_START)
> > PAGE_SHIFT)
- 1,
VSYSCALL_HPET,
# endif
FIX_DBGP_BASE,
FIX_EARLYCON_MEM_BASE,
# ifdef CONFIG_X86_LOCAL_APIC
FIX_APIC_BASE,
/* local (CPU) APIC) -- required for SMP or not */
# endif
# ifdef CONFIG_X86_IO_APIC
FIX_IO_APIC_BASE_0,
FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0
+ MAX_IO_APICS - 1,
# endif
# ifdef CONFIG_X86_VISWS_APIC
FIX_CO_CPU,
/* Cobalt timer */
FIX_CO_APIC,
/* Cobalt APIC Redirection Table */
FIX_LI_PCIA,
/* Lithium PCI Bridge A */
FIX_LI_PCIB,
/* Lithium PCI Bridge B */
# endif
# ifdef CONFIG_X86_F00F_BUG
FIX_F00F_IDT,
/* Virtual mapping for IDT */
# endif
# ifdef CONFIG_X86_CYCLONE_TIMER
FIX_CYCLONE_TIMER,
/*cyclone timer register*/
# endif
# ifdef CONFIG_X86_32
FIX_KMAP_BEGIN,
/* reserved pte's for temporary kernel mappings */
FIX_KMAP_END = FIX_KMAP_BEGIN+ ( KM_TYPE_NR* NR_CPUS) - 1,
# ifdef CONFIG_PCI_MMCONFIG
FIX_PCIE_MCFG,
# endif
# endif
# ifdef CONFIG_PARAVIRT
FIX_PARAVIRT_BOOTMAP,
# endif
FIX_TEXT_POKE1,
/* reserve 2 pages for text_poke() */
FIX_TEXT_POKE0,
/* first page is last, because allocation is backward */
__end_of_permanent_fixed_addresses,
/*
* 256 temporary boot-time mappings, used by early_ioremap(),
* before ioremap() is functional.
*
* We round it up to the next 256 pages boundary so that we
* can have a single pgd entry and a single pte table:
*/
# define NR_FIX_BTMAPS 64
# define FIX_BTMAPS_SLOTS 4
FIX_BTMAP_END = __end_of_permanent_fixed_addresses
+ 256 -
( __end_of_permanent_fixed_addresses
& 255) ,
FIX_BTMAP_BEGIN = FIX_BTMAP_END
+ NR_FIX_BTMAPS* FIX_BTMAPS_SLOTS
- 1,
# ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
FIX_OHCI1394_BASE,
# endif
# ifdef CONFIG_X86_32
FIX_WP_TEST,
# endif
# ifdef CONFIG_INTEL_TXT
FIX_TBOOT_BASE,
# endif
__end_of_fixed_addresses
} ;
# define FIXADDR_SIZE
( __end_of_permanent_fixed_addresses
< < PAGE_SHIFT)
# define FIXADDR_START
( FIXADDR_TOP - FIXADDR_SIZE)
从FIXADDR_START到FIXADDR_TOP这段地址空间被称为固定映射线性空间,这段地址空间主要用来映射一些设备的内存和寄存器,像FIX_APIC_BASE是用来映射local apic的寄存器的。其中的FIX_KMAP_BEGIN到FIX_KMAP_END是用来供临时内存映射的使用。
这段空间的大小是 KM_TYPE_NR* NR_CPUS,也就是说,每个CPU都有独立的KM_TYPE_NR大小的空间,来看看看KM_TYPE_NR是什么
# ifdef __WITH_KM_FENCE
# define KMAP_D( n) __KM_FENCE_# # n
,
# else
# define KMAP_D( n)
# endif
enum km_type {
KMAP_D( 0) KM_BOUNCE_READ,
KMAP_D( 1) KM_SKB_SUNRPC_DATA,
KMAP_D( 2) KM_SKB_DATA_SOFTIRQ,
KMAP_D( 3) KM_USER0,
KMAP_D( 4) KM_USER1,
KMAP_D( 5) KM_BIO_SRC_IRQ,
KMAP_D( 6) KM_BIO_DST_IRQ,
KMAP_D( 7) KM_PTE0,
KMAP_D( 8) KM_PTE1,
KMAP_D( 9) KM_IRQ0,
KMAP_D( 10) KM_IRQ1,
KMAP_D( 11) KM_SOFTIRQ0,
KMAP_D( 12) KM_SOFTIRQ1,
KMAP_D( 13) KM_SYNC_ICACHE,
KMAP_D( 14) KM_SYNC_DCACHE,
/* UML specific, for copy_*_user - used in do_op_one_page */
KMAP_D( 15) KM_UML_USERCOPY,
KMAP_D( 16) KM_IRQ_PTE,
KMAP_D( 17) KM_NMI,
KMAP_D( 18) KM_NMI_PTE,
KMAP_D( 19) KM_TYPE_NR
} ;
每个枚举项都代表了一个pte,每个CPU都有20个这样的pte供临时内核映射使用。我们先来看看内核如何来获得这些对应的枚举项所对应的pte地址:
# define __fix_to_virt( x)
( FIXADDR_TOP -
( ( x)
< < PAGE_SHIFT) )
这个宏返回的是固定线性映射里面枚举项所对应的线性地址,FIXADDR_TOP=0xfffff000,以FIX_APIC_BASE为例,__fix_to_virt( FIX_APIC_BASE ),FIX_APIC_BASE等于4,那么他的线性地址就是0xfffff000-(4<<PAGE_SHIFT),这里我们看到, 线性地址和枚举项的位置关系是成反比的,越靠前的枚举项对应的线性地址越靠后。
下面的代码是完整的如何通过枚举项,获得对应的pte项所对应的线性地址。
static
inline pte_t * kmap_get_fixmap_pte( unsigned
long vaddr)
{
return pte_offset_kernel( pmd_offset( pud_offset( pgd_offset_k( vaddr) ,
vaddr) , vaddr) , vaddr) ;
}
static void __init kmap_init( void )
{
unsigned long kmap_vstart;
/*
* Cache the first kmap pte:
*/
kmap_vstart = __fix_to_virt( FIX_KMAP_BEGIN) ;
kmap_pte = kmap_get_fixmap_pte( kmap_vstart) ;
kmap_prot = PAGE_KERNEL;
}
注意:kmap_pte是临时内存映射起始的页表项,接下来的其他页表项根据他们的相对位置差就可以求得。比如idx对应的pte=kmap_pte - idx,为什么是减不是加,看上面的说明(越靠前的枚举项对应的线性地址越靠后 )。
接下来我们看实际的建立映射的函数(kmap_atomic)
/*
* kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because
* no global lock is needed and because the kmap code must perform a global TLB
* invalidation when the kmap pool wraps.
*
* However when holding an atomic kmap it is not legal to sleep, so atomic
* kmaps are appropriate for short, tight code paths only.
*/
void * kmap_atomic_prot( struct page
* page,
enum km_type type, pgprot_t prot)
{
enum fixed_addresses idx;
unsigned long vaddr;
/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
pagefault_disable( ) ;
//判断要建立映射的页在高端内存中,否则其线性地址已经存在
if ( ! PageHighMem( page) )
return page_address( page) ;
debug_kmap_atomic( type) ;
//求得对应的idx
idx = type
+ KM_TYPE_NR* smp_processor_id( ) ;
//内核可以访问该页的线性地址
vaddr = __fix_to_virt( FIX_KMAP_BEGIN
+ idx) ;
BUG_ON( ! pte_none( * ( kmap_pte- idx) ) ) ;
//根据kmap_pte,建立对应的页表项映射,之后就可以通过vaddr访问该页了
set_pte( kmap_pte- idx, mk_pte( page, prot) ) ;
return ( void
* ) vaddr;
}
void * kmap_atomic( struct page
* page,
enum km_type type)
{
return kmap_atomic_prot( page, type, kmap_prot) ;
}
解除映射也很简单,没有做什么特殊处理:
void kunmap_atomic( void
* kvaddr,
enum km_type type)
{
unsigned long vaddr
= ( unsigned
long ) kvaddr
& PAGE_MASK;
enum fixed_addresses idx
= type + KM_TYPE_NR* smp_processor_id( ) ;
/*
* Force other mappings to Oops if they'll try to access this pte
* without first remap it. Keeping stale mappings around is a bad idea
* also, in case the page changes cacheability attributes or becomes
* a protected page in a hypervisor.
*/
if ( vaddr
= = __fix_to_virt( FIX_KMAP_BEGIN+ idx) )
kpte_clear_flush( kmap_pte- idx, vaddr) ;
else {
# ifdef CONFIG_DEBUG_HIGHMEM
BUG_ON( vaddr
< PAGE_OFFSET) ;
BUG_ON( vaddr
> = ( unsigned
long ) high_memory) ;
# endif
}
pagefault_enable( ) ;
}
三. 总结
其实,不管是那样的方式,原理都是一样的,都是在固定映射区外选定一个地址,然后再修改PTE项,使其指向相应的page。特别值得我们注意的是,因为kmap()会引起睡眠,所以它不能用于中断处理。但每一种映射方式都有自己的优点和缺点,这需要我们在写代码的时候仔细考虑了。
转自: http://blog.chinaunix.net/space.php?uid=21752164&do=blog&id=2689673