0. 引言
操作系统中与匿名页相对的是文件页,文件页的反向映射对比匿名页的反向映射更为简单。如果还不清楚匿名页反向映射逻辑的,请移步 匿名页反向映射
1. 文件页反向映射数据结构
struct file: 用户进程每open()一次文件,则会生成一个file对象,file->f_mapping指向该管理该文件页面对象
struct file {
union {
struct llist_node fu_llist;
struct rcu_head fu_rcuhead;
} f_u;
struct path f_path;
struct inode *f_inode; /* cached value */
const struct file_operations *f_op;
/*
* Protects f_ep_links, f_flags.
* Must not be taken from IRQ context.
*/
spinlock_t f_lock;
enum rw_hint f_write_hint;
atomic_long_t f_count;
unsigned int f_flags;
fmode_t f_mode;
struct mutex f_pos_lock;
loff_t f_pos;
struct fown_struct f_owner;
const struct cred *f_cred;
struct file_ra_state f_ra;
u64 f_version;
#ifdef CONFIG_SECURITY
void *f_security;
#endif
/* needed for tty driver, and maybe others */
void *private_data;
#ifdef CONFIG_EPOLL
/* Used by fs/eventpoll.c to link all the hooks to this file */
struct list_head f_ep_links;
struct list_head f_tfile_llink;
#endif /* #ifdef CONFIG_EPOLL */
struct address_space *f_mapping; // 文件页缓冲区,文件页都放在该对象中进行管理
errseq_t f_wb_err;
errseq_t f_sb_err; /* for syncfs */
} __randomize_layout
__attribute__((aligned(4))); /* lest something weird decides that 2 is OK */
struct address_space: 用来管理一个文件所有内容页面,一个真实的文件只对应一个address_space结构,其中address_space->i_pages是保存页面的结构,address_space->i_mmap是一个红黑树,红黑树上挂载着映射该文件的所有vma对象。
/**
* struct address_space - Contents of a cacheable, mappable object.
* @host: Owner, either the inode or the block_device.
* @i_pages: Cached pages.
* @gfp_mask: Memory allocation flags to use for allocating pages.
* @i_mmap_writable: Number of VM_SHARED mappings.
* @nr_thps: Number of THPs in the pagecache (non-shmem only).
* @i_mmap: Tree of private and shared mappings.
* @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable.
* @nrpages: Number of page entries, protected by the i_pages lock.
* @nrexceptional: Shadow or DAX entries, protected by the i_pages lock.
* @writeback_index: Writeback starts here.
* @a_ops: Methods.
* @flags: Error bits and flags (AS_*).
* @wb_err: The most recent error which has occurred.
* @private_lock: For use by the owner of the address_space.
* @private_list: For use by the owner of the address_space.
* @private_data: For use by the owner of the address_space.
*/
struct address_space {
struct inode *host;
struct xarray i_pages;
gfp_t gfp_mask;
atomic_t i_mmap_writable;
#ifdef CONFIG_READ_ONLY_THP_FOR_FS
/* number of thp, only for non-shmem files */
atomic_t nr_thps;
#endif
struct rb_root_cached i_mmap;
struct rw_semaphore i_mmap_rwsem;
unsigned long nrpages;
unsigned long nrexceptional;
pgoff_t writeback_index;
const struct address_space_operations *a_ops;
unsigned long flags;
errseq_t wb_err;
spinlock_t private_lock;
struct list_head private_list;
void *private_data;
} __attribute__((aligned(sizeof(long)))) __randomize_layout;
struct vm_area_struct: 用来描述用户进程的一段虚拟地址空间,其中vma->vm_start表示映射的起始虚拟地址,vma->vm_end表示映射的结束虚拟地址,vma->vm_file指向映射的文件对象,vma->vm_pgoff表示从文件哪里进行映射(以页面为单位)。
/*
* This struct describes a virtual memory area. There is one of these
* per VM-area/task. A VM area is any part of the process virtual memory
* space that has a special rule for the page-fault handlers (ie a shared
* library, the executable area etc).
*/
struct vm_area_struct {
/* The first cache line has the info for VMA tree walking. */
unsigned long vm_start; /* Our start address within vm_mm. */ // 映射的起始虚拟地址
unsigned long vm_end; /* The first byte after our end address
within vm_mm. */ // 映射的结束虚拟地址
/* linked list of VM areas per task, sorted by address */
struct vm_area_struct *vm_next, *vm_prev;
struct rb_node vm_rb;
/*
* Largest free memory gap in bytes to the left of this VMA.
* Either between this VMA and vma->vm_prev, or between one of the
* VMAs below us in the VMA rbtree and its ->vm_prev. This helps
* get_unmapped_area find a free area of the right size.
*/
unsigned long rb_subtree_gap;
/* Second cache line starts here. */
struct mm_struct *vm_mm; /* The address space we belong to. */
/*
* Access permissions of this VMA.
* See vmf_insert_mixed_prot() for discussion.
*/
pgprot_t vm_page_prot;
unsigned long vm_flags; /* Flags, see mm.h. */
/*
* For areas with an address space and backing store,
* linkage into the address_space->i_mmap interval tree.
*/
struct {
struct rb_node rb;
unsigned long rb_subtree_last;
} shared;
/*
* A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
* list, after a COW of one of the file pages. A MAP_SHARED vma
* can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack
* or brk vma (with NULL file) can only be in an anon_vma list.
*/
struct list_head anon_vma_chain; /* Serialized by mmap_lock &
* page_table_lock */
struct anon_vma *anon_vma; /* Serialized by page_table_lock */
/* Function pointers to deal with this struct. */
const struct vm_operations_struct *vm_ops;
/* Information about our backing store: */
unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE
units */ // 该vma从文件哪里开始映射,以页面为单位
struct file * vm_file; /* File we map to (can be NULL). */ // 映射的文件对象
void * vm_private_data; /* was vm_pte (shared mem) */
#ifdef CONFIG_SWAP
atomic_long_t swap_readahead_info;
#endif
#ifndef CONFIG_MMU
struct vm_region *vm_region; /* NOMMU mapping region */
#endif
#ifdef CONFIG_NUMA
struct mempolicy *vm_policy; /* NUMA policy for the VMA */
#endif
struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
} __randomize_layout;
2. 数据结构关系图
3. 建立文件反向映射流程
我们都是通过一个path去打开一个文件,然后再通过mmap()去将文件映射到进程的虚拟地址空间,后续通过访存这块虚拟地址空间来达到读写文件的目的。
int fd = open("/path/to/your/file", O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
void *ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
// do something
munmap(ptr, size);
close(fd);
现在让我们从mmap()映射文件描述符这里看起。
arm64体系结构下,且内核是linux-5.10版本:
SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
unsigned long, prot, unsigned long, flags,
unsigned long, fd, unsigned long, off)
{
if (offset_in_page(off) != 0)
return -EINVAL;
return ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT