前面2篇介绍了ext4磁盘上的布局,在使用过程中,内核需要频繁的访问某些数据结构,所以会把磁盘上面数据读出装在内存中相应的数据结构。
超级块
ext4在内存中的超级块结构定义如下:
/*
* fourth extended-fs super-block data in memory
*/
struct ext4_sb_info {
unsigned long s_desc_size; /* Size of a group descriptor in bytes */
unsigned long s_inodes_per_block;/* Number of inodes per block */
unsigned long s_blocks_per_group;/* Number of blocks in a group */
unsigned long s_clusters_per_group; /* Number of clusters in a group */
unsigned long s_inodes_per_group;/* Number of inodes in a group */
unsigned long s_itb_per_group; /* Number of inode table blocks per group */
unsigned long s_gdb_count; /* Number of group descriptor blocks */
unsigned long s_desc_per_block; /* Number of group descriptors per block */
ext4_group_t s_groups_count; /* Number of groups in the fs */
ext4_group_t s_blockfile_groups;/* Groups acceptable for non-extent files */
unsigned long s_overhead; /* # of fs overhead clusters */
unsigned int s_cluster_ratio; /* Number of blocks per cluster */
unsigned int s_cluster_bits; /* log2 of s_cluster_ratio */
loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
struct buffer_head * s_sbh; /* Buffer containing the super block */
struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
struct buffer_head **s_group_desc;
unsigned int s_mount_opt;
unsigned int s_mount_opt2;
unsigned int s_mount_flags;
unsigned int s_def_mount_opt;
ext4_fsblk_t s_sb_block;
atomic64_t s_resv_clusters;
kuid_t s_resuid;
kgid_t s_resgid;
unsigned short s_mount_state;
unsigned short s_pad;
int s_addr_per_block_bits;
int s_desc_per_block_bits;
int s_inode_size;
int s_first_ino;
unsigned int s_inode_readahead_blks;
unsigned int s_inode_goal;
spinlock_t s_next_gen_lock;
u32 s_next_generation;
u32 s_hash_seed[4];
int s_def_hash_version;
int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */
struct percpu_counter s_freeclusters_counter;
struct percpu_counter s_freeinodes_counter;
struct percpu_counter s_dirs_counter;
struct percpu_counter s_dirtyclusters_counter;
struct blockgroup_lock *s_blockgroup_lock;
struct proc_dir_entry *s_proc;
struct kobject s_kobj;
struct completion s_kobj_unregister;
struct super_block *s_sb;
/* Journaling */
struct journal_s *s_journal;
struct list_head s_orphan;
struct mutex s_orphan_lock;
unsigned long s_resize_flags; /* Flags indicating if there
is a resizer */
unsigned long s_commit_interval;
u32 s_max_batch_time;
u32 s_min_batch_time;
struct block_device *journal_bdev;
#ifdef CONFIG_QUOTA
char *s_qf_names[EXT4_MAXQUOTAS]; /* Names of quota files with journalled quota */
int s_jquota_fmt; /* Format of quota to use */
#endif
unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
struct rb_root system_blks;
#ifdef EXTENTS_STATS
/* ext4 extents stats */
unsigned long s_ext_min;
unsigned long s_ext_max;
unsigned long s_depth_max;
spinlock_t s_ext_stats_lock;
unsigned long s_ext_blocks;
unsigned long s_ext_extents;
#endif
/* for buddy allocator */
struct ext4_group_info ***s_group_info;
struct inode *s_buddy_cache;
spinlock_t s_md_lock;
unsigned short *s_mb_offsets;
unsigned int *s_mb_maxs;
unsigned int s_group_info_size;
/* tunables */
unsigned long s_stripe;
unsigned int s_mb_stream_request;
unsigned int s_mb_max_to_scan;
unsigned int s_mb_min_to_scan;
unsigned int s_mb_stats;
unsigned int s_mb_order2_reqs;
unsigned int s_mb_group_prealloc;
unsigned int s_max_dir_size_kb;
/* where last allocation was done - for stream allocation */
unsigned long s_mb_last_group;
unsigned long s_mb_last_start;
/* stats for buddy allocator */
atomic_t s_bal_reqs; /* number of reqs with len > 1 */
atomic_t s_bal_success; /* we found long enough chunks */
atomic_t s_bal_allocated; /* in blocks */
atomic_t s_bal_ex_scanned; /* total extents scanned */
atomic_t s_bal_goals; /* goal hits */
atomic_t s_bal_breaks; /* too long searches */
atomic_t s_bal_2orders; /* 2^order hits */
spinlock_t s_bal_lock;
unsigned long s_mb_buddies_generated;
unsigned long long s_mb_generation_time;
atomic_t s_mb_lost_chunks;
atomic_t s_mb_preallocated;
atomic_t s_mb_discarded;
atomic_t s_lock_busy;
/* locality groups */
struct ext4_locality_group __percpu *s_locality_groups;
/* for write statistics */
unsigned long s_sectors_written_start;
u64 s_kbytes_written;
/* the size of zero-out chunk */
unsigned int s_extent_max_zeroout_kb;
unsigned int s_log_groups_per_flex;
struct flex_groups *s_flex_groups;
ext4_group_t s_flex_groups_allocated;
/* workqueue for reserved extent conversions (buffered io) */
struct workqueue_struct *rsv_conversion_wq;
/* timer for periodic error stats printing */
struct timer_list s_err_report;
/* Lazy inode table initialization info */
struct ext4_li_request *s_li_request;
/* Wait multiplier for lazy initialization thread */
unsigned int s_li_wait_mult;
/* Kernel thread for multiple mount protection */
struct task_struct *s_mmp_tsk;
/* record the last minlen when FITRIM is called. */
atomic_t s_last_trim_minblks;
/* Reference to checksum algorithm driver via cryptoapi */
struct crypto_shash *s_chksum_driver;
/* Precomputed FS UUID checksum for seeding other checksums */
__u32 s_csum_seed;
/* Reclaim extents from extent status tree */
struct shrinker s_es_shrinker;
struct list_head s_es_list; /* List of inodes with reclaimable extents */
long s_es_nr_inode;
struct ext4_es_stats s_es_stats;
struct mb_cache *s_mb_cache;
spinlock_t s_es_lock ____cacheline_aligned_in_smp;
/* Ratelimit ext4 messages. */
struct ratelimit_state s_err_ratelimit_state;
struct ratelimit_state s_warning_ratelimit_state;
struct ratelimit_state s_msg_ratelimit_state;
#ifdef CONFIG_EXT4_FS_ENCRYPTION
/* Encryption */
uint32_t s_file_encryption_mode;
uint32_t s_dir_encryption_mode;
#endif
};
内存中的超级块结构与磁盘中的超级块结构大体一致,在驱动初始化时,内核会把磁盘上面的ext4文件系统数据读出装入到内存中的磁盘数据结构体中,由于内核频繁使用这些结构数据,所以这些数据是常驻内存的。
结构体成员s_sbh指向磁盘超级块缓冲区头部;
结构体成员s_group_desc指向磁盘组描述符缓冲区头部;
结构体成员s_es指向磁盘超级块结构的内存首地址;
ext4_sb_info的建立是在ext4_fill_super函数中完成的,代码如下:
struct ext4_sb_info {
struct buffer_head * s_sbh; /* Buffer containing the super block */
struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
struct buffer_head **s_group_desc;
};
static int ext4_fill_super(struct super_block *sb, void *data, int silent)
{
struct ext4_sb_info *sbi;内存超级块
struct buffer_head *bh;磁盘超级块逻辑数据
struct ext4_super_block *es = NULL;磁盘超级块
bh = sb_bread_unmovable(sb, logical_sb_block)读出磁盘超级块数据到缓冲区
es = (struct ext4_super_block *) (bh->b_data + offset);数据装载到磁盘超级块内存数据结构
sbi->s_sbh = bh;VFS与内存和磁盘超级块的联系
sbi->s_es = es;
sb->s_fs_info = sbi;
sbi->s_sb = sb;
sbi->s_group_desc = ext4_kvmalloc(db_count *
sizeof(struct buffer_head *),
GFP_KERNEL);
sbi->s_group_desc[i] = sb_bread_unmovable(sb, block);读出组描述符数据
inode
ext4内存中inode数据结构如下:
/*
* fourth extended file system inode data in memory
*/
struct ext4_inode_info {
__le32 i_data[15]; /* unconverted */
__u32 i_dtime;
ext4_fsblk_t i_file_acl;
/*
* i_block_group is the number of the block group which contains
* this file's inode. Constant across the lifetime of the inode,
* it is ued for making block allocation decisions - we try to
* place a file's data blocks near its inode block, and new inodes
* near to their parent directory's inode.
*/
ext4_group_t i_block_group;
ext4_lblk_t i_dir_start_lookup;
#if (BITS_PER_LONG < 64)
unsigned long i_state_flags; /* Dynamic state flags */
#endif
unsigned long i_flags;
/*
* Extended attributes can be read independently of the main file
* data. Taking i_mutex even when reading would cause contention
* between readers of EAs and writers of regular file data, so
* instead we synchronize on xattr_sem when reading or changing
* EAs.
*/
struct rw_semaphore xattr_sem;
struct list_head i_orphan; /* unlinked but open inodes */
/*
* i_disksize keeps track of what the inode size is ON DISK, not
* in memory. During truncate, i_size is set to the new size by
* the VFS prior to calling ext4_truncate(), but the filesystem won't
* set i_disksize to 0 until the truncate is actually under way.
*
* The intent is that i_disksize always represents the blocks which
* are used by this file. This allows recovery to restart truncate
* on orphans if we crash during truncate. We actually write i_disksize
* into the on-disk inode when writing inodes out, instead of i_size.
*
* The only time when i_disksize and i_size may be different is when
* a truncate is in progress. The only things which change i_disksize
* are ext4_get_block (growth) and ext4_truncate (shrinkth).
*/
loff_t i_disksize;
/*
* i_data_sem is for serialising ext4_truncate() against
* ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's
* data tree are chopped off during truncate. We can't do that in
* ext4 because whenever we perform intermediate commits during
* truncate, the inode and all the metadata blocks *must* be in a
* consistent state which allows truncation of the orphans to restart
* during recovery. Hence we must fix the get_block-vs-truncate race
* by other means, so we have i_data_sem.
*/
struct rw_semaphore i_data_sem;
/*
* i_mmap_sem is for serializing page faults with truncate / punch hole
* operations. We have to make sure that new page cannot be faulted in
* a section of the inode that is being punched. We cannot easily use
* i_data_sem for this since we need protection for the whole punch
* operation and i_data_sem ranks below transaction start so we have
* to occasionally drop it.
*/
struct rw_semaphore i_mmap_sem;
struct inode vfs_inode;
struct jbd2_inode *jinode;
spinlock_t i_raw_lock; /* protects updates to the raw inode */
/*
* File creation time. Its function is same as that of
* struct timespec i_{a,c,m}time in the generic inode.
*/
struct timespec i_crtime;
/* mballoc */
struct list_head i_prealloc_list;
spinlock_t i_prealloc_lock;
/* extents status tree */
struct ext4_es_tree i_es_tree;
rwlock_t i_es_lock;
struct list_head i_es_list;
unsigned int i_es_all_nr; /* protected by i_es_lock */
unsigned int i_es_shk_nr; /* protected by i_es_lock */
ext4_lblk_t i_es_shrink_lblk; /* Offset where we start searching for
extents to shrink. Protected by
i_es_lock */
/* ialloc */
ext4_group_t i_last_alloc_group;
/* allocation reservation info for delalloc */
/* In case of bigalloc, these refer to clusters rather than blocks */
unsigned int i_reserved_data_blocks;
unsigned int i_reserved_meta_blocks;
unsigned int i_allocated_meta_blocks;
ext4_lblk_t i_da_metadata_calc_last_lblock;
int i_da_metadata_calc_len;
/* on-disk additional length */
__u16 i_extra_isize;
char i_crypt_policy_flags;
/* Indicate the inline data space. */
u16 i_inline_off;
u16 i_inline_size;
#ifdef CONFIG_QUOTA
/* quota space reservation, managed internally by quota code */
qsize_t i_reserved_quota;
#endif
/* Lock protecting lists below */
spinlock_t i_completed_io_lock;
/*
* Completed IOs that need unwritten extents handling and have
* transaction reserved
*/
struct list_head i_rsv_conversion_list;
/*
* Completed IOs that need unwritten extents handling and don't have
* transaction reserved
*/
atomic_t i_ioend_count; /* Number of outstanding io_end structs */
atomic_t i_unwritten; /* Nr. of inflight conversions pending */
struct work_struct i_rsv_conversion_work;
spinlock_t i_block_reservation_lock;
/*
* Transactions that contain inode's metadata needed to complete
* fsync and fdatasync, respectively.
*/
tid_t i_sync_tid;
tid_t i_datasync_tid;
#ifdef CONFIG_QUOTA
struct dquot *i_dquot[MAXQUOTAS];
#endif
/* Precomputed uuid+inum+igen checksum for seeding inode checksums */
__u32 i_csum_seed;
#ifdef CONFIG_EXT4_FS_ENCRYPTION
/* Encryption params */
struct ext4_encryption_key i_encryption_key;
#endif
};
结构体成员与磁盘上类似,具体怎么用还不知。
磁盘上的bmap和imap也会读取到内存中,用什么数据结构存储的呢?什么时候读的?
struct inode vfs_inode;这个vfs_inode是虚拟文件系统的inode结构,下一篇介绍。