从文件系统上来看,挂载一个文件系统也是挂载一个文件,文件系统是一个super_block,它和inode什么关系。
从mount挂载命令开始,需要先注册,然后再装载系统。分为如下步骤:
1,注册文件系统
int register_filesystem(struct file_system_type * fs)
{
int res = 0;
struct file_system_type ** p;
BUG_ON(strchr(fs->name, '.'));
if (fs->next)
return -EBUSY;
write_lock(&file_systems_lock);
p = find_filesystem(fs->name, strlen(fs->name));
if (*p)
res = -EBUSY;
else
*p = fs;
write_unlock(&file_systems_lock);
return res;
}
static struct file_system_type **find_filesystem(const char *name, unsigned len)
{
struct file_system_type **p;
for (p=&file_systems; *p; p=&(*p)->next)
if (strlen((*p)->name) == len &&
strncmp((*p)->name, name, len) == 0)
break;
return p;
}
static struct file_system_type *file_systems;
struct file_system_type {
const char *name;
int fs_flags;
struct dentry *(*mount) (struct file_system_type *, int,
const char *, void *);
void (*kill_sb) (struct super_block *);
struct module *owner;
struct file_system_type * next;
struct hlist_head fs_supers;
struct lock_class_key s_lock_key;
struct lock_class_key s_umount_key;
struct lock_class_key s_vfs_rename_key;
struct lock_class_key i_lock_key;
struct lock_class_key i_mutex_key;
struct lock_class_key i_mutex_dir_key;
};
struct dentry {
/* RCU lookup touched fields */
unsigned int d_flags; /* protected by d_lock */
seqcount_t d_seq; /* per dentry seqlock */
struct hlist_bl_node d_hash; /* lookup hash list */
struct dentry *d_parent; /* parent directory */
struct qstr d_name;
struct inode *d_inode; /* Where the name belongs to - NULL is
* negative */
unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */
/* Ref lookup also touches following */
unsigned int d_count; /* protected by d_lock */
spinlock_t d_lock; /* per dentry lock */
const struct dentry_operations *d_op;
struct super_block *d_sb; /* The root of the dentry tree */
//d_sb dentry的根节点
unsigned long d_time; /* used by d_revalidate */
void *d_fsdata; /* fs-specific data */
struct list_head d_lru; /* LRU list */
/*
* d_child and d_rcu can share memory
*/
union {
struct list_head d_child; /* child of parent list */
struct rcu_head d_rcu;
} d_u;
struct list_head d_subdirs; /* our children */
struct list_head d_alias; /* inode alias list */
};
1)挂载流程:指针函数find_filesystem返回一个 file_system_type** 类型的值,传入参数是名称和长度,函数中定义一个二级指针p,p指向全局指针变量file_systems的地址,file_systems本身是个指针,指向一个file_system_type类型实例,则*p就是取该实例的内容,p=&(*p)->next根据系统链表以此查找,用strlen先判断当前系统链表节点的名称长度是否与传入参数len相同并且,名称也相同,如果没有找到,则证明当前链表无此节点,错返回p这个二级指针给register_filesystem中的p,然后判定该系统当前是否位忙状态,如果不是,那么让*p指向fs这个地址(fs也是个指针),上锁。
2)file_system_type: name是一个系统名称,fs_flags是使用的标志,owner是一个指向module结构的指针(仅当文件系统以模块形式加载时才有意义)。next是个单链表。fs_supers是表头。mount是读取dentry结构的函数,这里面包括了超级块等成员。
3)dentry是干嘛的?
dentry是目录项,用于描述文件的逻辑属性,目录也是个文件;inode是索引节点,代表物理意义上的文件。
我们去掉该结构中所有的链表和锁,还有rcu等一些暂时无关的东西,那么能保留下来的就只有下面这么一个简化版的dentry:
struct dentry {
struct qstr d_name;
struct inode *d_inode; /* Where the name belongs to - NULL is
* negative */
struct dentry *d_parent; /* parent directory */
unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */
const struct dentry_operations *d_op;
struct list_head d_child; /* child of parent list */
struct super_block *d_sb; /* The root of the dentry tree */
};
一个d_name文件名称,一个d_iname用于保存短的名称,这两个并不冲突,能先用d_iname时就先不用d_name。dentry->d_inode指向相应的inode索引节点。dentry->d_parent和dentry.d_child形成了树状结构。dentry->d_sb则指向该目录下所属文件系统的超级块。
4)那超级块又是个什么东西?
一个超级块对应着一个文件系统,文件系统的装载操作始于超级块的读取。超级块是文件系统的核心结构,保存了文件系统所有的特征数据。
超级块的获取通过file_system_type对象中的mount函数指针来读取,在内核中跟踪如下:
file_system_type bd_type = {.mount = bd_mount} ----> bd_mount()---->mount_pseudo()---->sget()最终由sget函数调用alloc_super()获取一个超级块并进行初始化。
super_block结构体(简化)
LIST_HEAD(super_blocks);
struct super_block {
//用于将系统中所有的超级块聚集到一个链表中,该表表头是全程变量super_blocks,定义在super.c中。
struct list_head s_list; /* Keep this first */
//总是一个数字,包含文件系统的块设备描述符,例如/dev/hda1 符号:0x301
dev_t s_dev; /* search index; _not_ kdev_t */
unsigned char s_dirt;
//用于指定文件系统的块长度,本质上这两个变量以不通的方式表示了相同的信息
unsigned char s_blocksize_bits;
unsigned long s_blocksize;
//保存了文件系统可以处理的最大文件长度
loff_t s_maxbytes; /* Max file size */
//指向file_system_type实例,保存了与文件系统有关的一般类型信息
struct file_system_type *s_type;
//指向一个super操作函数集合,提供一些一般性的接口,用于处理超级块相关操作,
//操作的实现由底层文件系统的代码提供。
const struct super_operations *s_op;
//安装标识
unsigned long s_flags;
//区别于其他文件系统的标识
unsigned long s_magic;
//将超级块与全局根目录的dentry关联
struct dentry *s_root;
//属性处理指针
const struct xattr_handler **s_xattr;
//所有inode
struct list_head s_inodes; /* all inodes */
//s_files链表包含了一系列file结构,列出了该超级块表示的文件系统上所有打开的文件。
struct list_head s_files;
//指定了底层文件系统的数据所在的块设备
struct block_device *s_bdev;
//链表元素,同一类型的文件系统通过这个链表将所有的super_block连接起来
struct hlist_node s_instances;
char s_id[32]; /* Informational name */
//万精油型指针,指向文件系统实现的私有数据
void *s_fs_info; /* Filesystem private info */
//最大可连接项
unsigned int s_max_links;
fmode_t s_mode;
const struct dentry_operations *s_d_op; /* default d_op for dentries */
};
通过上边超级块的结构,就可以知道这个文件系统的信息,第一个链表负责把所有的超级块连起来。包括了描述符,file_system_type,inode,dentry,root,s_magic(区别于其它文件系统的标识),files等等。如此定义了一个文件系统。
内核中的结构体,目前看来list_head是必不可少的,它的意思是这样,如果以一整个结构体来作为节点组成链表,那么这个节点就非常大,效率不高。但是把第一个成员设置成链表,只要查找这个链表就可以了,遍历这个s_list,找到想要的节点,再去读取整个super_block长度的内存块,就把super_block读取出来,所以它放在了第一个位置。而后面其它的结构体的list_head也是在固定的位置,一层套一层的链表就体现了linux系统的分层思想。
关于超级块的super_operations方法集合,inode部分主要是对inode的操作,也就是文件系统操作文件;write_super将超级块写入存储介质,比如sdcard,put_super将超级块的私有信息从内存中移除,文件系统卸载时会用到这个回调函数。sync_fs将文件系统数据与底层块设备上的数据同步。show_options用于proc文件系统,用以显示文件系统装载的选项。
struct super_operations {
struct inode *(*alloc_inode)(struct super_block *sb);
void (*destroy_inode)(struct inode *);
void (*dirty_inode) (struct inode *, int flags);
int (*write_inode) (struct inode *, struct writeback_control *wbc);
int (*drop_inode) (struct inode *);
void (*evict_inode) (struct inode *);
void (*put_super) (struct super_block *);
void (*write_super) (struct super_block *);
int (*sync_fs)(struct super_block *sb, int wait);
int (*freeze_fs) (struct super_block *);
int (*unfreeze_fs) (struct super_block *);
int (*statfs) (struct dentry *, struct kstatfs *);
int (*remount_fs) (struct super_block *, int *, char *);
void (*umount_begin) (struct super_block *);
int (*show_options)(struct seq_file *, struct dentry *);
int (*show_devname)(struct seq_file *, struct dentry *);
int (*show_path)(struct seq_file *, struct dentry *);
int (*show_stats)(struct seq_file *, struct dentry *);
#ifdef CONFIG_QUOTA
ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
#endif
int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
int (*nr_cached_objects)(struct super_block *);
void (*free_cached_objects)(struct super_block *, int);
};
关于super_block,inode,dentry的概念,至此很清晰了,每一个super_block是一个文件系统,下面的文件是通过inode和dentry来联合描述的。
2,装载文件系统
注意点是:装载新文件系统必须执行的任务、装载点的数据结构。
每装载一个文件系统,对应一个vfsmount结构的实例:
struct vfsmount {
struct dentry *mnt_root; /* root of the mounted tree */
//子文件系统链表的起点,挂载树的根节点
struct super_block *mnt_sb; /* pointer to superblock */
//一个指向超级块的指针,获取一个超级块才允许产生一个文件系统
int mnt_flags;
//标志,里面会含有一些权限,是否只读等。 具体设置的值在mount.h中进行了宏定义
};
vfsmount和dentry通过path(路径)整合到一起,path.vfsmount.dentry是挂载点也是该挂载的文件系统的根节点。path.dentry则是这个文件系统的节点。
struct path {
struct vfsmount *mnt;
struct dentry *dentry;
};
struct nameidata {
struct path path;
struct qstr last;
struct path root;
struct inode *inode; /* path.dentry.d_inode */
unsigned int flags;
unsigned seq;
int last_type;
unsigned depth;
char *saved_names[MAX_NESTED_LINKS + 1];
/* Intent data */
union {
struct open_intent open;
} intent;
};
如下做了一个相关的图,左边inode进程相关,右边文件系统相关
这是自己画的,若发现错误,欢迎指正。

装载文件系统的过程可以参考 linux内核sys_mount()分析
mount的系统调用是sys_mount, 在内核中是SYSCALL_DEFINE5.
以下是整个调用过程需要调用的函数:
SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
char __user *, type, unsigned long, flags, void __user *, data)
{
int ret;
char *kernel_type;
char *kernel_dir;
char *kernel_dev;
unsigned long data_page;
ret = copy_mount_string(type, &kernel_type);
if (ret < 0)
goto out_type;
kernel_dir = getname(dir_name);
if (IS_ERR(kernel_dir)) {
ret = PTR_ERR(kernel_dir);
goto out_dir;
}
ret = copy_mount_string(dev_name, &kernel_dev);
if (ret < 0)
goto out_dev;
ret = copy_mount_options(data, &data_page);
if (ret < 0)
goto out_data;
ret = do_mount(kernel_dev, kernel_dir, kernel_type, flags,
(void *) data_page);
free_page(data_page);
out_data:
kfree(kernel_dev);
out_dev:
putname(kernel_dir);
out_dir:
kfree(kernel_type);
out_type:
return ret;
}
long do_mount(char *dev_name, char *dir_name, char *type_page,
unsigned long flags, void *data_page)
{
struct path path;
int retval = 0;
int mnt_flags = 0;
/* Discard magic */
if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
flags &= ~MS_MGC_MSK;
/* Basic sanity checks */
if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
return -EINVAL;
if (data_page)
((char *)data_page)[PAGE_SIZE - 1] = 0;
/* ... and get the mountpoint */
retval = kern_path(dir_name, LOOKUP_FOLLOW, &path);
if (retval)
return retval;
retval = security_sb_mount(dev_name, &path,
type_page, flags, data_page);
if (retval)
goto dput_out;
/* Default to relatime unless overriden */
if (!(flags & MS_NOATIME))
mnt_flags |= MNT_RELATIME;
/* Separate the per-mountpoint flags */
if (flags & MS_NOSUID)
mnt_flags |= MNT_NOSUID;
if (flags & MS_NODEV)
mnt_flags |= MNT_NODEV;
if (flags & MS_NOEXEC)
mnt_flags |= MNT_NOEXEC;
if (flags & MS_NOATIME)
mnt_flags |= MNT_NOATIME;
if (flags & MS_NODIRATIME)
mnt_flags |= MNT_NODIRATIME;
if (flags & MS_STRICTATIME)
mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
if (flags & MS_RDONLY)
mnt_flags |= MNT_READONLY;
flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
MS_STRICTATIME);
if (flags & MS_REMOUNT)
retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
data_page);
else if (flags & MS_BIND)
retval = do_loopback(&path, dev_name, flags & MS_REC);
else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
retval = do_change_type(&path, flags);
else if (flags & MS_MOVE)
retval = do_move_mount(&path, dev_name);
else
retval = do_new_mount(&path, type_page, flags, mnt_flags,
dev_name, data_page);
dput_out:
path_put(&path);
return retval;
}
kern_path ----> do_path_lookup ----> do_path_lookup ----> path_lookupat
static int path_lookupat(int dfd, const char *name,
unsigned int flags, struct nameidata *nd)
{
struct file *base = NULL;
struct path path;
int err;
/*
* Path walking is largely split up into 2 different synchronisation
* schemes, rcu-walk and ref-walk (explained in
* Documentation/filesystems/path-lookup.txt). These share much of the
* path walk code, but some things particularly setup, cleanup, and
* following mounts are sufficiently divergent that functions are
* duplicated. Typically there is a function foo(), and its RCU
* analogue, foo_rcu().
*
* -ECHILD is the error number of choice (just to avoid clashes) that
* is returned if some aspect of an rcu-walk fails. Such an error must
* be handled by restarting a traditional ref-walk (which will always
* be able to complete).
*/
err = path_init(dfd, name, flags | LOOKUP_PARENT, nd, &base);
if (unlikely(err))
return err;
current->total_link_count = 0;
err = link_path_walk(name, nd);
if (!err && !(flags & LOOKUP_PARENT)) {
err = lookup_last(nd, &path);
while (err > 0) {
void *cookie;
struct path link = path;
nd->flags |= LOOKUP_PARENT;
err = follow_link(&link, nd, &cookie);
if (!err)
err = lookup_last(nd, &path);
put_link(nd, &link, cookie);
}
}
if (!err)
err = complete_walk(nd);
if (!err && nd->flags & LOOKUP_DIRECTORY) {
if (!nd->inode->i_op->lookup) {
path_put(&nd->path);
err = -ENOTDIR;
}
}
if (base)
fput(base);
if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
path_put(&nd->root);
nd->root.mnt = NULL;
}
return err;
}
static int do_new_mount(struct path *path, char *type, int flags,
int mnt_flags, char *name, void *data)
{
struct vfsmount *mnt;
int err;
if (!type)
return -EINVAL;
/* we need capabilities... */
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
mnt = do_kern_mount(type, flags, name, data);
if (IS_ERR(mnt))
return PTR_ERR(mnt);
err = do_add_mount(real_mount(mnt), path, mnt_flags);
if (err)
mntput(mnt);
return err;
}
static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
{
int err;
mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL);
err = lock_mount(path);
if (err)
return err;
err = -EINVAL;
if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(real_mount(path->mnt)))
goto unlock;
/* Refuse the same filesystem on the same mount point */
err = -EBUSY;
if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb &&
path->mnt->mnt_root == path->dentry)
goto unlock;
err = -EINVAL;
if (S_ISLNK(newmnt->mnt.mnt_root->d_inode->i_mode))
goto unlock;
newmnt->mnt.mnt_flags = mnt_flags;
err = graft_tree(newmnt, path);
unlock:
unlock_mount(path);
return err;
}
static int graft_tree(struct mount *mnt, struct path *path)
{
if (mnt->mnt.mnt_sb->s_flags & MS_NOUSER)
return -EINVAL;
if (S_ISDIR(path->dentry->d_inode->i_mode) !=
S_ISDIR(mnt->mnt.mnt_root->d_inode->i_mode))
return -ENOTDIR;
if (d_unlinked(path->dentry))
return -ENOENT;
return attach_recursive_mnt(mnt, path, NULL);
}
static int attach_recursive_mnt(struct mount *source_mnt,
struct path *path, struct path *parent_path)
{
LIST_HEAD(tree_list);
struct mount *dest_mnt = real_mount(path->mnt);
struct dentry *dest_dentry = path->dentry;
struct mount *child, *p;
int err;
if (IS_MNT_SHARED(dest_mnt)) {
err = invent_group_ids(source_mnt, true);
if (err)
goto out;
}
err = propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list);
if (err)
goto out_cleanup_ids;
br_write_lock(vfsmount_lock);
if (IS_MNT_SHARED(dest_mnt)) {
for (p = source_mnt; p; p = next_mnt(p, source_mnt))
set_mnt_shared(p);
}
if (parent_path) {
detach_mnt(source_mnt, parent_path);
attach_mnt(source_mnt, path);
touch_mnt_namespace(source_mnt->mnt_ns);
} else {
mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
commit_tree(source_mnt);
}
list_for_each_entry_safe(child, p, &tree_list, mnt_hash) {
list_del_init(&child->mnt_hash);
commit_tree(child);
}
br_write_unlock(vfsmount_lock);
return 0;
out_cleanup_ids:
if (IS_MNT_SHARED(dest_mnt))
cleanup_group_ids(source_mnt, NULL);
out:
return err;
}
void mnt_set_mountpoint(struct mount *mnt, struct dentry *dentry,
struct mount *child_mnt)
{
mnt_add_count(mnt, 1); /* essentially, that's mntget */
child_mnt->mnt_mountpoint = dget(dentry);
child_mnt->mnt_parent = mnt;
spin_lock(&dentry->d_lock);
dentry->d_flags |= DCACHE_MOUNTED;
spin_unlock(&dentry->d_lock);
}
本文深入探讨Linux系统中文件系统的挂载流程,包括挂载命令的内部机制、超级块的作用、dentry与inode的关系,以及vfsmount结构在挂载过程中的作用。通过分析sys_mount系统调用,揭示了挂载新文件系统所需的关键步骤。
683

被折叠的 条评论
为什么被折叠?



