mount系统调用的实现

最新推荐文章于 2023-07-29 20:30:00 发布

原创最新推荐文章于 2023-07-29 20:30:00 发布 · 2.2k 阅读

1 ·

CC 4.0 BY-SA版权

文章标签：

#mount #vfsmount #do_mount #vfs_kern_mount #do_add_mount

kernel 专栏收录该内容

9 篇文章

订阅专栏

本文详细介绍了Linux内核中`do_mount`函数的实现过程，包括`do_remount`, `do_new_mount`等关键步骤。通过`kern_path`获取挂载点信息，`security_sb_mount`进行安全检查，根据`flags`决定执行哪个分支，最终通过`do_kern_mount`和`do_add_mount`完成文件系统的挂载操作。" 102583127,9034283,SpringBoot整合Servlet实践,"['Springboot', 'Servlet']

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

mount系统调用在内核中对应的服务函数为do_mount函数.下面就简单总结一下该函数的实现.
long do_mount(char *dev_name, char *dir_name, char *type_page,
          unsigned long flags, void *data_page)
{
   struct path path;
   int retval = 0;
   int mnt_flags = 0;

   //检查参数,省略
   ......

   /*调用函数kern_path函数获取挂载点的信息,后面会专门写总结介绍的,在这里只需
      之道调用这个函数之后,挂载点的vfsmount,dentry和inode信息都会得到.*/
   retval = kern_path(dir_name, LOOKUP_FOLLOW, &path);
   if (retval)
       return retval;

   retval = security_sb_mount(dev_name, &path,
                   type_page, flags, data_page);
   if (retval)
       goto dput_out;

   /*下面这段是根据flags参数进行判断要执行那个函数,一般的挂载会执行do_new_mount函数,
      这也是最常走的分支.*/
   if (flags & MS_REMOUNT)
       retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
                    data_page);
   else if (flags & MS_BIND)
       retval = do_loopback(&path, dev_name, flags & MS_REC);
   else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
       retval = do_change_type(&path, flags);
   else if (flags & MS_MOVE)
       retval = do_move_mount(&path, dev_name);
   else
       retval = do_new_mount(&path, type_page, flags, mnt_flags,
                      dev_name, data_page);//通常会执行该函数,下面会详细介绍
dput_out:
   path_put(&path);
   return retval;
}

static int do_new_mount(struct path *path, char *type, int flags,
           int mnt_flags, char *name, void *data)
{
   struct vfsmount *mnt;

   if (!type)
       return -EINVAL;

   /* we need capabilities... */
   if (!capable(CAP_SYS_ADMIN))
       return -EPERM;

   lock_kernel();
   //根据要挂在的文件系统获取vfsmount结构,该函数的工作基本委托给vfs_kern_mount函数
   mnt = do_kern_mount(type, flags, name, data);
   unlock_kernel();
   if (IS_ERR(mnt))
       return PTR_ERR(mnt);

   //将已经创建的vfsmount结构添加到mount tree上,彻底完成文件系统的mount任务
   return do_add_mount(mnt, path, mnt_flags, NULL);
}

struct vfsmount *do_kern_mount(const char *fstype, int flags, const char *name, void *data)
{
   struct file_system_type *type = get_fs_type(fstype);//获取文件系统类型,该实例会提供如何读取对应文件系统超级块的信息
   struct vfsmount *mnt;
   if (!type)
       return ERR_PTR(-ENODEV);
   mnt = vfs_kern_mount(type, flags, name, data);//创建vfsmount结构,详情见下文
   if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
        !mnt->mnt_sb->s_subtype)
       mnt = fs_set_subtype(mnt, fstype);
   put_filesystem(type);
   return mnt;
}

struct file_system_type *get_fs_type(const char *name)
{
   struct file_system_type *fs;
   const char *dot = strchr(name, '.');
   int len = dot ? dot - name : strlen(name);

   /*在全局链表file_systems中查找和name匹配的文件系统类型,如果没有找到
      则调用request_module函数加载相应的module,相应的module会向kernel
      注册文件系统类型,并将相应的file_system_type添加到全局链表file_systems中*/
   fs = __get_fs_type(name, len);
   if (!fs && (request_module("%.*s", len, name) == 0))
       fs = __get_fs_type(name, len);//再次查找,正常情况是能够找到的.

   if (dot && fs && !(fs->fs_flags & FS_HAS_SUBTYPE)) {
       put_filesystem(fs);
       fs = NULL;
   }
   return fs;
}

struct vfsmount * vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
{
   struct vfsmount *mnt;
   char *secdata = NULL;
   int error;

   if (!type)
       return ERR_PTR(-ENODEV);

   error = -ENOMEM;
   mnt = alloc_vfsmnt(name);//给vfsmount结构分配内存,并初始化,没什么好说的
   if (!mnt)
       goto out;

   ......

   //调用文件系统类型的get_sb函数获取superblock信息,不同的文件系统有不同的实现,但是基本上都是
   //调用sget函数先在file_system_type的fs_supers链表中遍历,根据参数data查找superblock,如果没
   //有找到的话,就调用alloc_super分配一个super_block结构,并且进行初始化,然后将该super_block和
   //参数data联系在一起,放进两个链表中:全局的super_blocks链表和file_system_type的fs_supers链
   //表最后返回super_blocks结构.然后get_sb函数调用fill_super函数(该函数也是不同的文件系统有不
   //同的实现),但基本操作都是读取块设备中的superblock,使用其中的信息填充刚才得到的super_blocks
   //结构,比如一些文件系统参数之类的数据.还要设置根节点的inode和dentry.在调用完fill_super函数
   //填充完superblock之后,get_sb函数一般会调用simple_set_mnt函数,将之前分配的要代表该文件系统
   //的vfsmount结构和刚刚填充好的super_blocks结构联系在一起.最后get_sb返回.
   error = type->get_sb(type, flags, name, data, mnt);
   if (error < 0)
       goto out_free_secdata;

   ......

   //设置vfsmount结构的mountpoint和parent,这里似乎不太合理,因为要加载的文件系统的挂载点
   //怎么会是该文件系统的根目录呢?它的parent怎么能是它本身呢?其实这里只是暂时这样设置的,
   //到了后面执行do_add_mount函数的时候会修改的.
   mnt->mnt_mountpoint = mnt->mnt_root;
   mnt->mnt_parent = mnt;
   up_write(&mnt->mnt_sb->s_umount);
   free_secdata(secdata);
   return mnt;//返回已经设置好的vfsmount结构.

   一些错误处理,这里就不关注了......
}

到了这里vfsmount已经创建好了,要加载的文件系统的超级块页也获取到了,但是vfsmount并没有和
挂载点建立联系,最后一步就是调用函数do_add_mount将vfsmount添加到mount tree中,简历vfsmount
和挂载点的联系.下面看一下函数do_add_mount.
int do_add_mount(struct vfsmount *newmnt, struct path *path,
       int mnt_flags, struct list_head *fslist)
{
   int err;

   down_write(&namespace_sem);

   //进行的各种检查,一般是不会成立的,故省略.
   ......

   newmnt->mnt_flags = mnt_flags;
   if ((err = graft_tree(newmnt, path)))//do_add_mount的主要工作会由graft_tree来做,下面详细介绍
       goto unlock;

   if (fslist) /* add to the specified expiration list */
       list_add_tail(&newmnt->mnt_expire, fslist);

   up_write(&namespace_sem);
   return 0;

unlock:
   up_write(&namespace_sem);
   mntput(newmnt);
   return err;
}

static int graft_tree(struct vfsmount *mnt, struct path *path)
{
   int err;

   //各种检查,省略
   ......

   err = -ENOENT;
   if (!d_unlinked(path->dentry))
       err = attach_recursive_mnt(mnt, path, NULL);//将mnt和path联系起来

   ......

   return err;
}

static int attach_recursive_mnt(struct vfsmount *source_mnt,
           struct path *path, struct path *parent_path)
{
   LIST_HEAD(tree_list);
   struct vfsmount *dest_mnt = path->mnt;
   struct dentry *dest_dentry = path->dentry;
   struct vfsmount *child, *p;
   int err;

   if (IS_MNT_SHARED(dest_mnt)) {
       err = invent_group_ids(source_mnt, true);
       if (err)
           goto out;
   }
   err = propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list);
   if (err)
       goto out_cleanup_ids;

   if (IS_MNT_SHARED(dest_mnt)) {
       for (p = source_mnt; p; p = next_mnt(p, source_mnt))
           set_mnt_shared(p);
   }
   //上面的代码是在处理共享映射等方面的问题,感觉不常用,没有仔细研究

   spin_lock(&vfsmount_lock);
   //传进来的参数parent_path是为NULL,因此执行else分支
   if (parent_path) {
       detach_mnt(source_mnt, parent_path);
       attach_mnt(source_mnt, path);
       touch_mnt_namespace(parent_path->mnt->mnt_ns);
   } else {
       //前面提到过没有正确设置vfsmount的挂载点,真正的设置是在这里.
       mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
       /*将vfsmount加入到很多链表中,建立父子文件系统之间的关系,而且将
          vfsmount结构添加到mount_hashtable哈希表中,以方便后面查找.*/
       commit_tree(source_mnt);
   }

   list_for_each_entry_safe(child, p, &tree_list, mnt_hash) {
       list_del_init(&child->mnt_hash);
       commit_tree(child);
   }
   spin_unlock(&vfsmount_lock);
   return 0;

out_cleanup_ids:
   if (IS_MNT_SHARED(dest_mnt))
       cleanup_group_ids(source_mnt, NULL);
out:
   return err;
}

void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
           struct vfsmount *child_mnt)
{
   child_mnt->mnt_parent = mntget(mnt);
   child_mnt->mnt_mountpoint = dget(dentry);//真正设置挂载点
   dentry->d_mounted++;
}

do_mount函数就总结到这里,其实只分析了最常见的代码执行路径,基本上把mount操作的原理说清楚了.
其他的情况,以后有时间再分析.