参考文章
http://www.ibm.com/developerworks/cn/linux/l-vfs/
http://blog.youkuaiyun.com/new_abc/article/details/7689137
http://blog.youkuaiyun.com/new_abc/article/details/7712715
a.
rootfs注册和挂载后,会先建立几个目录,之后会在目录中挂载实际的根文件系统。
如prepare_namespace()->mount_root()->do_mount_root()->sys_mount(name, "/root", fs, flags, data);该执行流程尝试把参数name指定的设备文件挂载到rootfs的/root目录下。
b.
建立目录的主要工作是创建目录项dentry结构和索引节点inode结构,并将其关联到父目录项和父索引节点中。
1.rootfs创建目录的函数如下:
init/noinitramfs.c
rootfs_initcall(default_rootfs);
static int __init default_rootfs(void)
{
int err;
err = sys_mkdir("/dev", 0755);
if (err < 0)
goto out;
err = sys_mknod((const char __user *) "/dev/console",
S_IFCHR | S_IRUSR | S_IWUSR,
new_encode_dev(MKDEV(5, 1)));
if (err < 0)
goto out;
err = sys_mkdir("/root", 0700);
if (err < 0)
goto out;
return 0;
out:
printk(KERN_WARNING "Failed to create a rootfs\n");
return err;
}
该函数先创建”/dev”目录,之后在该目录下创建设备文件”/dev/console”,最后创建”/root”目录。本文分析"/dev"目录的建立过程,"/dev/console"放在下篇文章中分析。
2.sys_mkdir()是系统调用,在fs/namei.c中定义。
SYSCALL_DEFINE2(mkdir, const char __user *, pathname, int, mode)
{
return sys_mkdirat(AT_FDCWD, pathname, mode);
}
pathname是路径名,mode是新目录的权限,如果设置了AT_FDCWD,在进行路径查找时,当要创建的目录名不是以”/”开头,即不是绝对路径时,会先在当前目录下进行查找,具体见path_init()函数。该函数会调用另一个系统调用sys_mkdirat().
SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, int, mode)
{
int error = 0;
char * tmp;
struct dentry *dentry;
struct nameidata nd;
/*查找要创建目录的父目录项保存在nd->path中,并将要创建目录的信息存放在nd->last中*/
error = user_path_parent(dfd, pathname, &nd, &tmp);
if (error)
goto out_err;
/*查找要创建的目录项是否存在,如果不存在就创建,
*并建立和父目录项的关联。
*/
dentry = lookup_create(&nd, 1);
error = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto out_unlock;
if (!IS_POSIXACL(nd.path.dentry->d_inode))
mode &= ~current_umask();
error = mnt_want_write(nd.path.mnt);//暂不理会
if (error)
goto out_dput;
error = security_path_mkdir(&nd.path, dentry, mode);//暂不理会
if (error)
goto out_drop_write;
/*调用父目录对应的inode中的mkdir函数,该函数会
*创建inode结构,并将建立与自己的目录项以及
*父目录对应inode之间的关联
*/
error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode);
out_drop_write:
mnt_drop_write(nd.path.mnt);
out_dput:
dput(dentry);
out_unlock:
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
path_put(&nd.path);
putname(tmp);
out_err:
return error;
}
3.首先分析user_path_parent()
static int user_path_parent(int dfd, const char __user *path,
struct nameidata *nd, char **name)
{
char *s = getname(path);
int error;
if (IS_ERR(s))
return PTR_ERR(s);
error = do_path_lookup(dfd, s, LOOKUP_PARENT, nd);
if (error)
putname(s);
else
*name = s;
return error;
}
LOOKUP_PARENT表示查找路径中最后一个分量所在的目录。该函数设置LOOKUP_PARENT后,调用do_path_lookup()。
static int do_path_lookup(int dfd, const char *name,
unsigned int flags, struct nameidata *nd)
{
int retval = path_init(dfd, name, flags, nd);
if (!retval)
retval = path_walk(name, nd);
if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
nd->path.dentry->d_inode))
audit_inode(name, nd->path.dentry);
if (nd->root.mnt) {
path_put(&nd->root);
nd->root.mnt = NULL;
}
return retval;
}
do_path_lookup()函数完成路径名的查找工作,这个函数接受四个参数:
dfd:使用的基目录;name:指向要解析的文件路径名的指针;flags:标志的值,表示将会怎样访问查找的文件;nd:nameidata数据结构的地址,这个结构存放了查找操作的结果。该函数主要包含path_init()和path_walk()两个函数,前者初始化查找的目录,是根目录还是当前目录还是某个指定的目录。后者则在目录中分级查找,必将查找结果保存在nd中。
3.1确定查找的起始目录
static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
{
int retval = 0;
int fput_needed;
struct file *file;
nd->last_type = LAST_ROOT; /* if there are only slashes... */
nd->flags = flags;
nd->depth = 0;
nd->root.mnt = NULL;
if (*name=='/') {
set_root(nd);
nd->path = nd->root;
path_get(&nd->root);
} else if (dfd == AT_FDCWD) {
struct fs_struct *fs = current->fs;
read_lock(&fs->lock);
nd->path = fs->pwd;
path_get(&fs->pwd);
read_unlock(&fs->lock);
} else {
struct dentry *dentry;
file = fget_light(dfd, &fput_needed);
retval = -EBADF;
if (!file)
goto out_fail;
dentry = file->f_path.dentry;
retval = -ENOTDIR;
if (!S_ISDIR(dentry->d_inode->i_mode))
goto fput_fail;
retval = file_permission(file, MAY_EXEC);
if (retval)
goto fput_fail;
nd->path = file->f_path;
path_get(&file->f_path);
fput_light(file, fput_needed);
}
return 0;
fput_fail:
fput_light(file, fput_needed);
out_fail:
return retval;
}
该函数判断是否是绝对路径,是则调用ser_root()将nd的root字段设置为current->fs ->root,随后,将nd-path也设置为current->fs ->root。若不是,则判断dfd是否为AT_FDCWD,即当前目录,是则,将nd->path设置为current->fs ->pwd。否则,dfd就代表某个文件描述符,根据该描述符找到对应file->f_path结构,就是要查找的初始目录。
本例是绝对路径,所以nd->path.dentry和nd->path.mnt就是rootfs挂载时分配的dentry和mnt,见<<rootfs文件系统的注册和挂载>>。
3.2开始查找
static int path_walk(const char *name, struct nameidata *nd)
{
/*用于避免符号链接可能导致的无穷递归*/
current->total_link_count = 0;
return link_path_walk(name, nd);
}
static __always_inline int link_path_walk(const char *name, struct nameidata *nd)
{
struct path save = nd->path;
int result;
/* make sure the stuff we saved doesn't go away */
path_get(&save);
result = __link_path_walk(name, nd);
if (result == -ESTALE) {
/* nd->path had been dropped */
nd->path = save;
path_get(&nd->path);
/*若失败,就设置LOOKUP_REVAL,再次查找*/
nd->flags |= LOOKUP_REVAL;
result = __link_path_walk(name, nd);
}
path_put(&save);
return result;
}
__link_path_walk()函数要处理软连接,挂载点,不同flags等各种情况,比较复杂,这里只分析和建立”/dev”有关的代码,其他的代码以后再分析。
static int __link_path_walk(const char *name, struct nameidata *nd)
{
struct path next;
struct inode *inode;
int err;
unsigned int lookup_flags = nd->flags;
while (*name=='/')
name++;
if (!*name)
goto return_reval;
inode = nd->path.dentry->d_inode;//获取初始目录对应的inode结构
if (nd->depth)
lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);
/* At this point we know we have a real path component. */
for(;;) {
unsigned long hash;
struct qstr this;
unsigned int c;
nd->flags |= LOOKUP_CONTINUE;
/*权限检查,是否可执行,拥有可执行权才能被遍历*/
err = exec_permission_lite(inode);
if (err)
break;
/*设置name len hash,执行完该代码段后,
*this.name = “dev”
*this.len = 3
*this.hash = ****
*/
this.name = name;
c = *(const unsigned char *)name;
hash = init_name_hash();
do {
name++;
hash = partial_name_hash(c, hash);
c = *(const unsigned char *)name;
} while (c && (c != '/'));
this.len = name - (const char *) this.name;
this.hash = end_name_hash(hash);
/* remove trailing slashes? */
/*本例中会跳到last_component执行,因为路径名已解析完毕*/
if (!c)
goto last_component;
while (*++name == '/');
if (!*name)
goto last_with_slashes;
/*
* "." and ".." are special - ".." especially so because it has
* to be able to know about the current root directory and
* parent relationships.
*/
if (this.name[0] == '.') switch (this.len) {
default:
break;
case 2:
if (this.name[1] != '.')
break;
follow_dotdot(nd);
inode = nd->path.dentry->d_inode;
/* fallthrough */
case 1:
continue;
}
/*
* See if the low-level filesystem might want
* to use its own hash..
*/
if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) {
err = nd->path.dentry->d_op->d_hash(nd->path.dentry,
&this);
if (err < 0)
break;
}
/* This does the actual lookups.. */
err = do_lookup(nd, &this, &next);
if (err)
break;
err = -ENOENT;
inode = next.dentry->d_inode;
if (!inode)
goto out_dput;
if (inode->i_op->follow_link) {
err = do_follow_link(&next, nd);
if (err)
goto return_err;
err = -ENOENT;
inode = nd->path.dentry->d_inode;
if (!inode)
break;
} else
path_to_nameidata(&next, nd);
err = -ENOTDIR;
if (!inode->i_op->lookup)
break;
continue;
/* here ends the main loop */
last_with_slashes:
lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
last_component:
/* Clear LOOKUP_CONTINUE iff it was previously unset */
nd->flags &= lookup_flags | ~LOOKUP_CONTINUE;
/*本例只是获取父目录的信息,所以跳到 lookup_parent执行*/
if (lookup_flags & LOOKUP_PARENT)
goto lookup_parent;
if (this.name[0] == '.') switch (this.len) {
default:
break;
case 2:
if (this.name[1] != '.')
break;
follow_dotdot(nd);
inode = nd->path.dentry->d_inode;
/* fallthrough */
case 1:
goto return_reval;
}
if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) {
err = nd->path.dentry->d_op->d_hash(nd->path.dentry,
&this);
if (err < 0)
break;
}
err = do_lookup(nd, &this, &next);
if (err)
break;
inode = next.dentry->d_inode;
if (follow_on_final(inode, lookup_flags)) {
err = do_follow_link(&next, nd);
if (err)
goto return_err;
inode = nd->path.dentry->d_inode;
} else
path_to_nameidata(&next, nd);
err = -ENOENT;
if (!inode)
break;
if (lookup_flags & LOOKUP_DIRECTORY) {
err = -ENOTDIR;
if (!inode->i_op->lookup)
break;
}
goto return_base;
lookup_parent:
/*父目录“/”信息仍保存在nd->path中,
*最后要查找的目录的信息保存在nd->this中
*设置last_type,表明查找结果是否有错
*/
nd->last = this;
nd->last_type = LAST_NORM;
if (this.name[0] != '.')
goto return_base;
if (this.len == 1)
nd->last_type = LAST_DOT;
else if (this.len == 2 && this.name[1] == '.')
nd->last_type = LAST_DOTDOT;
else
goto return_base;
return_reval:
/*
* We bypassed the ordinary revalidation routines.
* We may need to check the cached dentry for staleness.
*/
if (nd->path.dentry && nd->path.dentry->d_sb &&
(nd->path.dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) {
err = -ESTALE;
/* Note: we do not d_invalidate() */
if (!nd->path.dentry->d_op->d_revalidate(
nd->path.dentry, nd))
break;
}
return_base:
return 0;
out_dput:
path_put_conditional(&next, nd);
break;
}
path_put(&nd->path);
return_err:
return err;
}
follow_dotdot(),do_lookup(),do_follow_link(),path_to_nameidata()等函数本文中没有涉及到,就先不分析了,以后遇到再分析。
4.随后调用lookup_create()创建dentry结构,该函数的调用流程如下:
lookup_create()->lookup_hash()->__lookup_hash()
static struct dentry *__lookup_hash(struct qstr *name,
struct dentry *base, struct nameidata *nd)
{
struct dentry *dentry;
struct inode *inode;
int err;
inode = base->d_inode;
/*
* See if the low-level filesystem might want
* to use its own hash..
*/
/*父目录有自己的d_hash函数,就调用d_hash
*函数,在dentry_hashtable表中查找。
*/
if (base->d_op && base->d_op->d_hash) {
err = base->d_op->d_hash(base, name);
dentry = ERR_PTR(err);
if (err < 0)
goto out;
}
/*在散列表中查找是否存在以base为父目录项,d_name为name的目录项*/
dentry = cached_lookup(base, name, nd);
if (!dentry) {
struct dentry *new;
/* Don't create child dentry for a dead directory. */
dentry = ERR_PTR(-ENOENT);
if (IS_DEADDIR(inode))
goto out;
/*不存在,则创建新的目录项*/
new = d_alloc(base, name);
dentry = ERR_PTR(-ENOMEM);
if (!new)
goto out;
/*调用父目录inode中的lookup函数,
*该函数一般会调用d_rehash()函数,将dentry添加到hash表中
*/
dentry = inode->i_op->lookup(inode, new, nd);
if (!dentry)
dentry = new;
else
dput(new);
}
out:
return dentry;
}
struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
{
struct dentry *dentry;
char *dname;
dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
if (!dentry)
return NULL;
/*设置目录项的dname,该dname来自nd->last,
*按上所述,nd->last保存着要创建目录的名字等信息。
*/
if (name->len > DNAME_INLINE_LEN-1) {
dname = kmalloc(name->len + 1, GFP_KERNEL);
if (!dname) {
kmem_cache_free(dentry_cache, dentry);
return NULL;
}
} else {
dname = dentry->d_iname;
}
dentry->d_name.name = dname;
dentry->d_name.len = name->len;
dentry->d_name.hash = name->hash;
memcpy(dname, name->name, name->len);
dname[name->len] = 0;
atomic_set(&dentry->d_count, 1);
dentry->d_flags = DCACHE_UNHASHED;
spin_lock_init(&dentry->d_lock);
dentry->d_inode = NULL;
dentry->d_parent = NULL;
dentry->d_sb = NULL;
dentry->d_op = NULL;
dentry->d_fsdata = NULL;
dentry->d_mounted = 0;
INIT_HLIST_NODE(&dentry->d_hash);
INIT_LIST_HEAD(&dentry->d_lru);
INIT_LIST_HEAD(&dentry->d_subdirs);
INIT_LIST_HEAD(&dentry->d_alias);
/*d_parent指向父目录项,d_sb指向父目录项的d_sb*/
if (parent) {
dentry->d_parent = dget(parent);
dentry->d_sb = parent->d_sb;
} else {
INIT_LIST_HEAD(&dentry->d_u.d_child);
}
spin_lock(&dcache_lock);
/*添加到父目录的d_subdirs链表*/
if (parent)
list_add(&dentry->d_u.d_child, &parent->d_subdirs);
dentry_stat.nr_dentry++;
spin_unlock(&dcache_lock);
return dentry;
}
5.最后创建inode结构
vfs_mkdir()会调用父索引节点中的mkdir()函数,即rootfs文件系统中的mkdir()函数。
fs/ramfs/inode.c
static const struct inode_operations ramfs_dir_inode_operations = {
.create = ramfs_create,
.lookup = simple_lookup,
.link = simple_link,
.unlink = simple_unlink,
.symlink = ramfs_symlink,
.mkdir = ramfs_mkdir,
.rmdir = simple_rmdir,
.mknod = ramfs_mknod,
.rename = simple_rename,
};
static int ramfs_mkdir(struct inode * dir, struct dentry * dentry, int mode)
{
int retval = ramfs_mknod(dir, dentry, mode | S_IFDIR, 0);
if (!retval)
inc_nlink(dir);
return retval;
}
static int
ramfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
{
struct inode * inode = ramfs_get_inode(dir->i_sb, mode, dev);
int error = -ENOSPC;
if (inode) {
if (dir->i_mode & S_ISGID) {
inode->i_gid = dir->i_gid;
if (S_ISDIR(mode))
inode->i_mode |= S_ISGID;
}
d_instantiate(dentry, inode);
dget(dentry); /* Extra count - pin the dentry in core */
error = 0;
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
}
return error;
}
ramfs_get_inode()主要是分配一个inode,并进行一些初始化,其中的i_sb会指向父inode的i_sb。
static void __d_instantiate(struct dentry *dentry, struct inode *inode)
{
if (inode)
list_add(&dentry->d_alias, &inode->i_dentry);
dentry->d_inode = inode;
fsnotify_d_instantiate(dentry, inode);
}
最后画出建立”/dev”目录之后的数据结构图。
图比较乱,可以去掉里面的i_sb,d_sb因为他们都指向超级块sb。去掉后,目录层次就会清晰些。