sys_open分析,从文件名找到文件信息(namei)

从文件名找到文件信息(namei)
 
本文档的Copyleft归yfydz所有,使用GPL发布,可以自由拷贝,转载,转载时请保持文档的完整性,严禁用于任何商业用途。
msn: yfydz_no1@hotmail.com
来源:http://yfydz.cublog.cn

1. 前言

inode是类Unix系统的文件系统的基本索引方法,每个文件都对应一个inode,再通过inode找到文件中的实际数据,因此根据文件路径名找到具体的inode节点就是一个很重要的处理步骤。系统会缓存用过的每个文件或目录对应的dentry结构, 从该结构可以指向相应的inode, 每次打开文件, 都会最终对应到文件的inode,中间查找过程称为namei。

本文介绍Linux下的路径到文件指针的转换过程,内核版本为2.6.19.2。

虚拟文件系统的转换源代码在fs/namei.c中,具体和文件系统相关的部分在fs/*/namei.c文件中。

2. 引子

由于这种转换是一个中间过程,在具体分析namei处理前,先看看系统的调用顺序是如何进入转换的:
当用户空间程序用open系统调用打开一个文件时,内核对应的处理是sys_open:
/* fs/open.c */
asmlinkage long sys_open(const char __user *filename, int flags, int mode)
{
 long ret;
 if (force_o_largefile())
  flags |= O_LARGEFILE;
 ret = do_sys_open(AT_FDCWD, filename, flags, mode);
 /* avoid REGPARM breakage on x86: */
 prevent_tail_call(ret);
 return ret;
}
真正的打开函数是do_sys_open:
/* fs/open.c */
// dfd为AT_FDCWD
long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
{
// 通过该函数将用户空间的文件名传递到内核
// tmp是一个cache类的动态内存空间,用于保存文件路径名
//
 char *tmp = getname(filename);
 int fd = PTR_ERR(tmp);
 if (!IS_ERR(tmp)) {
// 获取一个未使用的文件描述符, 和inode无关
  fd = get_unused_fd();
  if (fd >= 0) {
// 打开文件,将文件名转换为文件结构
   struct file *f = do_filp_open(dfd, tmp, flags, mode);
   if (IS_ERR(f)) {
    put_unused_fd(fd);
    fd = PTR_ERR(f);
   } else {
    fsnotify_open(f->f_dentry);
    fd_install(fd, f);
   }
  }
  putname(tmp);
 }
 return fd;
}

// 文件打开
static struct file *do_filp_open(int dfd, const char *filename, int flags,
     int mode)
{
 int namei_flags, error;
// 注意这是结构而不是指针
 struct nameidata nd;
 namei_flags = flags;
 if ((namei_flags+1) & O_ACCMODE)
  namei_flags++;
// 根据文件名得到nameidata, nd作为namei空间保存结果
 error = open_namei(dfd, filename, namei_flags, mode, &nd);
 if (!error)
// 成功, nameidata再转换为file指针
  return nameidata_to_filp(&nd, flags);
 return ERR_PTR(error);
}

因此重点函数是open_namei函数, 实现了从文件名到inode的转换, 也是namei的处理入口.

在分析open_namei前, 再分析一下getname, 这用到了kmem_cache来处理的:
// 文件名转换, 从用户空间拷贝到内核空间
/* fs/namei.c */
char * getname(const char __user * filename)
{
 char *tmp, *result;
 result = ERR_PTR(-ENOMEM);
/* include/linux/fs.h */
// __getname和__putname的定义,实际就是内核cache的分配和释放
// #define __getname() kmem_cache_alloc(names_cachep, SLAB_KERNEL)
// #define __putname(name) kmem_cache_free(names_cachep, (void *)(name))
// 这里实际是分配names的cache, 该cache定义为
//  names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
//   SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
 tmp = __getname();
 if (tmp)  {
// cache分配成功
// 进入实际操作函数
  int retval = do_getname(filename, tmp);
// 要返回结果指向cache
  result = tmp;
  if (retval < 0) {
// 操作失败,释放cache,返回错误
   __putname(tmp);
   result = ERR_PTR(retval);
  }
 }
// 编译内核时如果没有设置CONFIG_AUDITSYSCALL, 则audit_getname为空
// 审计系统调用结果
 audit_getname(result);
 return result;
}

static int do_getname(const char __user *filename, char *page)
{
 int retval;
 unsigned long len = PATH_MAX;
 if (!segment_eq(get_fs(), KERNEL_DS)) {
  if ((unsigned long) filename >= TASK_SIZE)
   return -EFAULT;
  if (TASK_SIZE - (unsigned long) filename < PATH_MAX)
   len = TASK_SIZE - (unsigned long) filename;
 }
// 将用户空间提供的文件名拷贝到cache中
 retval = strncpy_from_user(page, filename, len);
 if (retval > 0) {
  if (retval < len)
   return 0;
  return -ENAMETOOLONG;
 } else if (!retval)
  retval = -ENOENT;
 return retval;
}
 
3. namei相关数据结构
/* include/linux/namei.h */
struct nameidata {
// 路径点
 struct dentry *dentry;
// 虚拟系统挂接点
 struct vfsmount *mnt;
// 路径名中的最后的文件名或目录名
 struct qstr last;
 unsigned int flags;
 int  last_type;
// 目录深度
 unsigned depth;
 char *saved_names[MAX_NESTED_LINKS + 1]; // 9
 /* Intent data */
// 相关数据
 union {
// 包含打开的文件的指针
  struct open_intent open;
 } intent;
};

struct open_intent {
// 标志
 int flags;
// 创建模式
 int create_mode;
// 文件指针
 struct file *file;
};

// 路径结构, 属于中间处理结构, 将文件系统挂接点和dentry捆绑在一起而已
struct path {
 struct vfsmount *mnt;
 struct dentry *dentry;
};

/* include/linux/dcache.h */
// 文件目录项, 在系统cache中
struct dentry {
 atomic_t d_count;
 unsigned int d_flags;  /* protected by d_lock */
 spinlock_t d_lock;  /* per dentry lock */
 struct inode *d_inode;  /* Where the name belongs to - NULL is
      * negative */
 /*
  * The next three fields are touched by __d_lookup.  Place them here
  * so they all fit in a cache line.
  */
 struct hlist_node d_hash; /* lookup hash list */
 struct dentry *d_parent; /* parent directory */
 struct qstr d_name;
 struct list_head d_lru;  /* LRU list */
 /*
  * d_child and d_rcu can share memory
  */
 union {
  struct list_head d_child; /* child of parent list */
   struct rcu_head d_rcu;
 } d_u;
 struct list_head d_subdirs; /* our children */
 struct list_head d_alias; /* inode alias list */
 unsigned long d_time;  /* used by d_revalidate */
 struct dentry_operations *d_op;
 struct super_block *d_sb; /* The root of the dentry tree */
 void *d_fsdata;   /* fs-specific data */
#ifdef CONFIG_PROFILING
 struct dcookie_struct *d_cookie; /* cookie, if any */
#endif
 int d_mounted;
 unsigned char d_iname[DNAME_INLINE_LEN_MIN]; /* small names */
};

/* include/linux/fs.h */
// 文件结构
struct file {
 /*
  * fu_list becomes invalid after file_free is called and queued via
  * fu_rcuhead for RCU freeing
  */
 union {
  struct list_head fu_list;
  struct rcu_head  fu_rcuhead;
 } f_u;
// 文件的dentry
 struct dentry  *f_dentry;
// 虚拟文件系统挂接点
 struct vfsmount         *f_vfsmnt;
// 文件操作
 const struct file_operations *f_op;
 atomic_t  f_count;
 unsigned int   f_flags;
 mode_t   f_mode;
 loff_t   f_pos;
 struct fown_struct f_owner;
 unsigned int  f_uid, f_gid;
 struct file_ra_state f_ra;
 unsigned long  f_version;
#ifdef CONFIG_SECURITY
 void   *f_security;
#endif
 /* needed for tty driver, and maybe others */
 void   *private_data;
#ifdef CONFIG_EPOLL
 /* Used by fs/eventpoll.c to link all the hooks to this file */
 struct list_head f_ep_links;
 spinlock_t  f_ep_lock;
#endif /* #ifdef CONFIG_EPOLL */
 struct address_space *f_mapping;
};
 
4. namei操作

4.1 open_namei

/* fs/namei.c */
/*
 * open_namei()
 *
 * namei for open - this is in fact almost the whole open-routine.
 *
 * Note that the low bits of "flag" aren&apos;t the same as in the open
 * system call - they are 00 - no permissions needed
 *     01 - read permission needed
 *     10 - write permission needed
 *     11 - read/write permissions needed
 * which is a lot more logical, and also allows the "no perm" needed
 * for symlinks (where the permissions are checked later).
 * SMP-safe
 */
int open_namei(int dfd, const char *pathname, int flag,
  int mode, struct nameidata *nd)
{
 int acc_mode, error;
 struct path path;
 struct dentry *dir;
 int count = 0;
// #define ACC_MODE(x) ("/000/004/002/006"[(x)&O_ACCMODE])
// 审计模式
 acc_mode = ACC_MODE(flag);
 /* O_TRUNC implies we need access checks for write permissions */
// 截断标志, 基本上需要写权限, 除非要截断的长度实际大于文件本身长度
 if (flag & O_TRUNC)
  acc_mode |= MAY_WRITE;
 /* Allow the LSM permission hook to distinguish append
    access from general write access. */
// 添加标志, 也是需要写权限
 if (flag & O_APPEND)
  acc_mode |= MAY_APPEND;
 /*
  * The simplest case - just a plain lookup.
  */
// 不需要创建文件
 if (!(flag & O_CREAT)) {
// 直接找pathname的dentry和挂接点, 结果填在nd中
  error = path_lookup_open(dfd, pathname, lookup_flags(flag),
      nd, flag);
  if (error)
   return error;
  goto ok;
 }
 /*
  * Create - we need to know the parent.
  */
// 创建文件的dentry和挂接点, 数据填到nd中
 error = path_lookup_create(dfd,pathname,LOOKUP_PARENT,nd,flag,mode);
 if (error)
  return error;
 /*
  * We have the parent and last component. First of all, check
  * that we are not asked to creat(2) an obvious directory - that
  * will not do.
  */
 error = -EISDIR;
// 检查nameidata结构中的last参数是否合法
 if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len])
  goto exit;
// 文件项dentry
 dir = nd->dentry;
// 去掉查询父目录标志
 nd->flags &= ~LOOKUP_PARENT;
 mutex_lock(&dir->d_inode->i_mutex);
// 填充path参数, 又根据nd的信息搜索一次当前的缓存的dentry
// 不过dir与path.dentry难道不相同么?
 path.dentry = lookup_hash(nd);
 path.mnt = nd->mnt;
do_last:
// 检查path.entry是否合法
 error = PTR_ERR(path.dentry);
 if (IS_ERR(path.dentry)) {
  mutex_unlock(&dir->d_inode->i_mutex);
  goto exit;
 }
// 检查nd->intent.open.file是否合法, 这是最终要返回的文件指针
 if (IS_ERR(nd->intent.open.file)) {
  mutex_unlock(&dir->d_inode->i_mutex);
  error = PTR_ERR(nd->intent.open.file);
  goto exit_dput;
 }
 /* Negative dentry, just create the file */
 if (!path.dentry->d_inode) {
// 创建新文件的inode, 然后返回
  error = open_namei_create(nd, &path, flag, mode);
  if (error)
   goto exit;
  return 0;
 }
// 现在是打开已经存在的文件
 /*
  * It already exists.
  */
 mutex_unlock(&dir->d_inode->i_mutex);
 audit_inode_update(path.dentry->d_inode);
 error = -EEXIST;
// O_EXCL标志是只必须打开的是不存在的文件, 文件已存在时错误
 if (flag & O_EXCL)
  goto exit_dput;
 if (__follow_mount(&path)) {
  error = -ELOOP;
  if (flag & O_NOFOLLOW)
   goto exit_dput;
 }
 error = -ENOENT;
 if (!path.dentry->d_inode)
  goto exit_dput;
// 如果dentry的具体FS的实现中定义了follow_link操作, 转
// 不过大多数FS的实现中都没有定义该函数
 if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link)
  goto do_link;
// 从路径中的dentry和mnt信息赋值到nameidata
 path_to_nameidata(&path, nd);
 error = -EISDIR;
// 如果是一个目录, 返回错误
 if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode))
  goto exit;
ok:
// 对nd中的dentry及其inode进行打开前的错误检查
 error = may_open(nd, acc_mode, flag);
 if (error)
  goto exit;
 return 0;
// 下面是错误处理, 释放掉已分配的资源, 返回错误
exit_dput:
 dput_path(&path, nd);
exit:
 if (!IS_ERR(nd->intent.open.file))
  release_open_intent(nd);
 path_release(nd);
 return error;
// 处理符号连接, 找到实际文件的inode,然后重新循环, 要注意回环情况的错误处理
do_link:
 error = -ELOOP;
 if (flag & O_NOFOLLOW)
  goto exit_dput;
 /*
  * This is subtle. Instead of calling do_follow_link() we do the
  * thing by hands. The reason is that this way we have zero link_count
  * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT.
  * After that we have the parent and last component, i.e.
  * we are in the same situation as after the first path_walk().
  * Well, almost - if the last component is normal we get its copy
  * stored in nd->last.name and we will have to putname() it when we
  * are done. Procfs-like symlinks just set LAST_BIND.
  */
// 设置查找LOOKUP_PARENT标志
 nd->flags |= LOOKUP_PARENT;
 error = security_inode_follow_link(path.dentry, nd);
 if (error)
  goto exit_dput;
// 处理符号链接
 error = __do_follow_link(&path, nd);
 if (error) {
  /* Does someone understand code flow here? Or it is only
   * me so stupid? Anathema to whoever designed this non-sense
   * with "intent.open".
   */
  release_open_intent(nd);
  return error;
 }
 nd->flags &= ~LOOKUP_PARENT;
// 检查最后一段文件或目录名的属性情况
 if (nd->last_type == LAST_BIND)
  goto ok;
 error = -EISDIR;
 if (nd->last_type != LAST_NORM)
  goto exit;
 if (nd->last.name[nd->last.len]) {
  __putname(nd->last.name);
  goto exit;
 }
 error = -ELOOP;
// 出现回环标志: 循环超过32次
 if (count++==32) {
  __putname(nd->last.name);
  goto exit;
 }
 dir = nd->dentry;
 mutex_lock(&dir->d_inode->i_mutex);
// 更新路径的挂接点和dentry
 path.dentry = lookup_hash(nd);
 path.mnt = nd->mnt;
 __putname(nd->last.name);
 goto do_last;
}

4.2  path_lookup_open和path_lookup_create

这两个函数找到路径名对应的挂接点和dentry结构, 赋值到nameidata结构中, create时如果文件不存在, 建立新文件:
/**
 * path_lookup_open - lookup a file path with open intent
 * @dfd: the directory to use as base, or AT_FDCWD
 * @name: pointer to file name
 * @lookup_flags: lookup intent flags
 * @nd: pointer to nameidata
 * @open_flags: open intent flags
 */
int path_lookup_open(int dfd, const char *name, unsigned int lookup_flags,
  struct nameidata *nd, int open_flags)
{
 return __path_lookup_intent_open(dfd, name, lookup_flags, nd,
   open_flags, 0);
}

/**
 * path_lookup_create - lookup a file path with open + create intent
 * @dfd: the directory to use as base, or AT_FDCWD
 * @name: pointer to file name
 * @lookup_flags: lookup intent flags
 * @nd: pointer to nameidata
 * @open_flags: open intent flags
 * @create_mode: create intent flags
 */
static int path_lookup_create(int dfd, const char *name,
         unsigned int lookup_flags, struct nameidata *nd,
         int open_flags, int create_mode)
{
 return __path_lookup_intent_open(dfd, name, lookup_flags|LOOKUP_CREATE,
   nd, open_flags, create_mode);
}

这两个函数都是调用__path_lookup_intent_open, 只是参数不同,create中加入了LOOKUP_CREATE标志和create_mode:

static int __path_lookup_intent_open(int dfd, const char *name,
  unsigned int lookup_flags, struct nameidata *nd,
  int open_flags, int create_mode)
{
// 找一个空闲的文件指针
 struct file *filp = get_empty_filp();
 int err;
// 找不到返回错误, 文件表溢出了
 if (filp == NULL)
  return -ENFILE;
// 在nameidate中填充打开的文件参数, 这是最终会返回的文件指针
 nd->intent.open.file = filp;
 nd->intent.open.flags = open_flags;
 nd->intent.open.create_mode = create_mode;
// 进行具体的路径查找, name是路径名
 err = do_path_lookup(dfd, name, lookup_flags|LOOKUP_OPEN, nd);
// 先检查nd->intent.open.file而不是err
 if (IS_ERR(nd->intent.open.file)) {
// 打开的文件指针错误
  if (err == 0) {
// do_path_lookup已经成功了, 释放path, err重新设置为错误值
   err = PTR_ERR(nd->intent.open.file);
   path_release(nd);
  }
 } else if (err != 0)
  release_open_intent(nd);
 return err;
}

// 路径查找
/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
static int fastcall do_path_lookup(int dfd, const char *name,
    unsigned int flags, struct nameidata *nd)
{
 int retval = 0;
 int fput_needed;
 struct file *file;
// 文件系统指针从进程中获取
 struct fs_struct *fs = current->fs;
// 缺省情况last_type为绝对路径, 以"/"开头的格式
 nd->last_type = LAST_ROOT; /* if there are only slashes... */
 nd->flags = flags;
 nd->depth = 0;
// 下面只是用于增加某些变量的使用计数值, get是增加,put是减少
 if (*name==&apos;/&apos;) {
// 绝对路径格式
  read_lock(&fs->lock);
  if (fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
// 检查是否更改了root, 即用chroot
// 增加altrootmnt的使用计数, 其为一vfsmount结构指针
   nd->mnt = mntget(fs->altrootmnt);
   nd->dentry = dget(fs->altroot);
   read_unlock(&fs->lock);
   if (__emul_lookup_dentry(name,nd))
    goto out; /* found in altroot */
   read_lock(&fs->lock);
  }
// 增加rootmnt的使用计数然后赋值到nd中
  nd->mnt = mntget(fs->rootmnt);
// 增加根的dentry的使用计数然后赋值到nd中
  nd->dentry = dget(fs->root);
  read_unlock(&fs->lock);
 } else if (dfd == AT_FDCWD) {
// 从sys_open调用来的话会到这里, 表示从当前工作目录的路径开始的相对路径
  read_lock(&fs->lock);
// 增加pwdmnt使用计数然后赋值到nd中
  nd->mnt = mntget(fs->pwdmnt);
// 增加pwd使用计数然后赋值到nd中
  nd->dentry = dget(fs->pwd);
  read_unlock(&fs->lock);
 } else {
  struct dentry *dentry;
// 轻量级的路径查找, fd不是共享的话不会增加引用计数
  file = fget_light(dfd, &fput_needed);
  retval = -EBADF;
  if (!file)
   goto out_fail;
  dentry = file->f_dentry;
  retval = -ENOTDIR;
  if (!S_ISDIR(dentry->d_inode->i_mode))
   goto fput_fail;
// 检查文件的执行权限
  retval = file_permission(file, MAY_EXEC);
  if (retval)
   goto fput_fail;
// 增加f_vfsmnt的使用计数
  nd->mnt = mntget(file->f_vfsmnt);
  nd->dentry = dget(dentry);
// 轻量级释放
  fput_light(file, fput_needed);
 }
// 清空总链接数
 current->total_link_count = 0;
// 变量路径表查询, 核心函数
 retval = link_path_walk(name, nd);
out:
 if (likely(retval == 0)) {
// 在大部分情况下都会执行到这,能正确打开路径
  if (unlikely(!audit_dummy_context() && nd && nd->dentry &&
    nd->dentry->d_inode))
  audit_inode(name, nd->dentry->d_inode);
 }
out_fail:
 return retval;
fput_fail:
 fput_light(file, fput_needed);
 goto out_fail;
}

do_path_lookup调用的核心函数是link_path_walk:

/*
 * Wrapper to retry pathname resolution whenever the underlying
 * file system returns an ESTALE.
 *
 * Retry the whole path once, forcing real lookup requests
 * instead of relying on the dcache.
 */
int fastcall link_path_walk(const char *name, struct nameidata *nd)
{
// 先备份一下
 struct nameidata save = *nd;
 int result;
 /* make sure the stuff we saved doesn&apos;t go away */
 dget(save.dentry);
 mntget(save.mnt);
 result = __link_path_walk(name, nd);
 if (result == -ESTALE) {
// ESTALE是失效的文件句柄错误
// 用备份的nameidate重新恢复, 设置LOOKUP_REVAL标志后重新查询
  *nd = save;
  dget(nd->dentry);
  mntget(nd->mnt);
  nd->flags |= LOOKUP_REVAL;
  result = __link_path_walk(name, nd);
 }
 dput(save.dentry);
 mntput(save.mnt);
 return result;
}

真正的名称解析函数__link_path_walk:
/*
 * Name resolution.
 * This is the basic name resolution function, turning a pathname into
 * the final dentry. We expect &apos;base&apos; to be positive and a directory.
 *
 * Returns 0 and nd will have valid dentry and mnt on success.
 * Returns error and drops reference to input namei data on failure.
 */
static fastcall int __link_path_walk(const char * name, struct nameidata *nd)
{
 struct path next;
 struct inode *inode;
 int err;
 unsigned int lookup_flags = nd->flags;
// 去掉起始多余的"/", 同时也说明系统可以允许你输入多个"/"而不报错
 while (*name==&apos;/&apos;)
  name++;
// 空路径
 if (!*name)
  goto return_reval;
// 路径对应的inode
 inode = nd->dentry->d_inode;
 if (nd->depth)
  lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);
 /* At this point we know we have a real path component. */
 for(;;) {
// 循环处理,每个循环提取文件路径的一个目录名, &apos;/&apos;分隔
  unsigned long hash;
  struct qstr this;
  unsigned int c;
  nd->flags |= LOOKUP_CONTINUE;
// 检查文件权限, 包括读写执行权限, 用户/组/其他权限, 返回0为合法
  err = exec_permission_lite(inode, nd);
  if (err == -EAGAIN)
// EAGAIN表示该inode正在被操作, 检查其执行权限
// 而对于普通文件检查结果将是错误
   err = vfs_permission(nd, MAY_EXEC);
// 出错中断循环
   if (err)
   break;
// 填充quickstring结构
  this.name = name;
// name的第一个字符的数值
  c = *(const unsigned char *)name;
// 计算文件名的hash, 不包括&apos;/&apos;
  hash = init_name_hash();
  do {
   name++;
   hash = partial_name_hash(c, hash);
   c = *(const unsigned char *)name;
  } while (c && (c != &apos;/&apos;));
// 目录(如果有的话)的名称长度
  this.len = name - (const char *) this.name;
// hash
  this.hash = end_name_hash(hash);
  /* remove trailing slashes? */
// c为0表示是最后的具体文件名了
  if (!c)
   goto last_component;
// 跳过中间的&apos;/&apos;
  while (*++name == &apos;/&apos;);
// 到名称尾, 说明文件名最后一个字符是&apos;/&apos;
  if (!*name)
   goto last_with_slashes;
  /*
   * "." and ".." are special - ".." especially so because it has
   * to be able to know about the current root directory and
   * parent relationships.
   */
// 如果第一个字符是&apos;.&apos;
  if (this.name[0] == &apos;.&apos;) switch (this.len) {
   default:
// 是一个一&apos;.&apos;开头的文件或目录名称
    break;
   case 2:
// 第2 个字符不是".", 是普通文件或路径名
    if (this.name[1] != &apos;.&apos;)
     break;
// 以".."开头, 是父目录, 更新nd为父目录nameidata数据, inode相应更新重新循环
    follow_dotdot(nd);
    inode = nd->dentry->d_inode;
    /* fallthrough */
   case 1:
// 以&apos;.&apos;开头的当前目录, 忽略, 重新循环
    continue;
  }
  /*
   * See if the low-level filesystem might want
   * to use its own hash..
   */
// 底层FS实现中有自己的HASH算法
  if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
   err = nd->dentry->d_op->d_hash(nd->dentry, &this);
   if (err < 0)
    break;
  }
  /* This does the actual lookups.. */
// 根据文件/目录名进行具体的查找
  err = do_lookup(nd, &this, &next);
  if (err)
   break;
  err = -ENOENT;
// inode更新为本级文件目录的inode
  inode = next.dentry->d_inode;
// 找不到inode, 转错误处理
  if (!inode)
   goto out_dput;
  err = -ENOTDIR;
  if (!inode->i_op)
   goto out_dput;
  if (inode->i_op->follow_link) {
// 处理符号链接, 在其中考虑了递归互相链接的异常处理
   err = do_follow_link(&next, nd);
   if (err)
    goto return_err;
   err = -ENOENT;
// 更新inode为实际的inode
   inode = nd->dentry->d_inode;
   if (!inode)
    break;
   err = -ENOTDIR;
   if (!inode->i_op)
    break;
  } else
// nd中得到下一级路径信息
   path_to_nameidata(&next, nd);
  err = -ENOTDIR;
  if (!inode->i_op->lookup)
   break;
// 继续循环找下一目录文件名称
  continue;
  /* here ends the main loop */
// 最后的文件名了, 处理和前面类似
last_with_slashes:
// 最后一个字符是&apos;/&apos;, 是一个目录
  lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
last_component:
  /* Clear LOOKUP_CONTINUE iff it was previously unset */
  nd->flags &= lookup_flags | ~LOOKUP_CONTINUE;
  if (lookup_flags & LOOKUP_PARENT)
   goto lookup_parent;
  if (this.name[0] == &apos;.&apos;) switch (this.len) {
   default:
    break;
   case 2:
// 文件名不是"..", 继续
    if (this.name[1] != &apos;.&apos;)
     break;
// 文件名是"..", 到父目录
    follow_dotdot(nd);
    inode = nd->dentry->d_inode;
    /* fallthrough */
   case 1:
// 文件名就是".", 跳到返回处理
    goto return_reval;
  }
// 一般文件处理
// 底层FS实现中有自己的HASH算法
  if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
   err = nd->dentry->d_op->d_hash(nd->dentry, &this);
   if (err < 0)
    break;
  }
// 查找最后的文件名
  err = do_lookup(nd, &this, &next);
  if (err)
   break;
  inode = next.dentry->d_inode;
  if ((lookup_flags & LOOKUP_FOLLOW)
      && inode && inode->i_op && inode->i_op->follow_link) {
   err = do_follow_link(&next, nd);
   if (err)
    goto return_err;
   inode = nd->dentry->d_inode;
  } else
// 更新nameidata中的mnt, dentry值
   path_to_nameidata(&next, nd);
  err = -ENOENT;
  if (!inode)
   break;
  if (lookup_flags & LOOKUP_DIRECTORY) {
   err = -ENOTDIR;
   if (!inode->i_op || !inode->i_op->lookup)
    break;
  }
  goto return_base;
lookup_parent:
// 复制当前quickstring结构this信息到nd的last中
// 类型为LAST_NORM
  nd->last = this;
  nd->last_type = LAST_NORM;
  if (this.name[0] != &apos;.&apos;)
   goto return_base;
  if (this.len == 1)
   nd->last_type = LAST_DOT;
  else if (this.len == 2 && this.name[1] == &apos;.&apos;)
   nd->last_type = LAST_DOTDOT;
  else
   goto return_base;
return_reval:
// 返回
  /*
   * We bypassed the ordinary revalidation routines.
   * We may need to check the cached dentry for staleness.
   */
  if (nd->dentry && nd->dentry->d_sb &&
      (nd->dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) {
   err = -ESTALE;
   /* Note: we do not d_invalidate() */
   if (!nd->dentry->d_op->d_revalidate(nd->dentry, nd))
    break;
  }
return_base:
  return 0;
out_dput:
  dput_path(&next, nd);
  break;
 }
// 到这里属于出错了
 path_release(nd);
return_err:
 return err;
}
 
/*
 *  It&apos;s more convoluted than I&apos;d like it to be, but... it&apos;s still fairly
 *  small and for now I&apos;d prefer to have fast path as straight as possible.
 *  It _is_ time-critical.
 */
static int do_lookup(struct nameidata *nd, struct qstr *name,
       struct path *path)
{
 struct vfsmount *mnt = nd->mnt;
// 从系统缓存的dentry的hash表中查找父dentry是nd->dentry, 名称为name的dentry
 struct dentry *dentry = __d_lookup(nd->dentry, name);
// 没找到dentry, 进行真正从存储硬盘中查找
 if (!dentry)
  goto need_lookup;
// 需要进行revalidate操作时先进行validate操作
 if (dentry->d_op && dentry->d_op->d_revalidate)
  goto need_revalidate;
done:
// 找到, 填充path参数: 挂接点mnt和目录项dentry
 path->mnt = mnt;
 path->dentry = dentry;
 __follow_mount(path);
 return 0;
need_lookup:
// 进行真正的查找, 不过read_lookup会重新调用__d_lookup, 找不到才调用底层的fs实现去查找
// 好象是重复操作了
// real_lookup中的操作才反映了各个fs底层和相关标志的区别处理
 dentry = real_lookup(nd->dentry, name, nd);
 if (IS_ERR(dentry))
  goto fail;
 goto done;
need_revalidate:
// 进行validate操作
 dentry = do_revalidate(dentry, nd);
 if (!dentry)
  goto need_lookup;
 if (IS_ERR(dentry))
  goto fail;
 goto done;
fail:
 return PTR_ERR(dentry);
}

/*
 * This is called when everything else fails, and we actually have
 * to go to the low-level filesystem to find out what we should do..
 *
 * We get the directory semaphore, and after getting that we also
 * make sure that nobody added the entry to the dcache in the meantime..
 * SMP-safe
 */
static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd)
{
 struct dentry * result;
 struct inode *dir = parent->d_inode;
 mutex_lock(&dir->i_mutex);
 /*
  * First re-do the cached lookup just in case it was created
  * while we waited for the directory semaphore..
  *
  * FIXME! This could use version numbering or similar to
  * avoid unnecessary cache lookups.
  *
  * The "dcache_lock" is purely to protect the RCU list walker
  * from concurrent renames at this point (we mustn&apos;t get false
  * negatives from the RCU list walk here, unlike the optimistic
  * fast walk).
  *
  * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup
  */
// 查找缓存中的dentry项
 result = d_lookup(parent, name);
 if (!result) {
// 没找到, 新建dentry项
  struct dentry * dentry = d_alloc(parent, name);
  result = ERR_PTR(-ENOMEM);
  if (dentry) {
// 调用inode的查找操作, 这是和具体文件系统相关
   result = dir->i_op->lookup(dir, dentry, nd);
   if (result)
// 失败, 释放dentry
    dput(dentry);
   else
// 成功, 找到的dentry作为结果返回
    result = dentry;
  }
  mutex_unlock(&dir->i_mutex);
  return result;
 }
 /*
  * Uhhuh! Nasty case: the cache was re-populated while
  * we waited on the semaphore. Need to revalidate.
  */
// 在缓存中找到dentry项, 进行validate操作
 mutex_unlock(&dir->i_mutex);
 if (result->d_op && result->d_op->d_revalidate) {
  result = do_revalidate(result, nd);
  if (!result)
   result = ERR_PTR(-ENOENT);
 }
 return result;
}

小结一下函数调用顺序:
path_lookup_open    path_lookup_create
     |                     |
     V                     V
   __path_lookup_intent_open
               |
               V
        do_path_lookup
               |
               V
        link_path_walk
               |
               V
      __link_path_walk
               |
               V
           do_lookup
               |
               V
          real_lookup

这些函数操作都属于虚拟文件系统操作, 对所有类型的文件系统都适用, 而从各个FS的具体实现才能看出差异和相关标志的作用.

4.3 open_namei_create

static int open_namei_create(struct nameidata *nd, struct path *path,
    int flag, int mode)
{
 int error;
// nd当前的dentry
 struct dentry *dir = nd->dentry;
 if (!IS_POSIXACL(dir->d_inode))
  mode &= ~current->fs->umask;
 error = vfs_create(dir->d_inode, path->dentry, mode, nd);
 mutex_unlock(&dir->d_inode->i_mutex);
 dput(nd->dentry);
 nd->dentry = path->dentry;
 if (error)
  return error;
 /* Don&apos;t check for write permission, don&apos;t truncate */
 return may_open(nd, 0, flag & ~O_TRUNC);
}

4.4 path_to_nameidata

// 将路径参数赋值到nameidata结构中
static inline void path_to_nameidata(struct path *path, struct nameidata *nd)
{
// 释放原来的目录项
 dput(nd->dentry);
// 如果挂接点也不同,释放掉原来的
 if (nd->mnt != path->mnt)
  mntput(nd->mnt);
// 将新路径参数赋值到nameidata结构中
 nd->mnt = path->mnt;
 nd->dentry = path->dentry;
}

5. 结论

打开文件时, 目的是要生成一个struct file的结构的指针, 该结构中有相关文件名的名称, dentry指针, 挂接点文件系统等信息, 而struct nameidata作为一个中间过程结构保存相关的处理结果, 最终返回需要的文件信息。


本文来自优快云博客,转载请标明出处:http://blog.youkuaiyun.com/air_snake/archive/2008/07/22/2690554.aspx

/* * linux/fs/open.c * * Copyright (C) 1991, 1992 Linus Torvalds */ #include <linux/string.h> #include <linux/mm.h> #include <linux/file.h> #include <linux/fdtable.h> #include <linux/fsnotify.h> #include <linux/module.h> #include <linux/tty.h> #include <linux/namei.h> #include <linux/backing-dev.h> #include <linux/capability.h> #include <linux/securebits.h> #include <linux/security.h> #include <linux/mount.h> #include <linux/fcntl.h> #include <linux/slab.h> #include <asm/uaccess.h> #include <linux/fs.h> #include <linux/personality.h> #include <linux/pagemap.h> #include <linux/syscalls.h> #include <linux/rcupdate.h> #include <linux/audit.h> #include <linux/falloc.h> #include <linux/fs_struct.h> #include <linux/ima.h> #include <linux/dnotify.h> #include <linux/compat.h> #include "internal.h" int do_truncate2(struct vfsmount *mnt, struct dentry *dentry, loff_t length, unsigned int time_attrs, struct file *filp) { int ret; struct iattr newattrs; /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */ if (length < 0) return -EINVAL; newattrs.ia_size = length; newattrs.ia_valid = ATTR_SIZE | time_attrs; if (filp) { newattrs.ia_file = filp; newattrs.ia_valid |= ATTR_FILE; } /* Remove suid, sgid, and file capabilities on truncate too */ ret = dentry_needs_remove_privs(dentry); if (ret < 0) return ret; if (ret) newattrs.ia_valid |= ret | ATTR_FORCE; mutex_lock(&dentry->d_inode->i_mutex); /* Note any delegations or leases have already been broken: */ ret = notify_change2(mnt, dentry, &newattrs, NULL); mutex_unlock(&dentry->d_inode->i_mutex); return ret; } int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, struct file *filp) { return do_truncate2(NULL, dentry, length, time_attrs, filp); } long vfs_truncate(struct path *path, loff_t length) { struct inode *inode; struct vfsmount *mnt; long error; inode = path->dentry->d_inode; mnt = path->mnt; /* For directories it's -EISDIR, for other non-regulars - -EINVAL */ if (S_ISDIR(inode->i_mode)) return -EISDIR; if (!S_ISREG(inode->i_mode)) return -EINVAL; error = mnt_want_write(path->mnt); if (error) goto out; error = inode_permission2(mnt, inode, MAY_WRITE); if (error) goto mnt_drop_write_and_out; error = -EPERM; if (IS_APPEND(inode)) goto mnt_drop_write_and_out; error = get_write_access(inode); if (error) goto mnt_drop_write_and_out; /* * Make sure that there are no leases. get_write_access() protects * against the truncate racing with a lease-granting setlease(). */ error = break_lease(inode, O_WRONLY); if (error) goto put_write_and_out; error = locks_verify_truncate(inode, NULL, length); if (!error) error = security_path_truncate(path); if (!error) error = do_truncate2(mnt, path->dentry, length, 0, NULL); put_write_and_out: put_write_access(inode); mnt_drop_write_and_out: mnt_drop_write(path->mnt); out: return error; } EXPORT_SYMBOL_GPL(vfs_truncate); static long do_sys_truncate(const char __user *pathname, loff_t length) { unsigned int lookup_flags = LOOKUP_FOLLOW; struct path path; int error; if (length < 0) /* sorry, but loff_t says... */ return -EINVAL; retry: error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path); if (!error) { error = vfs_truncate(&path, length); path_put(&path); } if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; } return error; } SYSCALL_DEFINE2(truncate, const char __user *, path, long, length) { return do_sys_truncate(path, length); } #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE2(truncate, const char __user *, path, compat_off_t, length) { return do_sys_truncate(path, length); } #endif static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) { struct inode *inode; struct dentry *dentry; struct vfsmount *mnt; struct fd f; int error; error = -EINVAL; if (length < 0) goto out; error = -EBADF; f = fdget(fd); if (!f.file) goto out; /* explicitly opened as large or we are on 64-bit box */ if (f.file->f_flags & O_LARGEFILE) small = 0; dentry = f.file->f_path.dentry; mnt = f.file->f_path.mnt; inode = dentry->d_inode; error = -EINVAL; if (!S_ISREG(inode->i_mode) || !(f.file->f_mode & FMODE_WRITE)) goto out_putf; error = -EINVAL; /* Cannot ftruncate over 2^31 bytes without large file support */ if (small && length > MAX_NON_LFS) goto out_putf; error = -EPERM; if (IS_APPEND(inode)) goto out_putf; sb_start_write(inode->i_sb); error = locks_verify_truncate(inode, f.file, length); if (!error) error = security_path_truncate(&f.file->f_path); if (!error) error = do_truncate2(mnt, dentry, length, ATTR_MTIME|ATTR_CTIME, f.file); sb_end_write(inode->i_sb); out_putf: fdput(f); out: return error; } SYSCALL_DEFINE2(ftruncate, unsigned int, fd, unsigned long, length) { return do_sys_ftruncate(fd, length, 1); } #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE2(ftruncate, unsigned int, fd, compat_ulong_t, length) { return do_sys_ftruncate(fd, length, 1); } #endif /* LFS versions of truncate are only needed on 32 bit machines */ #if BITS_PER_LONG == 32 SYSCALL_DEFINE2(truncate64, const char __user *, path, loff_t, length) { return do_sys_truncate(path, length); } SYSCALL_DEFINE2(ftruncate64, unsigned int, fd, loff_t, length) { return do_sys_ftruncate(fd, length, 0); } #endif /* BITS_PER_LONG == 32 */ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { struct inode *inode = file_inode(file); long ret; if (offset < 0 || len <= 0) return -EINVAL; /* Return error if mode is not supported */ if (mode & ~FALLOC_FL_SUPPORTED_MASK) return -EOPNOTSUPP; /* Punch hole and zero range are mutually exclusive */ if ((mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) == (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) return -EOPNOTSUPP; /* Punch hole must have keep size set */ if ((mode & FALLOC_FL_PUNCH_HOLE) && !(mode & FALLOC_FL_KEEP_SIZE)) return -EOPNOTSUPP; /* Collapse range should only be used exclusively. */ if ((mode & FALLOC_FL_COLLAPSE_RANGE) && (mode & ~FALLOC_FL_COLLAPSE_RANGE)) return -EINVAL; /* Insert range should only be used exclusively. */ if ((mode & FALLOC_FL_INSERT_RANGE) && (mode & ~FALLOC_FL_INSERT_RANGE)) return -EINVAL; if (!(file->f_mode & FMODE_WRITE)) return -EBADF; /* * We can only allow pure fallocate on append only files */ if ((mode & ~FALLOC_FL_KEEP_SIZE) && IS_APPEND(inode)) return -EPERM; if (IS_IMMUTABLE(inode)) return -EPERM; /* * We cannot allow any fallocate operation on an active swapfile */ if (IS_SWAPFILE(inode)) return -ETXTBSY; /* * Revalidate the write permissions, in case security policy has * changed since the files were opened. */ ret = security_file_permission(file, MAY_WRITE); if (ret) return ret; if (S_ISFIFO(inode->i_mode)) return -ESPIPE; /* * Let individual file system decide if it supports preallocation * for directories or not. */ if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) return -ENODEV; /* Check for wrap through zero too */ if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0)) return -EFBIG; if (!file->f_op->fallocate) return -EOPNOTSUPP; sb_start_write(inode->i_sb); ret = file->f_op->fallocate(file, mode, offset, len); /* * Create inotify and fanotify events. * * To keep the logic simple always create events if fallocate succeeds. * This implies that events are even created if the file size remains * unchanged, e.g. when using flag FALLOC_FL_KEEP_SIZE. */ if (ret == 0) fsnotify_modify(file); sb_end_write(inode->i_sb); return ret; } EXPORT_SYMBOL_GPL(vfs_fallocate); SYSCALL_DEFINE4(fallocate, int, fd, int, mode, loff_t, offset, loff_t, len) { struct fd f = fdget(fd); int error = -EBADF; if (f.file) { error = vfs_fallocate(f.file, mode, offset, len); fdput(f); } return error; } /* * access() needs to use the real uid/gid, not the effective uid/gid. * We do this by temporarily clearing all FS-related capabilities and * switching the fsuid/fsgid around to the real ones. */ SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode) { const struct cred *old_cred; struct cred *override_cred; struct path path; struct inode *inode; struct vfsmount *mnt; int res; unsigned int lookup_flags = LOOKUP_FOLLOW; if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ return -EINVAL; override_cred = prepare_creds(); if (!override_cred) return -ENOMEM; override_cred->fsuid = override_cred->uid; override_cred->fsgid = override_cred->gid; if (!issecure(SECURE_NO_SETUID_FIXUP)) { /* Clear the capabilities if we switch to a non-root user */ kuid_t root_uid = make_kuid(override_cred->user_ns, 0); if (!uid_eq(override_cred->uid, root_uid)) cap_clear(override_cred->cap_effective); else override_cred->cap_effective = override_cred->cap_permitted; } /* * The new set of credentials can *only* be used in * task-synchronous circumstances, and does not need * RCU freeing, unless somebody then takes a separate * reference to it. * * NOTE! This is _only_ true because this credential * is used purely for override_creds() that installs * it as the subjective cred. Other threads will be * accessing ->real_cred, not the subjective cred. * * If somebody _does_ make a copy of this (using the * 'get_current_cred()' function), that will clear the * non_rcu field, because now that other user may be * expecting RCU freeing. But normal thread-synchronous * cred accesses will keep things non-RCY. */ override_cred->non_rcu = 1; old_cred = override_creds(override_cred); retry: res = user_path_at(dfd, filename, lookup_flags, &path); if (res) goto out; inode = d_backing_inode(path.dentry); mnt = path.mnt; if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) { /* * MAY_EXEC on regular files is denied if the fs is mounted * with the "noexec" flag. */ res = -EACCES; if (path_noexec(&path)) goto out_path_release; } res = inode_permission2(mnt, inode, mode | MAY_ACCESS); /* SuS v2 requires we report a read only fs too */ if (res || !(mode & S_IWOTH) || special_file(inode->i_mode)) goto out_path_release; /* * This is a rare case where using __mnt_is_readonly() * is OK without a mnt_want/drop_write() pair. Since * no actual write to the fs is performed here, we do * not need to telegraph to that to anyone. * * By doing this, we accept that this access is * inherently racy and know that the fs may change * state before we even see this result. */ if (__mnt_is_readonly(path.mnt)) res = -EROFS; out_path_release: path_put(&path); if (retry_estale(res, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; } out: revert_creds(old_cred); put_cred(override_cred); return res; } SYSCALL_DEFINE2(access, const char __user *, filename, int, mode) { return sys_faccessat(AT_FDCWD, filename, mode); } SYSCALL_DEFINE1(chdir, const char __user *, filename) { struct path path; int error; unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY; retry: error = user_path_at(AT_FDCWD, filename, lookup_flags, &path); if (error) goto out; error = inode_permission2(path.mnt, path.dentry->d_inode, MAY_EXEC | MAY_CHDIR); if (error) goto dput_and_out; set_fs_pwd(current->fs, &path); dput_and_out: path_put(&path); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; } out: return error; } SYSCALL_DEFINE1(fchdir, unsigned int, fd) { struct fd f = fdget_raw(fd); struct inode *inode; struct vfsmount *mnt; int error = -EBADF; error = -EBADF; if (!f.file) goto out; inode = file_inode(f.file); mnt = f.file->f_path.mnt; error = -ENOTDIR; if (!S_ISDIR(inode->i_mode)) goto out_putf; error = inode_permission2(mnt, inode, MAY_EXEC | MAY_CHDIR); if (!error) set_fs_pwd(current->fs, &f.file->f_path); out_putf: fdput(f); out: return error; } SYSCALL_DEFINE1(chroot, const char __user *, filename) { struct path path; int error; unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY; retry: error = user_path_at(AT_FDCWD, filename, lookup_flags, &path); if (error) goto out; error = inode_permission2(path.mnt, path.dentry->d_inode, MAY_EXEC | MAY_CHDIR); if (error) goto dput_and_out; error = -EPERM; if (!ns_capable(current_user_ns(), CAP_SYS_CHROOT)) goto dput_and_out; error = security_path_chroot(&path); if (error) goto dput_and_out; set_fs_root(current->fs, &path); error = 0; dput_and_out: path_put(&path); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; } out: return error; } static int chmod_common(struct path *path, umode_t mode) { struct inode *inode = path->dentry->d_inode; struct inode *delegated_inode = NULL; struct iattr newattrs; int error; error = mnt_want_write(path->mnt); if (error) return error; retry_deleg: mutex_lock(&inode->i_mutex); error = security_path_chmod(path, mode); if (error) goto out_unlock; newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; error = notify_change2(path->mnt, path->dentry, &newattrs, &delegated_inode); out_unlock: mutex_unlock(&inode->i_mutex); if (delegated_inode) { error = break_deleg_wait(&delegated_inode); if (!error) goto retry_deleg; } mnt_drop_write(path->mnt); return error; } SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode) { struct fd f = fdget(fd); int err = -EBADF; if (f.file) { audit_file(f.file); err = chmod_common(&f.file->f_path, mode); fdput(f); } return err; } SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, umode_t, mode) { struct path path; int error; unsigned int lookup_flags = LOOKUP_FOLLOW; retry: error = user_path_at(dfd, filename, lookup_flags, &path); if (!error) { error = chmod_common(&path, mode); path_put(&path); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; } } return error; } SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode) { return sys_fchmodat(AT_FDCWD, filename, mode); } static int chown_common(struct path *path, uid_t user, gid_t group) { struct inode *inode = path->dentry->d_inode; struct inode *delegated_inode = NULL; int error; struct iattr newattrs; kuid_t uid; kgid_t gid; uid = make_kuid(current_user_ns(), user); gid = make_kgid(current_user_ns(), group); retry_deleg: newattrs.ia_valid = ATTR_CTIME; if (user != (uid_t) -1) { if (!uid_valid(uid)) return -EINVAL; newattrs.ia_valid |= ATTR_UID; newattrs.ia_uid = uid; } if (group != (gid_t) -1) { if (!gid_valid(gid)) return -EINVAL; newattrs.ia_valid |= ATTR_GID; newattrs.ia_gid = gid; } if (!S_ISDIR(inode->i_mode)) newattrs.ia_valid |= ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; mutex_lock(&inode->i_mutex); error = security_path_chown(path, uid, gid); if (!error) error = notify_change2(path->mnt, path->dentry, &newattrs, &delegated_inode); mutex_unlock(&inode->i_mutex); if (delegated_inode) { error = break_deleg_wait(&delegated_inode); if (!error) goto retry_deleg; } return error; } SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user, gid_t, group, int, flag) { struct path path; int error = -EINVAL; int lookup_flags; if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0) goto out; lookup_flags = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW; if (flag & AT_EMPTY_PATH) lookup_flags |= LOOKUP_EMPTY; retry: error = user_path_at(dfd, filename, lookup_flags, &path); if (error) goto out; error = mnt_want_write(path.mnt); if (error) goto out_release; error = chown_common(&path, user, group); mnt_drop_write(path.mnt); out_release: path_put(&path); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; } out: return error; } SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group) { return sys_fchownat(AT_FDCWD, filename, user, group, 0); } SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group) { return sys_fchownat(AT_FDCWD, filename, user, group, AT_SYMLINK_NOFOLLOW); } SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) { struct fd f = fdget(fd); int error = -EBADF; if (!f.file) goto out; error = mnt_want_write_file(f.file); if (error) goto out_fput; audit_file(f.file); error = chown_common(&f.file->f_path, user, group); mnt_drop_write_file(f.file); out_fput: fdput(f); out: return error; } int open_check_o_direct(struct file *f) { /* NB: we're sure to have correct a_ops only after f_op->open */ if (f->f_flags & O_DIRECT) { if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO) return -EINVAL; } return 0; } static int do_dentry_open(struct file *f, struct inode *inode, int (*open)(struct inode *, struct file *), const struct cred *cred) { static const struct file_operations empty_fops = {}; int error; f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE; path_get(&f->f_path); f->f_inode = inode; f->f_mapping = inode->i_mapping; if (unlikely(f->f_flags & O_PATH)) { f->f_mode = FMODE_PATH; f->f_op = &empty_fops; return 0; } if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) { error = get_write_access(inode); if (unlikely(error)) goto cleanup_file; error = __mnt_want_write(f->f_path.mnt); if (unlikely(error)) { put_write_access(inode); goto cleanup_file; } f->f_mode |= FMODE_WRITER; } /* POSIX.1-2008/SUSv4 Section XSI 2.9.7 */ if (S_ISREG(inode->i_mode)) f->f_mode |= FMODE_ATOMIC_POS; f->f_op = fops_get(inode->i_fop); if (unlikely(WARN_ON(!f->f_op))) { error = -ENODEV; goto cleanup_all; } error = security_file_open(f, cred); if (error) goto cleanup_all; error = break_lease(inode, f->f_flags); if (error) goto cleanup_all; if (!open) open = f->f_op->open; if (open) { error = open(inode, f); if (error) goto cleanup_all; } if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) i_readcount_inc(inode); if ((f->f_mode & FMODE_READ) && likely(f->f_op->read || f->f_op->read_iter)) f->f_mode |= FMODE_CAN_READ; if ((f->f_mode & FMODE_WRITE) && likely(f->f_op->write || f->f_op->write_iter)) f->f_mode |= FMODE_CAN_WRITE; f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); return 0; cleanup_all: fops_put(f->f_op); if (f->f_mode & FMODE_WRITER) { put_write_access(inode); __mnt_drop_write(f->f_path.mnt); } cleanup_file: path_put(&f->f_path); f->f_path.mnt = NULL; f->f_path.dentry = NULL; f->f_inode = NULL; return error; } /** * finish_open - finish opening a file * @file: file pointer * @dentry: pointer to dentry * @open: open callback * @opened: state of open * * This can be used to finish opening a file passed to i_op->atomic_open(). * * If the open callback is set to NULL, then the standard f_op->open() * filesystem callback is substituted. * * NB: the dentry reference is _not_ consumed. If, for example, the dentry is * the return value of d_splice_alias(), then the caller needs to perform dput() * on it after finish_open(). * * On successful return @file is a fully instantiated open file. After this, if * an error occurs in ->atomic_open(), it needs to clean up with fput(). * * Returns zero on success or -errno if the open failed. */ int finish_open(struct file *file, struct dentry *dentry, int (*open)(struct inode *, struct file *), int *opened) { int error; BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ file->f_path.dentry = dentry; error = do_dentry_open(file, d_backing_inode(dentry), open, current_cred()); if (!error) *opened |= FILE_OPENED; return error; } EXPORT_SYMBOL(finish_open); /** * finish_no_open - finish ->atomic_open() without opening the file * * @file: file pointer * @dentry: dentry or NULL (as returned from ->lookup()) * * This can be used to set the result of a successful lookup in ->atomic_open(). * * NB: unlike finish_open() this function does consume the dentry reference and * the caller need not dput() it. * * Returns "1" which must be the return value of ->atomic_open() after having * called this function. */ int finish_no_open(struct file *file, struct dentry *dentry) { file->f_path.dentry = dentry; return 1; } EXPORT_SYMBOL(finish_no_open); char *file_path(struct file *filp, char *buf, int buflen) { return d_path(&filp->f_path, buf, buflen); } EXPORT_SYMBOL(file_path); /** * vfs_open - open the file at the given path * @path: path to open * @file: newly allocated file with f_flag initialized * @cred: credentials to use */ int vfs_open(const struct path *path, struct file *file, const struct cred *cred) { struct inode *inode = vfs_select_inode(path->dentry, file->f_flags); if (IS_ERR(inode)) return PTR_ERR(inode); file->f_path = *path; return do_dentry_open(file, inode, NULL, cred); } struct file *dentry_open(const struct path *path, int flags, const struct cred *cred) { int error; struct file *f; validate_creds(cred); /* We must always pass in a valid mount pointer. */ BUG_ON(!path->mnt); f = get_empty_filp(); if (!IS_ERR(f)) { f->f_flags = flags; error = vfs_open(path, f, cred); if (!error) { /* from now on we need fput() to dispose of f */ error = open_check_o_direct(f); if (error) { fput(f); f = ERR_PTR(error); } } else { put_filp(f); f = ERR_PTR(error); } } return f; } EXPORT_SYMBOL(dentry_open); static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op) { int lookup_flags = 0; int acc_mode; /* * Clear out all open flags we don't know about so that we don't report * them in fcntl(F_GETFD) or similar interfaces. */ flags &= VALID_OPEN_FLAGS; if (flags & (O_CREAT | __O_TMPFILE)) op->mode = (mode & S_IALLUGO) | S_IFREG; else op->mode = 0; /* Must never be set by userspace */ flags &= ~FMODE_NONOTIFY & ~O_CLOEXEC; /* * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only * check for O_DSYNC if the need any syncing at all we enforce it's * always set instead of having to deal with possibly weird behaviour * for malicious applications setting only __O_SYNC. */ if (flags & __O_SYNC) flags |= O_DSYNC; if (flags & __O_TMPFILE) { if ((flags & O_TMPFILE_MASK) != O_TMPFILE) return -EINVAL; acc_mode = MAY_OPEN | ACC_MODE(flags); if (!(acc_mode & MAY_WRITE)) return -EINVAL; } else if (flags & O_PATH) { /* * If we have O_PATH in the open flag. Then we * cannot have anything other than the below set of flags */ flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH; acc_mode = 0; } else { acc_mode = MAY_OPEN | ACC_MODE(flags); } op->open_flag = flags; /* O_TRUNC implies we need access checks for write permissions */ if (flags & O_TRUNC) acc_mode |= MAY_WRITE; /* Allow the LSM permission hook to distinguish append access from general write access. */ if (flags & O_APPEND) acc_mode |= MAY_APPEND; op->acc_mode = acc_mode; op->intent = flags & O_PATH ? 0 : LOOKUP_OPEN; if (flags & O_CREAT) { op->intent |= LOOKUP_CREATE; if (flags & O_EXCL) op->intent |= LOOKUP_EXCL; } if (flags & O_DIRECTORY) lookup_flags |= LOOKUP_DIRECTORY; if (!(flags & O_NOFOLLOW)) lookup_flags |= LOOKUP_FOLLOW; op->lookup_flags = lookup_flags; return 0; } /** * file_open_name - open file and return file pointer * * @name: struct filename containing path to open * @flags: open flags as per the open(2) second argument * @mode: mode for the new file if O_CREAT is set, else ignored * * This is the helper to open a file from kernelspace if you really * have to. But in generally you should not do this, so please move * along, nothing to see here.. */ struct file *file_open_name(struct filename *name, int flags, umode_t mode) { struct open_flags op; int err = build_open_flags(flags, mode, &op); return err ? ERR_PTR(err) : do_filp_open(AT_FDCWD, name, &op); } /** * filp_open - open file and return file pointer * * @filename: path to open * @flags: open flags as per the open(2) second argument * @mode: mode for the new file if O_CREAT is set, else ignored * * This is the helper to open a file from kernelspace if you really * have to. But in generally you should not do this, so please move * along, nothing to see here.. */ struct file *filp_open(const char *filename, int flags, umode_t mode) { struct filename *name = getname_kernel(filename); struct file *file = ERR_CAST(name); if (!IS_ERR(name)) { file = file_open_name(name, flags, mode); putname(name); } return file; } EXPORT_SYMBOL(filp_open); struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt, const char *filename, int flags, umode_t mode) { struct open_flags op; int err = build_open_flags(flags, mode, &op); if (err) return ERR_PTR(err); return do_file_open_root(dentry, mnt, filename, &op); } EXPORT_SYMBOL(file_open_root); long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode) { struct open_flags op; int fd = build_open_flags(flags, mode, &op); struct filename *tmp; if (fd) return fd; tmp = getname(filename); if (IS_ERR(tmp)) return PTR_ERR(tmp); fd = get_unused_fd_flags(flags); if (fd >= 0) { struct file *f = do_filp_open(dfd, tmp, &op); if (IS_ERR(f)) { put_unused_fd(fd); fd = PTR_ERR(f); } else { fsnotify_open(f); fd_install(fd, f); } } putname(tmp); return fd; } SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode) { if (force_o_largefile()) flags |= O_LARGEFILE; return do_sys_open(AT_FDCWD, filename, flags, mode); } SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags, umode_t, mode) { if (force_o_largefile()) flags |= O_LARGEFILE; return do_sys_open(dfd, filename, flags, mode); } #ifndef __alpha__ /* * For backward compatibility? Maybe this should be moved * into arch/i386 instead? */ SYSCALL_DEFINE2(creat, const char __user *, pathname, umode_t, mode) { return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode); } #endif /* * "id" is the POSIX thread ID. We use the * files pointer for this.. */ int filp_close(struct file *filp, fl_owner_t id) { int retval = 0; if (!file_count(filp)) { printk(KERN_ERR "VFS: Close: file count is 0\n"); return 0; } if (filp->f_op->flush) retval = filp->f_op->flush(filp, id); if (likely(!(filp->f_mode & FMODE_PATH))) { dnotify_flush(filp, id); locks_remove_posix(filp, id); } fput(filp); return retval; } EXPORT_SYMBOL(filp_close); /* * Careful here! We test whether the file pointer is NULL before * releasing the fd. This ensures that one clone task can't release * an fd while another clone is opening it. */ SYSCALL_DEFINE1(close, unsigned int, fd) { int retval = __close_fd(current->files, fd); /* can't restart close syscall because file table entry was cleared */ if (unlikely(retval == -ERESTARTSYS || retval == -ERESTARTNOINTR || retval == -ERESTARTNOHAND || retval == -ERESTART_RESTARTBLOCK)) retval = -EINTR; return retval; } EXPORT_SYMBOL(sys_close); /* * This routine simulates a hangup on the tty, to arrange that users * are given clean terminals at login time. */ SYSCALL_DEFINE0(vhangup) { if (capable(CAP_SYS_TTY_CONFIG)) { tty_vhangup_self(); return 0; } return -EPERM; } /* * Called when an inode is about to be open. * We use this to disallow opening large files on 32bit systems if * the caller didn't specify O_LARGEFILE. On 64bit systems we force * on this flag in sys_open. */ int generic_file_open(struct inode * inode, struct file * filp) { if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) return -EOVERFLOW; return 0; } EXPORT_SYMBOL(generic_file_open); /* * This is used by subsystems that don't want seekable * file descriptors. The function is not supposed to ever fail, the only * reason it returns an 'int' and not 'void' is so that it can be plugged * directly into file_operations structure. */ int nonseekable_open(struct inode *inode, struct file *filp) { filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE); return 0; } EXPORT_SYMBOL(nonseekable_open); /* * stream_open is used by subsystems that want stream-like file descriptors. * Such file descriptors are not seekable and don't have notion of position * (file.f_pos is always 0). Contrary to file descriptors of other regular * files, .read() and .write() can run simultaneously. * * stream_open never fails and is marked to return int so that it could be * directly used as file_operations.open . */ int stream_open(struct inode *inode, struct file *filp) { filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE | FMODE_ATOMIC_POS); filp->f_mode |= FMODE_STREAM; return 0; } EXPORT_SYMBOL(stream_open); 问题1: 以上是mtk方案的一个sdk文件 open.c 使用以下,访问文件没有问题。 snprintf(cfgFile, MAX_LINE_LEN-1, "%s/dev_config.json", g_cfgPath); // cfgFile = /etc/xxxxxxxxxxxxxxxxxx/dev_config.json pFile = filp_open(cfgFile, O_RDONLY, 0); if (IS_ERR(pFile)) { PRINT_ERR("Fail to Open File %s", cfgFile); set_fs(origFs); return -1; } /* * linux/fs/open.c * * Copyright (C) 1991, 1992 Linus Torvalds */ #include <linux/string.h> #include <linux/mm.h> #include <linux/file.h> #include <linux/fdtable.h> #include <linux/fsnotify.h> #include <linux/module.h> #include <linux/tty.h> #include <linux/namei.h> #include <linux/backing-dev.h> #include <linux/capability.h> #include <linux/securebits.h> #include <linux/security.h> #include <linux/mount.h> #include <linux/fcntl.h> #include <linux/slab.h> #include <asm/uaccess.h> #include <linux/fs.h> #include <linux/personality.h> #include <linux/pagemap.h> #include <linux/syscalls.h> #include <linux/rcupdate.h> #include <linux/audit.h> #include <linux/falloc.h> #include <linux/fs_struct.h> #include <linux/ima.h> #include <linux/dnotify.h> #include <linux/compat.h> #include "internal.h" int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, struct file *filp) { int ret; struct iattr newattrs; /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */ if (length < 0) return -EINVAL; newattrs.ia_size = length; newattrs.ia_valid = ATTR_SIZE | time_attrs; if (filp) { newattrs.ia_file = filp; newattrs.ia_valid |= ATTR_FILE; } /* Remove suid, sgid, and file capabilities on truncate too */ ret = dentry_needs_remove_privs(dentry); if (ret < 0) return ret; if (ret) newattrs.ia_valid |= ret | ATTR_FORCE; mutex_lock(&dentry->d_inode->i_mutex); /* Note any delegations or leases have already been broken: */ ret = notify_change(dentry, &newattrs, NULL); mutex_unlock(&dentry->d_inode->i_mutex); return ret; } long vfs_truncate(struct path *path, loff_t length) { struct inode *inode; long error; inode = path->dentry->d_inode; /* For directories it's -EISDIR, for other non-regulars - -EINVAL */ if (S_ISDIR(inode->i_mode)) return -EISDIR; if (!S_ISREG(inode->i_mode)) return -EINVAL; error = mnt_want_write(path->mnt); if (error) goto out; error = inode_permission(inode, MAY_WRITE); if (error) goto mnt_drop_write_and_out; error = -EPERM; if (IS_APPEND(inode)) goto mnt_drop_write_and_out; error = get_write_access(inode); if (error) goto mnt_drop_write_and_out; /* * Make sure that there are no leases. get_write_access() protects * against the truncate racing with a lease-granting setlease(). */ error = break_lease(inode, O_WRONLY); if (error) goto put_write_and_out; error = locks_verify_truncate(inode, NULL, length); if (!error) error = security_path_truncate(path); if (!error) error = do_truncate(path->dentry, length, 0, NULL); put_write_and_out: put_write_access(inode); mnt_drop_write_and_out: mnt_drop_write(path->mnt); out: return error; } EXPORT_SYMBOL_GPL(vfs_truncate); static long do_sys_truncate(const char __user *pathname, loff_t length) { unsigned int lookup_flags = LOOKUP_FOLLOW; struct path path; int error; if (length < 0) /* sorry, but loff_t says... */ return -EINVAL; retry: error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path); if (!error) { error = vfs_truncate(&path, length); path_put(&path); } if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; } return error; } SYSCALL_DEFINE2(truncate, const char __user *, path, long, length) { return do_sys_truncate(path, length); } #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE2(truncate, const char __user *, path, compat_off_t, length) { return do_sys_truncate(path, length); } #endif static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) { struct inode *inode; struct dentry *dentry; struct fd f; int error; error = -EINVAL; if (length < 0) goto out; error = -EBADF; f = fdget(fd); if (!f.file) goto out; /* explicitly opened as large or we are on 64-bit box */ if (f.file->f_flags & O_LARGEFILE) small = 0; dentry = f.file->f_path.dentry; inode = dentry->d_inode; error = -EINVAL; if (!S_ISREG(inode->i_mode) || !(f.file->f_mode & FMODE_WRITE)) goto out_putf; error = -EINVAL; /* Cannot ftruncate over 2^31 bytes without large file support */ if (small && length > MAX_NON_LFS) goto out_putf; error = -EPERM; if (IS_APPEND(inode)) goto out_putf; sb_start_write(inode->i_sb); error = locks_verify_truncate(inode, f.file, length); if (!error) error = security_path_truncate(&f.file->f_path); if (!error) error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, f.file); sb_end_write(inode->i_sb); out_putf: fdput(f); out: return error; } SYSCALL_DEFINE2(ftruncate, unsigned int, fd, unsigned long, length) { return do_sys_ftruncate(fd, length, 1); } #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE2(ftruncate, unsigned int, fd, compat_ulong_t, length) { return do_sys_ftruncate(fd, length, 1); } #endif /* LFS versions of truncate are only needed on 32 bit machines */ #if BITS_PER_LONG == 32 SYSCALL_DEFINE2(truncate64, const char __user *, path, loff_t, length) { return do_sys_truncate(path, length); } SYSCALL_DEFINE2(ftruncate64, unsigned int, fd, loff_t, length) { return do_sys_ftruncate(fd, length, 0); } #endif /* BITS_PER_LONG == 32 */ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { struct inode *inode = file_inode(file); long ret; if (offset < 0 || len <= 0) return -EINVAL; /* Return error if mode is not supported */ if (mode & ~FALLOC_FL_SUPPORTED_MASK) return -EOPNOTSUPP; /* Punch hole and zero range are mutually exclusive */ if ((mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) == (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) return -EOPNOTSUPP; /* Punch hole must have keep size set */ if ((mode & FALLOC_FL_PUNCH_HOLE) && !(mode & FALLOC_FL_KEEP_SIZE)) return -EOPNOTSUPP; /* Collapse range should only be used exclusively. */ if ((mode & FALLOC_FL_COLLAPSE_RANGE) && (mode & ~FALLOC_FL_COLLAPSE_RANGE)) return -EINVAL; /* Insert range should only be used exclusively. */ if ((mode & FALLOC_FL_INSERT_RANGE) && (mode & ~FALLOC_FL_INSERT_RANGE)) return -EINVAL; if (!(file->f_mode & FMODE_WRITE)) return -EBADF; /* * We can only allow pure fallocate on append only files */ if ((mode & ~FALLOC_FL_KEEP_SIZE) && IS_APPEND(inode)) return -EPERM; if (IS_IMMUTABLE(inode)) return -EPERM; /* * We cannot allow any fallocate operation on an active swapfile */ if (IS_SWAPFILE(inode)) return -ETXTBSY; /* * Revalidate the write permissions, in case security policy has * changed since the files were opened. */ ret = security_file_permission(file, MAY_WRITE); if (ret) return ret; if (S_ISFIFO(inode->i_mode)) return -ESPIPE; /* * Let individual file system decide if it supports preallocation * for directories or not. */ if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) return -ENODEV; /* Check for wrap through zero too */ if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0)) return -EFBIG; if (!file->f_op->fallocate) return -EOPNOTSUPP; sb_start_write(inode->i_sb); ret = file->f_op->fallocate(file, mode, offset, len); /* * Create inotify and fanotify events. * * To keep the logic simple always create events if fallocate succeeds. * This implies that events are even created if the file size remains * unchanged, e.g. when using flag FALLOC_FL_KEEP_SIZE. */ if (ret == 0) fsnotify_modify(file); sb_end_write(inode->i_sb); return ret; } EXPORT_SYMBOL_GPL(vfs_fallocate); SYSCALL_DEFINE4(fallocate, int, fd, int, mode, loff_t, offset, loff_t, len) { struct fd f = fdget(fd); int error = -EBADF; if (f.file) { error = vfs_fallocate(f.file, mode, offset, len); fdput(f); } return error; } /* * access() needs to use the real uid/gid, not the effective uid/gid. * We do this by temporarily clearing all FS-related capabilities and * switching the fsuid/fsgid around to the real ones. */ SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode) { const struct cred *old_cred; struct cred *override_cred; struct path path; struct inode *inode; int res; unsigned int lookup_flags = LOOKUP_FOLLOW; if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ return -EINVAL; override_cred = prepare_creds(); if (!override_cred) return -ENOMEM; override_cred->fsuid = override_cred->uid; override_cred->fsgid = override_cred->gid; if (!issecure(SECURE_NO_SETUID_FIXUP)) { /* Clear the capabilities if we switch to a non-root user */ kuid_t root_uid = make_kuid(override_cred->user_ns, 0); if (!uid_eq(override_cred->uid, root_uid)) cap_clear(override_cred->cap_effective); else override_cred->cap_effective = override_cred->cap_permitted; } old_cred = override_creds(override_cred); retry: res = user_path_at(dfd, filename, lookup_flags, &path); if (res) goto out; inode = d_backing_inode(path.dentry); if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) { /* * MAY_EXEC on regular files is denied if the fs is mounted * with the "noexec" flag. */ res = -EACCES; if (path_noexec(&path)) goto out_path_release; } res = inode_permission(inode, mode | MAY_ACCESS); /* SuS v2 requires we report a read only fs too */ if (res || !(mode & S_IWOTH) || special_file(inode->i_mode)) goto out_path_release; /* * This is a rare case where using __mnt_is_readonly() * is OK without a mnt_want/drop_write() pair. Since * no actual write to the fs is performed here, we do * not need to telegraph to that to anyone. * * By doing this, we accept that this access is * inherently racy and know that the fs may change * state before we even see this result. */ if (__mnt_is_readonly(path.mnt)) res = -EROFS; out_path_release: path_put(&path); if (retry_estale(res, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; } out: revert_creds(old_cred); put_cred(override_cred); return res; } SYSCALL_DEFINE2(access, const char __user *, filename, int, mode) { return sys_faccessat(AT_FDCWD, filename, mode); } SYSCALL_DEFINE1(chdir, const char __user *, filename) { struct path path; int error; unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY; retry: error = user_path_at(AT_FDCWD, filename, lookup_flags, &path); if (error) goto out; error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR); if (error) goto dput_and_out; set_fs_pwd(current->fs, &path); dput_and_out: path_put(&path); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; } out: return error; } SYSCALL_DEFINE1(fchdir, unsigned int, fd) { struct fd f = fdget_raw(fd); struct inode *inode; int error = -EBADF; error = -EBADF; if (!f.file) goto out; inode = file_inode(f.file); error = -ENOTDIR; if (!S_ISDIR(inode->i_mode)) goto out_putf; error = inode_permission(inode, MAY_EXEC | MAY_CHDIR); if (!error) set_fs_pwd(current->fs, &f.file->f_path); out_putf: fdput(f); out: return error; } SYSCALL_DEFINE1(chroot, const char __user *, filename) { struct path path; int error; unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY; retry: error = user_path_at(AT_FDCWD, filename, lookup_flags, &path); if (error) goto out; error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR); if (error) goto dput_and_out; error = -EPERM; if (!ns_capable(current_user_ns(), CAP_SYS_CHROOT)) goto dput_and_out; error = security_path_chroot(&path); if (error) goto dput_and_out; set_fs_root(current->fs, &path); error = 0; dput_and_out: path_put(&path); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; } out: return error; } static int chmod_common(struct path *path, umode_t mode) { struct inode *inode = path->dentry->d_inode; struct inode *delegated_inode = NULL; struct iattr newattrs; int error; error = mnt_want_write(path->mnt); if (error) return error; retry_deleg: mutex_lock(&inode->i_mutex); error = security_path_chmod(path, mode); if (error) goto out_unlock; newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; error = notify_change(path->dentry, &newattrs, &delegated_inode); out_unlock: mutex_unlock(&inode->i_mutex); if (delegated_inode) { error = break_deleg_wait(&delegated_inode); if (!error) goto retry_deleg; } mnt_drop_write(path->mnt); return error; } SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode) { struct fd f = fdget(fd); int err = -EBADF; if (f.file) { audit_file(f.file); err = chmod_common(&f.file->f_path, mode); fdput(f); } return err; } SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, umode_t, mode) { struct path path; int error; unsigned int lookup_flags = LOOKUP_FOLLOW; retry: error = user_path_at(dfd, filename, lookup_flags, &path); if (!error) { error = chmod_common(&path, mode); path_put(&path); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; } } return error; } SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode) { return sys_fchmodat(AT_FDCWD, filename, mode); } static int chown_common(struct path *path, uid_t user, gid_t group) { struct inode *inode = path->dentry->d_inode; struct inode *delegated_inode = NULL; int error; struct iattr newattrs; kuid_t uid; kgid_t gid; uid = make_kuid(current_user_ns(), user); gid = make_kgid(current_user_ns(), group); retry_deleg: newattrs.ia_valid = ATTR_CTIME; if (user != (uid_t) -1) { if (!uid_valid(uid)) return -EINVAL; newattrs.ia_valid |= ATTR_UID; newattrs.ia_uid = uid; } if (group != (gid_t) -1) { if (!gid_valid(gid)) return -EINVAL; newattrs.ia_valid |= ATTR_GID; newattrs.ia_gid = gid; } if (!S_ISDIR(inode->i_mode)) newattrs.ia_valid |= ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; mutex_lock(&inode->i_mutex); error = security_path_chown(path, uid, gid); if (!error) error = notify_change(path->dentry, &newattrs, &delegated_inode); mutex_unlock(&inode->i_mutex); if (delegated_inode) { error = break_deleg_wait(&delegated_inode); if (!error) goto retry_deleg; } return error; } SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user, gid_t, group, int, flag) { struct path path; int error = -EINVAL; int lookup_flags; if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0) goto out; lookup_flags = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW; if (flag & AT_EMPTY_PATH) lookup_flags |= LOOKUP_EMPTY; retry: error = user_path_at(dfd, filename, lookup_flags, &path); if (error) goto out; error = mnt_want_write(path.mnt); if (error) goto out_release; error = chown_common(&path, user, group); mnt_drop_write(path.mnt); out_release: path_put(&path); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; } out: return error; } SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group) { return sys_fchownat(AT_FDCWD, filename, user, group, 0); } SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group) { return sys_fchownat(AT_FDCWD, filename, user, group, AT_SYMLINK_NOFOLLOW); } SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) { struct fd f = fdget(fd); int error = -EBADF; if (!f.file) goto out; error = mnt_want_write_file(f.file); if (error) goto out_fput; audit_file(f.file); error = chown_common(&f.file->f_path, user, group); mnt_drop_write_file(f.file); out_fput: fdput(f); out: return error; } int open_check_o_direct(struct file *f) { /* NB: we're sure to have correct a_ops only after f_op->open */ if (f->f_flags & O_DIRECT) { #ifdef CONFIG_DIRECT_IO if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO) #endif return -EINVAL; } return 0; } static int do_dentry_open(struct file *f, struct inode *inode, int (*open)(struct inode *, struct file *), const struct cred *cred) { static const struct file_operations empty_fops = {}; int error; f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE; path_get(&f->f_path); f->f_inode = inode; f->f_mapping = inode->i_mapping; if (unlikely(f->f_flags & O_PATH)) { f->f_mode = FMODE_PATH; f->f_op = &empty_fops; return 0; } if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) { error = get_write_access(inode); if (unlikely(error)) goto cleanup_file; error = __mnt_want_write(f->f_path.mnt); if (unlikely(error)) { put_write_access(inode); goto cleanup_file; } f->f_mode |= FMODE_WRITER; } /* POSIX.1-2008/SUSv4 Section XSI 2.9.7 */ if (S_ISREG(inode->i_mode)) f->f_mode |= FMODE_ATOMIC_POS; f->f_op = fops_get(inode->i_fop); if (unlikely(WARN_ON(!f->f_op))) { error = -ENODEV; goto cleanup_all; } error = security_file_open(f, cred); if (error) goto cleanup_all; error = break_lease(inode, f->f_flags); if (error) goto cleanup_all; if (!open) open = f->f_op->open; if (open) { error = open(inode, f); if (error) goto cleanup_all; } if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) i_readcount_inc(inode); if ((f->f_mode & FMODE_READ) && likely(f->f_op->read || f->f_op->read_iter)) f->f_mode |= FMODE_CAN_READ; if ((f->f_mode & FMODE_WRITE) && likely(f->f_op->write || f->f_op->write_iter)) f->f_mode |= FMODE_CAN_WRITE; f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); return 0; cleanup_all: fops_put(f->f_op); if (f->f_mode & FMODE_WRITER) { put_write_access(inode); __mnt_drop_write(f->f_path.mnt); } cleanup_file: path_put(&f->f_path); f->f_path.mnt = NULL; f->f_path.dentry = NULL; f->f_inode = NULL; return error; } /** * finish_open - finish opening a file * @file: file pointer * @dentry: pointer to dentry * @open: open callback * @opened: state of open * * This can be used to finish opening a file passed to i_op->atomic_open(). * * If the open callback is set to NULL, then the standard f_op->open() * filesystem callback is substituted. * * NB: the dentry reference is _not_ consumed. If, for example, the dentry is * the return value of d_splice_alias(), then the caller needs to perform dput() * on it after finish_open(). * * On successful return @file is a fully instantiated open file. After this, if * an error occurs in ->atomic_open(), it needs to clean up with fput(). * * Returns zero on success or -errno if the open failed. */ int finish_open(struct file *file, struct dentry *dentry, int (*open)(struct inode *, struct file *), int *opened) { int error; BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ file->f_path.dentry = dentry; error = do_dentry_open(file, d_backing_inode(dentry), open, current_cred()); if (!error) *opened |= FILE_OPENED; return error; } EXPORT_SYMBOL(finish_open); /** * finish_no_open - finish ->atomic_open() without opening the file * * @file: file pointer * @dentry: dentry or NULL (as returned from ->lookup()) * * This can be used to set the result of a successful lookup in ->atomic_open(). * * NB: unlike finish_open() this function does consume the dentry reference and * the caller need not dput() it. * * Returns "1" which must be the return value of ->atomic_open() after having * called this function. */ int finish_no_open(struct file *file, struct dentry *dentry) { file->f_path.dentry = dentry; return 1; } EXPORT_SYMBOL(finish_no_open); char *file_path(struct file *filp, char *buf, int buflen) { return d_path(&filp->f_path, buf, buflen); } EXPORT_SYMBOL(file_path); /** * vfs_open - open the file at the given path * @path: path to open * @file: newly allocated file with f_flag initialized * @cred: credentials to use */ int vfs_open(const struct path *path, struct file *file, const struct cred *cred) { struct inode *inode = vfs_select_inode(path->dentry, file->f_flags); if (IS_ERR(inode)) return PTR_ERR(inode); file->f_path = *path; return do_dentry_open(file, inode, NULL, cred); } struct file *dentry_open(const struct path *path, int flags, const struct cred *cred) { int error; struct file *f; validate_creds(cred); /* We must always pass in a valid mount pointer. */ BUG_ON(!path->mnt); f = get_empty_filp(); if (!IS_ERR(f)) { f->f_flags = flags; error = vfs_open(path, f, cred); if (!error) { /* from now on we need fput() to dispose of f */ error = open_check_o_direct(f); if (error) { fput(f); f = ERR_PTR(error); } } else { put_filp(f); f = ERR_PTR(error); } } return f; } EXPORT_SYMBOL(dentry_open); static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op) { int lookup_flags = 0; int acc_mode; if (flags & (O_CREAT | __O_TMPFILE)) op->mode = (mode & S_IALLUGO) | S_IFREG; else op->mode = 0; /* Must never be set by userspace */ flags &= ~FMODE_NONOTIFY & ~O_CLOEXEC; /* * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only * check for O_DSYNC if the need any syncing at all we enforce it's * always set instead of having to deal with possibly weird behaviour * for malicious applications setting only __O_SYNC. */ if (flags & __O_SYNC) flags |= O_DSYNC; if (flags & __O_TMPFILE) { if ((flags & O_TMPFILE_MASK) != O_TMPFILE) return -EINVAL; acc_mode = MAY_OPEN | ACC_MODE(flags); if (!(acc_mode & MAY_WRITE)) return -EINVAL; } else if (flags & O_PATH) { /* * If we have O_PATH in the open flag. Then we * cannot have anything other than the below set of flags */ flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH; acc_mode = 0; } else { acc_mode = MAY_OPEN | ACC_MODE(flags); } op->open_flag = flags; /* O_TRUNC implies we need access checks for write permissions */ if (flags & O_TRUNC) acc_mode |= MAY_WRITE; /* Allow the LSM permission hook to distinguish append access from general write access. */ if (flags & O_APPEND) acc_mode |= MAY_APPEND; op->acc_mode = acc_mode; op->intent = flags & O_PATH ? 0 : LOOKUP_OPEN; if (flags & O_CREAT) { op->intent |= LOOKUP_CREATE; if (flags & O_EXCL) op->intent |= LOOKUP_EXCL; } if (flags & O_DIRECTORY) lookup_flags |= LOOKUP_DIRECTORY; if (!(flags & O_NOFOLLOW)) lookup_flags |= LOOKUP_FOLLOW; op->lookup_flags = lookup_flags; return 0; } /** * file_open_name - open file and return file pointer * * @name: struct filename containing path to open * @flags: open flags as per the open(2) second argument * @mode: mode for the new file if O_CREAT is set, else ignored * * This is the helper to open a file from kernelspace if you really * have to. But in generally you should not do this, so please move * along, nothing to see here.. */ struct file *file_open_name(struct filename *name, int flags, umode_t mode) { struct open_flags op; int err = build_open_flags(flags, mode, &op); return err ? ERR_PTR(err) : do_filp_open(AT_FDCWD, name, &op); } /** * filp_open - open file and return file pointer * * @filename: path to open * @flags: open flags as per the open(2) second argument * @mode: mode for the new file if O_CREAT is set, else ignored * * This is the helper to open a file from kernelspace if you really * have to. But in generally you should not do this, so please move * along, nothing to see here.. */ struct file *filp_open(const char *filename, int flags, umode_t mode) { struct filename *name = getname_kernel(filename); struct file *file = ERR_CAST(name); if (!IS_ERR(name)) { file = file_open_name(name, flags, mode); putname(name); } return file; } EXPORT_SYMBOL(filp_open); struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt, const char *filename, int flags, umode_t mode) { struct open_flags op; int err = build_open_flags(flags, mode, &op); if (err) return ERR_PTR(err); return do_file_open_root(dentry, mnt, filename, &op); } EXPORT_SYMBOL(file_open_root); long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode) { struct open_flags op; int fd = build_open_flags(flags, mode, &op); struct filename *tmp; if (fd) return fd; tmp = getname(filename); if (IS_ERR(tmp)) return PTR_ERR(tmp); fd = get_unused_fd_flags(flags); if (fd >= 0) { struct file *f = do_filp_open(dfd, tmp, &op); if (IS_ERR(f)) { put_unused_fd(fd); fd = PTR_ERR(f); } else { fsnotify_open(f); fd_install(fd, f); } } putname(tmp); return fd; } SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode) { if (force_o_largefile()) flags |= O_LARGEFILE; return do_sys_open(AT_FDCWD, filename, flags, mode); } SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags, umode_t, mode) { if (force_o_largefile()) flags |= O_LARGEFILE; return do_sys_open(dfd, filename, flags, mode); } #ifndef __alpha__ /* * For backward compatibility? Maybe this should be moved * into arch/i386 instead? */ SYSCALL_DEFINE2(creat, const char __user *, pathname, umode_t, mode) { return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode); } #endif /* * "id" is the POSIX thread ID. We use the * files pointer for this.. */ int filp_close(struct file *filp, fl_owner_t id) { int retval = 0; if (!file_count(filp)) { printk(KERN_ERR "VFS: Close: file count is 0\n"); return 0; } if (filp->f_op->flush) retval = filp->f_op->flush(filp, id); if (likely(!(filp->f_mode & FMODE_PATH))) { dnotify_flush(filp, id); locks_remove_posix(filp, id); } fput(filp); return retval; } EXPORT_SYMBOL(filp_close); /* * Careful here! We test whether the file pointer is NULL before * releasing the fd. This ensures that one clone task can't release * an fd while another clone is opening it. */ SYSCALL_DEFINE1(close, unsigned int, fd) { int retval = __close_fd(current->files, fd); /* can't restart close syscall because file table entry was cleared */ if (unlikely(retval == -ERESTARTSYS || retval == -ERESTARTNOINTR || retval == -ERESTARTNOHAND || retval == -ERESTART_RESTARTBLOCK)) retval = -EINTR; return retval; } EXPORT_SYMBOL(sys_close); /* * This routine simulates a hangup on the tty, to arrange that users * are given clean terminals at login time. */ SYSCALL_DEFINE0(vhangup) { if (capable(CAP_SYS_TTY_CONFIG)) { tty_vhangup_self(); return 0; } return -EPERM; } /* * Called when an inode is about to be open. * We use this to disallow opening large files on 32bit systems if * the caller didn't specify O_LARGEFILE. On 64bit systems we force * on this flag in sys_open. */ int generic_file_open(struct inode * inode, struct file * filp) { if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) return -EOVERFLOW; return 0; } EXPORT_SYMBOL(generic_file_open); /* * This is used by subsystems that don't want seekable * file descriptors. The function is not supposed to ever fail, the only * reason it returns an 'int' and not 'void' is so that it can be plugged * directly into file_operations structure. */ int nonseekable_open(struct inode *inode, struct file *filp) { filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE); return 0; } EXPORT_SYMBOL(nonseekable_open); 问题1: 以上是qca方案的一个sdk文件 open.c 使用以下,访问文件会报错:[ 15.669739] [Error _btn_parse_conf:1007] Fail to Open File /etc/xxxxxxxxxxxxxxxxxx/dev_config.json snprintf(cfgFile, MAX_LINE_LEN-1, "%s/dev_config.json", g_cfgPath); // cfgFile = /etc/xxxxxxxxxxxxxxxxxx/dev_config.json pFile = filp_open(cfgFile, O_RDONLY, 0); if (IS_ERR(pFile)) { PRINT_ERR("Fail to Open File %s", cfgFile); set_fs(origFs); return -1; } 问题1:告诉我哪里出问题了
最新发布
12-19
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值