http://www.th7.cn/system/lin/2011/11/23/20649.shtml
内核回调 之 一"读"到底
小小地跟踪下read函数,从 ssize_t read(int fd, void *buf, size_t count) 到DATASHEET 一调到底,见证内核的分层模块化。
--内核服务例程开始提供服务--
--fs/read_write.c-- SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) { struct file *file; ssize_t ret = -EBADF; int fput_needed; file = fget_light(fd, &fput_needed); if (file) { loff_t pos = file_pos_read(file); ret = vfs_read(file, buf, count, &pos); //--> file_pos_write(file, pos); fput_light(file, fput_needed); } return ret; }
--进入vfs层--
ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) { ... ... if (ret >= 0) { count = ret; if (file->f_op->read) ret = file->f_op->read(file, buf, count, pos); //--> ... ... return ret; }
发现回调函数,file->f_op->read
static const struct file_operations yaffs_file_operations = { .read = do_sync_read, //--> .write = do_sync_write, .aio_read = generic_file_aio_read, .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, .flush = yaffs_file_flush, .fsync = yaffs_sync_object, .sendfile = generic_file_sendfile,}
又是何时给回调函数挂上的钩子?
file = fget_light(fd, &fput_needed)
通过fd得出file。是谁将fd与file有了联系,是read之前的open。
--fs/open.c-- 669 static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, 670 struct file *f, 671 int (*open)(struct inode *, struct file *), 672 const struct cred *cred) 673 { 684 inode = dentry->d_inode; 704 f->f_op = fops_get(inode->i_fop); //--> 734 return f; 735 }
我们发现:f->f_op等同了inode->i_fop。
--include/linux/fs.h-- 1862 /* Alas, no aliases. Too much hassle with bringing module.h everywhere */ 1863 #define fops_get(fops) / 1864 (((fops) && try_module_get((fops)->owner) ? (fops) : NULL)) 1865 #define fops_put(fops) / 1866 do { if (fops) module_put((fops)->owner); } while(0)
继续追踪inode,发现这么一个函数:yaffs_fill_inode_from_obj 。
inode->i_fop在这里赋值。函数名说的很清楚,通过yaffs_obj结构体来填充inode。填充inode就是该函数的使命。
--fs/yaffs2/yaffs_vfs.c-- 1273 static void yaffs_fill_inode_from_obj(struct inode *inode, 1274 struct yaffs_obj *obj)
1354 case S_IFREG: /* file */ 1355 inode->i_op = &yaffs_file_inode_operations; 1356 inode->i_fop = &yaffs_file_operations; 1357 inode->i_mapping->a_ops = 1358 &yaffs_file_address_operations; 1359 break;
1378 }
看来这里还给inode->i_op 和 inode->i_mapping->a_ops 挂上了钩子,以后会用到。
紧接之前回调到do_sync_read。
ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) { ... ... for (;;) { ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos); --> if (ret != -EIOCBRETRY) break; wait_on_retry_sync_kiocb(&kiocb); } ... ... }
这里又转向了aio_read? 是异步读取的意思。原有的read为同步读取,异步读写后被patch上。
From: Marco Stornelli <marco.stornelli@gmail.com>If a filesystem in the file operations specifies for read and write operations only do_sync_read and do_sync_write without init aio_read and aio_write, there will be a kernel oops, because the vfs code check the presence of (to read for example)read OR aio_read method, then it calls read if it's pointer is not null. It's not sufficient because if the read function is actually a do_sync_read, it calls aio_read but without checking the presence. I think a BUG_ON check can be more useful.Signed-off-by: Marco Stornelli <marco.stornelli@gmail.com>
回调到generic_file_aio_read。
1292 ssize_t 1293 generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, 1294 unsigned long nr_segs, loff_t pos) 1295 {
1369 do_generic_file_read(filp, ppos, &desc, file_read_actor); -->
1380 }
开始真正的读操作。
不管是读还是写,都会优先考虑cache。若无法命中,再进行传统意义上的actual read。
975 /** 976 * do_generic_file_read - generic file read routine 977 * @filp: the file to read 978 * @ppos: current file position 979 * @desc: read_descriptor 980 * @actor: read method 981 * 982 * This is a generic file read routine, and uses the 983 * mapping->a_ops->readpage() function for the actual low-level stuff. 984 * 985 * This is really ugly. But the goto's actually try to clarify some 986 * of the logic when it comes to error handling etc. 987 */ 988 static void do_generic_file_read(struct file *filp, loff_t *ppos, 989 read_descriptor_t *desc, read_actor_t actor) 990 { 991 struct address_space *mapping = filp->f_mapping;
1134 /* Start the actual read. The read will unlock the page. */ 1135 error = mapping->a_ops->readpage(filp, page); //假设cache没有命中,就继续向下读,没有命中真是一种糟糕的情况
1203 }
又见回调。
static struct address_space_operations yaffs_file_address_operations = { .readpage = yaffs_readpage, //--> .writepage = yaffs_writepage, #if (YAFFS_USE_WRITE_BEGIN_END > 0) .write_begin = yaffs_write_begin, .write_end = yaffs_write_end, #else .prepare_write = yaffs_prepare_write, .commit_write = yaffs_commit_write, #endif };
struct address_space_operations 为struct address_space的操作函数。
struct address_space 用于管理文件(struct inode)映射到内存的页面(struct page);
与之对应,address_space_operations 就是用来操作该文件映射到内存的页面,比如把内存中的修改写回文件、从文件中读入数据到页面缓冲等。
也就是说address_space结构与文件的对应:一个具体的文件在打开后,内核会在内存中为之建立一个struct inode结构,其中的i_mapping域指向一个address_space结构。这样,一个文件就对应一个address_space结构,一个 address_space与一个偏移量能够确定一个page cache 或swap cache中的一个页面。因此,当要寻址某个数据时,很容易根据给定的文件及数据在文件内的偏移量而找到相应的页面。
-- fs/yaffs2/yaffs_vfs.c -- static int yaffs_readpage(struct file *f, struct page *pg) { int ret; yaffs_trace(YAFFS_TRACE_OS, "yaffs_readpage"); ret = yaffs_readpage_unlock(f, pg); //--> yaffs_trace(YAFFS_TRACE_OS, "yaffs_readpage done"); return ret; }
看样子要读page:
static int yaffs_readpage_unlock(struct file *f, struct page *pg) { int ret = yaffs_readpage_nolock(f, pg); //--> UnlockPage(pg); return ret; }
这里出现了file指针,我们知道file是个vfs逻辑上的概念,可能多个file对应一个inode。看来逻辑file就快转化为相应的文件物理地址。
static int yaffs_readpage_nolock(struct file *f, struct page *pg) { ... ... /* FIXME: Can kmap fail? */ pg_buf = kmap(pg); /******************************** void *kmap(struct page *page) { might_sleep(); if (!PageHighMem(page)) return page_address(page); return kmap_high(page);, //将高端内存页映射到内核地址空间,返回映射的虚拟地址 } ********************************/
yaffs_gross_lock(dev);
ret = yaffs_file_rd(obj, pg_buf,
pg->index << PAGE_CACHE_SHIFT, PAGE_CACHE_SIZE); //-->
yaffs_gross_unlock(dev);
... ...
}
--文件系统:yaffs--
终于进入了yaffs_guts.c文件,该文件涉及到了yaffs的操作细节。
-- fs/yaffs2/yaffs_guts.c -- int yaffs_file_rd(struct yaffs_obj *in, u8 * buffer, loff_t offset, int n_bytes) { ... ... int n = n_bytes; //page size: 4k(arm) while (n > 0) { yaffs_addr_to_chunk(dev, offset, &chunk, &start); //chunk = (u32) (addr >> dev->chunk_shift); chunk++; /* OK now check for the curveball where the start and end are in * the same chunk. */ if ((start + n) < dev->data_bytes_per_chunk) n_copy = n; else n_copy = dev->data_bytes_per_chunk - start; cache = yaffs_find_chunk_cache(in, chunk);
/* If the chunk is already in the cache or it is less than
* a whole chunk or we're using inband tags then use the cache
* (if there is caching) else bypass the cache.
*/
if (cache || n_copy != dev->data_bytes_per_chunk ||
dev->param.inband_tags) {
/*先考虑是否在高缓中*/ } else { /* A full chunk. Read directly into the buffer. */ yaffs_rd_data_obj(in, chunk, buffer); //--> } n -= n_copy; offset += n_copy; buffer += n_copy; n_done += n_copy; } return n_done; }
-->
/*inode_chunk:yaffs中的chunk下标*/ static int yaffs_rd_data_obj(struct yaffs_obj *in, int inode_chunk, u8 * buffer) { int nand_chunk = yaffs_find_chunk_in_file(in, inode_chunk, NULL); if (nand_chunk >= 0) return yaffs_rd_chunk_tags_nand(in->my_dev, nand_chunk, buffer, NULL); //--> else { ... ... } }
有必要了解下:
int nand_chunk = yaffs_find_chunk_in_file(in, inode_chunk, NULL);
第二个参数inode_chunk是逻辑地址,在这里chunk是以512字节为单位排序后的下标。
YAFFS2文件系统使用树结点结构来完成逻辑chunk地址与物理地址的映射。显然,经过此函数的处理,找到物理下标对应的逻辑下标。树结点用Tnode表示。
关于Tnode,涉及读yaffs的细节的理解,总之,yaffs作为文件系统就要管理物理页面,物理页面对应着逻辑chunk,出于文件寻找,文件扩大等效率方面的考虑,采用了数据结构——树。具体可参考有关yaffs的论文。
-->
--fs/yaffs2/yaffs_nand.c--
int yaffs_rd_chunk_tags_nand(struct yaffs_dev *dev, int nand_chunk, u8 *buffer, struct yaffs_ext_tags *tags) { ... ... if (dev->param.read_chunk_tags_fn) result = dev->param.read_chunk_tags_fn(dev, realigned_chunk, buffer, tags); //--> ... ... }
一个看似不一样的回调,赋值在这里:
--yaffs_vfs.c--static struct super_block *yaffs_internal_read_super(int yaffs_version, struct super_block *sb, void *data, int silent){ param->read_chunk_tags_fn = nandmtd2_read_chunk_tags;}
进入yaffs_mtdif2.c文件,看来要到mtd层咯。
--fs/yaffs2/yaffs_mtdif2.c--
int nandmtd2_read_chunk_tags(struct yaffs_dev *dev, int nand_chunk, u8 *data, struct yaffs_ext_tags *tags) { ... ... if (dev->param.inband_tags || (data && !tags)) retval = mtd->read(mtd, addr, dev->param.total_bytes_per_chunk, &dummy, data); //-->进入mtd层 else if (tags) { ops.mode = MTD_OOB_AUTO; ops.ooblen = packed_tags_size; ops.len = data ? dev->data_bytes_per_chunk : packed_tags_size; ops.ooboffs = 0; ops.datbuf = data; ops.oobbuf = yaffs_dev_to_lc(dev)->spare_buffer; retval = mtd->read_oob(mtd, addr, &ops); } ... ... }
在进入mtd层之前,简单的提下yaffs中的各种回调是在何时挂好的呢?yaffs又是如何注册进的内核?
--------------------
注册文件系统:
--------------------
--fs/yaffs2/yaffs_vfs.c-- static int __init init_yaffs_fs(void) { int error = 0; struct file_system_to_install *fsinst; ... ... fsinst = fs_to_install; //创建文件系统进程入口 /*********************************************************** static struct file_system_to_install fs_to_install[] = { {&yaffs_fs_type, 0}, {&yaffs2_fs_type, 0}, {NULL, 0} }; ************************************************************/ while (fsinst->fst && !error) { error = register_filesystem(fsinst->fst); //注册文件系统 if (!error) fsinst->installed = 1; fsinst++; } ... ... return error; }
注册文件系统后,yaffs2挂上fs list,开始超级块操作:
static struct file_system_type yaffs2_fs_type = {
.owner = THIS_MODULE,
.name = "yaffs2",
.get_sb = yaffs2_read_super, //-->
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
读取超级块。
static int yaffs2_read_super(struct file_system_type *fs,
int flags, const char *dev_name, void *data,
struct vfsmount *mnt)
{
return get_sb_bdev(fs, flags, dev_name, data,
yaffs2_internal_read_super_mtd, mnt); //-->
}
----> mount_bdev函数中调用 fill_super, 也就是 yaffs2_internal_read_super_mtd
int get_sb_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, int (*fill_super)(struct super_block *, void *, int), struct vfsmount *mnt) { struct dentry *root; //获得了超级块指针,如果s->s_root,也就是目录挂载点为空的化,那就要填充超级块 root = mount_bdev(fs_type, flags, dev_name, data, fill_super); //--> if (IS_ERR(root)) return PTR_ERR(root); mnt->mnt_root = root; mnt->mnt_sb = root->d_sb; return 0; }
----> fill_super,填充超级块
static int yaffs2_read_super(struct file_system_type *fs,
int flags, const char *dev_name, void *data,
struct vfsmount *mnt)
{
return get_sb_bdev(fs, flags, dev_name, data,
yaffs2_internal_read_super_mtd, mnt); //-->
}
---->
static int yaffs2_internal_read_super_mtd(struct super_block *sb, void *data, int silent) { return yaffs_internal_read_super(2, sb, data, silent) ? 0 : -EINVAL; }
----> 获取super block:kmalloc出空间,而后填充,返回。
static struct super_block *yaffs_internal_read_super(int yaffs_version,
struct super_block *sb,
void *data, int silent)
{
int n_blocks;
struct inode *inode = NULL;
struct dentry *root;
struct yaffs_dev *dev = 0;
char devname_buf[BDEVNAME_SIZE + 1];
struct mtd_info *mtd;
int err;
char *data_str = (char *)data;
struct yaffs_linux_context *context = NULL;
struct yaffs_param *param;
int read_only = 0;
struct yaffs_options options;
unsigned mount_id;
int found;
struct yaffs_linux_context *context_iterator;
struct list_head *l;
if (!sb) {
printk(KERN_INFO "yaffs: sb is NULL/n");
return NULL;
}
sb->s_magic = YAFFS_MAGIC;
sb->s_op = &yaffs_super_ops; //super_block层的操作函数
sb->s_flags |= MS_NOATIME;
read_only = ((sb->s_flags & MS_RDONLY) != 0);
#ifdef YAFFS_COMPILE_EXPORTFS
sb->s_export_op = &yaffs_export_ops;
#endif
... ...
sb->s_blocksize = PAGE_CACHE_SIZE;
sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
... ...
/* Check it's an mtd device..... */
if (MAJOR(sb->s_dev) != MTD_BLOCK_MAJOR)
return NULL; /* This isn't an mtd device */
/* --判断是mtd,开始mtd相关操作-- */
/* Get the device */
mtd = get_mtd_device(NULL, MINOR(sb->s_dev)); //MTD_BLOCK_MAJOR
if (!mtd) {
yaffs_trace(YAFFS_TRACE_ALWAYS,
"yaffs: MTD device #%u doesn't appear to exist",
MINOR(sb->s_dev));
return NULL;
}
/* Check it's NAND */
if (mtd->type != MTD_NANDFLASH) {
yaffs_trace(YAFFS_TRACE_ALWAYS,
"yaffs: MTD device is not NAND it's type %d",
mtd->type);
return NULL;
}
/* 获得mtd之后,检查mtd各项及操作函数 */
/* OK, so if we got here, we have an MTD that's NAND and looks
* like it has the right capabilities
* Set the struct yaffs_dev up for mtd
*/
if (!read_only && !(mtd->flags & MTD_WRITEABLE)) {
read_only = 1;
printk(KERN_INFO
"yaffs: mtd is read only, setting superblock read only/n"
);
sb->s_flags |= MS_RDONLY;
}
/* struct yaffs_dev */
dev = kmalloc(sizeof(struct yaffs_dev), GFP_KERNEL);
context = kmalloc(sizeof(struct yaffs_linux_context), GFP_KERNEL);
... ...
dev->os_context = context;
INIT_LIST_HEAD(&(context->context_list));
context->dev = dev;
context->super = sb;
dev->read_only = read_only;
#if (LINUX_VERSION_CODE > KERNEL_VERSION(2, 5, 0))
sb->s_fs_info = dev;
#else
sb->u.generic_sbp = dev;
#endif
dev->driver_context = mtd;
/* struct yaffs_param */ param->name = mtd->name; /* Set up the memory size parameters.... */ n_blocks = YCALCBLOCKS(mtd->size, (YAFFS_CHUNKS_PER_BLOCK * YAFFS_BYTES_PER_CHUNK)); /**************************************************************** #define YAFFS_BYTES_PER_SPARE 16 #define YAFFS_BYTES_PER_CHUNK 512 #define YAFFS_CHUNK_SIZE_SHIFT 9 #define YAFFS_CHUNKS_PER_BLOCK 32 #define YAFFS_BYTES_PER_BLOCK (YAFFS_CHUNKS_PER_BLOCK*YAFFS_BYTES_PER_CHUNK) ****************************************************************/ param->start_block = 0; param->end_block = n_blocks - 1; param->chunks_per_block = YAFFS_CHUNKS_PER_BLOCK; //32 param->total_bytes_per_chunk = YAFFS_BYTES_PER_CHUNK; //512 param->n_reserved_blocks = 5; param->n_caches = (options.no_cache) ? 0 : 10; param->inband_tags = options.inband_tags; #ifdef CONFIG_YAFFS_DISABLE_LAZY_LOAD param->disable_lazy_load = 1; #endif #ifdef CONFIG_YAFFS_XATTR param->enable_xattr = 1; #endif /** * struct yaffs_param 的填充 * 包括yaffs所有信息,操作函数等。 */ /* ... and the functions. */ if (yaffs_version == 2) { param->write_chunk_tags_fn = nandmtd2_write_chunk_tags; param->read_chunk_tags_fn = nandmtd2_read_chunk_tags; param->bad_block_fn = nandmtd2_mark_block_bad; param->query_block_fn = nandmtd2_query_block; yaffs_dev_to_lc(dev)->spare_buffer = kmalloc(mtd->oobsize, GFP_NOFS); param->is_yaffs2 = 1; #if (LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 17)) param->total_bytes_per_chunk = mtd->writesize; param->chunks_per_block = mtd->erasesize / mtd->writesize; #else param->total_bytes_per_chunk = mtd->oobblock; param->chunks_per_block = mtd->erasesize / mtd->oobblock; #endif n_blocks = YCALCBLOCKS(mtd->size, mtd->erasesize); param->start_block = 0; param->end_block = n_blocks - 1; } else { ... ... } ... ... err = yaffs_guts_initialise(dev); //对dev赋值并检验。yaffs_gut.c --> ... ... if (!inode) return NULL; /*索引节点操作的接口函数*/ inode->i_op = &yaffs_dir_inode_operations; inode->i_fop = &yaffs_dir_operations; yaffs_trace(YAFFS_TRACE_OS, "yaffs_read_super: got root inode"); root = d_alloc_root(inode); yaffs_trace(YAFFS_TRACE_OS, "yaffs_read_super: d_alloc_root done"); if (!root) { iput(inode); return NULL; } sb->s_root = root; sb->s_dirt = !dev->is_checkpointed; yaffs_trace(YAFFS_TRACE_ALWAYS, "yaffs_read_super: is_checkpointed %d", dev->is_checkpointed); yaffs_trace(YAFFS_TRACE_OS, "yaffs_read_super: done"); return sb; }
----> 完成对dev赋值并检验
int yaffs_guts_initialise(struct yaffs_dev *dev)
{
... ...
if (!init_failed && !yaffs_init_blocks(dev))
yaffs_init_tnodes_and_objs(dev);
if (!init_failed && !yaffs_create_initial_dir(dev))
init_failed = 1;
... ...
}
--mtd层--
mtd->read( ) 正式进入mtd层的地界,又是在何时挂上具体的操作函数?
nand_scan,nand驱动中相当面熟的函数。即使看不到它,十有八九也会看到它封装的nand_scan_ident和nand_scan_tail。
打开nand_scan_tail瞧一瞧:
int nand_scan_tail(struct mtd_info *mtd)
{
... ...
case NAND_ECC_HW:
/* Use standard hwecc read page function ? */
if (!chip->ecc.read_page)
chip->ecc.read_page = nand_read_page_hwecc;
if (!chip->ecc.write_page)
chip->ecc.write_page = nand_write_page_hwecc;
if (!chip->ecc.read_page_raw)
chip->ecc.read_page_raw = nand_read_page_raw;
if (!chip->ecc.write_page_raw)
chip->ecc.write_page_raw = nand_write_page_raw;
if (!chip->ecc.read_oob)
chip->ecc.read_oob = nand_read_oob_std;
if (!chip->ecc.write_oob)
chip->ecc.write_oob = nand_write_oob_std;
... ...
/* Fill in remaining MTD driver data */
mtd->type = MTD_NANDFLASH;
mtd->flags = (chip->options & NAND_ROM) ? MTD_CAP_ROM : MTD_CAP_NANDFLASH;
mtd->erase = nand_erase;
mtd->point = NULL;
mtd->unpoint = NULL;
mtd->read = nand_read; //-->
mtd->write = nand_write;
mtd->panic_write = panic_nand_write;
mtd->read_oob = nand_read_oob;
mtd->write_oob = nand_write_oob;
mtd->sync = nand_sync;
mtd->lock = NULL;
mtd->unlock = NULL;
mtd->suspend = nand_suspend;
mtd->resume = nand_resume;
mtd->block_isbad = nand_block_isbad;
mtd->block_markbad = nand_block_markbad;
mtd->writebufsize = mtd->writesize;
/* propagate ecc.layout to mtd_info */
mtd->ecclayout = chip->ecc.layout;
/* Check, if we should skip the bad block table scan */
if (chip->options & NAND_SKIP_BBTSCAN)
return 0;
/* Build bad block table */
return chip->scan_bbt(mtd);
}
也正如注释所言:
/** * nand_scan_tail - [NAND Interface] Scan for the NAND device * @mtd: MTD device structure * * This is the second phase of the normal nand_scan() function. It * fills out all the uninitialized function pointers with the defaults * and scans for a bad block table if appropriate. */
nand驱动中调用nand_scan,便为该nandFlash设备的mtd层提供了上层接口。
--drivers/mtd/nand/nand_base.c -- static int nand_read(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, uint8_t *buf) { struct nand_chip *chip = mtd->priv; int ret; /* Do not allow reads past end of device */ if ((from + len) > mtd->size) return -EINVAL; if (!len) return 0; nand_get_device(chip, mtd, FL_READING); chip->ops.len = len; chip->ops.datbuf = buf; chip->ops.oobbuf = NULL; ret = nand_do_read_ops(mtd, from, &chip->ops); //Read data with ECC--> *retlen = chip->ops.retlen; nand_release_device(mtd); return ret; }
---->
static int nand_do_read_ops(struct mtd_info *mtd, loff_t from, struct mtd_oob_ops *ops) { while(1) { ... ... /* Now read the page into the buffer */ if (unlikely(ops->mode == MTD_OOB_RAW)) ret = chip->ecc.read_page_raw(mtd, chip, bufpoi, page); else if (!aligned && NAND_SUBPAGE_READ(chip) && !oob) ret = chip->ecc.read_subpage(mtd, chip, col, bytes, bufpoi); else ret = chip->ecc.read_page(mtd, chip, bufpoi, page); //--> if (ret < 0) break; ... ... }
我X,又见回调!不过在之前的nand_scan_tail中已挂上了nand_read_page_hwecc。
--drivers/mtd/nand/nand_base.c -- static int nand_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip, uint8_t *buf, int page) { int i, eccsize = chip->ecc.size; int eccbytes = chip->ecc.bytes; int eccsteps = chip->ecc.steps; uint8_t *p = buf; uint8_t *ecc_calc = chip->buffers->ecccalc; uint8_t *ecc_code = chip->buffers->ecccode; uint32_t *eccpos = chip->ecc.layout->eccpos; /* 最终导向具体nand驱动中的xxx_read*/ for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize) { chip->ecc.hwctl(mtd, NAND_ECC_READ); chip->read_buf(mtd, p, eccsize); chip->ecc.calculate(mtd, p, &ecc_calc[i]); } chip->read_buf(mtd, chip->oob_poi, mtd->oobsize); for (i = 0; i < chip->ecc.total; i++) ecc_code[i] = chip->oob_poi[eccpos[i]]; eccsteps = chip->ecc.steps; p = buf; for (i = 0 ; eccsteps; eccsteps--, i += eccbytes, p += eccsize) { int stat; stat = chip->ecc.correct(mtd, p, &ecc_code[i], &ecc_calc[i]); if (stat < 0) mtd->ecc_stats.failed++; else mtd->ecc_stats.corrected += stat; } return 0; }
--硬件驱动层--
这里以davinci_nand.c为例,TI的一款构架。
部分代码:
^^^^^^^^^^^ info->dev = &pdev->dev; info->base = base; info->vaddr = vaddr; ^^^^^^^^^^^ info->mtd.priv = &info->chip; info->mtd.name = dev_name(&pdev->dev); info->mtd.owner = THIS_MODULE; info->mtd.dev.parent = &pdev->dev; ^^^^^^^^^^^ info->chip.IO_ADDR_R = vaddr; info->chip.IO_ADDR_W = vaddr; info->chip.chip_delay = 0; info->chip.select_chip = nand_davinci_select_chip; /* options such as NAND_USE_FLASH_BBT or 16-bit widths */ info->chip.options = pdata->options; info->chip.bbt_td = pdata->bbt_td; info->chip.bbt_md = pdata->bbt_md; /* Set address of hardware control function */ info->chip.cmd_ctrl = nand_davinci_hwcontrol; info->chip.dev_ready = nand_davinci_dev_ready; /* Speed up buffer I/O */ info->chip.read_buf = nand_davinci_read_buf; //读 --> info->chip.write_buf = nand_davinci_write_buf; ^^^^^^^^^^^ info->ioaddr = (uint32_t __force) vaddr; info->current_cs = info->ioaddr; info->core_chipsel = pdev->id; info->mask_chipsel = pdata->mask_chipsel; /* use nandboot-capable ALE/CLE masks by default */ info->mask_ale = pdata->mask_ale ? : MASK_ALE; info->mask_cle = pdata->mask_cle ? : MASK_CLE;
这部分的思路很清晰,就是填充info指向的代表nand设备的结构体,其中包含了对函数指针chip.read_buf 的赋值。
其中包含了两个结构体:mtd_info 和 nand_chip。
驱动代码中出现了nand_scan_ident和nand_scan_tail,也就是mtd层挂钩子的过程。
/* Scan to find existence of the device(s) */ ret = nand_scan_ident(&info->mtd, pdata->mask_chipsel ? 2 : 1); if (ret < 0) { dev_dbg(&pdev->dev, "no NAND chip(s) found/n"); goto err_scan; }
... ...
ret = nand_scan_tail(&info->mtd);
if (ret < 0)
goto err_scan;
驱动的结尾调用add_mtd_device函数向内核注册mtd设备。
-->
nand_davinci_read_buf是要我们自己去实现,参考使用nand的时序图,比如:

具体的说就是gpio的控制。
小小的看下该回调的函数:
static void nand_davinci_read_buf(struct mtd_info *mtd, uint8_t *buf, int len) { struct nand_chip *chip = mtd->priv; if ((0x03 & ((unsigned)buf)) == 0 && (0x03 & len) == 0) ioread32_rep(chip->IO_ADDR_R, buf, len >> 2); else if ((0x01 & ((unsigned)buf)) == 0 && (0x01 & len) == 0) ioread16_rep(chip->IO_ADDR_R, buf, len >> 1); else ioread8_rep(chip->IO_ADDR_R, buf, len); }
从寄存器读出相应位宽的数据,最后调到io口的“原子”操作_raw_read,_raw_write。
#define ioread32_rep(p,d,c) __raw_readsl(p,d,c)
/* * Generic IO read/write. These perform native-endian accesses. Note * that some architectures will want to re-define __raw_{read,write}w. */ extern void __raw_readsb(const void __iomem *addr, void *data, int bytelen); extern void __raw_readsw(const void __iomem *addr, void *data, int wordlen); extern void __raw_readsl(const void __iomem *addr, void *data, int longlen);
raw_readsl这种基础的原子函数,汇编化是必须的。
1 /* 2 * linux/arch/arm/lib/io-readsb.S 3 * 4 * Copyright (C) 1995-2000 Russell King 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 */ 10 #include <linux/linkage.h> 11 #include <asm/assembler.h> 12 13 .Linsb_align: rsb ip, ip, #4 14 cmp ip, r2 15 movgt ip, r2 16 cmp ip, #2 17 ldrb r3, [r0] 18 strb r3, [r1], #1 19 ldrgeb r3, [r0] 20 strgeb r3, [r1], #1 21 ldrgtb r3, [r0] 22 strgtb r3, [r1], #1 23 subs r2, r2, ip 24 bne .Linsb_aligned 25 26 ENTRY(__raw_readsb) 27 teq r2, #0 @ do we have to check for the zero len? 28 moveq pc, lr 29 ands ip, r1, #3 30 bne .Linsb_align 31 32 .Linsb_aligned: stmfd sp!, {r4 - r6, lr} 33 34 subs r2, r2, #16 35 bmi .Linsb_no_16 36 37 .Linsb_16_lp: ldrb r3, [r0] 38 ldrb r4, [r0] 39 ldrb r5, [r0] 40 mov r3, r3, put_byte_0 41 ldrb r6, [r0] 42 orr r3, r3, r4, put_byte_1 43 ldrb r4, [r0] 44 orr r3, r3, r5, put_byte_2 45 ldrb r5, [r0] 46 orr r3, r3, r6, put_byte_3 47 ldrb r6, [r0] 48 mov r4, r4, put_byte_0 49 ldrb ip, [r0] 50 orr r4, r4, r5, put_byte_1 51 ldrb r5, [r0] 52 orr r4, r4, r6, put_byte_2 53 ldrb r6, [r0] 54 orr r4, r4, ip, put_byte_3 55 ldrb ip, [r0] 56 mov r5, r5, put_byte_0 57 ldrb lr, [r0] 58 orr r5, r5, r6, put_byte_1 59 ldrb r6, [r0] 60 orr r5, r5, ip, put_byte_2 61 ldrb ip, [r0] 62 orr r5, r5, lr, put_byte_3 63 ldrb lr, [r0] 64 mov r6, r6, put_byte_0 65 orr r6, r6, ip, put_byte_1 66 ldrb ip, [r0] 67 orr r6, r6, lr, put_byte_2 68 orr r6, r6, ip, put_byte_3 69 stmia r1!, {r3 - r6} 70 71 subs r2, r2, #16 72 bpl .Linsb_16_lp 73 74 tst r2, #15 75 ldmeqfd sp!, {r4 - r6, pc} 76 77 .Linsb_no_16: tst r2, #8 78 beq .Linsb_no_8 79 80 ldrb r3, [r0] 81 ldrb r4, [r0] 82 ldrb r5, [r0] 83 mov r3, r3, put_byte_0 84 ldrb r6, [r0] 85 orr r3, r3, r4, put_byte_1 86 ldrb r4, [r0] 87 orr r3, r3, r5, put_byte_2 88 ldrb r5, [r0] 89 orr r3, r3, r6, put_byte_3 90 ldrb r6, [r0] 91 mov r4, r4, put_byte_0 92 ldrb ip, [r0] 93 orr r4, r4, r5, put_byte_1 94 orr r4, r4, r6, put_byte_2 95 orr r4, r4, ip, put_byte_3 96 stmia r1!, {r3, r4} 97 98 .Linsb_no_8: tst r2, #4 99 beq .Linsb_no_4100 101 ldrb r3, [r0]102 ldrb r4, [r0]103 ldrb r5, [r0]104 ldrb r6, [r0]105 mov r3, r3, put_byte_0106 orr r3, r3, r4, put_byte_1107 orr r3, r3, r5, put_byte_2108 orr r3, r3, r6, put_byte_3109 str r3, [r1], #4110 111 .Linsb_no_4: ands r2, r2, #3112 ldmeqfd sp!, {r4 - r6, pc}113 114 cmp r2, #2115 ldrb r3, [r0]116 strb r3, [r1], #1117 ldrgeb r3, [r0]118 strgeb r3, [r1], #1119 ldrgtb r3, [r0]120 strgtb r3, [r1]121 122 ldmfd sp!, {r4 - r6, pc}123 ENDPROC(__raw_readsb)
OK,流水账完毕,粗略的浏览,过程中的每一部分都是一门学问,展开来去那就是一篇篇的论文。借一校友的的社区签名:“好的论文就像一个美女,研读论文的过程就是脱衣服的过程。”
本文详细跟踪了Linux内核中read函数的执行流程,从用户空间的read调用开始,深入到文件系统层(VFS)、YAFFS文件系统层,直至硬件层的读取操作。解析了内核服务例程、文件操作结构体、超级块、内存映射、页面缓存等关键组件的作用,揭示了内核如何高效地进行文件读取操作。
168

被折叠的 条评论
为什么被折叠?



