linux 0.11 内核学习 -- buffer.c

最新推荐文章于 2022-11-07 21:04:29 发布

iteye_11486

最新推荐文章于 2022-11-07 21:04:29 发布

阅读量97

点赞数

文章标签：运维数据结构与算法开发工具

* buffer.c 程序用于对高速缓冲区(池)进行操作和管理。高速缓冲

* 区位于内核代码和主内存区之间。

* |---|---|------------------|---------------------|-------------------|

* | | | * * * | buffer | |

* |---|---|------------------|---------------------|-------------------|

* | /|/

* |------------------------------------|

* /|/

* buffer_head (list)

* linux/fs/buffer.c

* 'buffer.c' implements the buffer-cache functions. Race-conditions have

* been avoided by NEVER letting a interrupt change a buffer (except for the

* data, of course), but instead letting the caller do it. NOTE! As interrupts

* can wake up a caller, some cli-sti sequences are needed to check for

* sleep-on-calls. These should be extremely quick, though (I hope).

* NOTE! There is one discordant note here: checking floppies for

* disk change. This is where it fits best, I think, as it should

* invalidate changed floppy-disk-caches.

#include <stdarg.h>

#include <linux/config.h>

#include <linux/sched.h>

#include <linux/kernel.h>

#include <asm/system.h>

#include <asm/io.h>

extern int end; // 由连接程序ld 生成的表明程序末端的变量，代表的是一个内存地址

struct buffer_head * start_buffer = (struct buffer_head *) &end;

struct buffer_head * hash_table[NR_HASH];

static struct buffer_head * free_list;

static struct task_struct * buffer_wait = NULL;

int NR_BUFFERS = 0;

/*等待指定缓冲区解锁 */

static inline void wait_on_buffer(struct buffer_head * bh)

{

cli();

while (bh->b_lock)

sleep_on(&bh->b_wait);

sti();

}

/* 同步设备（存储介质）和内存高速缓冲中数据 */

int sys_sync(void)

{

int i;

struct buffer_head * bh;

* sync_inodes的主要作用是把inode_table中的inode与磁盘上的一致起来。若

* 这样笼统的理解,但是这里牵涉到系统高速缓冲区。由此，同步操作实际被分

* 成了两个阶段：

* 1. 数据结构信息与高速缓冲区中的缓冲块同步问题，由相关程序独立负责；

* 2. 高速缓冲区中数据块与磁盘对应块的同步问题，由缓冲管理程序负责。

* sync_inodes不会直接与磁盘打交道，它只能前进到缓冲区这一步.

* inode ------- 高速缓冲buffer --------- 硬盘

/* 将i 节点写入高速缓冲 */

sync_inodes(); /* write out inodes into buffers */

bh = start_buffer;

for (i=0 ; i<NR_BUFFERS ; i++,bh++)

{

wait_on_buffer(bh); // 等待缓冲区解锁

if (bh->b_dirt)

ll_rw_block(WRITE,bh); // 产生写设备块请求

}

return 0;

}

/* 高速缓冲与硬盘上数据的同步 */

int sync_dev(int dev)

{

int i;

struct buffer_head * bh;

/* 高速缓冲buffer -- 存储介质 */

bh = start_buffer;

for (i=0 ; i<NR_BUFFERS ; i++,bh++)

{

if (bh->b_dev != dev) // look for the dev

continue;

wait_on_buffer(bh);

if (bh->b_dev == dev && bh->b_dirt)

ll_rw_block(WRITE,bh);

}

/* 数据结构inode -- 高速缓冲 */

sync_inodes(); // 将i 节点数据写入高速缓冲

/* 高速缓冲buffer -- 存储介质 */

bh = start_buffer;

for (i=0 ; i<NR_BUFFERS ; i++,bh++)

{

if (bh->b_dev != dev)

continue;

wait_on_buffer(bh);

if (bh->b_dev == dev && bh->b_dirt)

ll_rw_block(WRITE,bh);

}

return 0;

}

/* 指定设备在高速缓冲区中的数据无效 */

void inline invalidate_buffers(int dev)

{

int i;

struct buffer_head * bh;

bh = start_buffer;

for (i=0 ; i<NR_BUFFERS ; i++,bh++)

{

if (bh->b_dev != dev)

continue;

wait_on_buffer(bh);

if (bh->b_dev == dev)

// 指定设备在高速缓冲区中的数据无效

bh->b_uptodate = bh->b_dirt = 0;

}

* This routine checks whether a floppy has been changed, and

* invalidates all buffer-cache-entries in that case. This

* is a relatively slow routine, so we have to try to minimize using

* it. Thus it is called only upon a 'mount' or 'open'. This

* is the best way of combining speed and utility, I think.

* People changing diskettes in the middle of an operation deserve

* to loose :-)

* NOTE! Although currently this is only for floppies, the idea is

* that any additional removable block-device will use this routine,

* and that mount/open needn't know that floppies/whatever are

* special.

* 该子程序检查一个软盘是否已经被更换，如果已经更换就使高速缓冲中与该软驱

* 对应的所有缓冲区无效。该子程序相对来说较慢，所以我们要尽量少使用它。

/* 检查磁盘是否更换 */

void check_disk_change(int dev)

{

int i;

if (MAJOR(dev) != 2) // 是软盘设备吗？

return;

if (!floppy_change(dev & 0x03)) // 对应软盘是否已更换 ?

return;

/* 软盘已经更换 */

for (i=0 ; i<NR_SUPER ; i++)

if (super_block[i].s_dev == dev)

// 释放对应设备的i 节点位图和逻辑块位图所占的高速缓冲区

put_super(super_block[i].s_dev);

invalidate_inodes(dev); // i 节点无效

invalidate_buffers(dev); // 数据块信息无效

}

#define _hashfn(dev,block) (((unsigned)(dev^block))%NR_HASH) // hash 函数

#define hash(dev,block) hash_table[_hashfn(dev,block)] // hash 表项

/* 从hash 队列和空闲缓冲队列中移走指定的缓冲块 */

/* 在数据结构中，包含两个数组，使用两个数组来实现两个链表 */

/* 对于每一块的话，她同时存在于hash_table可寻的范围，同时也可能在free list链表上 */

static inline void remove_from_queues(struct buffer_head * bh)

{

/* remove from hash-queue */

/* 从hash 队列中移除缓冲块，只是对应的指针操作，原因是数据结构 */

/* 采用的是数组的形式来存储的信息 */

if (bh->b_next)

bh->b_next->b_prev = bh->b_prev;

if (bh->b_prev)

bh->b_prev->b_next = bh->b_next;

/* 如果该缓冲区是该队列的头一个块，则让hash 表的对应项指向本队列中的下一个缓冲区 */

* |------|--------|------|-------|-------|

* | | | | | | hash_table

* |------|--------|------|-------|-------|

* |

* /|/

* a link list(if it is the first block)

if (hash(bh->b_dev,bh->b_blocknr) == bh)

hash(bh->b_dev,bh->b_blocknr) = bh->b_next;

/* remove from free list */

/* 从空闲缓冲区表中移除缓冲块 */

if (!(bh->b_prev_free) || !(bh->b_next_free))

panic("Free block list corrupted");

bh->b_prev_free->b_next_free = bh->b_next_free;

bh->b_next_free->b_prev_free = bh->b_prev_free;

// 如果空闲链表头指向本缓冲区，则让其指向下一缓冲区

if (free_list == bh)

free_list = bh->b_next_free;

}

/* 将指定缓冲区插入空闲链表尾并放入hash 队列中，同上 */

static inline void insert_into_queues(struct buffer_head * bh)

{

/* put at end of free list */

bh->b_next_free = free_list;

bh->b_prev_free = free_list->b_prev_free;

free_list->b_prev_free->b_next_free = bh;

free_list->b_prev_free = bh;

/* put the buffer in new hash-queue if it has a device */

bh->b_prev = NULL;

bh->b_next = NULL;

if (!bh->b_dev)

return;

bh->b_next = hash(bh->b_dev,bh->b_blocknr);

hash(bh->b_dev,bh->b_blocknr) = bh;

bh->b_next->b_prev = bh;

}

/* 在高速缓冲中寻找给定设备和指定块的缓冲区块，返回的是指定的buffer_head */

static struct buffer_head * find_buffer(int dev, int block)

{

struct buffer_head * tmp;

/* dev, block --hash--> tmp (a member of the hash table) --> has free block ? */

/* 可能存在哈希的冲突 */

for (tmp = hash(dev,block) ; tmp != NULL ; tmp = tmp->b_next)

if (tmp->b_dev==dev && tmp->b_blocknr==block)

return tmp;

return NULL;

}

* Why like this, I hear you say... The reason is race-conditions.

* As we don't lock buffers (unless we are readint them, that is),

* something might happen to it while we sleep (ie a read-error

* will force it bad). This shouldn't really happen currently, but

* the code is ready.

/* 函数get_hash_table()在哈希链表中查找是否存在给定条件（dev,block,size）的buffer_head对象 */

struct buffer_head * get_hash_table(int dev, int block)

{

struct buffer_head * bh;

for (;;)

{

if (!(bh=find_buffer(dev,block)))

return NULL;

bh->b_count++; // 增加引用计数

wait_on_buffer(bh); // 等待该缓冲区解锁

// 经过了睡眠状态

if (bh->b_dev == dev && bh->b_blocknr == block)

return bh;

// 如果该缓冲区所属的设备号或块号在睡眠时发生了改变，则撤消对它的引用计数

bh->b_count--;

}

* Ok, this is getblk, and it isn't very clear, again to hinder

* race-conditions. Most of the code is seldom used, (ie repeating),

* so it should be much more efficient than it looks.

* The algorithm is changed: hopefully better, and an elusive bug removed.

/* 相当于一个权值，比较哪个空闲块最适合 */

#define BADNESS(bh) (((bh)->b_dirt<<1)+(bh)->b_lock)

/* 取高速缓冲区中指定的缓冲区 */

struct buffer_head * getblk(int dev,int block)

{

struct buffer_head * tmp, * bh;

repeat:

// 搜索hash 表，如果指定块已经在高速缓冲中

if (bh = get_hash_table(dev,block))

return bh;

// 如果指定的缓冲块还没存在，扫描空闲数据块链表，寻找空闲缓冲区

tmp = free_list;

do {

if (tmp->b_count) // 如果该缓冲区正被使用

continue; // 继续扫描下一项

// 缓冲头指针bh 为空，或者tmp 所指缓冲头的标志(修改、锁定)

// 权重小于bh 头标志的权重，寻找最合适的块

if (!bh || BADNESS(tmp)<BADNESS(bh))

{

bh = tmp; // 让bh 指向该tmp 缓冲区头

// 如果该tmp 缓冲区头表明缓冲区既没有修改也没有锁定标志置位

if (!BADNESS(tmp))

// 说明已为指定设备上的块取得对应的高速缓冲区

break;

}

/* and repeat until we find something good，while结束时bh中存放的是空闲的缓冲块 */

} while ((tmp = tmp->b_next_free) != free_list);

// 如果所有缓冲区都正被使用（所有缓冲区的头部引用计数都>0）

if (!bh)

{

sleep_on(&buffer_wait); // 睡眠

goto repeat;

}

// 睡醒，等待该缓冲区解锁

wait_on_buffer(bh);

if (bh->b_count) // 如果该缓冲区又被其它任务使用的话?

goto repeat;

while (bh->b_dirt) // 该缓冲区已被修改

{

sync_dev(bh->b_dev); // 将数据写盘

wait_on_buffer(bh); // 再次等待缓冲区解锁

if (bh->b_count) // 该缓冲区又被其它任务使用

goto repeat; // 只好再重复上述过程

}

/* NOTE!! While we slept waiting for this block, somebody else might */

/* already have added "this" block to the cache. check it */

/* 注意！当进程为了等待该缓冲块而睡眠时，其它进程可能已经将该缓冲块 */

// 在高速缓冲hash 表中检查指定设备和块的缓冲区是否已经被加入进去

if (find_buffer(dev,block))

goto repeat;

/* OK, FINALLY we know that this buffer is the only one of it's kind, */

/* and that it's unused (b_count=0), unlocked (b_lock=0), and clean */

/* OK，最终我们知道该缓冲区是指定参数的唯一一块 */

// 于是让我们占用此缓冲区。置引用计数为1，复位修改标志和有效(更新)标志

bh->b_count=1;

bh->b_dirt=0;

bh->b_uptodate=0;

// 从hash 队列和空闲块链表中移出该缓冲区头

// 让该缓冲区用于指定设备和其上的指定块

remove_from_queues(bh);

bh->b_dev=dev;

bh->b_blocknr=block;

// 然后根据此新的设备号和块号重新插入空闲链表和hash 队列新位置处

insert_into_queues(bh);

return bh; // 并最终返回缓冲头指针

* 该函数的最终目的是寻找到一块没有使用的缓冲块。linux中的缓冲块的

* 目的是就是在设备和读写操作之间提供的暂存数据的地方。如果通过get_hash_table

* 函数计算的出的值不是NULL，说明该块缓冲区已经存在，直接返回即可。

* 否则需要寻找合适缓冲块，方法是从空闲链表free_list中寻找一块

* ，然后将其插入到相应的位置上，最后返回bh指针

}

/* 释放指定的缓冲区 */

void brelse(struct buffer_head * buf)

{

// 等待该缓冲区解锁。引用计数递减1。唤醒等待空闲缓冲区的进程

if (!buf)

return;

wait_on_buffer(buf);

if (!(buf->b_count--))

panic("Trying to free free buffer");

wake_up(&buffer_wait);

}

* bread() reads a specified block and returns the buffer that contains

* it. It returns NULL if the block was unreadable.

* 从设备上读取指定的数据块并返回含有数据的缓冲区。如果指定的块不存在

* 则返回NULL.

struct buffer_head * bread(int dev,int block)

{

struct buffer_head * bh;

if (!(bh=getblk(dev,block))) // 在高速缓冲中申请一块缓冲区

panic("bread: getblk returned NULL/n");

// 如果该缓冲区中的数据是有效的（已更新的）可以直接使用

if (bh->b_uptodate)

return bh;

// 否则调用ll_rw_block()函数，产生读设备块请求

ll_rw_block(READ,bh);

wait_on_buffer(bh);

// 如果该缓冲区已更新，则返回缓冲区头指针

if (bh->b_uptodate)

return bh;

// 否则表明读设备操作失败，释放该缓冲区，返回NULL 指针，退出

brelse(bh);

return NULL;

}

/* 从from 地址复制一块数据到to 位置 */

#define COPYBLK(from,to) /

__asm__("cld/n/t" /

"rep/n/t" /

"movsl/n/t" /

::"c" (BLOCK_SIZE/4),"S" (from),"D" (to) /

:"cx","di","si")

* bread_page reads four buffers into memory at the desired address. It's

* a function of its own, as there is some speed to be got by reading them

* all at the same time, not waiting for one to be read, and then another

* etc.

/* 读设备上一个页面（4 个缓冲块）的内容到内存指定的地址 */

void bread_page(unsigned long address,int dev,int b[4])

{

struct buffer_head * bh[4];

int i;

/* 循环四次得到四个buffer_head指针 */

for (i=0 ; i<4 ; i++)

if (b[i]) {

if (bh[i] = getblk(dev,b[i]))

if (!bh[i]->b_uptodate)

ll_rw_block(READ,bh[i]);

} else

bh[i] = NULL;

/* 将buffer_head指针对应的缓冲区数据读到address中 */

for (i=0 ; i<4 ; i++,address += BLOCK_SIZE)

if (bh[i]) {

wait_on_buffer(bh[i]);

if (bh[i]->b_uptodate)

COPYBLK((unsigned long) bh[i]->b_data,address);

brelse(bh[i]);

}

* Ok, breada can be used as bread, but additionally to mark other

* blocks for reading as well. End the argument list with a negative

* number.

* breada 可以象bread 一样使用，但会另外预读一些块。该函数参数列表

* 需要使用一个负数来表明参数列表的结束。

struct buffer_head * breada(int dev,int first, ...)

{

va_list args;

struct buffer_head * bh, *tmp;

va_start(args,first); // 取可变参数表中第1 个参数（块号）

if (!(bh=getblk(dev,first))) // 取高速缓冲中指定设备和块号的缓冲区

panic("bread: getblk returned NULL/n");

// 如果该缓冲区数据无效，则发出读设备数据块请求

if (!bh->b_uptodate)

ll_rw_block(READ,bh);

// 顺序取可变参数表中其它预读块号，并作与上面同样处理

while ((first=va_arg(args,int))>=0)

{

tmp=getblk(dev,first);

if (tmp)

{

if (!tmp->b_uptodate)

ll_rw_block(READA,bh);

tmp->b_count--;

}

va_end(args); // 可变参数表中所有参数处理完毕

wait_on_buffer(bh); // 等待第1 个缓冲区解锁

if (bh->b_uptodate) // 数据有效 ?

return bh;

brelse(bh); // 释放该缓冲区

return (NULL);

}

/* 缓冲区初始化函数，参数buffer_end 是指定的缓冲区内存的末端 */

void buffer_init(long buffer_end)

{

struct buffer_head * h = start_buffer;

void * b;

int i;

/* 获得缓冲区的高端，存放在变量b中 */

if (buffer_end == 1<<20)

b = (void *) (640*1024);

else

b = (void *) buffer_end;

* h(start_buffer) b(buffer_end)

* /|/ /|/

* |-buffer_head--|----|***|-----buffer-------|----------------|

while ( (b -= BLOCK_SIZE) >= ((void *) (h+1)) ) // 直到无法分配

{

h->b_dev = 0;

h->b_dirt = 0;

h->b_count = 0;

h->b_lock = 0;

h->b_uptodate = 0;

h->b_wait = NULL;

/* 散列hash_table之后没有链表 */

h->b_next = NULL;

h->b_prev = NULL;

h->b_data = (char *) b;

/* 形成free list */

h->b_prev_free = h-1;

h->b_next_free = h+1;

h++;

NR_BUFFERS++;

if (b == (void *) 0x100000)

b = (void *) 0xA0000;

}

/* 形成双向链表 */

h--;

free_list = start_buffer;

free_list->b_prev_free = h;

h->b_next_free = free_list;

// 初始化hash 表（哈希表、散列表），置表中所有的指针为NULL

for (i=0;i<NR_HASH;i++)

hash_table[i]=NULL;

* 在该函数buffer_init调用之后，首先建立的是如下的两个内存结构：

* |-------------|

* | | hash_table[0->size] = NULL

* | * * * |

* |-------------|

* h(start_buffer) b(buffer_end)

* /|/ /|/

* |-buffer_head--|----|***|-----buffer-------|----------------|

* 现在如果是产生第一个一个新的请求的话，在函数getblk中首先是调用函数

* get_hash_table，在该函数中调用函数buffer_head，返回NULL（该哈希表中

* 不存在相应的缓冲块），然后寻找到一块，并相应的修改数据。

* |-------------|

* |-------------| hash_table[x] != NULL即是刚刚插入的一项

* | * * * |

* |-------------|

* h(start_buffer) b(buffer_end)

* /|/ /|/

* |-buffer_head--|----|***|-----buffer-------|----------------|

* 已经分配b_count != 0

* 下面就是继续这个过程

}

参考《linux内核完全注释》和网上相关资料