由于写 LAB8 时调试文件系统并发 bug 遇到了很大困难,这里决定读读 xv6 文件系统源码,来帮助调试
先运行 make qemu
,看看编译日志中哪里出现了 fs.img:
猜测:mkfs 是一个命令,他会把后边的一堆参数都编译到 fs.img。
后面那一堆参数会成为 fs.img 中的 “用户程序”。
再搜 mkfs:
这里可以看到 mkfs 其实是 mkfs.c 的直接编译产物
解析 mkfs.c
从注释来看,可知 xv6 磁盘结构的数据排布:
// Disk layout:
// [ boot block | sb block | log | inode blocks | free bit map | data blocks ]
// 磁盘结构:启动块,超级块,日志块,inode块,位图块,数据块
超级块的结构如下(定义于 fs.h):
// Disk layout:
// [ boot block | super block | log | inode blocks |
// free bit map | data blocks]
//
// mkfs computes the super block and builds an initial file system. The
// super block describes the disk layout:
struct superblock {
uint magic; // Must be FSMAGIC
uint size; // Size of file system image (blocks)
uint nblocks; // Number of data blocks
uint ninodes; // Number of inodes.
uint nlog; // Number of log blocks
uint logstart; // Block number of first log block
uint inodestart; // Block number of first inode block
uint bmapstart; // Block number of first free map block
};
磁盘上的 inode 定义如下:
// On-disk inode structure
struct dinode {
short type; // File type
short major; // Major device number (T_DEVICE only)
short minor; // Minor device number (T_DEVICE only)
short nlink; // Number of links to inode in file system
uint size; // Size of file (bytes)
uint addrs[NDIRECT+1]; // Data block addresses
};
阅读 mkfs.c 源码,可以理解这个代码在做什么:
1.写超级块到 fs.img
2.写根目录 inode 到 fs.img
3.写 . 和 … 节点 inodes 到 / 目录
4.把命令行参数 fs.img 后边的所有文件拷贝、写入到 fs.img 的根目录内
注释后的源码如下:
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <assert.h>
#define stat xv6_stat // avoid clash with host struct stat
#include "kernel/types.h"
#include "kernel/fs.h"
#include "kernel/stat.h"
#include "kernel/param.h"
#ifndef static_assert
#define static_assert(a, b) do { switch (0) case 0: case (a): ; } while (0)
#endif
#define NINODES 200
// Disk layout:
// [ boot block | sb block | log | inode blocks | free bit map | data blocks ]
// 磁盘结构:启动块,超级块,日志块,inode块,位图块,数据块
int nbitmap = FSSIZE/(BSIZE*8) + 1; // 一共有 FSSIZE 个块需要记录,也就需要 FSSIZE 个 bits 需要记录。这里计算存放 FSSIZE 个 bits 所需的块数量
int ninodeblocks = NINODES / IPB + 1; // xv6 文件系统提供 200 个 inode,这里计算存放所有 inodes 需要的磁盘块数
int nlog = LOGSIZE; // 日志部分包含 30 个磁盘块
int nmeta; // Number of meta blocks (boot, sb, nlog, inode, bitmap)
int nblocks; // Number of data blocks
// 文件系统文件描述符
int fsfd;
struct superblock sb;
char zeroes[BSIZE];
uint freeinode = 1;
uint freeblock;
void balloc(int);
void wsect(uint, void*);
void winode(uint, struct dinode*);
void rinode(uint inum, struct dinode *ip);
void rsect(uint sec, void *buf);
uint ialloc(ushort type);
void iappend(uint inum, void *p, int n);
void die(const char *);
// convert to riscv byte order
ushort
xshort(ushort x)
{
ushort y;
uchar *a = (uchar*)&y;
a[0] = x;
a[1] = x >> 8;
return y;
}
uint
xint(uint x)
{
uint y;
uchar *a = (uchar*)&y;
a[0] = x;
a[1] = x >> 8;
a[2] = x >> 16;
a[3] = x >> 24;
return y;
}
int
main(int argc, char *argv[])
{
int i, cc, fd;
uint rootino, inum, off;
struct dirent de;
char buf[BSIZE];
struct dinode din;
static_assert(sizeof(int) == 4, "Integers must be 4 bytes!");
if(argc < 2){
fprintf(stderr, "Usage: mkfs fs.img files...\n");
exit(1);
}
assert((BSIZE % sizeof(struct dinode)) == 0);
assert((BSIZE % sizeof(struct dirent)) == 0);
// 打开 fs.img,文件描述符为 fsfd (全局变量)
fsfd = open(argv[1], O_RDWR|O_CREAT|O_TRUNC, 0666);
// 打不开就报错
if(fsfd < 0)
die(argv[1]);
// 1 fs block = 1 disk sector
// 元数据块数量 = 启动块 + 超级块 + 日志块 + inode块 + bitmap块
nmeta = 2 + nlog + ninodeblocks + nbitmap;
// 数据块数量 = FS总数量 - 元数据块数量
nblocks = FSSIZE - nmeta;
// 记录超级块元数据
sb.magic = FSMAGIC;
sb.size = xint(FSSIZE);
sb.nblocks = xint(nblocks);
sb.ninodes = xint(NINODES);
sb.nlog = xint(nlog);
sb.logstart = xint(2);
sb.inodestart = xint(2+nlog);
sb.bmapstart = xint(2+nlog+ninodeblocks);
printf("nmeta %d (boot, super, log blocks %u inode blocks %u, bitmap blocks %u) blocks %d total %d\n",
nmeta, nlog, ninodeblocks, nbitmap, nblocks, FSSIZE);
// 计算第一个 datablock 的位置
freeblock = nmeta; // the first free block that we can allocate
// 对文件系统所有的块写入全零
for(i = 0; i < FSSIZE; i++)
wsect(i, zeroes);
// 对文件系统第一个 sector 写入 superblock 的内容
memset(buf, 0, sizeof(buf));
memmove(buf, &sb, sizeof(sb));
wsect(1, buf);
// 为根目录分配一个 inode (会写入磁盘)
rootino = ialloc(T_DIR);
assert(rootino == ROOTINO);
// 为根目录写入 . inode
bzero(&de, sizeof(de));
de.inum = xshort(rootino);
strcpy(de.name, ".");
iappend(rootino, &de, sizeof(de));
// 为根目录写入 .. inode
bzero(&de, sizeof(de));
de.inum = xshort(rootino);
strcpy(de.name, "..");
iappend(rootino, &de, sizeof(de));
// 把命令行参数后面跟着的所有文件拷贝到 fs.img 里,分配 inode
for(i = 2; i < argc; i++){
// get rid of "user/"
char *shortname;
if(strncmp(argv[i], "user/", 5) == 0)
shortname = argv[i] + 5;
else
shortname = argv[i];
assert(index(shortname, '/') == 0);
if((fd = open(argv[i], 0)) < 0)
die(argv[i]);
// Skip leading _ in name when writing to file system.
// The binaries are named _rm, _cat, etc. to keep the
// build operating system from trying to execute them
// in place of system binaries like rm and cat.
if(shortname[0] == '_')
shortname += 1;
inum = ialloc(T_FILE);
bzero(&de, sizeof(de));
de.inum = xshort(inum);
strncpy(de.name, shortname, DIRSIZ);
iappend(rootino, &de, sizeof(de));
while((cc = read(fd, buf, sizeof(buf))) > 0)
iappend(inum, buf, cc);
close(fd);
}
// fix size of root inode dir
// 配置好跟文件系统的 inode 的元数据
rinode(rootino, &din);
off = xint(din.size);
off = ((off/BSIZE) + 1) * BSIZE;
din.size = xint(off);
winode(rootino, &din);
balloc(freeblock);
exit(0);
}
void
wsect(uint sec, void *buf)
{
if(lseek(fsfd, sec * BSIZE, 0) != sec * BSIZE)
die("lseek");
if(write(fsfd, buf, BSIZE) != BSIZE)
die("write");
}
void
winode(uint inum, struct dinode *ip)
{
char buf[BSIZE];
uint bn;
struct dinode *dip;
bn = IBLOCK(inum, sb);
rsect(bn, buf);
dip = ((struct dinode*)buf) + (inum % IPB);
*dip = *ip;
wsect(bn, buf);
}
void
rinode(uint inum, struct dinode *ip)
{
char buf[BSIZE];
uint bn;
struct dinode *dip;
bn = IBLOCK(inum, sb);
rsect(bn, buf);
dip = ((struct dinode*)buf) + (inum % IPB);
*ip = *dip;
}
void
rsect(uint sec, void *buf)
{
if(lseek(fsfd, sec * BSIZE, 0) != sec * BSIZE)
die("lseek");
if(read(fsfd, buf, BSIZE) != BSIZE)
die("read");
}
uint
ialloc(ushort type)
{
uint inum = freeinode++;
struct dinode din;
bzero(&din, sizeof(din));
din.type = xshort(type);
din.nlink = xshort(1);
din.size = xint(0);
winode(inum, &din);
return inum;
}
void
balloc(int used)
{
uchar buf[BSIZE];
int i;
printf("balloc: first %d blocks have been allocated\n", used);
assert(used < BSIZE*8);
bzero(buf, BSIZE);
for(i = 0; i < used; i++){
buf[i/8] = buf[i/8] | (0x1 << (i%8));
}
printf("balloc: write bitmap block at sector %d\n", sb.bmapstart);
wsect(sb.bmapstart, buf);
}
#define min(a, b) ((a) < (b) ? (a) : (b))
void
iappend(uint inum, void *xp, int n)
{
char *p = (char*)xp;
uint fbn, off, n1;
struct dinode din;
char buf[BSIZE];
uint indirect[NINDIRECT];
uint x;
rinode(inum, &din);
off = xint(din.size);
// printf("append inum %d at off %d sz %d\n", inum, off, n);
while(n > 0){
fbn = off / BSIZE;
assert(fbn < MAXFILE);
if(fbn < NDIRECT){
if(xint(din.addrs[fbn]) == 0){
din.addrs[fbn] = xint(freeblock++);
}
x = xint(din.addrs[fbn]);
} else {
if(xint(din.addrs[NDIRECT]) == 0){
din.addrs[NDIRECT] = xint(freeblock++);
}
rsect(xint(din.addrs[NDIRECT]), (char*)indirect);
if(indirect[fbn - NDIRECT] == 0){
indirect[fbn - NDIRECT] = xint(freeblock++);
wsect(xint(din.addrs[NDIRECT]), (char*)indirect);
}
x = xint(indirect[fbn-NDIRECT]);
}
n1 = min(n, (fbn + 1) * BSIZE - off);
rsect(x, buf);
bcopy(p, buf + off - (fbn * BSIZE), n1);
wsect(x, buf);
n -= n1;
off += n1;
p += n1;
}
din.size = xint(off);
winode(inum, &din);
}
// 报错
void
die(const char *s)
{
perror(s);
exit(1);
}