lab3
页表初始化过程:
物理页是一组由run结构体保存的,每个run
main函数调用kinit,初始化物理页。kinit调用了freerange。把内核对应的物理页全部释放掉,加入到freelist(由run组成的链表)中。(因为内核的虚拟地址和物理地址是直接映射的,所以就是end到PHYSTOP)
freerange又调用了kfree(void *p)函数,这个函数的作用就是释放p这一页,从p开始,pgsize大小的空间全被置为1,然后把p转换为*run类型,用头插法把*p加入到freelist中,也是就*p的值是原先freelist的地址。这样从end到PHYSTOP对应的物理页就全被初始化,并被保存到freelist中去了。
然后main又调用kvminit。kvminit首先通过kalloc创建内核的root page-table page 。然后调用kvmmap install the translations that the kernel needs(不知道这句话咋翻译)。kvmmap实际是调用mappages实现这个功能。
int
mappages(pagetable_t pagetable, uint64 va, uint64 size, uint64 pa, int perm)
{
uint64 a, last;
pte_t *pte;
a = PGROUNDDOWN(va);
last = PGROUNDDOWN(va + size - 1);
for(;;){
if((pte = walk(pagetable, a, 1)) == 0)
return -1;
if(*pte & PTE_V)
panic("remap");
*pte = PA2PTE(pa) | perm | PTE_V;
//printf("%p %p \n",pa,PA2PTE(pa));
if(a == last)
break;
a += PGSIZE;
pa += PGSIZE;
}
return 0;
}
是创建一个页表项,映射虚拟地址va开始的size个字节到物理地址pa。walk返回的是最后一级页表的页表项地址。然后将walk返回的这项改为物理地址pa。
然后main调用kvminithart,让系统能够映射。把kernel pagetable存入寄存器。
然后main调用procinit,为每个进程分配kernel stack。
一些点
在内核部分,虚拟地址和物理地址是直接映射的,虚拟地址和物理地址的区别就是多了10个标志位。
Print a page table
这个实验比较简单,参照freewalk的代码就好了。
void
vmp(pagetable_t pagetable, int depth){
if(depth>2) return;
for(int i=0; i<512;i++){
pte_t pte = pagetable[i];
if((pte & PTE_V)){
for(int j=0;j<depth;j++){
printf(".. ");
}
uint64 child = PTE2PA(pte);
printf("..%d: pte %p pa %p\n",i,pte,child);
vmp((pagetable_t)child,depth+1);
}
}
}
void
vmprint(pagetable_t pagetable){
printf("page table %p\n",pagetable);
vmp(pagetable,0);
}
A kernel page table per process
这个实验和下个实验要做的是使内核能直接解析用户指针。利用的是内核在虚拟地址较高的部分开始,而进程从0开始。这个实验要做的就是为每个进程创建一个kernel pagetable,然后在运行这个进程的时候,把系统的kernel pagetable切换为进程的kernel pagetable。
这个实验做的时候挺难的,现在写起复盘好像还好。
struct proc {
struct spinlock lock;
// p->lock must be held when using these:
enum procstate state; // Process state
struct proc *parent; // Parent process
void *chan; // If non-zero, sleeping on chan
int killed; // If non-zero, have been killed
int xstate; // Exit status to be returned to parent's wait
int pid; // Process ID
// these are private to the process, so p->lock need not be held.
uint64 kstack; // Virtual address of kernel stack
uint64 sz; // Size of process memory (bytes)
pagetable_t pagetable; // User page table
pagetable_t kernel_pagetable; //user kernel pagetable
struct trapframe *trapframe; // data page for trampoline.S
struct context context; // swtch() here to run process
struct file *ofile[NOFILE]; // Open files
struct inode *cwd; // Current directory
char name[16]; // Process name (debugging)
};
根据提示,首先要做的是往proc 结构体加个 kernel pagetable。
pagetable_t
proc_kernel_pagetable(){
pagetable_t kpagetable = (pagetable_t)kalloc();
memset(kpagetable, 0 ,PGSIZE);
mappages(kpagetable, UART0, PGSIZE, UART0,PTE_R |PTE_W);
mappages(kpagetable,VIRTIO0, PGSIZE, VIRTIO0, PTE_R | PTE_W);
//mappages(kpagetable, CLINT, 0x10000, CLINT, PTE_R | PTE_W);
mappages(kpagetable, PLIC, 0x400000, PLIC, PTE_R | PTE_W);
mappages(kpagetable, KERNBASE, (uint64)etext-KERNBASE, KERNBASE, PTE_R | PTE_X );
mappages(kpagetable,(uint64)etext, PHYSTOP-(uint64)etext, (uint64)etext, PTE_R | PTE_W);
mappages(kpagetable,TRAMPOLINE, PGSIZE, (uint64)trampoline, PTE_R | PTE_X);
return kpagetable;
}
然后参照kvminit,写一个初始化进程的kernel pagetable的函数。 注释掉的那行是因为在第三个实验不需要映射。要注意的是注意mappages里面的标志位参数,有两个是PTE_X,全是读写的话,会卡在一个地方,进不了shell。
static struct proc*
allocproc(void)
{
struct proc *p;
for(p = proc; p < &proc[NPROC]; p++) {
acquire(&p->lock);
if(p->state == UNUSED) {
goto found;
} else {
release(&p->lock);
}
}
return 0;
found:
p->pid = allocpid();
//lab 3 初始化
p->kernel_pagetable=proc_kernel_pagetable();
//kstack
char *pa = kalloc();
if(pa==0)
panic("stack kalloc");
uint64 va = (TRAMPOLINE - 2*PGSIZE);
mappages(p->kernel_pagetable, va, PGSIZE,(uint64)pa, PTE_R | PTE_W);
p->kstack = va;
// Allocate a trapframe page.
if((p->trapframe = (struct trapframe *)kalloc()) == 0){
release(&p->lock);
return 0;
}
// An empty user page table.
p->pagetable = proc_pagetable(p);
if(p->pagetable == 0){
freeproc(p);
release(&p->lock);
return 0;
}
// Set up new context to start executing at forkret,
// which returns to user space.
memset(&p->context, 0, sizeof(p->context));
p->context.ra = (uint64)forkret;
p->context.sp = p->kstack + PGSIZE;
return p;
}
接下来就是改allocproc函数,主要有两个:
1.初始化进程的kernel pagetable
2.修改kernel stack。因为每个进程的kernel stack是映射在kernel pagetable的,所以这里需要把procinit的部分功能移到这来。
void
scheduler(void)
{
struct proc *p;
struct cpu *c = mycpu();
c->proc = 0;
for(;;){
// Avoid deadlock by ensuring that devices can interrupt.
intr_on();
int found = 0;
for(p = proc; p < &proc[NPROC]; p++) {
acquire(&p->lock);
if(p->state == RUNNABLE) {
// Switch to chosen process. It is the process's job
// to release its lock and then reacquire it
// before jumping back to us.
p->state = RUNNING;
c->proc = p;
w_satp(MAKE_SATP(p->kernel_pagetable));
sfence_vma();
swtch(&c->context, &p->context);
kvminithart();
// Process is done running for now.
// It should have changed its p->state before coming back.
c->proc = 0;
found = 1;
}
release(&p->lock);
}
#if !defined (LAB_FS)
if(found == 0) {
intr_on();
asm volatile("wfi");
}
#else
;
#endif
}
}
修改scheduler,在进程调度的时候切换kernel pagetable。
void
free_kernel_pagetable(pagetable_t pagetable, uint64 stack,uint64 sz){
uvmunmap(pagetable, UART0, 1, 0);
uvmunmap(pagetable, VIRTIO0, 1, 0);
//uvmunmap(pagetable, CLINT, 0x10000/PGSIZE, 0);
uvmunmap(pagetable, PLIC, 0x400000/PGSIZE, 0);
uvmunmap(pagetable, KERNBASE, ((uint64)etext-KERNBASE)/PGSIZE, 0);
uvmunmap(pagetable, TRAMPOLINE, 1, 0);
uvmunmap(pagetable, (uint64)etext,(PHYSTOP-(uint64)etext)/PGSIZE , 0);
uvmunmap(pagetable, 0, PGROUNDUP(sz)/PGSIZE, 0);
uvmunmap(pagetable, stack, 1, 1);
freewalk(pagetable);
}
static void
freeproc(struct proc *p)
{
if(p->trapframe)
kfree((void*)p->trapframe);
p->trapframe = 0;
if(p->pagetable)
proc_freepagetable(p->pagetable, p->sz);
if(p->kernel_pagetable)
free_kernel_pagetable(p->kernel_pagetable,p->kstack,p->sz);
p->pagetable = 0;
p->sz = 0;
p->pid = 0;
p->parent = 0;
p->name[0] = 0;
p->chan = 0;
p->killed = 0;
p->xstate = 0;
p->state = UNUSED;
}
// Create a user page table for a given process,
// with no user memory, but with trampoline pages.
pagetable_t
proc_pagetable(struct proc *p)
{
pagetable_t pagetable;
// An empty page table.
pagetable = uvmcreate();
if(pagetable == 0)
return 0;
// map the trampoline code (for system call return)
// at the highest user virtual address.
// only the supervisor uses it, on the way
// to/from user space, so not PTE_U.
if(mappages(pagetable, TRAMPOLINE, PGSIZE,
(uint64)trampoline, PTE_R | PTE_X) < 0){
uvmfree(pagetable, 0);
return 0;
}
// map the trapframe just below TRAMPOLINE, for trampoline.S.
if(mappages(pagetable, TRAPFRAME, PGSIZE,
(uint64)(p->trapframe), PTE_R | PTE_W) < 0){
uvmunmap(pagetable, TRAMPOLINE, 1, 0);
uvmfree(pagetable, 0);
return 0;
}
return pagetable;
}
然后就是释放进程的时候也要free 进程的kernel pagetable。因为这里贴的代码是做完了lab3的代码,所以有些内容和下个实验有关系。
Simplify copyin/copyinstr
这个部分就是在之前实验的基础上,把进程的pagetable映射添加到kernel pagetable上,要注意PTE_U只能在用户态使用。
void
userinit(void)
{
struct proc *p;
pte_t *pte,*kernelpte;
p = allocproc();
initproc = p;
// allocate one user page and copy init's instructions
// and data into it.
uvminit(p->pagetable, initcode, sizeof(initcode));
p->sz = PGSIZE;
//lab3
pte= walk(p->pagetable,0,0);
kernelpte = walk(p->kernel_pagetable,0,1);
*kernelpte = (*pte) & (~PTE_U);
// prepare for the very first "return" from kernel to user.
p->trapframe->epc = 0; // user program counter
p->trapframe->sp = PGSIZE; // user stack pointer
safestrcpy(p->name, "initcode", sizeof(p->name));
p->cwd = namei("/");
p->state = RUNNABLE;
release(&p->lock);
}
首先修改userinit。
int
fork(void)
{
int i, pid;
struct proc *np;
struct proc *p = myproc();
pte_t *pte,*kernelpte;
// Allocate process.
if((np = allocproc()) == 0){
return -1;
}
// Copy user memory from parent to child.
if(uvmcopy(p->pagetable, np->pagetable, p->sz) < 0){
freeproc(np);
release(&np->lock);
return -1;
}
np->sz = p->sz;
np->parent = p;
//lab3 paet3
for(int j=0;j< p->sz; j+=PGSIZE){
kernelpte = walk(np->kernel_pagetable,j,1);
pte = walk(np->pagetable,j,0);
*kernelpte = (*pte) & (~PTE_U);
}
// copy saved user registers.
*(np->trapframe) = *(p->trapframe);
// Cause fork to return 0 in the child.
np->trapframe->a0 = 0;
// increment reference counts on open file descriptors.
for(i = 0; i < NOFILE; i++)
if(p->ofile[i])
np->ofile[i] = filedup(p->ofile[i]);
np->cwd = idup(p->cwd);
safestrcpy(np->name, p->name, sizeof(p->name));
pid = np->pid;
np->state = RUNNABLE;
release(&np->lock);
return pid;
}
int
exec(char *path, char **argv)
{
char *s, *last;
int i, off;
uint64 argc, sz = 0, sp, ustack[MAXARG+1], stackbase;
struct elfhdr elf;
struct inode *ip;
struct proghdr ph;
pagetable_t pagetable = 0, oldpagetable;
struct proc *p = myproc();
begin_op();
if((ip = namei(path)) == 0){
end_op();
return -1;
}
ilock(ip);
// Check ELF header
if(readi(ip, 0, (uint64)&elf, 0, sizeof(elf)) != sizeof(elf))
goto bad;
if(elf.magic != ELF_MAGIC)
goto bad;
if((pagetable = proc_pagetable(p)) == 0)
goto bad;
// Load program into memory.
for(i=0, off=elf.phoff; i<elf.phnum; i++, off+=sizeof(ph)){
if(readi(ip, 0, (uint64)&ph, off, sizeof(ph)) != sizeof(ph))
goto bad;
if(ph.type != ELF_PROG_LOAD)
continue;
if(ph.memsz < ph.filesz)
goto bad;
if(ph.vaddr + ph.memsz < ph.vaddr)
goto bad;
uint64 sz1;
if((sz1 = uvmalloc(pagetable, sz, ph.vaddr + ph.memsz)) == 0)
goto bad;
if(sz1>=PLIC)
goto bad;
sz = sz1;
if(ph.vaddr % PGSIZE != 0)
goto bad;
if(loadseg(pagetable, ph.vaddr, ip, ph.off, ph.filesz) < 0)
goto bad;
}
iunlockput(ip);
end_op();
ip = 0;
p = myproc();
uint64 oldsz = p->sz;
// Allocate two pages at the next page boundary.
// Use the second as the user stack.
sz = PGROUNDUP(sz);
uint64 sz1;
if((sz1 = uvmalloc(pagetable, sz, sz + 2*PGSIZE)) == 0)
goto bad;
sz = sz1;
uvmclear(pagetable, sz-2*PGSIZE);
sp = sz;
stackbase = sp - PGSIZE;
//lab3 part3
// Push argument strings, prepare rest of stack in ustack.
for(argc = 0; argv[argc]; argc++) {
if(argc >= MAXARG)
goto bad;
sp -= strlen(argv[argc]) + 1;
sp -= sp % 16; // riscv sp must be 16-byte aligned
if(sp < stackbase)
goto bad;
if(copyout(pagetable, sp, argv[argc], strlen(argv[argc]) + 1) < 0)
goto bad;
ustack[argc] = sp;
}
ustack[argc] = 0;
// push the array of argv[] pointers.
sp -= (argc+1) * sizeof(uint64);
sp -= sp % 16;
if(sp < stackbase)
goto bad;
if(copyout(pagetable, sp, (char *)ustack, (argc+1)*sizeof(uint64)) < 0)
goto bad;
pte_t *pte, *kernel_pte;
uvmunmap(p->kernel_pagetable,0,PGROUNDUP(oldsz)/PGSIZE,0);
for(int j=0;j<sz;j+=PGSIZE){
pte = walk(pagetable, j,0);
kernel_pte = walk(p->kernel_pagetable, j, 1);
*kernel_pte = ((*pte) & (~PTE_U));
}
// arguments to user main(argc, argv)
// argc is returned via the system call return
// value, which goes in a0.
p->trapframe->a1 = sp;
// Save program name for debugging.
for(last=s=path; *s; s++)
if(*s == '/')
last = s+1;
safestrcpy(p->name, last, sizeof(p->name));
// Commit to the user image.
oldpagetable = p->pagetable;
p->pagetable = pagetable;
p->sz = sz;
p->trapframe->epc = elf.entry; // initial program counter = main
p->trapframe->sp = sp; // initial stack pointer
proc_freepagetable(oldpagetable, oldsz);
if(p->pid==1)vmprint(p->pagetable);
return argc; // this ends up in a0, the first argument to main(argc, argv)
bad:
if(pagetable)
proc_freepagetable(pagetable, sz);
if(ip){
iunlockput(ip);
end_op();
}
return -1;
}
然后是fork和exec,这两个函数要做的操作本质是一样的。
uint64
sys_sbrk(void)
{
int addr;
int n;
pte_t *pte,*kernel_pte;
struct proc *p = myproc();
if(argint(0, &n) < 0)
return -1;
addr = p->sz;
if(growproc(n) < 0)
return -1;
if(n>0){
for(int j=addr;j<addr+n;j+=PGSIZE){
pte=walk(p->pagetable,j,0);
kernel_pte = walk(p->kernel_pagetable,j,1);
*kernel_pte = (*pte) & ~PTE_U;
}}
else{
//for(int j=addr-PGSIZE;j>=addr+n;j-=PGSIZE)
// uvmunmap(p->kernel_pagetable,j,1,0 );
uvmunmap(p->kernel_pagetable,PGROUNDUP(addr+n),-n/PGSIZE,0);
}
return addr;
}
然后修改 sbrk,n是正数增加映射,负数减少映射。
int
copyin(pagetable_t pagetable, char *dst, uint64 srcva, uint64 len)
{
/*
uint64 n, va0, pa0;
while(len > 0){
va0 = PGROUNDDOWN(srcva);
pa0 = walkaddr(pagetable, va0);
if(pa0 == 0)
return -1;
n = PGSIZE - (srcva - va0);
if(n > len)
n = len;
memmove(dst, (void *)(pa0 + (srcva - va0)), n);
copyin_new(pagetable, dst, srcva, n);
len -= n;
dst += n;
srcva = va0 + PGSIZE;
}
return 0;
*/
return copyin_new(pagetable, dst, srcva, len);
}
最后就是修改copyin()和copyinstr()。这里我没太搞懂的是为什么可以直接return copyin_new(pagetable, dst, srcva, len); 不需要每一个PGSIZE调用一次这个函数,像注释掉的memmove(dst, (void *)(pa0 + (srcva - va0)), n);一样。 是因为之前因为内核不能直接识别用户指针,所以每一个PGSIZE就需要翻译一遍吗?