系列文章 : [6.1810] 跟著 MIT 6.1810 學習基礎作業系統觀念
// Sleep on channel chan, releasing condition lock lk.
// Re-acquires lk when awakened.
void
sleep(void *chan, struct spinlock *lk)
{
struct proc *p = myproc();
// Must acquire p->lock in order to
// change p->state and then call sched.
// Once we hold p->lock, we can be
// guaranteed that we won't miss any wakeup
// (wakeup locks p->lock),
// so it's okay to release lk.
acquire(&p->lock); //DOC: sleeplock1
release(lk);
// Go to sleep.
p->chan = chan;
p->state = SLEEPING;
sched();
// Tidy up.
p->chan = 0;
// Reacquire original lock.
release(&p->lock);
acquire(lk);
}
// Wake up all processes sleeping on channel chan.
// Caller should hold the condition lock.
void
wakeup(void *chan)
{
struct proc *p;
for(p = proc; p < &proc[NPROC]; p++) {
if(p != myproc()){
acquire(&p->lock);
if(p->state == SLEEPING && p->chan == chan) {
p->state = RUNNABLE;
}
release(&p->lock);
}
}
}
// map ELF permissions to PTE permission bits.
int flags2perm(int flags)
{
int perm = 0;
if(flags & 0x1)
perm = PTE_X;
if(flags & 0x2)
perm |= PTE_W;
return perm;
}
讀/寫/執行 權限的 flag,轉換成 xv6-riscv PTE ( page table entry,用於 MMU ) 權限的 flag。ELF flags 的意義 ( 可以在 kernel/elf.h 看到 )
RISC-V PTE Flags ( 可以在 kernel/riscv.h 看到 )
// Load an ELF program segment into pagetable at virtual address va.
// va must be page-aligned
// and the pages from va to va+sz must already be mapped.
// Returns 0 on success, -1 on failure.
static int
loadseg(pagetable_t pagetable, uint64 va, struct inode *ip, uint offset, uint sz)
{
disk-hardware 載入到特定的 user-space-virtual-address。 uint i, n;
uint64 pa;
for(i = 0; i < sz; i += PGSIZE){
pa = walkaddr(pagetable, va + i);
if(pa == 0)
panic("loadseg: address should exist");
if(sz - i < PGSIZE)
n = sz - i;
else
n = PGSIZE;
if(readi(ip, 0, (uint64)pa, offset+i, n) != n)
return -1;
}
return 0;
}
walkaddr(pagetable, va + i)
return 0
//
// the implementation of the exec() system call
//
int
kexec(char *path, char **argv)
{
char *s, *last;
int i, off;
uint64 argc, sz = 0, sp, ustack[MAXARG], stackbase;
struct elfhdr elf;
struct inode *ip;
struct proghdr ph;
pagetable_t pagetable = 0, oldpagetable;
struct proc *p = myproc();
(arg) char *path
(arg) char **argv
begin_op();
// Open the executable file.
if((ip = namei(path)) == 0){
end_op();
return -1;
}
ilock(ip);
// Read the ELF header.
if(readi(ip, 0, (uint64)&elf, 0, sizeof(elf)) != sizeof(elf))
goto bad;
這邊會去讀取該 ELF 檔案的 header ( struct-elfhdr ),取得該 ELF 檔案的 metadata。
// Is this really an ELF file?
if(elf.magic != ELF_MAGIC)
goto bad;
檢查 header 內的 magic value 是否為合法值,若不是合法值,表示該檔案不是一個合法的 ELF 檔案,需到 bad 標籤,表示該次 kexec 執行失敗。
if((pagetable = proc_pagetable(p)) == 0)
goto bad;
// Load program into memory.
for(i=0, off=elf.phoff; i<elf.phnum; i++, off+=sizeof(ph)){
if(readi(ip, 0, (uint64)&ph, off, sizeof(ph)) != sizeof(ph))
goto bad;
if(ph.type != ELF_PROG_LOAD)
continue;
if(ph.memsz < ph.filesz)
goto bad;
if(ph.vaddr + ph.memsz < ph.vaddr)
goto bad;
if(ph.vaddr % PGSIZE != 0)
goto bad;
uint64 sz1;
if((sz1 = uvmalloc(pagetable, sz, ph.vaddr + ph.memsz, flags2perm(ph.flags))) == 0)
goto bad;
sz = sz1;
if(loadseg(pagetable, ph.vaddr, ip, ph.off, ph.filesz) < 0)
goto bad;
}
elf.phoff
elf.phnum
off
continue 跳過這個 program header。但不會包含未初始化的資料 ( e.g. int array[10000] ),理由很簡單,包含初始化的資料根本是浪費空間,只要知道該 array 的大小就足夠了。flags2perm(ph.flags)。 iunlockput(ip);
end_op();
ip = 0;
bad 標籤那邊,會需要依照這個值來看要不要 iunlockput。 p = myproc();
uint64 oldsz = p->sz;
// Allocate some pages at the next page boundary.
// Make the first inaccessible as a stack guard.
// Use the rest as the user stack.
sz = PGROUNDUP(sz);
uint64 sz1;
if((sz1 = uvmalloc(pagetable, sz, sz + (USERSTACK+1)*PGSIZE, PTE_W)) == 0)
goto bad;
sz = sz1;
uvmclear(pagetable, sz-(USERSTACK+1)*PGSIZE);
sp = sz;
stackbase = sp - USERSTACK*PGSIZE;
p = myproc();
+ 1 是為了 Guard Page,當有人讀/寫超出 stack 的空間,並對 Guard Page 進行 讀/寫 的話,就會觸發 page fault,讓我們知道發生了 buffer overflow。uvmalloc 回傳 0 表示 uvmalloc 失敗,該 kexec 執行失敗,進入 bad 標籤。guard page
往低位擴展。Guard Page 開始的地方 // Copy argument strings into new stack, remember their
// addresses in ustack[].
for(argc = 0; argv[argc]; argc++) {
if(argc >= MAXARG)
goto bad;
sp -= strlen(argv[argc]) + 1;
sp -= sp % 16; // riscv sp must be 16-byte aligned
if(sp < stackbase)
goto bad;
if(copyout(pagetable, sp, argv[argc], strlen(argv[argc]) + 1) < 0)
goto bad;
ustack[argc] = sp;
}
ustack[argc] = 0;
MAXARG ( 預設為 32 )+1 是為了 null-terminator ( string ending charactor \0 )argv[argc] 指向的字串 在 user-stack 空出的一個空間。+1 代表 null-terminator。 // push a copy of ustack[], the array of argv[] pointers.
sp -= (argc+1) * sizeof(uint64);
sp -= sp % 16;
if(sp < stackbase)
goto bad;
if(copyout(pagetable, sp, (char *)ustack, (argc+1)*sizeof(uint64)) < 0)
goto bad;
// a0 and a1 contain arguments to user main(argc, argv)
// argc is returned via the system call return
// value, which goes in a0.
p->trapframe->a1 = sp;
// Save program name for debugging.
for(last=s=path; *s; s++)
if(*s == '/')
last = s+1;
safestrcpy(p->name, last, sizeof(p->name));
ustack。p->trapframe->a0 (link)int main(int argc, char *argv[])
int argc 就會是 p->trapframe->a0
char *argv[] 就會是 p->trapframe->a1
// Commit to the user image.
oldpagetable = p->pagetable;
p->pagetable = pagetable;
p->sz = sz;
p->trapframe->epc = elf.entry; // initial program counter = main
p->trapframe->sp = sp; // initial stack pointer
proc_freepagetable(oldpagetable, oldsz);
return argc; // this ends up in a0, the first argument to main(argc, argv)
w_sepc(p->trapframe->epc); ,這讓我們可以控制從 kernel space 回到 user-space 的時候,program counter 該跳到哪裡。於是這邊換成 ELF 的 entry point,在我們從 kernel space 回到 user space 的時候,就會在 user space 的 ELF entry point 開始執行。kernel/syscall.c/syscall,會把這個回傳值放到 p->trapframe->a0。program counter 會被設定成 ELF 的 entry-point ( main function ),並且把 p->trapframe->a0 視為第一個參數 (argc),並把 p->trapframe->a1 視為第二個參數 ( char *argv[] )。 bad:
if(pagetable)
proc_freepagetable(pagetable, sz);
if(ip){
iunlockput(ip);
end_op();
}
return -1;
}
pagetable 的話,要歸還回去這邊稍微總結一下 kexec 所發生的事情。
struct elfhdr ),驗證這是否是一個合法的 ELF 檔案。proc_pagetable 建立一個新的 pagetablestruct proghdr ),並把全部需要載入到 RAM 裡面的 section,載入到 RAM 裡面,並在新的 pagetable 加上這些區段的 virtual-address -> physical address 映射Guard Page。p->trapframe->a0 以及 p->trapframe->a1。// Create a new process, copying the parent.
// Sets up child kernel stack to return as if from fork() system call.
int
kfork(void)
{
這個 function 會複製當前的 user process ( parent ),創造出一個相同的 user process ( child )。
int i, pid;
struct proc *np;
struct proc *p = myproc();
// Allocate process.
if((np = allocproc()) == 0){
return -1;
}
allocproc 回傳 struct-proc 的時候,會是已經拿到 struct-proc->lock 的狀態,用以避免其他人會去使用這個還在中間狀態的新的 struct-proc。 // Copy user memory from parent to child.
if(uvmcopy(p->pagetable, np->pagetable, p->sz) < 0){
freeproc(np);
release(&np->lock);
return -1;
}
np->sz = p->sz;
因為目前 parent-process 跟 child-process 的 pagetable 一模一樣,所以 size 自然而然也是一樣
// copy saved user registers.
*(np->trapframe) = *(p->trapframe);
// Cause fork to return 0 in the child.
np->trapframe->a0 = 0;
// increment reference counts on open file descriptors.
for(i = 0; i < NOFILE; i++)
if(p->ofile[i])
np->ofile[i] = filedup(p->ofile[i]);
np->cwd = idup(p->cwd);
safestrcpy(np->name, p->name, sizeof(p->name));
pid = np->pid;
current working directory 與 parent-process 相同。 release(&np->lock);
acquire(&wait_lock);
np->parent = p;
release(&wait_lock);
acquire(&np->lock);
np->state = RUNNABLE;
release(&np->lock);
return pid;
}