[6.1810][code] xv6 的 FileSystem (九) : System Call (二)

xv6-riscv

wtommy_fdgkhdkgh 2026-05-27 10:27:53 ‧ 156 瀏覽

分享至

系列文章 : [6.1810] 跟著 MIT 6.1810 學習基礎作業系統觀念

大綱

kernel/sysfile.c/isdirempty
kernel/sysfile.c/sys_link
kernel/sysfile.c/sys_unlink
kernel/sysfile.c/sys_mkdir
kernel/sysfile.c/sys_mknod
kernel/sysfile.c/sys_chdir
kernel/sysfile.c/sys_exec
kernel/sysfile.c/sys_pipe

kernel/sysfile.c/isdirempty

// Is the directory dp empty except for "." and ".." ?
static int
isdirempty(struct inode *dp)
{

(arg) struct inode *dp
- 這個 struct-inode 代表著我們想要查看的資料夾 ( directory )
return value
- 資料夾裡尚有檔案 : 0
- 資料夾是空的 : 1

  int off;
  struct dirent de;

  for (off = 2 * sizeof(de); off < dp->size; off += sizeof(de)) {
    if (readi(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de))
      panic("isdirempty: readi");
    if (de.inum != 0)
      return 0;
  }
  return 1;
}

off = 2 * sizeof(de); off < dp->size; off += sizeof(de)
- off = 2 * sizeof(de) : 前兩個 directory entry 代表 “.” 以及 “..”，在檢查資料夾是否為空的時候，不需要檢查這兩個 entry，所以在這邊要跳過。
- 這個 for-loop 會遍尋這個 directory 的所有 directory-entry。
readi(dp, 0, (uint64)&de, off, sizeof(de))
- 把某個特定的 directory entry 從 disk-hardware 讀取出來

kernel/sysfile.c/sys_link

// Create the path new as a link to the same inode as old.
uint64
sys_link(void)

這個 function 實作了 link 的系統呼叫。它會 create 一個新的 hard link，這個新的 hard link 會指向一個已經存在了的檔案 ( inode )。

這個 function 成功了的話，舊的檔案名稱 ( syscall 第 0 個參數 )，以及新的檔案名稱 ( syscall 第 1 個參數 ) 會指向 disk-hardware 上的同一個檔案。

return value
- value == 0 : 成功
- value == -1 : 失敗

{
  char name[DIRSIZ], new[MAXPATH], old[MAXPATH];
  struct inode *dp, *ip;

  if (argstr(0, old, MAXPATH) < 0 || argstr(1, new, MAXPATH) < 0)
    return -1;

argstr(0, old, MAXPATH)
- 提取 syscall 第 0 個 parameter，並視為字串，意思是舊有的檔案路徑。
argstr(1, new, MAXPATH)
- 提取 syscall 第 1 個 parameter，並視為字串，意思是新的檔案路徑。

  begin_op();

一段 transaction 的開始。

  if ((ip = namei(old)) == 0) {
    end_op();
    return -1;
  }

namei : 從路徑 ( 字串 ) 取得相對應的 struct-inode

  ilock(ip);
  if (ip->type == T_DIR) {
    iunlockput(ip);
    end_op();
    return -1;
  }

ilock : 從 disk-hardware 載入 struct-inode 的資訊
假如該 struct-inode 的型態是 directory，就會判定該 function 失敗，並 return -1。

  ip->nlink++;
  iupdate(ip);
  iunlock(ip);

ip->nlink++ : 新增一個 hard link，意味著該 struct-inode 會有多一個 reference。
iupdate(ip) : 把針對 struct-inode metadata 的更新，寫入道 disk-hardware

  if ((dp = nameiparent(new, name)) == 0)
    goto bad;
  ilock(dp);
  if (dp->dev != ip->dev || dirlink(dp, name, ip->inum) < 0) {
    iunlockput(dp);
    goto bad;
  }
  iunlockput(dp);

nameiparent(new, name))
- 假設新的路徑是 “/abc/def/fff”
- return value : 會是 “/abc/def” 的 struct-inode
- (arg) name : 會是 “fff”
ilock(dp)
- 從 disk-hardware 讀取 dp 的資訊
dirlink(dp, name, ip->inum)
- 在 struct-inode(dp) 的 directory entries 加上 inode nubmer 為 ip->inum 的節點，並且該節點的名稱為 name
iunlockput(dp)
- 針對 dp 的操作結束了，可以釋放掉資源。

  iput(ip);

  end_op();

  return 0;

iput
- 對 struct-inode 的使用結束了，可以將 struct-inode->ref - 1
end_op
- 標示這一個 transaction 已經結束了
return 0
- 代表這次的 function 成功了

bad:
  ilock(ip);
  ip->nlink--;
  iupdate(ip);
  iunlockput(ip);
  end_op();
  return -1;
}

假如失敗的話，需要把剛剛修改的狀態復原。

ilock : 把 ip 的資訊從 disk-hardware 載入到 RAM
ip->nlink-- : 因為剛剛有 ip->nlink++，所以失敗的話，需要把這個狀態減回來
iupdate(ip) : 把新的資訊更新到 disk-hardware
end_op() : 結束這個 transaction

kernel/sysfile.c/sys_unlink

uint64
sys_unlink(void)
{

這個 function 實作了 unlink 系統呼叫。這個 function 可以用來從檔案系統中 unlink 一個檔案。

假如刪除後沒有任何 link 指向目標檔案，且沒有任何正在 open 這個檔案的話，那這個檔案在 disk-hardware 中的資料就會被刪除。

例如說某個檔案在 “/abc/bcd/fff” 有一個 link，”/efg/hed/aaa” 也有一個 link ( 兩個路徑皆指向 disk-hardware 裡面的同樣的 blocks )。當我們 unlink(“/abc/bcd/fff”) 的時候，並不會把目標檔案在 disk-hardware 內佔用的空間清除，並且 “efg/hed/aaa” 這個 link 也不會有影響。

return value
- 成功 : 0
- 失敗 : -1

  struct inode *ip, *dp;
  struct dirent de;
  char name[DIRSIZ], path[MAXPATH];
  uint off;

  if (argstr(0, path, MAXPATH) < 0)
    return -1;

argstr : 把 syscall 第 0 個參數視為字串的 address ( 該 address 是 user-space 的 virtual address )，並放到 kernel space 的 path[MAXPATH] 指標所指向的地方

  begin_op();
  if ((dp = nameiparent(path, name)) == 0) {
    end_op();
    return -1;
  }

  ilock(dp);

begin_op
- 開始一段 transaction
nameiparent
- 給予 path ( e.g. “/abc/def/ggg” )，回傳 “/abc/def” 的 struct-inode，並用 (arg) name 回傳 “ggg”
ilock
- 從 disk-hardware 載入 struct-inode 的資訊。

  // Cannot unlink "." or "..".
  if (namecmp(name, ".") == 0 || namecmp(name, "..") == 0)
    goto bad;

不可以 unlink “.” 或是 “..”，這邊會檢查給予的 name 是否為這兩種禁止的情況。

  if ((ip = dirlookup(dp, name, &off)) == 0)
    goto bad;
  ilock(ip);

  if (ip->nlink < 1)
    panic("unlink: nlink < 1");
  if (ip->type == T_DIR && !isdirempty(ip)) {
    iunlockput(ip);
    goto bad;
  }

dirlookup(dp, name, &off)
- 從 struct-inode(dp) 的 directory entries 裡面去尋找有沒有檔案名稱為 (arg) name 的 struct-inode，有的話便回傳該 struct-inode，並且會用 (arg) off 回傳該 directory entry 在 struct-inode(dp)->directory-entries 內的偏移量。
ip->type == T_DIR && !isdirempty(ip)
- 假如我們現在想要刪除一個資料夾 ( T_DIR )，但該資料夾裡面卻還有檔案 !! 這時候會直接宣判這個 function 失敗。

  memset(&de, 0, sizeof(de));
  if (writei(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de))
    panic("unlink: writei");

memset(&de, 0, sizeof(de))
- 初始化這個 directory entry，因為現在我們想要清空這個 directory entry，所以全部都設為 0。
writei(dp, 0, (uint64)&de, off, sizeof(de)
- 剛剛的 dirlookup 有讓我們拿到目標 directory entry 在 struct-inode(dp)->directory-entries 內的偏移量。
- 把目標 directory entry 清空。

  if (ip->type == T_DIR) {
    dp->nlink--;
    iupdate(dp);
  }
  iunlockput(dp);

假如 ip->type == T_DIR，則在建立這個 struct-inode 的時候，會有 “..” 指向 parent struct-inode。所以當我們要刪掉一個 T_DIR 的時候，需要把 parent 的 nlink 減回去。
iupdate : 把更改後的 metadata 儲存回到 disk-hardware。

  ip->nlink--;
  iupdate(ip);
  iunlockput(ip);

  end_op();

  return 0;

把 ip->nlink 減去 1，代表又少了一個 link。
return 0 代表 unlink 成功。

bad:
  iunlockput(dp);
  end_op();
  return -1;
}

假如發生意外，就退出這個 function，並 return -1 代表 unlink 失敗。

kernel/sysfile.c/sys_mkdir

uint64
sys_mkdir(void)
{

給予一個特定的檔案路徑，並嘗試在該路徑建立一個資料夾。

  char path[MAXPATH];
  struct inode *ip;

  begin_op();
  if (argstr(0, path, MAXPATH) < 0 || (ip = create(path, T_DIR, 0, 0)) == 0) {
    end_op();
    return -1;
  }
  iunlockput(ip);
  end_op();
  return 0;
}

argstr(0, path, MAXPATH)
- 把 syscall 第 0 個參數視為字串，這裡的意思是想要新增資料夾的路徑。
(ip = create(path, T_DIR, 0, 0))
- (arg) char *path : 想要新增檔案的路徑
- (arg) short type : 這邊設為 T_DIR，意思是這是我們想新增的檔案的類型是一個資料夾。
- (arg) short major : major device number，只有在 T_DEVICE 時有意義。因為這邊的類型是 T_DIR，所以這個參數其實沒意義，直接給個 0。
- (arg) short minor : minor device number，只有在 T_DEVICE 時有意義。給 0 的原因跟 major 相同。

kernel/sysfile.c/sys_mknod

uint64
sys_mknod(void)
{

給予一個特定的檔案路徑，並嘗試在該路徑建立一個 T_DEVICE 類型的檔案，讓我們可以透過這個檔案跟特定的周邊裝置 ( peripheral device ) 互動，例如 Uart

  struct inode *ip;
  char path[MAXPATH];
  int major, minor;

  begin_op();
  argint(1, &major);
  argint(2, &minor);
  if ((argstr(0, path, MAXPATH)) < 0 ||
      (ip = create(path, T_DEVICE, major, minor)) == 0) {
    end_op();
    return -1;
  }
  iunlockput(ip);
  end_op();
  return 0;
}

argint(1, &major)
- 把 syscall 第 1 個 argument 視為 integer。
- 根據 xv6-riscv 的設計，會依賴 major 去呼叫相對應的 device 的 read/write function。 major 即是 struct devsw 陣列的 index。
argint(2, &minor)
- 把 syscall 第 2 個 argument 視為 integer。
- TODO : 目前還不知道，在 xv6-riscv 裡面，minor number 會有怎樣的功能 … 我猜是讓特定 device 的 driver 去分辨同類型 device 的不同 instance。例如說一個 platform 裡面有兩個 Uart，可以用 major number 來說，這兩個 Uart 要使用相同的 read/write function，並且用 minor number 來分辨目前是哪一個 Uart 來呼叫了 read/write function。
argstr(0, path, MAXPATH)
- 把第 0 參數視為字串，這裡的意思是想要創建檔案的路徑。
create(path, T_DEVICE, major, minor)
- 創建一個 type 為 T_DEVICE 的檔案，並且設定相對應的 major 以及 minor。

kernel/sysfile.c/sys_chdir

uint64
sys_chdir(void)
{

這個 function 實作了 chidir 系統呼叫，可以用來改變呼叫的 process 的當前 working directory。

return value
- 成功 : 0
- 失敗 : -1

  char path[MAXPATH];
  struct inode *ip;
  struct proc *p = myproc();

  begin_op();
  if (argstr(0, path, MAXPATH) < 0 || (ip = namei(path)) == 0) {
    end_op();
    return -1;
  }

argstr(0, path, MAXPATH)
- 提取 syscall 第 0 個參數，並視為字串。這裡的字串的意義是想要前往的資料夾。
ip = namei(path)
- 提取特定 path 的 struct-inode。

  ilock(ip);
  if (ip->type != T_DIR) {
    iunlockput(ip);
    end_op();
    return -1;
  }

ip->type != T_DIR : 假如我們想要前往的 struct-inode 不是資料夾，那這個 function 就宣告失敗。

  iunlock(ip);
  iput(p->cwd);
  end_op();
  p->cwd = ip;
  return 0;
}

p->cwd = ip
- 更改該 process 的 current working directory 到目標 struct-inode。
return 0
- 表示該 function 成功!

kernel/sysfile.c/sys_exec

uint64
sys_exec(void)
{

this function
- 把目前程式的 memory image ( e.g. code section, data section, stack… ) 替換成新的 program 的資料。而這個新的 program 會從 executable file 載入。
return value
- 執行新的 program
- 執行失敗 : -1

  char path[MAXPATH], *argv[MAXARG];
  int i;
  uint64 uargv, uarg;

  argaddr(1, &uargv);
  if (argstr(0, path, MAXPATH) < 0) {
    return -1;
  }

char *argv[MAXARG]
- argv 是一個字串陣列，該陣列的每個元素都是指向一個字串的指標。
argaddr(1, &uargv)
- 把 syscall 第 1 個參數視為 address，並放到 uargv 裡。
- uargv 代表的是 user space 給予的 argv，也是一個字串陣列
argstr(0, path, MAXPATH)
- 把 syscall 第 0 個參數視為字串，這裡是可執行檔的路徑。

  memset(argv, 0, sizeof(argv));
  for (i = 0;; i++) {
    if (i >= NELEM(argv)) {
      goto bad;
    }
    if (fetchaddr(uargv + sizeof(uint64) * i, (uint64 *)&uarg) < 0) {
      goto bad;
    }
    if (uarg == 0) {
      argv[i] = 0;
      break;
    }
    argv[i] = kalloc();
    if (argv[i] == 0)
      goto bad;
    if (fetchstr(uarg, argv[i], PGSIZE) < 0)
      goto bad;
  }

memset(argv, 0, sizeof(argv));
- 把字串陣列清空
i >= NELEM(argv)
- NELEM 是一個 macro，會計算這個陣列有幾個元素。
- 因為這個陣列的大小是靜態宣告的，所以可以藉由這個 macro 來計算出有幾個元素，假如這個陣列是動態 allocate 的，就不能使用這個 macro。
fetchaddr(uint64 addr, uint64 *ip)
- 會從當前 process 的 user space virtual address : addr 的地方，載入一個 uint64_t
- addr : user space 的 virtual address
- ip : 載入的 uint64_t 會放在這裡
- return value
  - 失敗 : -1
  - 成功 : 0
fetchaddr(uargv + sizeof(uint64) * i, (uint64 *)&uarg)
- 從 user space 載入第 i 個字串，並把指向該字串的指標放在 uarg。
uarg == 0
- 該字串是 null pointer，表示我們已經把所有參數都載入完畢。
argv[i] == 0
- 表示 allocate 失敗，目前已經沒有多餘的 RAM 了，跳到 bad 標籤。
fetchstr(uarg, argv[i], PGSIZE)
- uarg 是指向一個字串的指標
- argv[i] 是透過 kalloc 所 allocate 出來的字串指標。
- 把 uarg 所指向的字串載入到 argv[i] 這個字串指標。

  int ret = kexec(path, argv);

執行給定路徑的檔案。
- 失敗 : return-value == -1
- 成功 : return-value != -1

  for (i = 0; i < NELEM(argv) && argv[i] != 0; i++)
    kfree(argv[i]);

把每一個剛剛 allocate 出來的字串指標釋放掉。

  return ret;

正常的跳出 function

bad:
  for (i = 0; i < NELEM(argv) && argv[i] != 0; i++)
    kfree(argv[i]);
  return -1;
}

假如該 function 有地方出錯了，就釋放掉所有字串指標，並回傳 -1

kernel/sysfile.c/sys_pipe

uint64
sys_pipe(void)
{

this function
- 這個 function 可以建立一個 pipe，pipe 可以用於 inter-process communication ( IPC )。一個 process 可以對 pipe 進行寫入，另外一個 process 可以對 pipe 進行讀取。
return value
- 成功 : 0
- 失敗 : -1

  uint64 fdarray; // user pointer to array of two integers
  struct file *rf, *wf;
  int fd0, fd1;
  struct proc *p = myproc();

  argaddr(0, &fdarray);

argaddr : 會把 syscall 第 0 個參數視為 address，該 address 指向一個有著兩個 int 的陣列。

  if (pipealloc(&rf, &wf) < 0)
    return -1;

int pipealloc(struct file **f0, struct file **f1)
- f0 : 唯讀的 struct-file
- f1 : 唯寫的 struct-file

  fd0 = -1;
  if ((fd0 = fdalloc(rf)) < 0 || (fd1 = fdalloc(wf)) < 0) {
    if (fd0 >= 0)
      p->ofile[fd0] = 0;
    fileclose(rf);
    fileclose(wf);
    return -1;
  }

假如 fdalloc 失敗之後，就把資源釋放，並 return -1。

  if (copyout(p->pagetable, fdarray, (char *)&fd0, sizeof(fd0)) < 0 ||
      copyout(p->pagetable, fdarray + sizeof(fd0), (char *)&fd1, sizeof(fd1)) <
        0) {
    p->ofile[fd0] = 0;
    p->ofile[fd1] = 0;
    fileclose(rf);
    fileclose(wf);
    return -1;
  }
  return 0;
}

int copyout(pagetable_t pagetable, uint64 dstva, char *src, uint64 len)
- copy data from kernel to user
- pagetable_t pagetable
  - 目標 process 的 page table。因為這邊希望從 kernel 複製資料到 user space virtual address，所以需要該 process 的 page table，以得知 user-space-virtual-address 所代表的 physical address 是在哪裡。
- uint64 dstva
  - 目標 user space virtual address，會把複製的資料放在這裡。
- char *src
  - kernel space 的 address。
- uint64 len
  - 複製的長度。
這邊是希望把兩個開好的 file-descriptor 放到 user space 的 fdarray
假如失敗的話，就把相對應的 process-descriptor-table-entry 清空，用 fileclose 釋放資源，並 return -1
成功的話，就 return 0

熱門推薦

{{ item.channelVendor }} | {{ item.webinarstarted }} |

直播中

尚未有邦友留言

立即登入留言

參賽組數

902 組

團體組數

37 組

累計文章數

19835 篇

完賽人數

528 人

15th鐵人賽 16th鐵人賽 13th鐵人賽 14th鐵人賽 17th鐵人賽 12th鐵人賽 11th鐵人賽鐵人賽 2019鐵人賽 javascript 2018鐵人賽 python 2017鐵人賽 windows php c# linux windows server css react

IT邦幫忙