系列文章 : [6.1810] 跟著 MIT 6.1810 學習基礎作業系統觀念
// Allocate an inode on device dev.
// Mark it as allocated by giving it type type.
// Returns an unlocked but allocated and referenced inode,
// or NULL if there is no free inode.
struct inode*
ialloc(uint dev, short type)
allocate 一個新的 struct-dinode,並把 type 給予這個 struct-dinode。
然後再把這個新的 struct-dinode 資訊寫回 disk-hardware。
{
int inum;
struct buf *bp;
struct dinode *dip;
for(inum = 1; inum < sb.ninodes; inum++){
bp = bread(dev, IBLOCK(inum, sb));
dip = (struct dinode*)bp->data + inum%IPB;
if(dip->type == 0){ // a free inode
memset(dip, 0, sizeof(*dip));
dip->type = type;
log_write(bp); // mark it allocated on the disk
brelse(bp);
return iget(dev, inum);
}
brelse(bp);
}
printf("ialloc: no inodes\n");
return 0;
}
dev 以及 inum 去拿出一個 in-memory copy struct-inode。雖然拿到了 struct-inode,但這不代表這個 inode 的資訊已經從 disk hardware 讀取出來了 ( 需要用 ilock 讀取出來 )。// Copy a modified in-memory inode to disk.
// Must be called after every change to an ip->xxx field
// that lives on disk.
// Caller must hold ip->lock.
void
iupdate(struct inode *ip)
{
這個 function 會把 in-memory-struct-inode 寫進 disk 的 struct-dinode。
在呼叫這個 function 之前,需要先擁有該 struct-inode 的 sleep-lock。
struct buf *bp;
struct dinode *dip;
bp = bread(ip->dev, IBLOCK(ip->inum, sb));
dip = (struct dinode*)bp->data + ip->inum%IPB;
dip->type = ip->type;
dip->major = ip->major;
dip->minor = ip->minor;
dip->nlink = ip->nlink;
dip->size = ip->size;
memmove(dip->addrs, ip->addrs, sizeof(ip->addrs));
log_write(bp);
brelse(bp);
}
void
ireclaim(int dev)
{
orphaned inode。orphaned 的意思是已經被刪掉,但是檔案本身沒有完全被刪除的 struct-inode。 for (int inum = 1; inum < sb.ninodes; inum++) {
struct inode *ip = 0;
struct buf *bp = bread(dev, IBLOCK(inum, sb));
struct dinode *dip = (struct dinode *)bp->data + inum % IPB;
if (dip->type != 0 && dip->nlink == 0) { // is an orphaned inode
printf("ireclaim: orphaned inode %d\n", inum);
ip = iget(dev, inum);
}
brelse(bp);
if (ip) {
begin_op();
ilock(ip);
iunlock(ip);
iput(ip);
end_op();
}
}
}
commit,真正去完成一系列的 disk-hardware 寫入操作。// Inode content
//
// The content (data) associated with each inode is stored
// in blocks on the disk. The first NDIRECT block numbers
// are listed in ip->addrs[]. The next NINDIRECT blocks are
// listed in block ip->addrs[NDIRECT].
// Return the disk block address of the nth block in inode ip.
// If there is no such block, bmap allocates one.
// returns 0 if out of disk space.
static uint
bmap(struct inode *ip, uint bn)
{
這個 function 會回傳,struct-inode 的第 bn 個 content,它的 data-block number 是多少。
balloc 去要一個 data-blockbn 個 content,其 data-block number 是多少可以看到 xv6-riscv-boot 裡面的圖,能理解 struct-dinode 以及其 content 在 disk-hardware 裡面的分佈。
{ xv6-riscv-book/FileSystem }
在 disk-hardware 裡的 struct-dinode->addrs[NDIRECT+1] 陣列總共會有 NDIRECT+1 個元素。
indirect block 在 disk-hardware 內的 block 編號indirect block 裡面,可以裝 (BSIZE / sizeof(uint)) 個 block 編號。以 xv6-riscv 預設的設定來說,BSIZE : 1024 bytes ,所以這邊可以裝 1024 / 4 = 256 個 block 編號。 uint addr, *a;
struct buf *bp;
if(bn < NDIRECT){
if((addr = ip->addrs[bn]) == 0){
addr = balloc(ip->dev);
if(addr == 0)
return 0;
ip->addrs[bn] = addr;
}
return addr;
}
bn -= NDIRECT;
bn 是在 0 ~ NDIRECT - 1 這個區間,則在 struct-dinode->addr[bn] 拿 block 編號balloc 去嘗試 allocate 一個新的 block if(bn < NINDIRECT){
// Load indirect block, allocating if necessary.
if((addr = ip->addrs[NDIRECT]) == 0){
addr = balloc(ip->dev);
if(addr == 0)
return 0;
ip->addrs[NDIRECT] = addr;
}
indirect block 在 disk-hardware 內的 block 編號。 bp = bread(ip->dev, addr);
a = (uint*)bp->data;
if((addr = a[bn]) == 0){
addr = balloc(ip->dev);
if(addr){
a[bn] = addr;
log_write(bp);
}
}
brelse(bp);
return addr;
}
panic("bmap: out of range");
}
indirect block 拿 bn 對應的 block 編號// Truncate inode (discard contents).
// Caller must hold ip->lock.
void
itrunc(struct inode *ip)
{
這個 function 會把一個 struct-inode 的 content 砍掉。
int i, j;
struct buf *bp;
uint *a;
for(i = 0; i < NDIRECT; i++){
if(ip->addrs[i]){
bfree(ip->dev, ip->addrs[i]);
ip->addrs[i] = 0;
}
}
遍尋 struct-inode->addrs[X] 的 0 ~ NDIRECT - 1 ( direct part ),用 bfree 釋放掉所有看到的 block 編號 !
if(ip->addrs[NDIRECT]){
bp = bread(ip->dev, ip->addrs[NDIRECT]);
a = (uint*)bp->data;
for(j = 0; j < NINDIRECT; j++){
if(a[j])
bfree(ip->dev, a[j]);
}
brelse(bp);
bfree(ip->dev, ip->addrs[NDIRECT]);
ip->addrs[NDIRECT] = 0;
}
ip->size = 0;
iupdate(ip);
}
遍尋 indirect block,用 bfree 釋放掉所有看到的 block 編號 !
// Copy stat information from inode.
// Caller must hold ip->lock.
void
stati(struct inode *ip, struct stat *st)
{
st->dev = ip->dev;
st->ino = ip->inum;
st->type = ip->type;
st->nlink = ip->nlink;
st->size = ip->size;
}
struct stat。system call 會去拿這個資料。// Read data from inode.
// Caller must hold ip->lock.
// If user_dst==1, then dst is a user virtual address;
// otherwise, dst is a kernel address.
int
readi(struct inode *ip, int user_dst, uint64 dst, uint off, uint n)
{
ip 的檔案內容的 off 偏移的地方讀取 n bytes 到 dst。 user_dst == 1 的話,表示 dst address 是 user space 的 address,user_dst == 0 的話,表示 dst address 是 kernel space 的 address。ip 的 sleep-lock。 uint tot, m;
struct buf *bp;
if(off > ip->size || off + n < off)
return 0;
off > ip->size : 偏移量比檔案的大小還大off + n < off : overflow 了 if(off + n > ip->size)
n = ip->size - off;
假如偏移量 + n 大於檔案大小,則把 n 壓在不會超出檔案大小的值。
for(tot=0; tot<n; tot+=m, off+=m, dst+=m){
uint addr = bmap(ip, off/BSIZE);
if(addr == 0)
break;
bp = bread(ip->dev, addr);
m = min(n - tot, BSIZE - off%BSIZE);
if(either_copyout(user_dst, dst, bp->data + (off % BSIZE), m) == -1) {
brelse(bp);
tot = -1;
break;
}
brelse(bp);
}
return tot;
}
// Write data to inode.
// Caller must hold ip->lock.
// If user_src==1, then src is a user virtual address;
// otherwise, src is a kernel address.
// Returns the number of bytes successfully written.
// If the return value is less than the requested n,
// there was an error of some kind.
int
writei(struct inode *ip, int user_src, uint64 src, uint off, uint n)
{
uint tot, m;
struct buf *bp;
if(off > ip->size || off + n < off)
return -1;
if(off + n > MAXFILE*BSIZE)
return -1;
for(tot=0; tot<n; tot+=m, off+=m, src+=m){
uint addr = bmap(ip, off/BSIZE);
if(addr == 0)
break;
bp = bread(ip->dev, addr);
m = min(n - tot, BSIZE - off%BSIZE);
if(either_copyin(bp->data + (off % BSIZE), user_src, src, m) == -1) {
brelse(bp);
break;
}
log_write(bp);
brelse(bp);
}
if(off > ip->size)
ip->size = off;
// write the i-node back to disk even if the size didn't change
// because the loop above might have called bmap() and added a new
// block to ip->addrs[].
iupdate(ip);
return tot;
}
剩下的操作跟 readi 差不多,下面舉出幾個不同的地方