/*
* Now umount can handle mount points as well as block devices.
* This is important for filesystems which use unnamed block devices.
*
* We now support a flag for forced unmount like the other 'big iron'
* unixes. Our API is identical to OSF/1 to avoid making a mess of AMD
*/
//文件系统卸载的系统调用
asmlinkage long sys_umount(char * name, int flags)
{
struct nameidata nd;
char *kname;
int retval;
lock_kernel();
kname = getname(name);
retval = PTR_ERR(kname);
if (IS_ERR(kname))
goto out;
retval = 0;
//kulv:得到卸载目录的nameidata结构。其内有该目录的dentry , inode
if (path_init(kname, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &nd))
retval = path_walk(kname, &nd);
putname(kname);
if (retval)
goto out;
retval = -EINVAL;
if (nd.dentry != nd.mnt->mnt_root)//mnt 是代表该安装点的连接结构,此结构的mnt_root指向了设备的根节点
goto dput_and_out;//如果该安装目录的dentry结构不等于设备的根节点,则一定有问题。
retval = -EPERM;
if (!capable(CAP_SYS_ADMIN) && current->uid!=nd.mnt->mnt_owner)
goto dput_and_out;//如果不是管理员,且不是拥有者。
dput(nd.dentry);//减少使用计数
/* puts nd.mnt */
down(&mount_sem);
retval = do_umount(nd.mnt, 0, flags);//真正工作的地方.
up(&mount_sem);
goto out;
dput_and_out:
path_release(&nd);
out:
unlock_kernel();
return retval;
}
/*kulv:
mnt为该安装点的连接结构。包含了设备目录,超级块,以及安装树的支持结构。
umount_root = 0 ;
flags:从sys_umount 传下来
*/
static int do_umount(struct vfsmount *mnt, int umount_root, int flags)
{
struct super_block * sb = mnt->mnt_sb;//得到超级块
/*
* No sense to grab the lock for this test, but test itself looks
* somewhat bogus. Suggestions for better replacement?
* Ho-hum... In principle, we might treat that as umount + switch
* to rootfs. GC would eventually take care of the old vfsmount.
* The problem being: we have to implement rootfs and GC for that 😉
* Actually it makes sense, especially if rootfs would contain a
* /reboot - static binary that would close all descriptors and
* call reboot(9). Then init(8) could umount root and exec /reboot.
*/
if (mnt == current->fs->rootmnt && !umount_root) {
int retval = 0;//如果卸载的目录为当前进程根目录,
/*
* Special case for "unmounting" root ...
* we just try to remount it readonly.
*/
mntput(mnt);
if (!(sb->s_flags & MS_RDONLY))//因为调用这的时候flag为0,所以下面不成立,用户进程无法卸载自己的根目录
retval = do_remount_sb(sb, MS_RDONLY, 0);
return retval;
}
spin_lock(&dcache_lock);
//如果一个设备安装了多次,即一个超级块有多个连接结构,那么
if (mnt->mnt_instances.next != mnt->mnt_instances.prev) {//如果安装了多次,即其
if (atomic_read(&mnt->mnt_count) > 2) {//引用计数大于2,表面还有其他地方用到这个结构,那么···
spin_unlock(&dcache_lock);
mntput(mnt);//递减一下计数就可以了
return -EBUSY;
}
if (sb->s_type->fs_flags & FS_SINGLE)//FS_SINGLE表示超级块在同种文件系统中是共享的
put_filesystem(sb->s_type);
/* We hold two references, so mntput() is safe */
mntput(mnt);
//因为这个超级块有多次安装,//即设备有多次安装,不能将设备拆下,只是拆除多次安装的一次
//下面的函数只是把当前连接件从各种队列中删除,然后递减相关计数,这样就无法访问到了
remove_vfsmnt(mnt);
return 0;
}
spin_unlock(&dcache_lock);
//下面代表设备的唯一一次安装,那就麻烦点了
/*
* Before checking whether the filesystem is still busy,
* make sure the kernel doesn't hold any quota files open
* on the device. If the umount fails, too bad -- there
* are no quotas running any more. Just turn them on again.
*/
DQUOT_OFF(sb);//关于这个磁盘空间的配额。
acct_auto_close(sb->s_dev);
/*
* If we may have to abort operations to get out of this
* mount, and they will themselves hold resources we must
* allow the fs to do things. In the Unix tradition of
* 'Gee thats tricky lets do it in userspace' the umount_begin
* might fail to complete on the first run through as other tasks
* must return, and the like. Thats for the mount program to worry
* about for the moment.
*/
if( (flags&MNT_FORCE) && sb->s_op->umount_begin)
sb->s_op->umount_begin(sb);//让相应的文件系统来处理卸载准备工作
//struct file_operations 定义在:include/linux/fs.h里面
/*
* Shrink dcache, then fsync. This guarantees that if the
* filesystem is quiescent at this point, then (a) only the
* root entry should be in use and (b) that root entry is
* clean.
*/
shrink_dcache_sb(sb);//将缓存在dentry_unused 队列中的属于本设备的dentry结构删除
fsync_dev(sb->s_dev);//将缓存在内存中,还没有同步到磁盘设备中的数据刷新到设备上去。
if (sb->s_root->d_inode->i_state) {
mntput(mnt);
return -EBUSY;
}
/* Something might grab it again - redo checks */
spin_lock(&dcache_lock);
if (atomic_read(&mnt->mnt_count) > 2) {
spin_unlock(&dcache_lock);
mntput(mnt);
return -EBUSY;
}
/* OK, that's the point of no return */
mntput(mnt);
remove_vfsmnt(mnt);
//设备上的缓存,inode,脏页面,锁住的页面,睡眠等待的进程·都处理完了,该把
//当前连接件从各种队列中删除,然后递减相关计数,这样就无法访问到了.
//你懂的,这些队列什么的形成了一个挂载点所需要的所有连接
//将超级块的后事处理一下,包括调用其super_operations函数通知相应的文件系统
//然后删除相关申请的缓存什么的。然后安全退出
kill_super(sb, umount_root);
return 0;
}
/*
* Called with spinlock held, releases it.
*/
static void remove_vfsmnt(struct vfsmount *mnt)
{//本函数只是卸载相应设备的多次安装中的一次
/* First of all, remove it from all lists */
list_del(&mnt->mnt_instances);//把自己从超级块的多次安装中删除,此成员是挂入超级块的s_mounts上的。
list_del(&mnt->mnt_clash);//一个安装点可以安装多个文件系统,mnt_clash挂入安装点的dentry结构的d_vfsmount上
list_del(&mnt->mnt_list);//系统有个全局的vfsmntlist队列,记录系统中所有的连接件
list_del(&mnt->mnt_child);//从上一层安装中删除,相应的,自己也有一个mnt_mounts队列表示安装在我下面的设备
spin_unlock(&dcache_lock);
/* Now we can work safely */
if (mnt->mnt_parent != mnt)//我的上一层设备为我自己,那就说明这是根节点
mntput(mnt->mnt_parent);
dput(mnt->mnt_mountpoint);//指安装点的dentry
dput(mnt->mnt_root);//设备的根目录dentry结构
if (mnt->mnt_devname)
kfree(mnt->mnt_devname);
kfree(mnt);
}
/*
* Shrink the dcache for the specified super block.
* This allows us to unmount a device without disturbing
* the dcache for the other devices.
*
* This implementation makes just two traversals of the
* unused list. On the first pass we move the selected
* dentries to the most recent end, and on the second
* pass we free them. The second pass must restart after
* each dput(), but since the target dentries are all at
* the end, it's really just a single traversal.
*/
/**
* shrink_dcache_sb - shrink dcache for a superblock
* @sb: superblock
*
* Shrink the dcache for the specified super block. This
* is used to free the dcache before unmounting a file
* system
*/
//删除属于给定超级块的设备的所有数据
void shrink_dcache_sb(struct super_block * sb)
{
struct list_head *tmp, *next;
struct dentry *dentry;
/*
* Pass one ... move the dentries for the specified
* superblock to the most recent end of the unused list.
*/
spin_lock(&dcache_lock);
next = dentry_unused.next;//系统全局的邋錮entry_unused数据结构队列,缓存所有暂时不用要删除的数据
while (next != &dentry_unused) {
tmp = next;
next = tmp->next;
dentry = list_entry(tmp, struct dentry, d_lru);
if (dentry->d_sb != sb)//越过不属于我们的
continue;
list_del(tmp);//从dentry_unused中删除
list_add(tmp, &dentry_unused);//然后插入到节点之后,即第一个节点
}//经过这轮迭代,所有符合条件的节点都已经相对倒叙的放在dentry_unused的前面部分了
/*
* Pass two ... free the dentries for this superblock.
*/
repeat:
next = dentry_unused.next;
while (next != &dentry_unused) {//再来一次
tmp = next;
next = tmp->next;
dentry = list_entry(tmp, struct dentry, d_lru);
if (dentry->d_sb != sb)
continue;//这用的着吗,直接退出就可以呀,因为上一趟已经把所有符合条件的都放到前面了
if (atomic_read(&dentry->d_count))
continue;
dentry_stat.nr_unused--;//总数减少
list_del(tmp);//正式删除了哈
INIT_LIST_HEAD(tmp);//这···用得着吗?
prune_one_dentry(dentry);//把dentry处理一下,其实就是把相关的指针删除,比如hash等
goto repeat;
}
spin_unlock(&dcache_lock);
}
static inline void prune_one_dentry(struct dentry * dentry)
{
struct dentry * parent;
list_del_init(&dentry->d_hash);
list_del(&dentry->d_child);
dentry_iput(dentry);
parent = dentry->d_parent;
d_free(dentry);
if (parent != dentry)//莫非这里如果不成立,那就是说这是根目录了?
dput(parent);
spin_lock(&dcache_lock);
}
//KULV:
//dev:设备号
int fsync_dev(kdev_t dev)
{
//sync_buffers的第一趟,0表示对所有
sync_buffers(dev, 0);//第一趟,刷新,不等待被锁住的缓存,过门不入
lock_kernel();
sync_supers(dev);//将超级块也处理一下,实际上就是在super_blocks全局队列中找到设备号相同的
//然后如果相应文件系统有对应的sb->s_op->write_super(sb),则调用,更新LRU
sync_inodes(dev);
DQUOT_SYNC(dev);
unlock_kernel();
return sync_buffers(dev, 1);//最后一趟,如果buf被锁住了,
//那会进入等待状态,有可能会引起重新调度
}
/* Call sync_buffers with wait!=0 to ensure that the call does not
* return until all buffer writes have completed. Sync() may return
* before the writes have finished; fsync() may not.
*/
/* Godamity-damn. Some buffers (bitmaps for filesystems)
* spontaneously dirty themselves without ever brelse being called.
* We will ultimately want to put these in a separate list, but for
* now we search all of the lists for dirty buffers.
*/
static int sync_buffers(kdev_t dev, int wait)
{//第一趟wait为0,表示不等,第二趟为1,等
int i, retry, pass = 0, err = 0;
struct buffer_head * bh, *next;
/* One pass for no-wait, three for wait:
* 0) write out all dirty, unlocked buffers;
* 1) write out all dirty buffers, waiting if locked;
* 2) wait for completion by waiting for all buffers to unlock.
*/
do {
retry = 0;
/* We search all lists as a failsafe mechanism, not because we expect
* there to be dirty buffers on any of the other lists.
*/
repeat:
spin_lock(&lru_list_lock);
bh = lru_list[BUF_DIRTY];//取得脏的队列头,待会会被移到其他队列中,比如被锁定的
if (!bh)
goto repeat2;
//nr_buffers_type[BUF_DIRTY] 指相对每种buf的总数
for (i = nr_buffers_type[BUF_DIRTY]*2 ; i-- > 0 ; bh = next) {
next = bh->b_next_free;
if (!lru_list[BUF_DIRTY])//这···有可能吗,上面不是检查过bh了吗···
break;
if (dev && bh->b_dev != dev)//忽略所有不属于此设备的。前面的dev用不着了吧?
continue;
if (buffer_locked(bh)) {//看是否是锁住的,如果是,那么进去
/* Buffer is locked; skip it unless wait is
* requested AND pass > 0.
*/
if (!wait || !pass) {
retry = 1;//第一趟wait为0,不等,直接跳过,只是锁住它
continue;
}
atomic_inc(&bh->b_count);
spin_unlock(&lru_list_lock);
wait_on_buffer (bh);
atomic_dec(&bh->b_count);
goto repeat;
}
//以下节点是没有锁住的
/* If an unlocked buffer is not uptodate, there has
* been an IO error. Skip it.
*/
if (wait && buffer_req(bh) && !buffer_locked(bh) &&
!buffer_dirty(bh) && !buffer_uptodate(bh)) {
err = -EIO;
continue;
}
/* Don't write clean buffers. Don't write ANY buffers
* on the third pass.
*/
if (!buffer_dirty(bh) || pass >= 2)
continue;
atomic_inc(&bh->b_count);
spin_unlock(&lru_list_lock);
ll_rw_block(WRITE, 1, &bh);//刷到磁盘上面
atomic_dec(&bh->b_count);
retry = 1;
goto repeat;//成功刷了一次,为何要这样重来?难道为了怕链表变动?
}
repeat2:
bh = lru_list[BUF_LOCKED];
if (!bh) {
spin_unlock(&lru_list_lock);
break;
}
for (i = nr_buffers_type[BUF_LOCKED]*2 ; i-- > 0 ; bh = next) {
next = bh->b_next_free;
if (!lru_list[BUF_LOCKED])
break;
if (dev && bh->b_dev != dev)
continue;
if (buffer_locked(bh)) {
/* Buffer is locked; skip it unless wait is
* requested AND pass > 0.
*/
if (!wait || !pass) {
retry = 1;
continue;
}
atomic_inc(&bh->b_count);
spin_unlock(&lru_list_lock);
wait_on_buffer (bh); //等待buf被解锁,会引起进程调度
spin_lock(&lru_list_lock);
atomic_dec(&bh->b_count);
goto repeat2;//这里重新来循环是因为等待了之后list变化了吗?
}
}
spin_unlock(&lru_list_lock);
/* If we are waiting for the sync to succeed, and if any dirty
* blocks were written, then repeat; on the second pass, only
* wait for buffers being written (do not pass to write any
* more buffers on the second pass).
*/
} while (wait && retry && ++passs_list.next)) {
if (!sb->s_dev)
continue;
if (dev && sb->s_dev != dev)//不是我的,就掠过
continue;
if (!sb->s_dirt)
continue;
lock_super(sb);
if (sb->s_dev && sb->s_dirt && (!dev || dev == sb->s_dev))
if (sb->s_op && sb->s_op->write_super)
sb->s_op->write_super(sb);
//如果相应的设备提供了write_super的方法,那调用之。
//对于ext2文件系统来说,这是ext2_write_super函数,其初始化如下
/*其实ext2文件系统也没干啥事,就是更新了一下LRU,时间什么的
static struct super_operations ext2_sops = {
read_inode: ext2_read_inode,
write_inode: ext2_write_inode,
put_inode: ext2_put_inode,
delete_inode: ext2_delete_inode,
put_super: ext2_put_super,
write_super: ext2_write_super,
statfs: ext2_statfs,
remount_fs: ext2_remount,
};*/
unlock_super(sb);
}
}
/**
* sync_inodes
* @dev: device to sync the inodes from.
*
* sync_inodes goes through the super block's dirty list,
* writes them out, and puts them back on the normal list.
*/
void sync_inodes(kdev_t dev)
{
struct super_block * sb = sb_entry(super_blocks.next);
/*
* Search the super_blocks array for the device(s) to sync.
*/
spin_lock(&inode_lock);
for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.next)) {
if (!sb->s_dev)
continue;
if (dev && sb->s_dev != dev)
continue;
//找到了属于我的超块
sync_list(&sb->s_dirty);//s_dirty表示属于此超级块(设备)的脏了的inodes,全部写回去
if (dev)
break;
}
spin_unlock(&inode_lock);
}
static inline void sync_list(struct list_head *head)
{
struct list_head * tmp;
while ((tmp = head->prev) != head)//无非就是得到每一个inode,然后刷新之
sync_one(list_entry(tmp, struct inode, i_list), 0);
/*sync_one会吧给定inode的所有脏页面,锁住的页面都耍到磁盘上去
然后唤醒所有等待在上面的进程
*/
}
static inline void sync_one(struct inode *inode, int sync)
{
if (inode->i_state & I_LOCK) {//不明白,锁住的难道不脏?
__iget(inode);//如果被锁住了,先递增计数吧,免得待会被别人先登了
spin_unlock(&inode_lock);
__wait_on_inode(inode);//这是没办法的,必须等
iput(inode);//递减技术
spin_lock(&inode_lock);
} else {
unsigned dirty;
list_del(&inode->i_list);//从队列中删除
list_add(&inode->i_list, atomic_read(&inode->i_count)
? &inode_in_use
: &inode_unused);//是放入在用的呢,还是不用的?不过都是缓存
/* Set I_LOCK, reset I_DIRTY */
dirty = inode->i_state & I_DIRTY;//这是不是脏的呢
inode->i_state |= I_LOCK;//我锁住了哈,你们别来碰
inode->i_state &= ~I_DIRTY;//去掉脏位
spin_unlock(&inode_lock);
filemap_fdatasync(inode->i_mapping);//你懂的,i_mapping 有大文章,缓存的
//里面其实是把mapping->dirty_pages,即这个inode的所有缓冲的脏页面写入到磁盘。
//刚才filemap_fdatasync写完了所有的脏页面,那么,如果真的有脏的,通知一下相应文件系统吧
/* Don't write the inode if only I_DIRTY_PAGES was set */
if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC))
write_inode(inode, sync);
//其实就是调用相应文件系统的inode->i_sb->s_op->write_inode(inode, sync);
//把这个inode写进去
//这一次,把锁住的页面locked_pages也刷新了,不过会等待的___wait_on_page(page);
filemap_fdatawait(inode->i_mapping);
spin_lock(&inode_lock);
inode->i_state &= ~I_LOCK;
wake_up(&inode->i_wait);//有谁再我这上面睡找了,都醒来
}
}
近期评论