Linux 0.11源码阅读笔记-文件IO流程

2023年5月27日上午4:07 • Linux • 阅读 93

文件IO流程

用户进程read、write在高速缓冲块上读写数据，高速缓冲块和块设备交换数据。

何时将磁盘块数据读取到缓冲块？

[En]

when will the disk block data be read to the buffer block?*
何时将缓冲区块数据刷新到磁盘块？
[En]
when will buffer block data be brushed to disk block?*

函数调用关系

read/write（c库函数，通过int 80调用sys_read/sys_write）
sys_read/sys_write
- block_read/block_write
- breada
  - getblk
  - sync_dev
  - ll_rw_block

sys_read与sys_write

代码文件：linux-0.11/fs/read_write.c

系统调用sys_read与sys_write是内核提供给用户程序调用的IO接口。若IO设备是块设备，底层分别调用block_read与block_write进行块设备的读写。

sys_read

int sys_read(unsigned int fd,char * buf,int count)
{
    struct file * file;
    struct m_inode * inode;

    // 通过文件描述符，在file表中找到file结构地址
    if (fd>=NR_OPEN || countfilp[fd]))
        return -EINVAL;
    if (!count)
        return 0;

    verify_area(buf,count);
    inode = file->f_inode;  // 通过file的f_inode访问inode节点

    //判断是什么设备：管道、字符设备、块设备
    //如果是块设备，调用block_read读块设备
    if (inode->i_pipe)
        return (file->f_mode&1)?read_pipe(inode,buf,count):-EIO;
    if (S_ISCHR(inode->i_mode))
        return rw_char(READ,inode->i_zone[0],buf,count,&file->f_pos);
    if (S_ISBLK(inode->i_mode))
        return block_read(inode->i_zone[0],&file->f_pos,buf,count);

    if (S_ISDIR(inode->i_mode) || S_ISREG(inode->i_mode)) {
        if (count+file->f_pos > inode->i_size)
            count = inode->i_size - file->f_pos;
        if (counti_mode=%06o\n\r",inode->i_mode);
    return -EINVAL;
}

sys_write

int sys_write(unsigned int fd,char * buf,int count)
{
    struct file * file;
    struct m_inode * inode;

    if (fd>=NR_OPEN || count filp[fd]))
        return -EINVAL;
    if (!count)
        return 0;

    //判断是什么设备：管道、字符设备、块设备
    //如果是块设备，调用block_write读块设备
    inode=file->f_inode;
    if (inode->i_pipe)
        return (file->f_mode&2)?write_pipe(inode,buf,count):-EIO;
    if (S_ISCHR(inode->i_mode))
        return rw_char(WRITE,inode->i_zone[0],buf,count,&file->f_pos);
    if (S_ISBLK(inode->i_mode))
        return block_write(inode->i_zone[0],&file->f_pos,buf,count);
    if (S_ISREG(inode->i_mode))
        return file_write(inode,file,buf,count);

    printk("(Write)inode->i_mode=%06o\n\r",inode->i_mode);
    return -EINVAL;
}

block_read与block_write

block_read与block_write负责块设备的读写。他们底层调用breada函数获取缓冲块，然后在缓冲块上读写数据。

block_write

代码文件：linux-0.11/fs/block_dev.c

int block_write(int dev, long * pos, char * buf, int count)
{
    int block = *pos >> BLOCK_SIZE_BITS;// pos所在文件数据块号
    int offset = *pos & (BLOCK_SIZE-1); // pos在数据块中偏移值
    int chars;
    int written = 0;
    struct buffer_head * bh;            //指向当前写缓冲块
    register char * p;

    // 向缓冲块中写数据，通过getblk获取缓冲块，获取缓冲块的同时会读取磁盘块数据到缓冲块
    // 数据量较多时，通过bread一次性缓存3个磁盘块数据到缓冲块，减小磁盘IO次数
    while (count>0) {
        chars = BLOCK_SIZE - offset;
        if (chars > count)
            chars=count;
        if (chars == BLOCK_SIZE)
            //获取高速缓冲块，并建立其与磁盘块的映射关系
            bh = getblk(dev,block);
        else
            // 读取的数据超过一个磁盘块，调用breada读多个块
            // breada底层调用getblk缓存3个连续磁盘块的数据
            bh = breada(dev,block,block+1,block+2,-1);
        block++;
        if (!bh)
            return written?written:-EIO;

        p = offset + bh->b_data;
        offset = 0;
        *pos += chars;
        written += chars;
        count -= chars;
        while (chars-->0)
            *(p++) = get_fs_byte(buf++);

        //完成对缓冲块的数据写入后，设置缓冲块的修改位dirt，然后释放缓冲块（引用计数减一）
        bh->b_dirt = 1;
        brelse(bh);
    }
    return written;
}

block_read

代码文件：linux-0.11/fs/block_dev.c

int block_read(int dev, unsigned long * pos, char * buf, int count)
{
    int block = *pos >> BLOCK_SIZE_BITS;
    int offset = *pos & (BLOCK_SIZE-1);
    int chars;
    int read = 0;
    struct buffer_head * bh;
    register char * p;

    while (count>0) {
        chars = BLOCK_SIZE-offset;
        if (chars > count)
            chars = count;
        if (!(bh = breada(dev,block,block+1,block+2,-1)))
            return read?read:-EIO;
        block++;

        p = offset + bh->b_data;
        offset = 0;
        *pos += chars;
        read += chars;
        count -= chars;
        while (chars-->0)
            put_fs_byte(*(p++),buf++);

        //完成对缓冲块的数据读取之后，释放缓冲块（引用计数减一）
        brelse(bh);
    }
    return read;
}

bread

代码文件：linux-0.11/fs/buffer.c

bread：块读取函数
breada：块提前预读函数
bread_page：页块读取函数，一个内存页通常为4k大小、磁盘块通常为1k大小

bread、breada、bread_page三者功能相似，用法不同。三者均会调用getblk获取缓冲块，并调用ll_rw_block读数据到缓冲块。

struct buffer_head * bread(int dev,int block)
{
    struct buffer_head * bh;

    if (!(bh=getblk(dev,block)))
        panic("bread: getblk returned NULL\n");
    if (bh->b_uptodate)
        return bh;

    // 调用ll_rw_block读磁盘块数据到缓冲区
    ll_rw_block(READ,bh);
    wait_on_buffer(bh);
    if (bh->b_uptodate)
        return bh;
    brelse(bh);
    return NULL;
}

getblk

代码文件：linux-0.11/fs/buffer.c

bread系列函数通过getblk获取缓冲块，在必要的时候，会调用sync_dev函数将脏缓冲块数据写入磁盘。

getblk代码逻辑复杂，需要对资源可用性进行复杂的检查。资源不可用时，需要睡眠，被唤醒之后又要进行一些检查判断资源是否可用。复杂逻辑可以暂时不考虑，避免陷入代码细节。

仅考虑getblk获取空闲块之后的代码逻辑。getblk获取可用缓冲块后，若缓冲块dirt位为1，表示缓冲块有数据未同步到磁盘，getblk将调用sync_dev将数据同步到磁盘，然后占用该缓冲块。

struct buffer_head * getblk(int dev,int block)
{
    struct buffer_head * tmp, * bh;

repeat:
    // 搜索hash表，如果指定块已经在高速缓冲中，则返回对应缓冲区头指针，退出。
    if ((bh = get_hash_table(dev,block)))
        return bh;
    // 扫描空闲数据块链表，寻找空闲缓冲区。
    tmp = free_list;
    do {
        // 如果该缓冲区正被使用（引用计数不等于0）
        if (tmp->b_count)
            continue;

        // 找到可用缓冲块，且满足一些条件
        if (!bh || BADNESS(tmp)b_next_free) != free_list);

    // 没有可用缓冲块，则睡眠等待有空闲缓冲块可用。
    // 当有空闲缓冲块可用时本进程会被的唤醒。
    if (!bh) {
        sleep_on(&buffer_wait); //睡眠在缓冲区上
        goto repeat;
    }

    //等待缓冲区解锁？
    wait_on_buffer(bh);
    if (bh->b_count)
        goto repeat;

    // 分配到的缓冲块dirt位为1（表示有数据未同步到磁盘）
    // 调用sync_dev将数据同步到磁盘，并睡眠在该缓冲块上
    while (bh->b_dirt) {
        sync_dev(bh->b_dev);
        wait_on_buffer(bh);
        if (bh->b_count)
            goto repeat;
    }
/* NOTE!! While we slept waiting for this block, somebody else might */
/* already have added "this" block to the cache. check it */
    if (find_buffer(dev,block))
        goto repeat;
/* OK, FINALLY we know that this buffer is the only one of it's kind, */
/* and that it's unused (b_count=0), unlocked (b_lock=0), and clean */

    // 对空闲缓冲块的处理
    // 占用空闲缓冲块。置引用计数为1，复位修改标志和有效(更新)标志。
    bh->b_count=1;
    bh->b_dirt=0;
    bh->b_uptodate=0;
    // 从原hash队列和空闲队列块链表中移出该缓冲区头。根据此新的设备号和块号重新插入空闲链表和hash队列
    // 让该缓冲区用于指定设备和其上的指定块。
    // 根据此新的设备号和块号重新哈希，并插入响应的hash队列
    remove_from_queues(bh);
    bh->b_dev=dev;
    bh->b_blocknr=block; //加锁
    insert_into_queues(bh);
    return bh;
}

sync_dev

代码文件：linux-0.11/fs/buffer.c

调用ll_rw_block将缓冲块内数据写入磁盘。getblk管理缓冲块时，若其它进程需要某缓冲块，且缓冲块具有脏（dirt位为1）数据，调用sync_dev将数据写入磁盘。

int sync_dev(int dev)
{
    int i;
    struct buffer_head * bh;

    bh = start_buffer;
    for (i=0 ; ib_dev != dev)
            continue;
        wait_on_buffer(bh);
        if (bh->b_dev == dev && bh->b_dirt)
            // 调用ll_rw_block写缓冲区数据到磁盘块
            ll_rw_block(WRITE,bh);
    }

    bh = start_buffer;
    for (i=0 ; ib_dev != dev)
            continue;
        wait_on_buffer(bh);
        if (bh->b_dev == dev && bh->b_dirt)
            ll_rw_block(WRITE,bh);
    }
    return 0;
}

ll_rw_block

代码文件：linux-0.11/kernel/blk_drv/ll_rw_blk.c

将缓冲块的数据写入磁盘块，将磁盘块数据读入缓冲块，底层通过设备请求队列完成读写。

[En]

The data of the buffer block is written to the disk block, and the disk block data is read into the buffer block, and the bottom layer completes the reading and writing through the device request queue.

void ll_rw_block(int rw, struct buffer_head * bh)
{
    unsigned int major;

    if ((major=MAJOR(bh->b_dev)) >= NR_BLK_DEV ||
    !(blk_dev[major].request_fn)) {
        printk("Trying to read nonexistent block-device\n\r");
        return;
    }

    // 将读写请求加入设备请求队列
    make_request(major,rw,bh);
}

设备中断处理程序

代码文件：linux-0.11/kernel/blk_drv/hd.c

读完成中断处理程序

设备完成读扇区数据后，发出读中断，读中断处理程序read_intr执行。若当前读请求还有数据要读，则继续完成当前请求的数据读。因为，一次读请求可能读若干连续扇区数据，磁盘每次只能写读一个扇区数据。完成一次读请求的所有数据读之后，将调用do_hd_request处理下一个写请求。

static void read_intr(void)
{
    if (win_result()) {
        bad_rw_intr();
        do_hd_request();
        return;
    }
    port_read(HD_DATA,CURRENT->buffer,256);
    CURRENT->errors = 0;
    CURRENT->buffer += 512;
    CURRENT->sector++;
    if (--CURRENT->nr_sectors) {
        do_hd = &read_intr;
        return;
    }
    end_request(1);
    do_hd_request();
}

写完成中断处理程序

它类似于编写中断处理程序的过程。

[En]

It is similar to the process of writing an interrupt handler.

static void write_intr(void)
{
    if (win_result()) {
        bad_rw_intr();
        do_hd_request(); //处理下一个请求
        return;
    }
    if (--CURRENT->nr_sectors) {
        CURRENT->sector++;
        CURRENT->buffer += 512;
        do_hd = &write_intr;
        port_write(HD_DATA,CURRENT->buffer,256);
        return;
    }
    end_request(1);
    do_hd_request();
}

处理读写队列请求

处理设备请求队列的读写请求。设备中断处理程序不断调用do_hd_request处理请求队列，直到请求队列为空。

void do_hd_request(void)
{
    int i,r = 0;
    unsigned int block,dev;
    unsigned int sec,head,cyl;
    unsigned int nsect;

    INIT_REQUEST;
    dev = MINOR(CURRENT->dev);
    block = CURRENT->sector;
    if (dev >= 5*NR_HD || block+2 > hd[dev].nr_sects) {
        end_request(0);
        goto repeat;
    }
    block += hd[dev].start_sect;
    dev /= 5;
    __asm__("divl %4":"=a" (block),"=d" (sec):"0" (block),"1" (0),
        "r" (hd_info[dev].sect));
    __asm__("divl %4":"=a" (cyl),"=d" (head):"0" (block),"1" (0),
        "r" (hd_info[dev].head));
    sec++;
    nsect = CURRENT->nr_sectors;
    if (reset) {
        reset = 0;
        recalibrate = 1;
        reset_hd(CURRENT_DEV);
        return;
    }
    if (recalibrate) {
        recalibrate = 0;
        hd_out(dev,hd_info[CURRENT_DEV].sect,0,0,0,
            WIN_RESTORE,&recal_intr);
        return;
    }
    if (CURRENT->cmd == WRITE) {
        hd_out(dev,nsect,sec,head,cyl,WIN_WRITE,&write_intr);
        for(i=0 ; ibuffer,256);
    } else if (CURRENT->cmd == READ) {
        hd_out(dev,nsect,sec,head,cyl,WIN_READ,&read_intr);
    } else
        panic("unknown hd-command");
}

Original: https://www.cnblogs.com/lazyfiish/p/16081812.html
Author: LazyFish
Title: Linux 0.11源码阅读笔记-文件IO流程

原创文章受到原创版权保护。转载请注明出处：https://www.johngo689.com/523912/

转载文章受原作者版权保护。转载请注明原作者出处！

Linux

【自取】最近整理的，有需要可以领取学习：

Linux核心资料大放送~

全栈面试题汇总（持续更新&可下载）

一个提高学习100%效率的工具！

【超详细】深度学习面试题目！

LeetCode Python刷题答案下载！

LeetCode Java版刷题答案下载！

LeetCode C++ 版本，抓紧保存！

LeetCode GO语言刷题答案下载！

【已解决】wordpress 修改固定链接伪静态URL出现nginx 404错误

一、站点设置打开站点设置，选择伪静态，选择wordpress 二、wordpress设置打开wordpress后台，选择设置 —》固定链接选择一个你喜欢的格式点…

Linux 2023年6月14日
00114
最新超详细VMware下CentOS系统安装

一、了解CentOS系统 CentOS是免费的、开源的、可以重新分发的开源操作系统，CentOS（Community Enterprise Operating System，中文意…

Linux 2023年6月15日
00144
go将青龙面板里面的脚本文件都下载到本地

纯粹练手用的，大家轻喷青龙面板的脚本文件可以下载到本地，这样的话自己可以研究一下对应的脚本文件，能学到更多的知识，原理其实很简单，F12一下就知道了，青龙面板使用Request H…

Linux 2023年6月7日
00154
Web前端基础精品入门（HTML+CSS+JavaScript+JS）[爱前端]听课笔记（1）：网页中的logo的制作

前边课程没有仔细记录 css 学习笔记 index.html如下：网页显示但是我们不需要在网页中显示文字，用如下式样文字不见，但是搜索引擎还能找到文字目前高度没有居中，如果…

Linux 2023年6月14日
00108
powershell配置自动补全

powershell配置自动补全一、需求：看到老师上课用mac命令行有自动补全功能，发现真的爽。但是自己的windows powershell不能使用自动补全功能。有了需求，就…

Linux 2023年6月13日
00142
Linux虚拟机上按安装jdk1.8.0

Linux虚拟机上按安装jdk1.8.0 1.准备工作 jdk1.8.0下载地址： http://www.oracle.com/technetwork/java/javase/do…

Linux 2023年6月11日
0092
搭建docker镜像仓库(二)：使用harbor搭建本地镜像仓库

一.系统环境二.前言三.Harbor 四.使用harbor搭建私有镜像仓库 4.1 环境介绍 4.2 k8smaster节点安装配置harbor 4.2.1 安装harbor离…

Linux 2023年6月7日
00147
线段树扫描线（一）矩形面积以hdu 1542为例

还是老规矩，传送门 hdu 1542 不做过多解释了，就是给出n个矩形，求出这些矩形所覆盖的面积和。由于n很小，因而这道题不是必须用线段树先想想怎么办，先来一个例图（稍微有点复杂…

Linux 2023年6月6日
00115
电脑中图标变白色教你怎么修复

复制一下代码到文本文档中另存为 .bat 然后点击好的配置文件右键以管理员身份运行就会解决桌面变白的问题 @echo off taskkill /f /im explorer….

Linux 2023年6月7日
00102
QML 信号与响应方法的总结

如果面试过程中，面试官想了解你对 Qt 的理解有多少，少不了会涉及到信号槽这一块，毕竟这是 Qt 最经典的一项技术。刚开笔，我可能有点狂妄了。信号槽，分为两部分，信号和对信号响…

Linux 2023年6月6日
00112
玩转SpringBoot之捣鼓 Redis

我们都知道，把首页数据放到Redis里，能够加快首页数据的访问速度。但是我们要如何准确又快速的将 Redis 整合到自己的 SpringBoot2.x 项目中呢？今天阿淼就带大家爬…

Linux 2023年5月28日
00127
[python] arch linux install mysql and use with python

1. 概述 2. 安装 MySQL / MariaDB 3. 运行 MySQL / MariaDB 4. 配置 MySQL / MariaDB 5. 使用 MySQL / Mari…

Linux 2023年6月8日
0098
redis高级

1 redis高可用主从复制存在的问题： 1 主从复制，主节点发生故障，需要做故障转移，可以手动转移：让其中一个slave变成master—>哨兵 2 主从复制，只能主…

Linux 2023年6月14日
0092
Java 内功修炼之数据结构与算法（二）

一、二叉树补充、多叉树 1、二叉树（非递归实现遍历）（1）前提前面一篇介绍了二叉树、顺序二叉树、线索二叉树、哈夫曼树等树结构。可参考：https://www.cnblogs.c…

Linux 2023年6月11日
0091
位运算（一）

位运算的一般应用功能例子运算去掉最后一位 1110101->111010 x>>1 在最后加0 1110101->11101010 x< 通过…

Linux 2023年6月8日
00146
null和空字符串对于查询where条件语句的影响

在数据库中我们进行数据处理的过程中，对于null值或者空字符串的情况对于这种数据我们进行计算平均值以及查询过程中如何进行对于这类数据的处理呢？ step1:建表：create ta…

Linux 2023年6月14日
00104

2024 年 5 月
一	二	三	四	五	六	日
		1	2	3	4	5
6	7	8	9	10	11	12
13	14	15	16	17	18	19
20	21	22	23	24	25	26
27	28	29	30	31

Linux 0.11源码阅读笔记-文件IO流程

函数调用关系

sys_read与sys_write

sys_read

sys_write

block_read与block_write

block_write

block_read

bread

getblk

sync_dev

ll_rw_block

设备中断处理程序

大家都在看