Linux 0.11源码阅读笔记-文件IO流程

文件IO流程

用户进程read、write在高速缓冲块上读写数据,高速缓冲块和块设备交换数据。

  • 何时将磁盘块数据读取到缓冲块?
    [En]

    when will the disk block data be read to the buffer block?*

  • 何时将缓冲区块数据刷新到磁盘块?
    [En]

    when will buffer block data be brushed to disk block?*

Linux 0.11源码阅读笔记-文件IO流程

函数调用关系

  • read/write(c库函数,通过int 80调用sys_read/sys_write)
  • sys_read/sys_write
    • block_read/block_write
    • breada
      • getblk
      • sync_dev
      • ll_rw_block

sys_read与sys_write

代码文件:linux-0.11/fs/read_write.c

系统调用sys_read与sys_write是内核提供给用户程序调用的IO接口。若IO设备是块设备,底层分别调用block_read与block_write进行块设备的读写。

sys_read

int sys_read(unsigned int fd,char * buf,int count)
{
    struct file * file;
    struct m_inode * inode;

    // 通过文件描述符,在file表中找到file结构地址
    if (fd>=NR_OPEN || countfilp[fd]))
        return -EINVAL;
    if (!count)
        return 0;

    verify_area(buf,count);
    inode = file->f_inode;  // 通过file的f_inode访问inode节点

    //判断是什么设备:管道、字符设备、块设备
    //如果是块设备,调用block_read读块设备
    if (inode->i_pipe)
        return (file->f_mode&1)?read_pipe(inode,buf,count):-EIO;
    if (S_ISCHR(inode->i_mode))
        return rw_char(READ,inode->i_zone[0],buf,count,&file->f_pos);
    if (S_ISBLK(inode->i_mode))
        return block_read(inode->i_zone[0],&file->f_pos,buf,count);

    if (S_ISDIR(inode->i_mode) || S_ISREG(inode->i_mode)) {
        if (count+file->f_pos > inode->i_size)
            count = inode->i_size - file->f_pos;
        if (counti_mode=%06o\n\r",inode->i_mode);
    return -EINVAL;
}

sys_write

int sys_write(unsigned int fd,char * buf,int count)
{
    struct file * file;
    struct m_inode * inode;

    if (fd>=NR_OPEN || count filp[fd]))
        return -EINVAL;
    if (!count)
        return 0;

    //判断是什么设备:管道、字符设备、块设备
    //如果是块设备,调用block_write读块设备
    inode=file->f_inode;
    if (inode->i_pipe)
        return (file->f_mode&2)?write_pipe(inode,buf,count):-EIO;
    if (S_ISCHR(inode->i_mode))
        return rw_char(WRITE,inode->i_zone[0],buf,count,&file->f_pos);
    if (S_ISBLK(inode->i_mode))
        return block_write(inode->i_zone[0],&file->f_pos,buf,count);
    if (S_ISREG(inode->i_mode))
        return file_write(inode,file,buf,count);

    printk("(Write)inode->i_mode=%06o\n\r",inode->i_mode);
    return -EINVAL;
}

block_read与block_write

block_read与block_write负责块设备的读写。他们底层调用breada函数获取缓冲块,然后在缓冲块上读写数据。

block_write

代码文件:linux-0.11/fs/block_dev.c

int block_write(int dev, long * pos, char * buf, int count)
{
    int block = *pos >> BLOCK_SIZE_BITS;// pos所在文件数据块号
    int offset = *pos & (BLOCK_SIZE-1); // pos在数据块中偏移值
    int chars;
    int written = 0;
    struct buffer_head * bh;            //指向当前写缓冲块
    register char * p;

    // 向缓冲块中写数据,通过getblk获取缓冲块,获取缓冲块的同时会读取磁盘块数据到缓冲块
    // 数据量较多时,通过bread一次性缓存3个磁盘块数据到缓冲块,减小磁盘IO次数
    while (count>0) {
        chars = BLOCK_SIZE - offset;
        if (chars > count)
            chars=count;
        if (chars == BLOCK_SIZE)
            //获取高速缓冲块,并建立其与磁盘块的映射关系
            bh = getblk(dev,block);
        else
            // 读取的数据超过一个磁盘块,调用breada读多个块
            // breada底层调用getblk缓存3个连续磁盘块的数据
            bh = breada(dev,block,block+1,block+2,-1);
        block++;
        if (!bh)
            return written?written:-EIO;

        p = offset + bh->b_data;
        offset = 0;
        *pos += chars;
        written += chars;
        count -= chars;
        while (chars-->0)
            *(p++) = get_fs_byte(buf++);

        //完成对缓冲块的数据写入后,设置缓冲块的修改位dirt,然后释放缓冲块(引用计数减一)
        bh->b_dirt = 1;
        brelse(bh);
    }
    return written;
}

block_read

代码文件:linux-0.11/fs/block_dev.c

int block_read(int dev, unsigned long * pos, char * buf, int count)
{
    int block = *pos >> BLOCK_SIZE_BITS;
    int offset = *pos & (BLOCK_SIZE-1);
    int chars;
    int read = 0;
    struct buffer_head * bh;
    register char * p;

    while (count>0) {
        chars = BLOCK_SIZE-offset;
        if (chars > count)
            chars = count;
        if (!(bh = breada(dev,block,block+1,block+2,-1)))
            return read?read:-EIO;
        block++;

        p = offset + bh->b_data;
        offset = 0;
        *pos += chars;
        read += chars;
        count -= chars;
        while (chars-->0)
            put_fs_byte(*(p++),buf++);

        //完成对缓冲块的数据读取之后,释放缓冲块(引用计数减一)
        brelse(bh);
    }
    return read;
}

bread

代码文件:linux-0.11/fs/buffer.c

  • bread:块读取函数
  • breada:块提前预读函数
  • bread_page:页块读取函数,一个内存页通常为4k大小、磁盘块通常为1k大小

bread、breada、bread_page三者功能相似,用法不同。三者均会调用getblk获取缓冲块,并调用ll_rw_block读数据到缓冲块。

struct buffer_head * bread(int dev,int block)
{
    struct buffer_head * bh;

    if (!(bh=getblk(dev,block)))
        panic("bread: getblk returned NULL\n");
    if (bh->b_uptodate)
        return bh;

    // 调用ll_rw_block读磁盘块数据到缓冲区
    ll_rw_block(READ,bh);
    wait_on_buffer(bh);
    if (bh->b_uptodate)
        return bh;
    brelse(bh);
    return NULL;
}

getblk

代码文件:linux-0.11/fs/buffer.c

bread系列函数通过getblk获取缓冲块,在必要的时候,会调用sync_dev函数将脏缓冲块数据写入磁盘。

getblk代码逻辑复杂,需要对资源可用性进行复杂的检查。资源不可用时,需要睡眠,被唤醒之后又要进行一些检查判断资源是否可用。复杂逻辑可以暂时不考虑,避免陷入代码细节。

仅考虑getblk获取空闲块之后的代码逻辑。getblk获取可用缓冲块后,若缓冲块dirt位为1,表示缓冲块有数据未同步到磁盘,getblk将调用sync_dev将数据同步到磁盘,然后占用该缓冲块。

struct buffer_head * getblk(int dev,int block)
{
    struct buffer_head * tmp, * bh;

repeat:
    // 搜索hash表,如果指定块已经在高速缓冲中,则返回对应缓冲区头指针,退出。
    if ((bh = get_hash_table(dev,block)))
        return bh;
    // 扫描空闲数据块链表,寻找空闲缓冲区。
    tmp = free_list;
    do {
        // 如果该缓冲区正被使用(引用计数不等于0)
        if (tmp->b_count)
            continue;

        // 找到可用缓冲块,且满足一些条件
        if (!bh || BADNESS(tmp)b_next_free) != free_list);

    // 没有可用缓冲块,则睡眠等待有空闲缓冲块可用。
    // 当有空闲缓冲块可用时本进程会被的唤醒。
    if (!bh) {
        sleep_on(&buffer_wait); //睡眠在缓冲区上
        goto repeat;
    }

    //等待缓冲区解锁?
    wait_on_buffer(bh);
    if (bh->b_count)
        goto repeat;

    // 分配到的缓冲块dirt位为1(表示有数据未同步到磁盘)
    // 调用sync_dev将数据同步到磁盘,并睡眠在该缓冲块上
    while (bh->b_dirt) {
        sync_dev(bh->b_dev);
        wait_on_buffer(bh);
        if (bh->b_count)
            goto repeat;
    }
/* NOTE!! While we slept waiting for this block, somebody else might */
/* already have added "this" block to the cache. check it */
    if (find_buffer(dev,block))
        goto repeat;
/* OK, FINALLY we know that this buffer is the only one of it's kind, */
/* and that it's unused (b_count=0), unlocked (b_lock=0), and clean */

    // 对空闲缓冲块的处理
    // 占用空闲缓冲块。置引用计数为1,复位修改标志和有效(更新)标志。
    bh->b_count=1;
    bh->b_dirt=0;
    bh->b_uptodate=0;
    // 从原hash队列和空闲队列块链表中移出该缓冲区头。根据此新的设备号和块号重新插入空闲链表和hash队列
    // 让该缓冲区用于指定设备和其上的指定块。
    // 根据此新的设备号和块号重新哈希,并插入响应的hash队列
    remove_from_queues(bh);
    bh->b_dev=dev;
    bh->b_blocknr=block; //加锁
    insert_into_queues(bh);
    return bh;
}

sync_dev

代码文件:linux-0.11/fs/buffer.c

调用ll_rw_block将缓冲块内数据写入磁盘。getblk管理缓冲块时,若其它进程需要某缓冲块,且缓冲块具有脏(dirt位为1)数据,调用sync_dev将数据写入磁盘。

int sync_dev(int dev)
{
    int i;
    struct buffer_head * bh;

    bh = start_buffer;
    for (i=0 ; ib_dev != dev)
            continue;
        wait_on_buffer(bh);
        if (bh->b_dev == dev && bh->b_dirt)
            // 调用ll_rw_block写缓冲区数据到磁盘块
            ll_rw_block(WRITE,bh);
    }

    bh = start_buffer;
    for (i=0 ; ib_dev != dev)
            continue;
        wait_on_buffer(bh);
        if (bh->b_dev == dev && bh->b_dirt)
            ll_rw_block(WRITE,bh);
    }
    return 0;
}

ll_rw_block

代码文件:linux-0.11/kernel/blk_drv/ll_rw_blk.c

将缓冲块的数据写入磁盘块,将磁盘块数据读入缓冲块,底层通过设备请求队列完成读写。

[En]

The data of the buffer block is written to the disk block, and the disk block data is read into the buffer block, and the bottom layer completes the reading and writing through the device request queue.

void ll_rw_block(int rw, struct buffer_head * bh)
{
    unsigned int major;

    if ((major=MAJOR(bh->b_dev)) >= NR_BLK_DEV ||
    !(blk_dev[major].request_fn)) {
        printk("Trying to read nonexistent block-device\n\r");
        return;
    }

    // 将读写请求加入设备请求队列
    make_request(major,rw,bh);
}

设备中断处理程序

代码文件:linux-0.11/kernel/blk_drv/hd.c

  • 读完成中断处理程序

设备完成读扇区数据后,发出读中断,读中断处理程序read_intr执行。若当前读请求还有数据要读,则继续完成当前请求的数据读。因为,一次读请求可能读若干连续扇区数据,磁盘每次只能写读一个扇区数据。完成一次读请求的所有数据读之后,将调用do_hd_request处理下一个写请求。

static void read_intr(void)
{
    if (win_result()) {
        bad_rw_intr();
        do_hd_request();
        return;
    }
    port_read(HD_DATA,CURRENT->buffer,256);
    CURRENT->errors = 0;
    CURRENT->buffer += 512;
    CURRENT->sector++;
    if (--CURRENT->nr_sectors) {
        do_hd = &read_intr;
        return;
    }
    end_request(1);
    do_hd_request();
}
  • 写完成中断处理程序

它类似于编写中断处理程序的过程。

[En]

It is similar to the process of writing an interrupt handler.

static void write_intr(void)
{
    if (win_result()) {
        bad_rw_intr();
        do_hd_request(); //处理下一个请求
        return;
    }
    if (--CURRENT->nr_sectors) {
        CURRENT->sector++;
        CURRENT->buffer += 512;
        do_hd = &write_intr;
        port_write(HD_DATA,CURRENT->buffer,256);
        return;
    }
    end_request(1);
    do_hd_request();
}
  • 处理读写队列请求

处理设备请求队列的读写请求。设备中断处理程序不断调用do_hd_request处理请求队列,直到请求队列为空。

void do_hd_request(void)
{
    int i,r = 0;
    unsigned int block,dev;
    unsigned int sec,head,cyl;
    unsigned int nsect;

    INIT_REQUEST;
    dev = MINOR(CURRENT->dev);
    block = CURRENT->sector;
    if (dev >= 5*NR_HD || block+2 > hd[dev].nr_sects) {
        end_request(0);
        goto repeat;
    }
    block += hd[dev].start_sect;
    dev /= 5;
    __asm__("divl %4":"=a" (block),"=d" (sec):"0" (block),"1" (0),
        "r" (hd_info[dev].sect));
    __asm__("divl %4":"=a" (cyl),"=d" (head):"0" (block),"1" (0),
        "r" (hd_info[dev].head));
    sec++;
    nsect = CURRENT->nr_sectors;
    if (reset) {
        reset = 0;
        recalibrate = 1;
        reset_hd(CURRENT_DEV);
        return;
    }
    if (recalibrate) {
        recalibrate = 0;
        hd_out(dev,hd_info[CURRENT_DEV].sect,0,0,0,
            WIN_RESTORE,&recal_intr);
        return;
    }
    if (CURRENT->cmd == WRITE) {
        hd_out(dev,nsect,sec,head,cyl,WIN_WRITE,&write_intr);
        for(i=0 ; ibuffer,256);
    } else if (CURRENT->cmd == READ) {
        hd_out(dev,nsect,sec,head,cyl,WIN_READ,&read_intr);
    } else
        panic("unknown hd-command");
}

Original: https://www.cnblogs.com/lazyfiish/p/16081812.html
Author: LazyFish
Title: Linux 0.11源码阅读笔记-文件IO流程

原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/523912/

转载文章受原作者版权保护。转载请注明原作者出处!

(0)

大家都在看

亲爱的 Coder【最近整理,可免费获取】👉 最新必读书单  | 👏 面试题下载  | 🌎 免费的AI知识星球