内核内存管理

作者:朱涵俊
链接:https://zhuanlan.zhihu.com/p/140274586
来源:知乎
著作权归作者所有。商业转载请联系作者获得授权,非商业转载请注明出处。

内存管理分为虚拟地址空间管理,物理内存管理,内核堆栈管理。

内核在启动的过程中需要不断申请内存,这些内存申请之后一般就不需要释放。频繁的申请释放会导致内存碎片,性能下降。因此内核堆kheap管理的原则是申请一次,终身使用。堆内存分为2类,一是小内存,而是页对齐的内存。因为有的内核对象需要页对齐,比如页表。

kernel/kheap.c

//由于只申请不释放,不会有很多内存块。主要是BIOS,内核数据等引起的。
#define MAX_HEAP_HOLE   10
#define MAX_SMALL_HOLE  100
#if 1
#define DEBUG_PRINT printk
#else
#define DEBUG_PRINT inline_printk
#endif

struct kheap_t;
struct kheap_t {
    ulong addr;
    ulong size;
    ulong orgin_size;
    struct kheap_t *pnext;
};
static struct kheap_t *pfree4k;
static struct kheap_t *phead4k;
static struct kheap_t heap_holes[MAX_HEAP_HOLE];
static struct kheap_t heap_small_holes[MAX_SMALL_HOLE];
__thread static spinlock_t spin_4k;
__thread static spinlock_t spin_small;
static struct kheap_t *pfree_small;
static struct kheap_t *phead_small;
void init_kheap()
{
    for (int i = 0; i < MAX_HEAP_HOLE - 1; i++) {
        heap_holes[i].pnext = &heap_holes[i + 1];
    }
    heap_holes[MAX_HEAP_HOLE - 1].pnext = 0;
    pfree4k = heap_holes;
    phead4k = 0;
    init_spinlock(&spin_4k);

    for (int i = 0; i < MAX_SMALL_HOLE - 1; i++) {
        heap_small_holes[i].pnext = &heap_small_holes[i + 1];
    }
    heap_small_holes[MAX_SMALL_HOLE - 1].pnext = 0;
    pfree_small = heap_small_holes;
    phead_small = 0;
    init_spinlock(&spin_small);

}
void free_kheap_4k(ulong addr, ulong size)
{
    struct kheap_t *p;
    struct kheap_t *ph;

    ASSERT((addr & 0xfff) == 0 && (size & 0xfff) == 0); //4k aligned

    p = pfree4k;
    if (!p) {
        panic("MAX_HEAP_HOLE too small!\n");
    }
    spin_lock(&spin_4k);
    pfree4k = pfree4k->pnext;
    p->addr = addr;
    p->orgin_size = size;
    p->size = size;
    //sort by addr asc,early init can only use low kheap
    if (!phead4k || phead4k->addr > p->addr) {
        p->pnext = phead4k;
        phead4k = p;
        spin_unlock(&spin_4k);
       return;
    }

    ph = phead4k;
    while (ph->pnext && ph->pnext->addr < p->addr) {
        ph = ph->pnext;
    }
    p->pnext = ph->pnext;
    ph->pnext = p;
    spin_unlock(&spin_4k);
    DEBUG_PRINT("KHeap: Free:%lx,addr,size:%lx\n", addr, size);
}

void *alloc_kheap_4k(ulong size)
{
    struct kheap_t *p;
    void *pret;

    DEBUG_PRINT("alloc 4k:%x\n", size);
    if (size & 0xfff)
        size = (size & ~0xfff) + 0x1000;        //align 4k
    spin_lock(&spin_4k);
    p = phead4k;
    while (p && p->size < size)
        p = p->pnext;
    while (!p) {
        ulong newaddr;

        spin_unlock(&spin_4k);
        newaddr = alloc_phy_page();
        if (!newaddr)
            return (void *)0;
        free_kheap_4k(newaddr, PAGE_SIZE);
        spin_lock(&spin_4k);
        p = phead4k;
        while (p && p->size < size)
            p = p->pnext;

    }
    p->size -= size;
    if (p->size == 0) {
        //add to free link?

        struct kheap_t *ptr = phead4k;

        if (ptr == p) {
            phead4k = phead4k->pnext;
        }
        else if (ptr->pnext == p) {
            ptr->pnext = p->pnext;
        }
        else {
            while (ptr->pnext && ptr->pnext != p)
                ptr = ptr->pnext;

            if (!ptr->pnext)
                panic("corrupted link,FILE:%s,LINE:%d", __FILE__, __LINE__);
            ptr->pnext = p->pnext;
        }
        p->pnext = pfree4k;
        pfree4k = p;
    }
    pret = (void *)p->addr;
    p->addr += size;
    spin_unlock(&spin_4k);
    return pret;
}

void free_kheap_small(ulong addr, ulong size)
{
    struct kheap_t *p;

    ASSERT((addr & 0xfff) == 0 && (size & 0xfff) == 0); //4k aligned
    spin_lock(&spin_small);

    p = pfree_small;
    if (!p) {
        panic("MAX_HEAP_HOLE too small!\n");
    }
    pfree_small = pfree_small->pnext;
    p->addr = addr;
    p->orgin_size = size;
    p->size = size;
    p->pnext = phead_small;
    phead_small = p;
    spin_unlock(&spin_small);
    DEBUG_PRINT("KHeap: Small Free:%lx,addr,size:%lx\n", addr, size);
}

void *alloc_kheap_small(ulong size)
{
    struct kheap_t *p;
    void *pret;

    DEBUG_PRINT("alloc small:%lx\n", size);
    ASSERT(size < 0x1000);      //small
    spin_lock(&spin_small);
    p = phead_small;
    while (p && p->size < size)
        p = p->pnext;
    while (!p) {
        ulong newaddr;

        spin_unlock(&spin_small);
        newaddr = (ulong) alloc_kheap_4k(0x1000);
        if (!newaddr)
            return (void *)0;
        free_kheap_small(newaddr, 0x1000);
        spin_lock(&spin_small);
        p = phead_small;
        while (p && p->size < size)
            p = p->pnext;

    }
    p->size -= size;
    if (p->size <= 16 16) { drop small than add to free link? struct kheap_t *ptr="phead_small;" if (ptr="=" p) phead_small="phead_small-">pnext;
        }
        else if (ptr->pnext == p) {
            ptr->pnext = p->pnext;
        }
        else {
            while (ptr->pnext && ptr->pnext != p)
                ptr = ptr->pnext;

            if (!ptr->pnext)
                panic("corrupted link,FILE:%s,LINE:%d", __FILE__, __LINE__);
            ptr->pnext = p->pnext;
        }
        p->pnext = pfree_small;
        pfree_small = p;
    }
    pret = (void *)p->addr;
    p->addr += size;
    spin_unlock(&spin_small);
    return pret;
}
void kheap_debug()
{
    struct kheap_t *ptr = phead4k;
    printk("kheap 4k:\n");
    while (ptr) {
        printk("addr:%lx,size:%lx\n", ptr->addr, ptr->size);
        ptr = ptr->pnext;
    }
}</=>

初始化之后kheap是没有内存可以申请的,需要进行释放。

内核加载的位置是0x10000,即1M,那0-1M位置去掉bios部分,都可以释放。1M以上去掉内核使用的部分可以释放。

multiboot协议在加载内核之前,有保存了内存相关信息,放在寄存器ebx跟eax。如果自己写加载器,也可以调用bios中断来获取。ax=0xe820,int 0x15

arch/x86_64/multiboot.c对内存信息进行读取,然后释放kheap内存块,释放虚拟地址空间块,后续才能申请这些资源。

在entry64.S那里,在跳转到arch/x86_64/main.c之前,

pop %esi #restore magic

pop %edi #restore multiboot_info_type

根据传参约定,edi就是第一个参数,esi就是第二个参数

因此bp_main的原型应该是:void bp_main(u32 info_addr, u32 magic)

今天遇到一个坑:

 unsigned int *p=(unsigned int *)0LL;
 printk("%x",*p);

上面代码会导致不停重启,编译选项-O0就不会,-O2就会。后来发现是GCC优化的时候看到引用地址0的内容,直接插入了一个异常指令。

学习地址: Dpdk/网络协议栈/vpp/OvS/DDos/NFV/虚拟化/高性能专家-学习视频教程-腾讯课堂
更多DPDK相关学习资料有需要的可以自行报名学习,免费订阅,久学习,或点击这里加qun免费
领取,关注我持续更新哦! !

汇编结果:

bp_main:
.LFB84:
        .loc 1 42 0
        .cfi_startproc
.LVL2:
        subq    $8, %rsp
.LCFI0:
        .cfi_def_cfa_offset 16
        .loc 1 45 0
        xorl    %eax, %eax
.LBB12:
.LBB13:
        .loc 1 37 0
        movb    $0, g_yaos(%rip)
.LVL3:
.LBE13:
.LBE12:
        .loc 1 45 0
        call    uart_early_init
.LVL4:
        .loc 1 46 0
        movl    0, %eax
        ud2   #&#x76F4;&#x63A5;&#x63D2;&#x5165;&#x4E86;&#x4E00;&#x4E2A;&#x5F02;&#x5E38;&#x6307;&#x4EE4;&#x5C31;&#x7ED3;&#x675F;&#x51FD;&#x6570;&#x4E86;&#x3002;
        .cfi_endproc
.LFE84:
        .size   bp_main, .-bp_main
        .section        .text.unlikely

后来在Makefile里面CFLAGS添加-fno-delete-null-pointer-checks解决。

long mode分页大小有4K,2M,1G可以选择,虽然是64位的,但是地址空间只用了48位,或者52位。如果按4K一页来算,每级页表512项(4K/8字节),48位-4K对齐的12位=36位,需要4级页表。如果用2M一页,则需要3级页表,如果是1G大小,则是2级就行。

pgtable.c

struct pml4_t {
    u64 pml4e[PML4_PER_PDP];//&#x7B2C;&#x4E00;&#x7EA7;&#x9875;&#x8868;
};
struct pdp_t {
    u64 pdpte[PDP_PER_PD];//&#x7B2C;&#x4E8C;&#x7EA7;&#x9875;&#x8868;
};
struct pd_t {

    u64 pde[PD_PER_PTE];//&#x7B2C;&#x4E09;&#x7EA7;&#x9875;&#x8868;&#xFF0C;&#x76F4;&#x63A5;&#x6307;&#x5411;2M&#x9875;&#xFF0C;&#x5982;&#x679C;&#x4F7F;&#x7528;4K&#x9875;&#xFF0C;&#x8FD8;&#x9700;&#x8981;&#x7B2C;&#x56DB;&#x7EA7;&#x9875;&#x8868;
};
static struct pml4_t pml4 __attribute__ ((aligned(4096)));;
static struct pdp_t first_pdp __attribute__ ((aligned(4096)));
static struct pd_t first_pd __attribute__ ((aligned(4096)));
static struct pd_t second_pd __attribute__ ((aligned(4096)));
static struct pd_t third_pd __attribute__ ((aligned(4096)));
static struct pd_t fourth_pd __attribute__ ((aligned(4096)));
//&#x4E3A;0-4G&#x7A7A;&#x95F4;&#x7533;&#x8BF7;&#x9759;&#x6001;<a href="https://www.zhihu.com/search?q=%E9%A1%B5%E8%A1%A8&search_source=Entity&hybrid_search_source=Entity&hybrid_search_extra=%7B%22sourceType%22%3A%22article%22%2C%22sourceId%22%3A%22140274586%22%7D" title="&#x9875;&#x8868;">&#x9875;&#x8868;</a>4*512*2M=4G
static void init_pml4()
{
    extern ulong __max_phy_mem_addr;
    int map_page_p2v(ulong paddr, ulong vaddr, ulong flag);

    memset(&pml4, 0, sizeof(pml4));
    memset(&first_pdp, 0, sizeof(first_pdp));

    ASSERT((((ulong) & pml4) & 0xfff) == 0);
    ASSERT((((ulong) & first_pdp) & 0xfff) == 0);
    ASSERT((((ulong) & first_pd) & 0xfff) == 0);

    //init first 4G 4*512*2M
    pml4.pml4e[0] = ((ulong) & first_pdp) | PTE_P | PTE_W;
    first_pdp.pdpte[0] = ((ulong) & first_pd) | PTE_P | PTE_W;
    first_pdp.pdpte[1] = ((ulong) & second_pd) | PTE_P | PTE_W;

    first_pdp.pdpte[2] = ((ulong) & third_pd) | PTE_P | PTE_W;

    first_pdp.pdpte[3] = ((ulong) & fourth_pd) | PTE_P | PTE_W;

    for (int i = 0; i < PD_PER_PTE; i++) {
        first_pd.pde[i] = i * PAGE_SIZE | PTE_PS | PTE_W | PTE_P;
        second_pd.pde[i] =
            (i + PD_PER_PTE) * PAGE_SIZE | PTE_PS | PTE_W | PTE_P;
        third_pd.pde[i] =
            (i + 2 * PD_PER_PTE) * PAGE_SIZE | PTE_PS | PTE_W | PTE_P;
        fourth_pd.pde[i] =
            (i + 3 * PD_PER_PTE) * PAGE_SIZE | PTE_PS | PTE_W | PTE_P;
    }
    DEBUG_PRINT("__max_phy_mem_addr:%lx\n", __max_phy_mem_addr);
    if (__max_phy_mem_addr > NUMBER_4G) {
        ulong addr = NUMBER_4G;

        for (; addr + PAGE_SIZE < __max_phy_mem_addr; addr += PAGE_SIZE) {
            if (OK != map_page_p2v(addr, addr, PTE_PS | PTE_P | PTE_W)) {
                panic("Not enough init heap memory!");
            }
        }
    }
    write_cr3((ulong) & pml4);
}

u64 get_pte_with_addr(u64 addr)
{
    ulong i, j, k, pml4base;
    struct pml4_t *p_pml4;
    struct pdp_t *p_pdp;
    struct pd_t *p_pd;

    pml4base = read_cr3();
    pml4base &= ~0xfff;
    printf(" %lx ", pml4base);
    i = (addr >> PML4_SHIFT) & (PML4_NR - 1);
    j = (addr >> PDP_SHIFT) & (PML4_PER_PDP - 1);
    k = (addr >> PD_SHIFT) & (PD_PER_PTE - 1);
    p_pml4 = (struct pml4_t *)pml4base;
    p_pdp = (struct pdp_t *)(p_pml4->pml4e[i] & ~0xfff);
    printf(" %lx ", p_pdp);
    if (!p_pdp)
        return 0;
    p_pd = (struct pd_t *)(p_pdp->pdpte[j] & ~0xfff);
    printf(" %lx ", p_pd);
    printf("%d %d %d ", i, j, k);
    if (!p_pd)
        return 0;
    return (u64) & p_pd->pde[k];
}

int map_page_p2v(ulong paddr, ulong vaddr, ulong flag)
{
    ulong i, j, k;
    struct pdp_t *p_pdp;
    struct pd_t *pd;

    ASSERT((paddr & 0xfff) == 0);       //align 4k
    ASSERT((flag & ~0xfff) == 0);
    i = (vaddr >> PML4_SHIFT) & (PML4_NR - 1);
    j = (vaddr >> PDP_SHIFT) & (PML4_PER_PDP - 1);
    k = (vaddr >> PD_SHIFT) & (PD_PER_PTE - 1);
    p_pdp = (struct pdp_t *)(pml4.pml4e[i] & ~0xfff);
    if (!p_pdp) {

        p_pdp = (struct pdp_t *)alloc_kheap_4k(4096);
        DEBUG_PRINT("new pdp:%lx\n", p_pdp);
        ASSERT(((ulong) p_pdp & 0xfff) == 0);
        if (!p_pdp)
            return E_NOMEM;
        memset(p_pdp, 0, 4096);
        pml4.pml4e[i] = (ulong) p_pdp | PTE_P | PTE_W | PTE_U;
    }
//DEBUG_PRINT("p_pdp:%lx\n",p_pdp);
    pd = (struct pd_t *)(~0xfff & (p_pdp->pdpte[j]));
    if (!pd) {
        pd = (struct pd_t *)alloc_kheap_4k(4096);
        ASSERT(((ulong) pd & 0xfff) == 0);
        if (!pd)
            return E_NOMEM;
        memset(pd, 0, 4096);
        p_pdp->pdpte[j] = (ulong) pd | PTE_P | PTE_W | PTE_U;
        DEBUG_PRINT("pd:%lx\n", pd);

    }
//DEBUG_PRINT("pd:%lx\n",pd);
    pd->pde[k] = paddr | flag;
//    DEBUG_PRINT("map phy:%lx to vaddr:%lx,pte:%lx,%d,%d,%d\n",paddr,vaddr,&pd[k],i,j,k);
}
//io&#x7A7A;&#x95F4;&#x6620;&#x5C04;&#xFF0C;&#x8BBE;&#x7F6E;PTE_PCD&#xFF0C;&#x7981;&#x7528;cache&#xFF0C;&#x8D77;&#x59CB;&#x5730;&#x5740;&#x4E3A;IO_MEM_BASE,0xfffffff800000000
//&#x9875;&#x8868;&#x5730;&#x5740;&#x662F;48&#x4F4D;&#x7684;&#xFF0C;&#x4F46;&#x53D6;&#x503C;&#x8303;&#x56F4;&#x662F;64&#x4F4D;&#x7684;&#xFF0C;&#x7A7A;&#x7684;&#x662F;&#x4E2D;&#x95F4;&#x90E8;&#x5206;&#xFF0C;
//48&#x4F4D;&#x5730;&#x5740;&#x7EBF;&#x7684;&#x60C5;&#x51B5;&#x4E0B;&#x662F;&#x4F4E;&#x4F4D;0-7FFF FFFFFFFF&#xFF0C;&#x9AD8;&#x4F4D;&#x662F;FFF8000 00000000-FFFFFFFF FFFFFFFF
void *ioremap_nocache(ulong addr,ulong size)
{
    ulong paddr=addr&~(PAGE_SIZE-1);
    size+=addr-paddr;
    for(ulong added=0;added<=size;added+=page_size){ map_page_p2v(paddr, paddr + io_mem_base, pte_p | pte_w pte_pwt pte_pcd pte_ps); paddr+="PAGE_SIZE;" } return (void *)(addr+io_mem_base); void init_pgtable() { init_pml4(); init_pgtable_ap() write_cr3((ulong) & pml4); < code></=size;added+=page_size){>

运行本例:

git clone https://github.com/saneee/x86_64_kernel.git
cd 0005
make qemu

运行结果:

内核内存管理

原文链接:https://zhuanlan.zhihu.com/p/140274586

Original: https://blog.csdn.net/lingshengxiyou/article/details/127820693
Author: lingshengxiyou
Title: 内核内存管理

原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/660117/

转载文章受原作者版权保护。转载请注明原作者出处!

(0)

大家都在看

亲爱的 Coder【最近整理,可免费获取】👉 最新必读书单  | 👏 面试题下载  | 🌎 免费的AI知识星球