作者:朱涵俊
链接:https://zhuanlan.zhihu.com/p/140274586
来源:知乎
著作权归作者所有。商业转载请联系作者获得授权,非商业转载请注明出处。
内核在启动的过程中需要不断申请内存,这些内存申请之后一般就不需要释放。频繁的申请释放会导致内存碎片,性能下降。因此内核堆kheap管理的原则是申请一次,终身使用。堆内存分为2类,一是小内存,而是页对齐的内存。因为有的内核对象需要页对齐,比如页表。
kernel/kheap.c
//由于只申请不释放,不会有很多内存块。主要是BIOS,内核数据等引起的。
#define MAX_HEAP_HOLE 10
#define MAX_SMALL_HOLE 100
#if 1
#define DEBUG_PRINT printk
#else
#define DEBUG_PRINT inline_printk
#endif
struct kheap_t;
struct kheap_t {
ulong addr;
ulong size;
ulong orgin_size;
struct kheap_t *pnext;
};
static struct kheap_t *pfree4k;
static struct kheap_t *phead4k;
static struct kheap_t heap_holes[MAX_HEAP_HOLE];
static struct kheap_t heap_small_holes[MAX_SMALL_HOLE];
__thread static spinlock_t spin_4k;
__thread static spinlock_t spin_small;
static struct kheap_t *pfree_small;
static struct kheap_t *phead_small;
void init_kheap()
{
for (int i = 0; i < MAX_HEAP_HOLE - 1; i++) {
heap_holes[i].pnext = &heap_holes[i + 1];
}
heap_holes[MAX_HEAP_HOLE - 1].pnext = 0;
pfree4k = heap_holes;
phead4k = 0;
init_spinlock(&spin_4k);
for (int i = 0; i < MAX_SMALL_HOLE - 1; i++) {
heap_small_holes[i].pnext = &heap_small_holes[i + 1];
}
heap_small_holes[MAX_SMALL_HOLE - 1].pnext = 0;
pfree_small = heap_small_holes;
phead_small = 0;
init_spinlock(&spin_small);
}
void free_kheap_4k(ulong addr, ulong size)
{
struct kheap_t *p;
struct kheap_t *ph;
ASSERT((addr & 0xfff) == 0 && (size & 0xfff) == 0); //4k aligned
p = pfree4k;
if (!p) {
panic("MAX_HEAP_HOLE too small!\n");
}
spin_lock(&spin_4k);
pfree4k = pfree4k->pnext;
p->addr = addr;
p->orgin_size = size;
p->size = size;
//sort by addr asc,early init can only use low kheap
if (!phead4k || phead4k->addr > p->addr) {
p->pnext = phead4k;
phead4k = p;
spin_unlock(&spin_4k);
return;
}
ph = phead4k;
while (ph->pnext && ph->pnext->addr < p->addr) {
ph = ph->pnext;
}
p->pnext = ph->pnext;
ph->pnext = p;
spin_unlock(&spin_4k);
DEBUG_PRINT("KHeap: Free:%lx,addr,size:%lx\n", addr, size);
}
void *alloc_kheap_4k(ulong size)
{
struct kheap_t *p;
void *pret;
DEBUG_PRINT("alloc 4k:%x\n", size);
if (size & 0xfff)
size = (size & ~0xfff) + 0x1000; //align 4k
spin_lock(&spin_4k);
p = phead4k;
while (p && p->size < size)
p = p->pnext;
while (!p) {
ulong newaddr;
spin_unlock(&spin_4k);
newaddr = alloc_phy_page();
if (!newaddr)
return (void *)0;
free_kheap_4k(newaddr, PAGE_SIZE);
spin_lock(&spin_4k);
p = phead4k;
while (p && p->size < size)
p = p->pnext;
}
p->size -= size;
if (p->size == 0) {
//add to free link?
struct kheap_t *ptr = phead4k;
if (ptr == p) {
phead4k = phead4k->pnext;
}
else if (ptr->pnext == p) {
ptr->pnext = p->pnext;
}
else {
while (ptr->pnext && ptr->pnext != p)
ptr = ptr->pnext;
if (!ptr->pnext)
panic("corrupted link,FILE:%s,LINE:%d", __FILE__, __LINE__);
ptr->pnext = p->pnext;
}
p->pnext = pfree4k;
pfree4k = p;
}
pret = (void *)p->addr;
p->addr += size;
spin_unlock(&spin_4k);
return pret;
}
void free_kheap_small(ulong addr, ulong size)
{
struct kheap_t *p;
ASSERT((addr & 0xfff) == 0 && (size & 0xfff) == 0); //4k aligned
spin_lock(&spin_small);
p = pfree_small;
if (!p) {
panic("MAX_HEAP_HOLE too small!\n");
}
pfree_small = pfree_small->pnext;
p->addr = addr;
p->orgin_size = size;
p->size = size;
p->pnext = phead_small;
phead_small = p;
spin_unlock(&spin_small);
DEBUG_PRINT("KHeap: Small Free:%lx,addr,size:%lx\n", addr, size);
}
void *alloc_kheap_small(ulong size)
{
struct kheap_t *p;
void *pret;
DEBUG_PRINT("alloc small:%lx\n", size);
ASSERT(size < 0x1000); //small
spin_lock(&spin_small);
p = phead_small;
while (p && p->size < size)
p = p->pnext;
while (!p) {
ulong newaddr;
spin_unlock(&spin_small);
newaddr = (ulong) alloc_kheap_4k(0x1000);
if (!newaddr)
return (void *)0;
free_kheap_small(newaddr, 0x1000);
spin_lock(&spin_small);
p = phead_small;
while (p && p->size < size)
p = p->pnext;
}
p->size -= size;
if (p->size <= 16 16) { drop small than add to free link? struct kheap_t *ptr="phead_small;" if (ptr="=" p) phead_small="phead_small-">pnext;
}
else if (ptr->pnext == p) {
ptr->pnext = p->pnext;
}
else {
while (ptr->pnext && ptr->pnext != p)
ptr = ptr->pnext;
if (!ptr->pnext)
panic("corrupted link,FILE:%s,LINE:%d", __FILE__, __LINE__);
ptr->pnext = p->pnext;
}
p->pnext = pfree_small;
pfree_small = p;
}
pret = (void *)p->addr;
p->addr += size;
spin_unlock(&spin_small);
return pret;
}
void kheap_debug()
{
struct kheap_t *ptr = phead4k;
printk("kheap 4k:\n");
while (ptr) {
printk("addr:%lx,size:%lx\n", ptr->addr, ptr->size);
ptr = ptr->pnext;
}
}</=>
初始化之后kheap是没有内存可以申请的,需要进行释放。
内核加载的位置是0x10000,即1M,那0-1M位置去掉bios部分,都可以释放。1M以上去掉内核使用的部分可以释放。
multiboot协议在加载内核之前,有保存了内存相关信息,放在寄存器ebx跟eax。如果自己写加载器,也可以调用bios中断来获取。ax=0xe820,int 0x15
arch/x86_64/multiboot.c对内存信息进行读取,然后释放kheap内存块,释放虚拟地址空间块,后续才能申请这些资源。
在entry64.S那里,在跳转到arch/x86_64/main.c之前,
pop %esi #restore magic
pop %edi #restore multiboot_info_type
根据传参约定,edi就是第一个参数,esi就是第二个参数
因此bp_main的原型应该是:void bp_main(u32 info_addr, u32 magic)
今天遇到一个坑:
unsigned int *p=(unsigned int *)0LL;
printk("%x",*p);
上面代码会导致不停重启,编译选项-O0就不会,-O2就会。后来发现是GCC优化的时候看到引用地址0的内容,直接插入了一个异常指令。
学习地址: Dpdk/网络协议栈/vpp/OvS/DDos/NFV/虚拟化/高性能专家-学习视频教程-腾讯课堂
更多DPDK相关学习资料有需要的可以自行报名学习,免费订阅,久学习,或点击这里加qun免费
领取,关注我持续更新哦! !
汇编结果:
bp_main:
.LFB84:
.loc 1 42 0
.cfi_startproc
.LVL2:
subq $8, %rsp
.LCFI0:
.cfi_def_cfa_offset 16
.loc 1 45 0
xorl %eax, %eax
.LBB12:
.LBB13:
.loc 1 37 0
movb $0, g_yaos(%rip)
.LVL3:
.LBE13:
.LBE12:
.loc 1 45 0
call uart_early_init
.LVL4:
.loc 1 46 0
movl 0, %eax
ud2 #直接插入了一个异常指令就结束函数了。
.cfi_endproc
.LFE84:
.size bp_main, .-bp_main
.section .text.unlikely
后来在Makefile里面CFLAGS添加-fno-delete-null-pointer-checks解决。
long mode分页大小有4K,2M,1G可以选择,虽然是64位的,但是地址空间只用了48位,或者52位。如果按4K一页来算,每级页表512项(4K/8字节),48位-4K对齐的12位=36位,需要4级页表。如果用2M一页,则需要3级页表,如果是1G大小,则是2级就行。
pgtable.c
struct pml4_t {
u64 pml4e[PML4_PER_PDP];//第一级页表
};
struct pdp_t {
u64 pdpte[PDP_PER_PD];//第二级页表
};
struct pd_t {
u64 pde[PD_PER_PTE];//第三级页表,直接指向2M页,如果使用4K页,还需要第四级页表
};
static struct pml4_t pml4 __attribute__ ((aligned(4096)));;
static struct pdp_t first_pdp __attribute__ ((aligned(4096)));
static struct pd_t first_pd __attribute__ ((aligned(4096)));
static struct pd_t second_pd __attribute__ ((aligned(4096)));
static struct pd_t third_pd __attribute__ ((aligned(4096)));
static struct pd_t fourth_pd __attribute__ ((aligned(4096)));
//为0-4G空间申请静态<a href="https://www.zhihu.com/search?q=%E9%A1%B5%E8%A1%A8&search_source=Entity&hybrid_search_source=Entity&hybrid_search_extra=%7B%22sourceType%22%3A%22article%22%2C%22sourceId%22%3A%22140274586%22%7D" title="页表">页表</a>4*512*2M=4G
static void init_pml4()
{
extern ulong __max_phy_mem_addr;
int map_page_p2v(ulong paddr, ulong vaddr, ulong flag);
memset(&pml4, 0, sizeof(pml4));
memset(&first_pdp, 0, sizeof(first_pdp));
ASSERT((((ulong) & pml4) & 0xfff) == 0);
ASSERT((((ulong) & first_pdp) & 0xfff) == 0);
ASSERT((((ulong) & first_pd) & 0xfff) == 0);
//init first 4G 4*512*2M
pml4.pml4e[0] = ((ulong) & first_pdp) | PTE_P | PTE_W;
first_pdp.pdpte[0] = ((ulong) & first_pd) | PTE_P | PTE_W;
first_pdp.pdpte[1] = ((ulong) & second_pd) | PTE_P | PTE_W;
first_pdp.pdpte[2] = ((ulong) & third_pd) | PTE_P | PTE_W;
first_pdp.pdpte[3] = ((ulong) & fourth_pd) | PTE_P | PTE_W;
for (int i = 0; i < PD_PER_PTE; i++) {
first_pd.pde[i] = i * PAGE_SIZE | PTE_PS | PTE_W | PTE_P;
second_pd.pde[i] =
(i + PD_PER_PTE) * PAGE_SIZE | PTE_PS | PTE_W | PTE_P;
third_pd.pde[i] =
(i + 2 * PD_PER_PTE) * PAGE_SIZE | PTE_PS | PTE_W | PTE_P;
fourth_pd.pde[i] =
(i + 3 * PD_PER_PTE) * PAGE_SIZE | PTE_PS | PTE_W | PTE_P;
}
DEBUG_PRINT("__max_phy_mem_addr:%lx\n", __max_phy_mem_addr);
if (__max_phy_mem_addr > NUMBER_4G) {
ulong addr = NUMBER_4G;
for (; addr + PAGE_SIZE < __max_phy_mem_addr; addr += PAGE_SIZE) {
if (OK != map_page_p2v(addr, addr, PTE_PS | PTE_P | PTE_W)) {
panic("Not enough init heap memory!");
}
}
}
write_cr3((ulong) & pml4);
}
u64 get_pte_with_addr(u64 addr)
{
ulong i, j, k, pml4base;
struct pml4_t *p_pml4;
struct pdp_t *p_pdp;
struct pd_t *p_pd;
pml4base = read_cr3();
pml4base &= ~0xfff;
printf(" %lx ", pml4base);
i = (addr >> PML4_SHIFT) & (PML4_NR - 1);
j = (addr >> PDP_SHIFT) & (PML4_PER_PDP - 1);
k = (addr >> PD_SHIFT) & (PD_PER_PTE - 1);
p_pml4 = (struct pml4_t *)pml4base;
p_pdp = (struct pdp_t *)(p_pml4->pml4e[i] & ~0xfff);
printf(" %lx ", p_pdp);
if (!p_pdp)
return 0;
p_pd = (struct pd_t *)(p_pdp->pdpte[j] & ~0xfff);
printf(" %lx ", p_pd);
printf("%d %d %d ", i, j, k);
if (!p_pd)
return 0;
return (u64) & p_pd->pde[k];
}
int map_page_p2v(ulong paddr, ulong vaddr, ulong flag)
{
ulong i, j, k;
struct pdp_t *p_pdp;
struct pd_t *pd;
ASSERT((paddr & 0xfff) == 0); //align 4k
ASSERT((flag & ~0xfff) == 0);
i = (vaddr >> PML4_SHIFT) & (PML4_NR - 1);
j = (vaddr >> PDP_SHIFT) & (PML4_PER_PDP - 1);
k = (vaddr >> PD_SHIFT) & (PD_PER_PTE - 1);
p_pdp = (struct pdp_t *)(pml4.pml4e[i] & ~0xfff);
if (!p_pdp) {
p_pdp = (struct pdp_t *)alloc_kheap_4k(4096);
DEBUG_PRINT("new pdp:%lx\n", p_pdp);
ASSERT(((ulong) p_pdp & 0xfff) == 0);
if (!p_pdp)
return E_NOMEM;
memset(p_pdp, 0, 4096);
pml4.pml4e[i] = (ulong) p_pdp | PTE_P | PTE_W | PTE_U;
}
//DEBUG_PRINT("p_pdp:%lx\n",p_pdp);
pd = (struct pd_t *)(~0xfff & (p_pdp->pdpte[j]));
if (!pd) {
pd = (struct pd_t *)alloc_kheap_4k(4096);
ASSERT(((ulong) pd & 0xfff) == 0);
if (!pd)
return E_NOMEM;
memset(pd, 0, 4096);
p_pdp->pdpte[j] = (ulong) pd | PTE_P | PTE_W | PTE_U;
DEBUG_PRINT("pd:%lx\n", pd);
}
//DEBUG_PRINT("pd:%lx\n",pd);
pd->pde[k] = paddr | flag;
// DEBUG_PRINT("map phy:%lx to vaddr:%lx,pte:%lx,%d,%d,%d\n",paddr,vaddr,&pd[k],i,j,k);
}
//io空间映射,设置PTE_PCD,禁用cache,起始地址为IO_MEM_BASE,0xfffffff800000000
//页表地址是48位的,但取值范围是64位的,空的是中间部分,
//48位地址线的情况下是低位0-7FFF FFFFFFFF,高位是FFF8000 00000000-FFFFFFFF FFFFFFFF
void *ioremap_nocache(ulong addr,ulong size)
{
ulong paddr=addr&~(PAGE_SIZE-1);
size+=addr-paddr;
for(ulong added=0;added<=size;added+=page_size){ map_page_p2v(paddr, paddr + io_mem_base, pte_p | pte_w pte_pwt pte_pcd pte_ps); paddr+="PAGE_SIZE;" } return (void *)(addr+io_mem_base); void init_pgtable() { init_pml4(); init_pgtable_ap() write_cr3((ulong) & pml4); < code></=size;added+=page_size){>
运行本例:
git clone https://github.com/saneee/x86_64_kernel.git
cd 0005
make qemu
运行结果:
原文链接:https://zhuanlan.zhihu.com/p/140274586
Original: https://blog.csdn.net/lingshengxiyou/article/details/127820693
Author: lingshengxiyou
Title: 内核内存管理
原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/660117/
转载文章受原作者版权保护。转载请注明原作者出处!