diff --git a/arch/x64/arch-setup.cc b/arch/x64/arch-setup.cc index a2fdb49a53de0da9c86ad6e0a2f13f9db15dc3cc..30662df333689ee89fd80efea824055826b9c4a7 100644 --- a/arch/x64/arch-setup.cc +++ b/arch/x64/arch-setup.cc @@ -1,6 +1,7 @@ #include "arch-setup.hh" #include "mempool.hh" #include "mmu.hh" +#include "processor.hh" #include "types.hh" #include <alloca.h> #include <string.h> @@ -38,34 +39,95 @@ struct e820ent { multiboot_info_type* multiboot_info; -void arch_setup_free_memory() +void setup_temporary_phys_map() { - // copy to stack so we don't free it now - auto mb = *multiboot_info; - auto tmp = alloca(mb.mmap_length); - memcpy(tmp, reinterpret_cast<void*>(mb.mmap_addr), mb.mmap_length); - auto p = tmp; - ulong edata; - asm ("lea .edata, %0" : "=rm"(edata)); - while (p < tmp + mb.mmap_length) { + // duplicate 1:1 mapping into phys_mem + u64 cr3 = processor::read_cr3(); + auto pt = reinterpret_cast<u64*>(cr3); + // assumes phys_mem = 0xffff800000000000 + pt[256] = pt[0]; +} + +void for_each_e820_entry(void* e820_buffer, unsigned size, void (*f)(e820ent e)) +{ + auto p = e820_buffer; + while (p < e820_buffer + size) { auto ent = static_cast<e820ent*>(p); if (ent->type == 1) { - memory::phys_mem_size += ent->size; - if (ent->addr < edata) { - u64 adjust = std::min(edata - ent->addr, ent->size); - ent->addr += adjust; - ent->size -= adjust; - } - // FIXME: limit to mapped 1GB for now - // later map all of memory and free it too - u64 memtop = 1 << 30; - if (ent->addr + ent->size >= memtop) { - auto excess = ent->addr + ent->size - memtop; - excess = std::min(ent->size, excess); - ent->size -= excess; - } - mmu::free_initial_memory_range(ent->addr, ent->size); + f(*ent); } p += ent->ent_size + 4; } } + +bool intersects(const e820ent& ent, u64 a) +{ + return a > ent.addr && a < ent.addr + ent.size; +} + +e820ent truncate_below(e820ent ent, u64 a) +{ + u64 delta = a - ent.addr; + ent.addr += delta; + ent.size -= delta; + return ent; +} + +e820ent truncate_above(e820ent ent, u64 a) +{ + u64 delta = ent.addr + ent.size - a; + ent.size -= delta; + return ent; +} + +void arch_setup_free_memory() +{ + static constexpr u64 phys_mem = 0xffff800000000000; + static ulong edata; + asm ("movl $.edata, %0" : "=rm"(edata)); + // copy to stack so we don't free it now + auto mb = *multiboot_info; + auto e820_buffer = alloca(mb.mmap_length); + auto e820_size = mb.mmap_length; + memcpy(e820_buffer, reinterpret_cast<void*>(mb.mmap_addr), e820_size); + for_each_e820_entry(e820_buffer, e820_size, [] (e820ent ent) { + memory::phys_mem_size += ent.size; + }); + constexpr u64 initial_map = 1 << 30; // 1GB mapped by startup code + setup_temporary_phys_map(); + + // setup all memory up to 1GB. We can't free any more, because no + // page tables have been set up, so we can't reference the memory being + // freed. + for_each_e820_entry(e820_buffer, e820_size, [] (e820ent ent) { + // can't free anything below edata, it's core code. + // FIXME: can free below 2MB. + if (ent.addr + ent.size <= edata) { + return; + } + if (intersects(ent, edata)) { + ent = truncate_below(ent, edata); + } + // ignore anything above 1GB, we haven't mapped it yet + if (intersects(ent, initial_map)) { + ent = truncate_above(ent, initial_map); + } + mmu::free_initial_memory_range(ent.addr, ent.size); + }); + mmu::linear_map(phys_mem, 0, initial_map, initial_map); + // map the core + mmu::linear_map(0, 0, edata, 0x200000); + // now that we have some free memory, we can start mapping the rest + mmu::switch_to_runtime_page_table(); + for_each_e820_entry(e820_buffer, e820_size, [] (e820ent ent) { + // Ignore memory already freed above + if (ent.addr + ent.size <= initial_map) { + return; + } + if (intersects(ent, initial_map)) { + ent = truncate_below(ent, edata); + } + mmu::linear_map(phys_mem + ent.addr, ent.addr, ent.size, ~0); + mmu::free_initial_memory_range(ent.addr, ent.size); + }); +} diff --git a/arch/x64/loader.ld b/arch/x64/loader.ld index 0475a9f63759fd7b78db7c9147429af6b7dc1167..92c84f365fd065ca54e6b8682972c40cc794c53f 100644 --- a/arch/x64/loader.ld +++ b/arch/x64/loader.ld @@ -40,6 +40,7 @@ SECTIONS .debug_weaknames 0 : { *(.debug_weaknames) } .gdb_index 0 : { *(.gdb_index) } .comment : { *(.comment) } + phys_mem = 0xffff800000000000; } PHDRS { text PT_LOAD FILEHDR PHDRS; diff --git a/mmu.cc b/mmu.cc index 93a15bd954217703f7a36667b5a2e8dea39f4689..b033eb913ff03526e3d61fe8afe2e1d960a6bba5 100644 --- a/mmu.cc +++ b/mmu.cc @@ -48,12 +48,12 @@ namespace mmu { template <typename T> T* phys_cast(phys pa) { - return reinterpret_cast<T*>(pa); + return reinterpret_cast<T*>(pa + 0xffff800000000000ull); } phys virt_to_phys(void *virt) { - return reinterpret_cast<phys>(virt); + return reinterpret_cast<phys>(virt) - 0xffff800000000000ull; } unsigned pt_index(void *virt, unsigned level) @@ -287,10 +287,72 @@ namespace mmu { vma_list.insert(*n); } + unsigned nr_page_sizes = 2; // FIXME: detect 1GB pages + + pt_element page_table_root; + + void clamp(uintptr_t& vstart1, uintptr_t& vend1, + uintptr_t min, size_t max, size_t slop) + { + vstart1 &= ~(slop - 1); + vend1 |= (slop - 1); + vstart1 = std::max(vstart1, min); + vend1 = std::min(vend1, max); + } + + unsigned pt_index(uintptr_t virt, unsigned level) + { + return pt_index(reinterpret_cast<void*>(virt), level); + } + + void linear_map_level(pt_element& parent, uintptr_t vstart, uintptr_t vend, + phys delta, uintptr_t base_virt, size_t slop, unsigned level) + { + --level; + if (!(parent & 1)) { + allocate_intermediate_level(&parent); + } + pt_element* pt = phys_cast<pt_element>(pte_phys(parent)); + pt_element step = pt_element(1) << (12 + level * 9); + auto idx = pt_index(vstart, level); + auto eidx = pt_index(vend, level); + base_virt += idx * step; + base_virt = (s64(base_virt) << 16) >> 16; // extend 47th bit + while (idx <= eidx) { + uintptr_t vstart1 = vstart, vend1 = vend; + clamp(vstart1, vend1, base_virt, base_virt + step - 1, slop); + if (level < nr_page_sizes && vstart1 == base_virt && vend1 == base_virt + step - 1) { + pt[idx] = (vstart1 + delta) | 0x67 | (level == 0 ? 0 : 0x80); + } else { + linear_map_level(pt[idx], vstart1, vend1, delta, base_virt, slop, level); + } + base_virt += step; + ++idx; + } + } + + size_t page_size_level(unsigned level) + { + return size_t(1) << (12 + 9 * level); + } + + void linear_map(uintptr_t virt, phys addr, size_t size, size_t slop) + { + slop = std::min(slop, page_size_level(nr_page_sizes - 1)); + assert((virt & (slop - 1)) == (addr & (slop - 1))); + linear_map_level(page_table_root, virt, virt + size - 1, + addr - virt, 0, slop, 4); + } + void free_initial_memory_range(uintptr_t addr, size_t size) { memory::free_initial_memory_range(phys_cast<void>(addr), size); } + + void switch_to_runtime_page_table() + { + processor::write_cr3(pte_phys(page_table_root)); + } } void page_fault(exception_frame *ef) diff --git a/mmu.hh b/mmu.hh index b3017ff7e28ba9257b1db2926e56a62f9888c992..e4b766c2370a3d1a26bfd643019cb7fe9f4d0c87 100644 --- a/mmu.hh +++ b/mmu.hh @@ -42,7 +42,9 @@ namespace mmu { typedef uint64_t phys; phys virt_to_phys(void *virt); + void linear_map(uintptr_t virt, phys addr, size_t size, size_t slop); void free_initial_memory_range(uintptr_t addr, size_t size); + void switch_to_runtime_page_table(); } #endif