diff --git a/core/mempool.cc b/core/mempool.cc
index eebb3ed805ccff3b41de75904c3de6daafa14704..1774bb0e8f02dcdc65dc31383044e1c7db66b618 100644
--- a/core/mempool.cc
+++ b/core/mempool.cc
@@ -654,9 +654,9 @@ namespace dbg {
 
 bool enabled;
 
+using mmu::debug_base;
 // FIXME: we assume the debug memory space is infinite (which it nearly is)
 // and don't reuse space
-static const auto debug_base = reinterpret_cast<char*>(0xffffe00000000000);
 std::atomic<char*> free_area{debug_base};
 struct header {
     explicit header(size_t sz) : size(sz), size2(sz) {
@@ -670,7 +670,7 @@ struct header {
     char fence[16];
     size_t size2;
 };
-static const size_t pad_before = mmu::page_size;
+static const size_t pad_before = 2 * mmu::page_size;
 static const size_t pad_after = mmu::page_size;
 
 void* malloc(size_t size)
@@ -679,15 +679,15 @@ void* malloc(size_t size)
         return std_malloc(size);
     }
 
-    auto hsize = size + sizeof(header);
-    auto asize = align_up(hsize, mmu::page_size);
+    auto asize = align_up(size, mmu::page_size);
     auto padded_size = pad_before + asize + pad_after;
     void* v = free_area.fetch_add(padded_size, std::memory_order_relaxed);
+    mmu::vpopulate(v, mmu::page_size);
+    new (v) header(size);
     v += pad_before;
     mmu::vpopulate(v, asize);
-    auto h = new (v) header(size);
-    memset(v + hsize, '$', asize - hsize);
-    return h + 1;
+    memset(v + size, '$', asize - size);
+    return v;
 }
 
 void free(void* v)
@@ -695,14 +695,14 @@ void free(void* v)
     if (v < debug_base) {
         return std_free(v);
     }
-    auto h = static_cast<header*>(v) - 1;
+    auto h = static_cast<header*>(v - pad_before);
     auto size = h->size;
-    auto hsize = size + sizeof(header);
-    auto asize = align_up(hsize, mmu::page_size);
-    char* vv = reinterpret_cast<char*>(h);
-    assert(std::all_of(vv + hsize, vv  + asize, [=](char c) { return c == '$'; }));
+    auto asize = align_up(size, mmu::page_size);
+    char* vv = reinterpret_cast<char*>(v);
+    assert(std::all_of(vv + size, vv + asize, [=](char c) { return c == '$'; }));
     h->~header();
-    mmu::vdepopulate(h, asize);
+    mmu::vdepopulate(h, mmu::page_size);
+    mmu::vdepopulate(v, asize);
 }
 
 void* realloc(void* v, size_t size)
@@ -760,4 +760,19 @@ void enable_debug_allocator()
 #endif
 }
 
+void* alloc_phys_contiguous_aligned(size_t size, size_t align)
+{
+    assert(align <= page_size); // implementation limitation
+    assert(is_power_of_two(align));
+    // make use of the standard allocator returning page-aligned
+    // physically contiguous memory:
+    size = std::max(page_size, size);
+    return std_malloc(size);
+}
+
+void free_phys_contiguous_aligned(void* p)
+{
+    std_free(p);
+}
+
 }
diff --git a/core/mmu.cc b/core/mmu.cc
index b22f5306dda4d1408d3276528173f49ac5523b3d..81874e4819d09235d91f2c50fdcf7bc5d98883ca 100644
--- a/core/mmu.cc
+++ b/core/mmu.cc
@@ -9,6 +9,7 @@
 #include "libc/signal.hh"
 #include "align.hh"
 #include "interrupt.hh"
+#include "ilog2.hh"
 
 extern void* elf_start;
 extern size_t elf_size;
@@ -164,6 +165,15 @@ hw_ptep follow(pt_element pte)
     return hw_ptep::force(phys_cast<pt_element>(pte.next_pt_addr()));
 }
 
+// 1's for the bits provided by the pte for this level
+// 0's for the bits provided by the virtual address for this level
+phys pte_level_mask(unsigned level)
+{
+    auto shift = level * ilog2_roundup_constexpr(pte_per_page)
+        + ilog2_roundup_constexpr(page_size);
+    return ~((phys(1) << shift) - 1);
+}
+
 const unsigned nlevels = 4;
 
 void* phys_to_virt(phys pa)
@@ -177,6 +187,22 @@ void* phys_to_virt(phys pa)
     return phys_mem + pa;
 }
 
+phys virt_to_phys_pt(void* virt)
+{
+    auto v = reinterpret_cast<uintptr_t>(virt);
+    auto pte = pt_element::force(processor::read_cr3());
+    unsigned level = nlevels;
+    while (level > 0 && !pte.large()) {
+        assert(pte.present() || level == nlevels);
+        --level;
+        auto pt = follow(pte);
+        pte = pt.at(pt_index(virt, level)).read();
+    }
+    assert(!pte.empty());
+    auto mask = pte_level_mask(level);
+    return (pte.addr(level != 0) & mask) | (v & ~mask);
+}
+
 phys virt_to_phys(void *virt)
 {
     // The ELF is mapped 1:1
@@ -184,6 +210,12 @@ phys virt_to_phys(void *virt)
         return reinterpret_cast<phys>(virt);
     }
 
+#if CONF_debug_memory
+    if (virt > debug_base) {
+        return virt_to_phys_pt(virt);
+    }
+#endif
+
     // For now, only allow non-mmaped areas.  Later, we can either
     // bounce such addresses, or lock them in memory and translate
     assert(virt >= phys_mem);
@@ -588,23 +620,6 @@ struct fill_anon_page : fill_page {
     }
 };
 
-struct fill_file_page : fill_page {
-    fill_file_page(fileref _f, uint64_t _off, uint64_t _len)
-        : f(_f), off(_off), len(_len) {}
-    virtual void fill(void* addr, uint64_t offset) {
-        offset += off;
-        unsigned toread = 0;
-        if (offset < len) {
-            toread = std::min(len - offset, page_size);
-            read(f, addr, offset, toread);
-        }
-        memset(addr + toread, 0, page_size - toread);
-    }
-    fileref f;
-    uint64_t off;
-    uint64_t len;
-};
-
 uintptr_t allocate(uintptr_t start, size_t size, bool search,
                     fill_page& fill, unsigned perm)
 {
@@ -660,10 +675,20 @@ void* map_anon(void* addr, size_t size, bool search, unsigned perm)
 void* map_file(void* addr, size_t size, bool search, unsigned perm,
               fileref f, f_offset offset)
 {
-    size = align_up(size, mmu::page_size);
+    auto asize = align_up(size, mmu::page_size);
     auto start = reinterpret_cast<uintptr_t>(addr);
-    fill_file_page ffill(f, offset, ::size(f));
-    return (void*) allocate(start, size, search, ffill, perm);
+    fill_anon_page zfill;
+    auto v = (void*) allocate(start, asize, search, zfill, perm | perm_write);
+    auto fsize = ::size(f);
+    // FIXME: we pre-zeroed this, and now we're overwriting the zeroes
+    if (offset < fsize) {
+        read(f, v, offset, std::min(size, fsize - offset));
+    }
+    // FIXME: do this more cleverly, avoiding a second pass
+    if (!(perm & perm_write)) {
+        protect(v, asize, perm);
+    }
+    return v;
 }
 
 // Efficiently find the vma in vma_list which contains the given address.
diff --git a/drivers/virtio-vring.cc b/drivers/virtio-vring.cc
index e0354bbaf10a265df2ca5262315b695795dfb5d3..2e762d5152a7f00e42be1e4d687fdcf2239977a9 100644
--- a/drivers/virtio-vring.cc
+++ b/drivers/virtio-vring.cc
@@ -25,7 +25,7 @@ namespace virtio {
         _q_index = q_index;
         // Alloc enough pages for the vring...
         unsigned sz = VIRTIO_ALIGN(vring::get_size(num, VIRTIO_PCI_VRING_ALIGN));
-        _vring_ptr = malloc(sz);
+        _vring_ptr = memory::alloc_phys_contiguous_aligned(sz, 4096);
         memset(_vring_ptr, 0, sz);
         
         // Set up pointers        
@@ -55,7 +55,7 @@ namespace virtio {
 
     vring::~vring()
     {
-        free(_vring_ptr);
+        memory::free_phys_contiguous_aligned(_vring_ptr);
         delete [] _cookie;
     }
 
diff --git a/include/ilog2.hh b/include/ilog2.hh
index 30ceec6261d9a88e15ed7ce001116c1de286c884..f2ef5d92bdae8625880390b1f8fb7fc4f1e5b21f 100644
--- a/include/ilog2.hh
+++ b/include/ilog2.hh
@@ -33,4 +33,11 @@ unsigned ilog2_roundup(T n)
     return sizeof(T)*8 - count_leading_zeros(n - 1);
 }
 
+template <typename T>
+inline constexpr
+bool is_power_of_two(T n)
+{
+    return (n & (n - 1)) == 0;
+}
+
 #endif
diff --git a/include/mempool.hh b/include/mempool.hh
index 4196963d77cf0eedf0e255f196259ae1269b7466..7a03e2f753b559737cdc0c78070b138523e0f523 100644
--- a/include/mempool.hh
+++ b/include/mempool.hh
@@ -13,6 +13,9 @@ const size_t page_size = 4096;
 
 extern size_t phys_mem_size;
 
+void* alloc_phys_contiguous_aligned(size_t sz, size_t align);
+void free_phys_contiguous_aligned(void* p);
+
 void* alloc_page();
 void free_page(void* page);
 void* alloc_huge_page(size_t bytes);
diff --git a/include/mmu.hh b/include/mmu.hh
index 7f577ed65fe1d529e1ad4c9138483949c81896ec..666bcde9f47ee574639e3501781e9845da52d1a0 100644
--- a/include/mmu.hh
+++ b/include/mmu.hh
@@ -14,6 +14,8 @@ constexpr uintptr_t page_size = 4096;
 typedef uint64_t f_offset;
 
 static constexpr char* phys_mem = reinterpret_cast<char*>(0xffffc00000000000);
+// area for debug allocations:
+static constexpr char* debug_base = reinterpret_cast<char*>(0xffffe00000000000);
 
 inline unsigned pt_index(void *virt, unsigned level)
 {