diff --git a/core/mmu.cc b/core/mmu.cc
index bac298826cacff17f5763a550c163c07a3fec833..17a8182dce6388f9a94cd40559ea70d72946113a 100644
--- a/core/mmu.cc
+++ b/core/mmu.cc
@@ -7,6 +7,7 @@
 #include <string.h>
 #include <iterator>
 #include "libc/signal.hh"
+#include "align.hh"
 
 namespace {
 
@@ -163,39 +164,89 @@ void debug_count_ptes(pt_element pte, int level, size_t &nsmall, size_t &nhuge)
     }
 }
 
-void populate_page(void* addr, fill_page& fill, uint64_t offset, unsigned perm)
+/*
+ * a page_range_operation implementation operates (via the operate() method)
+ * on a page-aligned byte range of virtual memory. The range is divided into a
+ * bulk of aligned huge pages (2MB pages), and if the beginning and end
+ * addresses aren't 2MB aligned, there are additional small pages (4KB pages).
+ * The appropriate method (set_small_page() or set_huge_page()) is called for
+ * each of these pages, to implement the operation.
+ * By supporting operations directly on whole huge pages, we allow for smaller
+ * pages and better TLB efficiency.
+ *
+ * TODO: Instead of walking the page table from its root for each page (small
+ * or huge), we can more efficiently walk the page table once calling
+ * small_page/huge_page for relevant page table entries (as well as avoid
+ * repeating the necessary allocation and split code, now repeated in
+ * all our small_page()/huge_page() implementations). See linear_map for
+ * an example on how to do this walk.
+ */
+class page_range_operation {
+public:
+    void operate(void *start, size_t size);
+    void operate(vma &vma){ operate((void*)vma.start(), vma.size()); }
+protected:
+    // offset is the offset of this page in the entire address range
+    // (in case the operation needs to know this).
+    virtual void small_page(pt_element *ptep, uintptr_t offset) = 0;
+    virtual void huge_page(pt_element *ptep, uintptr_t offset) = 0;
+    virtual bool should_allocate_intermediate() = 0;
+private:
+    void operate_page(bool huge, void *addr, uintptr_t offset);
+};
+
+void page_range_operation::operate(void *start, size_t size)
 {
-    pt_element pte = processor::read_cr3();
-    auto pt = phys_cast<pt_element>(pte_phys(pte));
-    auto ptep = &pt[pt_index(addr, nlevels - 1)];
-    unsigned level = nlevels - 1;
-    while (level > 0) {
-        if (!pte_present(*ptep)) {
-            allocate_intermediate_level(ptep);
-        } else if (pte_large(*ptep)) {
-            split_large_page(ptep, level);
-        }
-        pte = *ptep;
-        --level;
-        pt = phys_cast<pt_element>(pte_phys(pte));
-        ptep = &pt[pt_index(addr, level)];
+    start = align_down(start, page_size);
+    size = align_up(size, page_size);
+    void *end = start + size; // one byte after the end
+
+    // Find the largest 2MB-aligned range inside the given byte (or actually,
+    // 4K-aligned) range:
+    auto hp_start = align_up(start, huge_page_size);
+    auto hp_end = align_down(end, huge_page_size);
+
+    // Fix the hp_start/hp_end in degenerate cases so the following
+    // loops do the right thing.
+    if (hp_start > end) {
+        hp_start = end;
+    }
+    if (hp_end < start) {
+        hp_end = end;
+    }
+
+    for (void *addr = start; addr < hp_start; addr += page_size) {
+        operate_page(false, addr, (uintptr_t)addr-(uintptr_t)start);
+    }
+    for (void *addr = hp_start; addr < hp_end; addr += huge_page_size) {
+        operate_page(true, addr, (uintptr_t)addr-(uintptr_t)start);
+    }
+    for (void *addr = hp_end; addr < end; addr += page_size) {
+        operate_page(false, addr, (uintptr_t)addr-(uintptr_t)start);
     }
-    phys page = alloc_page();
-    fill.fill(phys_to_virt(page), offset);
-    assert(pte_phys(*ptep)==0); // don't populate an already populated page!
-    *ptep = make_pte(page, perm);
 }
 
-void populate_huge_page(void* addr, fill_page& fill, uint64_t offset, unsigned perm)
+void page_range_operation::operate_page(bool huge, void *addr, uintptr_t offset)
 {
     pt_element pte = processor::read_cr3();
     auto pt = phys_cast<pt_element>(pte_phys(pte));
     auto ptep = &pt[pt_index(addr, nlevels - 1)];
     unsigned level = nlevels - 1;
-    while (level > 1) {
+    unsigned stopat = huge ? 1 : 0;
+    while (level > stopat) {
         if (!pte_present(*ptep)) {
-            allocate_intermediate_level(ptep);
+            if (should_allocate_intermediate()) {
+                allocate_intermediate_level(ptep);
+            } else {
+                return;
+            }
         } else if (pte_large(*ptep)) {
+            // We're trying to change a small page out of a huge page (or
+            // in the future, potentially also 2 MB page out of a 1 GB),
+            // so we need to first split the large page into smaller pages.
+            // Our implementation ensures that it is ok to free pieces of a
+            // alloc_huge_page() with free_page(), so it is safe to do such a
+            // split.
             split_large_page(ptep, level);
         }
         pte = *ptep;
@@ -203,18 +254,11 @@ void populate_huge_page(void* addr, fill_page& fill, uint64_t offset, unsigned p
         pt = phys_cast<pt_element>(pte_phys(pte));
         ptep = &pt[pt_index(addr, level)];
     }
-    phys page = virt_to_phys(memory::alloc_huge_page(huge_page_size));
-    uint64_t o=0;
-    for (int i=0; i<pte_per_page; i++){
-        fill.fill(phys_to_virt(page+o), offset+o);
-        o += page_size;
-    }
-    if (pte_phys(*ptep)) {
-        assert(!pte_large(*ptep)); // don't populate an already populated page!
-        // held smallpages (already evacuated), now will be used for huge page
-        free_intermediate_level(ptep);
+    if(huge) {
+        huge_page(ptep, offset);
+    } else {
+        small_page(ptep, offset);
     }
-    *ptep = make_pte(page, perm) | (1<<7);
 }
 
 /*
@@ -228,126 +272,146 @@ void populate_huge_page(void* addr, fill_page& fill, uint64_t offset, unsigned p
  * is not 2MB aligned, we will need to apply the fill and perm only to a part
  * of a large page, in which case we must break the entire large page into its
  * constitutive small (4K) pages.
- *
- * FIXME: It would be nicer to, instead of iterating on all levels per page as
- * we do in populate_page/populate_huge_page, we walk once on the whole
- * hiearchy, as in linear_map.
  */
-void populate(vma& vma, fill_page& fill, unsigned perm)
-{
-    // Find the largest 2MB-aligned range inside the given byte (or actually,
-    // 4K-aligned) range:
-    uintptr_t hp_start = ((vma.start()-1) & ~(huge_page_size-1)) + huge_page_size;
-    uintptr_t hp_end = (vma.end()) & ~(huge_page_size-1);
-
-    if (hp_start > vma.end())
-        hp_start = vma.end();
-    if (hp_end < vma.start())
-        hp_end = vma.end();
-
-    /* Step 1: Break up the partial huge page (if any) in the beginning of the
-     * address range, and populate the small pages.
-     *  TODO: it would be more efficient not to walk all the levels all the time */
-    for (auto addr = vma.start(); addr < hp_start; addr += page_size)
-        populate_page(reinterpret_cast<void*>(addr), fill, addr-vma.start(), perm);
-    /* Step 2: Populate the huge pages (if any) in the middle of the range */
-    for (auto addr = hp_start; addr < hp_end; addr += huge_page_size)
-        populate_huge_page(reinterpret_cast<void*>(addr), fill, addr-vma.start(), perm);
-    /* Step 3: Break up the partial huge page (if any) at the end of the range */
-    for (auto addr = hp_end; addr < vma.end(); addr += page_size)
-        populate_page(reinterpret_cast<void*>(addr), fill, addr-vma.start(), perm);
-    //size_t nsmall=0, nhuge=0;
-    //debug_count_ptes(processor::read_cr3(), 4, nsmall, nhuge);
-    //debug(fmt("after population, page table contains %ld small pages, %ld huge") % nsmall % nhuge);
-
-}
-
-void unpopulate_page(void* addr)
-{
-    pt_element pte = processor::read_cr3();
-    auto pt = phys_cast<pt_element>(pte_phys(pte));
-    auto ptep = &pt[pt_index(addr, nlevels - 1)];
-    unsigned level = nlevels - 1;
-    while (level > 0) {
-        if (!pte_present(*ptep))
-            return;
-        else if (pte_large(*ptep)) {
-            // This case means that part of a larger mmap was mmapped over,
-            // previously a huge page was mapped, and now we need to free some
-            // of the small pages composing it. Luckily, in our implementation
-            // it is ok to free pieces of a alloc_huge_page() with free_page()
-            split_large_page(ptep, level);
+class populate : public page_range_operation {
+private:
+    fill_page *fill;
+    unsigned int perm;
+public:
+    populate(fill_page *fill, unsigned int perm) : fill(fill), perm(perm) { }
+protected:
+    virtual void small_page(pt_element *ptep, uintptr_t offset){
+        phys page = alloc_page();
+        fill->fill(phys_to_virt(page), offset);
+        assert(pte_phys(*ptep)==0); // don't populate an already populated page!
+        *ptep = make_pte(page, perm);
+    }
+    virtual void huge_page(pt_element *ptep, uintptr_t offset){
+        phys page = virt_to_phys(memory::alloc_huge_page(huge_page_size));
+        uint64_t o=0;
+        // Unfortunately, fill() is only coded for small-page-size chunks, we
+        // need to repeat it:
+        for (int i=0; i<pte_per_page; i++){
+            fill->fill(phys_to_virt(page+o), offset+o);
+            o += page_size;
         }
-        pte = *ptep;
-        --level;
-        pt = phys_cast<pt_element>(pte_phys(pte));
-        ptep = &pt[pt_index(addr, level)];
+        if (pte_phys(*ptep)) {
+            assert(!pte_large(*ptep)); // don't populate an already populated page!
+            // held smallpages (already evacuated), now will be used for huge page
+            free_intermediate_level(ptep);
+        }
+        *ptep = make_pte(page, perm) | (1<<7);
     }
-    // Note: we free the page even if it is already marked "not present".
-    // evacuate() makes sure we are only called for allocated pages, and
-    // not-present may only mean mprotect(PROT_NONE).
-    phys page=pte_phys(*ptep);
-    assert(page); // evacuate() shouldn't call us twice for the same page.
-    memory::free_page(phys_to_virt(page));
-    *ptep = 0;
-}
-
-void unpopulate_huge_page(void* addr)
-{
-    pt_element pte = processor::read_cr3();
-    auto pt = phys_cast<pt_element>(pte_phys(pte));
-    auto ptep = &pt[pt_index(addr, nlevels - 1)];
-    unsigned level = nlevels - 1;
-    while (level > 1) {
-        if (!pte_present(*ptep))
-            return;
-        else if (pte_large(*ptep))
-            split_large_page(ptep, level);
-        pte = *ptep;
-        --level;
-        pt = phys_cast<pt_element>(pte_phys(pte));
-        ptep = &pt[pt_index(addr, level)];
+    virtual bool should_allocate_intermediate(){
+        return true;
     }
-    if (!pte_present(*ptep)){
+};
+
+/*
+ * Undo the operation of populate(), freeing memory allocated by populate()
+ * and marking the pages non-present.
+ */
+class unpopulate : public page_range_operation {
+protected:
+    virtual void small_page(pt_element *ptep, uintptr_t offset){
         // Note: we free the page even if it is already marked "not present".
         // evacuate() makes sure we are only called for allocated pages, and
         // not-present may only mean mprotect(PROT_NONE).
         phys page=pte_phys(*ptep);
         assert(page); // evacuate() shouldn't call us twice for the same page.
-        memory::free_huge_page(phys_to_virt(page), huge_page_size);
-    } else if (pte_large(*ptep)){
-        memory::free_huge_page(phys_to_virt(pte_phys(*ptep)), huge_page_size);
-    } else {
-        // We've previously allocated small pages here, not a huge pages.
-        // We need to free them one by one - as they are not necessarily part
-        // of one huge page.
-        pt_element* pt = phys_cast<pt_element>(pte_phys(*ptep));
-        for(int i=0; i<pte_per_page; ++i)
-            if (pte_present(pt[i]))
-                memory::free_page(phys_to_virt(pte_phys(pt[i])));
+        memory::free_page(phys_to_virt(page));
+        *ptep = 0;
     }
-    *ptep = 0;
-}
+    virtual void huge_page(pt_element *ptep, uintptr_t offset){
+        if (!pte_present(*ptep)) {
+            // Note: we free the page even if it is already marked "not present".
+            // evacuate() makes sure we are only called for allocated pages, and
+            // not-present may only mean mprotect(PROT_NONE).
+            phys page=pte_phys(*ptep);
+            assert(page); // evacuate() shouldn't call us twice for the same page.
+            memory::free_huge_page(phys_to_virt(page), huge_page_size);
+        } else if (pte_large(*ptep)) {
+            memory::free_huge_page(phys_to_virt(pte_phys(*ptep)), huge_page_size);
+        } else {
+            // We've previously allocated small pages here, not a huge pages.
+            // We need to free them one by one - as they are not necessarily part
+            // of one huge page.
+            pt_element* pt = phys_cast<pt_element>(pte_phys(*ptep));
+            for(int i=0; i<pte_per_page; ++i)
+                if (pte_present(pt[i]))
+                    memory::free_page(phys_to_virt(pte_phys(pt[i])));
+        }
+        *ptep = 0;
+    }
+    virtual bool should_allocate_intermediate(){
+        return false;
+    }
+};
 
-/*
- * Undo the operation of populate(), freeing memory allocated by populate()
- * and marking the pages non-present.
- */
-void unpopulate(vma& vma)
+void change_perm(pt_element *ptep, unsigned int perm)
 {
-    uintptr_t hp_start = ((vma.start()-1) & ~(huge_page_size-1)) + huge_page_size;
-    uintptr_t hp_end = (vma.end()) & ~(huge_page_size-1);
-    if (hp_start > vma.end())
-        hp_start = vma.end();
-    if (hp_end < vma.start())
-        hp_end = vma.end();
+    // Note: in x86, if the present bit (0x1) is off, not only read is
+    // disallowed, but also write and exec. So in mprotect, if any
+    // permission is requested, we must also grant read permission.
+    // Linux does this too.
+    if (perm)
+        *ptep |= 0x1;
+    else
+        *ptep &= ~0x1;
+
+    if (perm & perm_write)
+        *ptep |= 0x2;
+    else
+        *ptep &= ~0x2;
 
-    for (auto addr = vma.start(); addr < hp_start; addr += page_size)
-        unpopulate_page(reinterpret_cast<void*>(addr));
-    for (auto addr = hp_start; addr < hp_end; addr += huge_page_size)
-        unpopulate_huge_page(reinterpret_cast<void*>(addr));
-    for (auto addr = hp_end; addr < vma.end(); addr += page_size)
-        unpopulate_page(reinterpret_cast<void*>(addr));
+    if (!(perm & perm_exec))
+        *ptep |= pt_element(0x8000000000000000);
+    else
+        *ptep &= ~pt_element(0x8000000000000000);
+}
+
+class protection : public page_range_operation {
+private:
+    unsigned int perm;
+    bool success;
+public:
+    protection(unsigned int perm) : perm(perm), success(true) { }
+    bool getsuccess(){ return success; }
+protected:
+    virtual void small_page(pt_element *ptep, uintptr_t offset){
+         if (!pte_phys(*ptep)) {
+            success = false;
+            return;
+        }
+        change_perm(ptep, perm);
+     }
+    virtual void huge_page(pt_element *ptep, uintptr_t offset){
+        if (!pte_phys(*ptep)) {
+            success = false;
+        } else if (pte_large(*ptep)) {
+            change_perm(ptep, perm);
+        } else {
+            pt_element* pt = phys_cast<pt_element>(pte_phys(*ptep));
+            for (int i=0; i<pte_per_page; ++i) {
+                if (pte_phys(pt[i])) {
+                    change_perm(&pt[i], perm);
+                } else {
+                    success = false;
+                }
+            }
+        }
+    }
+    virtual bool should_allocate_intermediate(){
+        success = false;
+        return false;
+    }
+};
+
+int protect(void *addr, size_t size, unsigned int perm)
+{
+    protection p(perm);
+    p.operate(addr, size);
+    return p.getsuccess();
 }
 
 uintptr_t find_hole(uintptr_t start, uintptr_t size)
@@ -383,7 +447,7 @@ void evacuate(vma* v)
         i->split(v->start());
         if (contains(*v, *i)) {
             auto& dead = *i--;
-            unpopulate(dead);
+            unpopulate().operate(dead);
             vma_list.erase(dead);
         }
     }
@@ -440,7 +504,7 @@ vma* allocate(uintptr_t start, uintptr_t end, fill_page& fill,
         evacuate(ret);
     vma_list.insert(*ret);
 
-    populate(*ret, fill, perm);
+    populate(&fill, perm).operate((void*)start, end-start);
 
     return ret;
 }
@@ -461,110 +525,6 @@ vma* map_file(void* addr, size_t size, unsigned perm,
     return ret;
 }
 
-void change_perm(pt_element *ptep, unsigned int perm)
-{
-    // Note: in x86, if the present bit (0x1) is off, not only read is
-    // disallowed, but also write and exec. So in mprotect, if any
-    // permission is requested, we must also grant read permission.
-    // Linux does this too.
-    if (perm)
-        *ptep |= 0x1;
-    else
-        *ptep &= ~0x1;
-
-    if (perm & perm_write)
-        *ptep |= 0x2;
-    else
-        *ptep &= ~0x2;
-
-    if (!(perm & perm_exec))
-        *ptep |= pt_element(0x8000000000000000);
-    else
-        *ptep &= ~pt_element(0x8000000000000000);
-}
-
-int protect_page(void *addr, unsigned int perm)
-{
-    pt_element pte = processor::read_cr3();
-    auto pt = phys_cast<pt_element>(pte_phys(pte));
-    auto ptep = &pt[pt_index(addr, nlevels - 1)];
-    unsigned level = nlevels - 1;
-    while (level > 0) {
-        if (!pte_phys(*ptep))
-            return 0;
-        else if (pte_large(*ptep)) {
-            // We're trying to change the protection of part of a huge page, so
-            // we need to split the huge page into small pages. This is fine
-            // because in in our implementation it is ok to free pieces of a
-            // alloc_huge_page() with free_page()
-            split_large_page(ptep, level);
-        }
-        pte = *ptep;
-        --level;
-        pt = phys_cast<pt_element>(pte_phys(pte));
-        ptep = &pt[pt_index(addr, level)];
-    }
-    if (!pte_phys(*ptep))
-        return 0;
-    change_perm(ptep, perm);
-    return 1;
-}
-
-int protect_huge_page(void *addr, unsigned int perm)
-{
-    pt_element pte = processor::read_cr3();
-    auto pt = phys_cast<pt_element>(pte_phys(pte));
-    auto ptep = &pt[pt_index(addr, nlevels - 1)];
-    unsigned level = nlevels - 1;
-    while (level > 1) {
-        if (!pte_present(*ptep))
-            return 0;
-        else if (pte_large(*ptep))
-            split_large_page(ptep, level);
-        pte = *ptep;
-        --level;
-        pt = phys_cast<pt_element>(pte_phys(pte));
-        ptep = &pt[pt_index(addr, level)];
-    }
-    if (!pte_phys(*ptep))
-        return 0;
-
-    if (pte_large(*ptep)){
-        change_perm(ptep, perm);
-        return 1;
-    } else {
-        int ret = 1;
-        pt_element* pt = phys_cast<pt_element>(pte_phys(*ptep));
-        for(int i=0; i<pte_per_page; ++i)
-            if(pte_phys(pt[i]))
-                change_perm(&pt[i], perm);
-            else
-                ret = 0;
-        return ret;
-    }
-}
-
-int protect(void *start, size_t size, unsigned int perm)
-{
-    void *end = start+size; // one byte after the end
-    void *hp_start = (void*) ((((uintptr_t)start-1) & ~(huge_page_size-1)) +
-            huge_page_size);
-    void *hp_end = (void*) ((uintptr_t)end & ~(huge_page_size-1));
-    if (hp_start > end)
-        hp_start = end;
-    if (hp_end < start)
-        hp_end = end;
-
-    int ret=1;
-    for (auto addr = start; addr < hp_start; addr += page_size)
-        ret &= protect_page(addr, perm);
-    for (auto addr = hp_start; addr < hp_end; addr += huge_page_size)
-        ret &= protect_huge_page(addr, perm);
-    for (auto addr = hp_end; addr < end; addr += page_size)
-        ret &= protect_page(addr, perm);
-    return ret;
-}
-
 
 namespace {