diff --git a/arch/x64/arch-mmu.hh b/arch/x64/arch-mmu.hh
index 5aaa6236bfb98ff9eb6e36b298b15ad89ec45da9..dd4c60c8cbbebc8d8d806131a9d87b97c7d41893 100644
--- a/arch/x64/arch-mmu.hh
+++ b/arch/x64/arch-mmu.hh
@@ -9,7 +9,8 @@
 #define ARCH_MMU_HH_
 
 #include <osv/ilog2.hh>
-#include <osv/mmu.hh>
+#include <osv/types.h>
+#include <osv/mmu-defs.hh>
 
 namespace mmu {
 
@@ -68,19 +69,20 @@ class hw_ptep {
 public:
     hw_ptep(const hw_ptep& a) : p(a.p) {}
     pt_element read() const { return *p; }
-    void write(pt_element pte) { *const_cast<volatile u64*>(&p->x) = pte.x; }
-    bool compare_exchange(pt_element oldval, pt_element newval) {
+    void write(pt_element pte) const { *const_cast<volatile u64*>(&p->x) = pte.x; }
+    bool compare_exchange(pt_element oldval, pt_element newval) const {
         std::atomic<u64> *x = reinterpret_cast<std::atomic<u64>*>(&p->x);
         return x->compare_exchange_strong(oldval.x, newval.x, std::memory_order_relaxed);
     }
-    pt_element exchange(pt_element newval) {
+    pt_element exchange(pt_element newval) const {
         std::atomic<u64> *x = reinterpret_cast<std::atomic<u64>*>(&p->x);
         return pt_element(x->exchange(newval.x));
     }
-    hw_ptep at(unsigned idx) { return hw_ptep(p + idx); }
+    hw_ptep at(unsigned idx) const { return hw_ptep(p + idx); }
     static hw_ptep force(pt_element* ptep) { return hw_ptep(ptep); }
     // no longer using this as a page table
-    pt_element* release() { return p; }
+    pt_element* release() const { return p; }
+    bool operator==(const hw_ptep& a) const noexcept { return p == a.p; }
 private:
     hw_ptep(pt_element* ptep) : p(ptep) {}
     pt_element* p;
diff --git a/bsd/porting/mmu.cc b/bsd/porting/mmu.cc
index 7be239eefa1b3b6bdb8e8827efd5d4c187b73e8d..ec3f18196b510e4f25aae6183d4c0043cffe484b 100644
--- a/bsd/porting/mmu.cc
+++ b/bsd/porting/mmu.cc
@@ -42,5 +42,5 @@ int vm_throttling_needed(void)
 
 void mmu_unmap(void *addr, size_t size)
 {
-    mmu::unmap_address(addr, size);
+    mmu::unmap_address(addr, addr, size);
 }
diff --git a/bsd/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c b/bsd/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
index b7b3bbb6d933003e82978d2f02dd89ef24ec8225..f0bb64e9c61f47417331cc1aafb43d1c372371a9 100644
--- a/bsd/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
+++ b/bsd/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
@@ -980,14 +980,11 @@ xuio_stat_wbuf_nocopy()
 #ifdef _KERNEL
 
 int
-dmu_map_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size, bool map)
+dmu_map_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size, unsigned action)
 {
 	dmu_buf_t **dbp;
 	int err;
-	struct uio_mapper *uio_map = (struct uio_mapper *)uio;
 	struct iovec *iov;
-	int tocpy;
-	int bufoff;
 	int numbufs = 0;
 
 	// This will acquire a reference both in the dbuf, and in the ARC buffer.
@@ -1003,33 +1000,19 @@ dmu_map_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size, bool map)
 	dmu_buf_impl_t *dbi = (dmu_buf_impl_t *)db;
 	arc_buf_t *dbuf_abuf = dbi->db_buf;
 
-	if (map) {
-		arc_share_buf(dbi->db_buf);
-
-		bufoff = uio->uio_loffset - db->db_offset;
-		tocpy = (int)MIN(db->db_size - bufoff, size);
+	iov = uio->uio_iov;
+	iov->iov_base = dbuf_abuf->b_data;
+	iov->iov_len = db->db_size;
+	uio->uio_loffset = uio->uio_loffset - db->db_offset;
 
-		uio_map->buffer = dbuf_abuf->b_data;
-		// FIXME: Should be the ARC size, but that is private. They should be the same.
-		uio_map->buf_size = db->db_size;
-		uio_map->buf_off = bufoff;
-		iov = uio->uio_iov;
-		iov->iov_base = (char *)dbuf_abuf->b_data;
-		iov->iov_len = tocpy;
-	} else {
-		iov = uio->uio_iov;
-		// empty iov is a query operation.
-		if (iov->iov_base) {
-			assert(iov->iov_base == (char *)dbuf_abuf->b_data);
-			arc_unshare_buf(dbi->db_buf);
-		} else {
-			iov->iov_base = (char *)dbuf_abuf->b_data;
-			iov->iov_len = db->db_size;
-		}
-	}
+	if (action == ARC_ACTION_HOLD)
+		arc_share_buf(dbi->db_buf);
+	else if (action == ARC_ACTION_RELEASE)
+		arc_unshare_buf(dbi->db_buf);
 
 	dmu_buf_rele_array(dbp, numbufs, FTAG);
-	return 0;
+
+	return (0);
 }
 
 int
diff --git a/bsd/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h b/bsd/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
index 7a1afa5e628f0ab70c8bd2203438c85932c4f857..81c51f440ec4b46112c82c4af3e13efebb348389 100644
--- a/bsd/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
+++ b/bsd/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
@@ -602,7 +602,7 @@ void dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
 	const void *buf, dmu_tx_t *tx);
 void dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
 	dmu_tx_t *tx);
-int dmu_map_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size, bool map);
+int dmu_map_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size, unsigned action);
 int dmu_read_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size);
 int dmu_write_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size,
     dmu_tx_t *tx);
diff --git a/bsd/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c b/bsd/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
index 42cad0f8f569cb0c8846fe5be65edfcb3b2c76dc..7cdf5fe462547b7bdfe65907a8e2353cbb3a36f0 100644
--- a/bsd/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
+++ b/bsd/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
@@ -636,7 +636,7 @@ out:
 static int zfs_truncate(struct vnode *vp, off_t new_size);
 
 static int
-zfs_manage_mapping(vnode_t *vp, struct file* fp, uio_t *uio, bool map)
+zfs_arc(vnode_t *vp, struct file* fp, uio_t *uio, unsigned action)
 {
 	znode_t		*zp = VTOZ(vp);
 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
@@ -682,7 +682,7 @@ zfs_manage_mapping(vnode_t *vp, struct file* fp, uio_t *uio, bool map)
 	nbytes = MIN(nbytes, zfs_read_chunk_size -
 		P2PHASE(uio->uio_loffset, zfs_read_chunk_size));
 
-	error = dmu_map_uio(os, zp->z_id, uio, nbytes, map);
+	error = dmu_map_uio(os, zp->z_id, uio, nbytes, action);
 	if (error) {
 		/* convert checksum errors into IO errors */
 		if (error == ECKSUM)
@@ -696,19 +696,6 @@ zfs_manage_mapping(vnode_t *vp, struct file* fp, uio_t *uio, bool map)
 	return (error);
 }
 
-static int
-zfs_map(vnode_t *vp, struct file* fp, uio_t *uio)
-{
-	return zfs_manage_mapping(vp, fp, uio, true);
-}
-
-
-static int
-zfs_unmap(vnode_t *vp, struct file* fp, uio_t *uio)
-{
-	return zfs_manage_mapping(vp, fp, uio, false);
-}
-
 /*
  * Write the bytes to a file.
  *
@@ -4961,6 +4948,5 @@ struct vnops zfs_vnops = {
 	zfs_inactive,			/* inactive */
 	zfs_truncate,			/* truncate */
 	zfs_link,			/* link */
-	zfs_map,			/* map */
-	zfs_unmap,			/* unmap */
+	zfs_arc,            /* arc */
 };
diff --git a/build.mk b/build.mk
index e2fe2ed487f6d6b766823c3e28fb90fb37c8a0ad..84534bf109dd748c46c9e8caa80a607062762793 100644
--- a/build.mk
+++ b/build.mk
@@ -622,6 +622,7 @@ objects += core/semaphore.o
 objects += core/condvar.o
 objects += core/debug.o
 objects += core/rcu.o
+objects += core/pagecache.o
 objects += drivers/pci.o
 objects += core/mempool.o
 objects += core/alloctracker.o
diff --git a/core/mmu.cc b/core/mmu.cc
index be0ae941a2ac04117160c6c816cfe2d628aedd43..f4826dfbd9098739fb657963152aabee5d149994 100644
--- a/core/mmu.cc
+++ b/core/mmu.cc
@@ -121,6 +121,24 @@ phys virt_to_phys(void *virt)
     return static_cast<char*>(virt) - phys_mem;
 }
 
+void* mmupage::vaddr() const
+{
+    return _page;
+}
+
+phys mmupage::paddr() const
+{
+    if (!_page) {
+        throw std::exception();
+    }
+    return virt_to_phys(_page);
+}
+
+bool mmupage::cow() const
+{
+    return _cow;
+}
+
 phys allocate_intermediate_level()
 {
     phys pt_page = virt_to_phys(memory::alloc_page());
@@ -156,16 +174,6 @@ bool change_perm(hw_ptep ptep, unsigned int perm)
     return old & ~perm;
 }
 
-// This is supposed to be lockless, so we need to rely heavily on atomics.  The
-// reason for this is that this will be called from an invalidation handler,
-// which can in turn be called by the filesystem mapper (for instance, if the
-// filesystem needs to evict some memory before reading more)
-bool clear_present(hw_ptep ptep)
-{
-    pt_element old = ptep.exchange(make_empty_pte());
-    return old.present();
-}
-
 void split_large_page(hw_ptep ptep, unsigned level)
 {
     pt_element pte_orig = ptep.read();
@@ -183,11 +191,10 @@ void split_large_page(hw_ptep ptep, unsigned level)
 }
 
 struct page_allocator {
-    virtual void* alloc(uintptr_t offset) = 0;
-    virtual void* alloc(size_t size, uintptr_t offset) = 0;
-    virtual void free(void *addr, uintptr_t offset) = 0;
-    virtual void free(void *addr, size_t size, uintptr_t offset) = 0;
-    virtual void set_addr(uintptr_t addr) {}
+    virtual mmupage alloc(uintptr_t offset, hw_ptep ptep, bool write) = 0;
+    virtual mmupage alloc(size_t size, uintptr_t offset, hw_ptep ptep, bool write) = 0;
+    virtual void free(void *addr, uintptr_t offset, hw_ptep ptep) = 0;
+    virtual void free(void *addr, size_t size, uintptr_t offset, hw_ptep ptep) = 0;
     virtual void finalize() = 0;
     virtual ~page_allocator() {}
 };
@@ -475,41 +482,57 @@ template <account_opt T = account_opt::no>
 class populate : public vma_operation<allocate_intermediate_opt::yes, skip_empty_opt::no, T> {
 private:
     page_allocator* _page_provider;
-    unsigned int perm;
+    unsigned int _perm;
+    bool _write;
     bool _map_dirty;
     pt_element dirty(pt_element pte) {
         pte.set_dirty(_map_dirty);
         return pte;
     }
+    bool skip(pt_element pte) {
+        if (pte.empty()) {
+            return false;
+        }
+        return !_write || pte.writable();
+    }
+    unsigned int perm(bool cow) {
+        unsigned int p = _perm;
+        if (cow) {
+            p &= ~perm_write;
+        }
+        return p;
+    }
 public:
-    populate(page_allocator* pops, unsigned int perm, bool map_dirty = true) :
-        _page_provider(pops), perm(perm), _map_dirty(map_dirty) { }
+    populate(page_allocator* pops, unsigned int perm, bool write = false, bool map_dirty = true) :
+        _page_provider(pops), _perm(perm), _write(write), _map_dirty(map_dirty) { }
     void small_page(hw_ptep ptep, uintptr_t offset){
-        if (!ptep.read().empty()) {
+        pt_element pte = ptep.read();
+        if (skip(pte)) {
             return;
         }
-        phys page = virt_to_phys(_page_provider->alloc(offset));
-        if (!ptep.compare_exchange(make_empty_pte(), dirty(make_normal_pte(page, perm)))) {
-            _page_provider->free(phys_to_virt(page), offset);
+        mmupage page = _page_provider->alloc(offset, ptep, _write);
+        if (!ptep.compare_exchange(pte, dirty(make_normal_pte(page.paddr(), perm(page.cow()))))) {
+            _page_provider->free(page.vaddr(), offset, ptep);
         } else {
             this->account(mmu::page_size);
         }
     }
     bool huge_page(hw_ptep ptep, uintptr_t offset){
-        auto pte = ptep.read();
-        if (!pte.empty()) {
+        pt_element pte = ptep.read();
+        if (skip(pte)) {
             return true;
         }
-        void *vpage = _page_provider->alloc(huge_page_size, offset);
-        if (!vpage) {
-            return false;
-        }
 
-        phys page = virt_to_phys(vpage);
-        if (!ptep.compare_exchange(make_empty_pte(), dirty(make_large_pte(page, perm)))) {
-            _page_provider->free(phys_to_virt(page), huge_page_size, offset);
-        } else {
-            this->account(mmu::huge_page_size);
+        try {
+            mmupage page = _page_provider->alloc(huge_page_size, offset, ptep, _write);
+
+            if (!ptep.compare_exchange(pte, dirty(make_large_pte(page.paddr(), perm(page.cow()))))) {
+                _page_provider->free(page.vaddr(), huge_page_size, offset, ptep);
+            } else {
+                this->account(mmu::huge_page_size);
+            }
+        } catch(std::exception&) {
+            return false;
         }
         return true;
     }
@@ -518,7 +541,8 @@ public:
 template <account_opt Account = account_opt::no>
 class populate_small : public populate<Account> {
 public:
-    populate_small(page_allocator* pops, unsigned int perm, bool map_dirty = true) : populate<Account>(pops, perm, map_dirty) { }
+    populate_small(page_allocator* pops, unsigned int perm, bool write = false, bool map_dirty = true) :
+        populate<Account>(pops, perm, write, map_dirty) { }
     bool huge_page(hw_ptep ptep, uintptr_t offset) {
         assert(0);
         return false;
@@ -534,15 +558,16 @@ struct tlb_gather {
         void* addr;
         size_t size;
         off_t offset; // FIXME: unneeded?
+        pt_element* ptep;
     };
     page_allocator* page_provider;
     size_t nr_pages = 0;
     tlb_page pages[max_pages];
-    void push(void* addr, size_t size, off_t offset) {
+    void push(void* addr, size_t size, off_t offset, hw_ptep ptep) {
         if (nr_pages == max_pages) {
             flush();
         }
-        pages[nr_pages++] = { addr, size, offset };
+        pages[nr_pages++] = { addr, size, offset, ptep.release() };
     }
     void flush() {
         if (!nr_pages) {
@@ -552,9 +577,9 @@ struct tlb_gather {
         for (auto i = 0u; i < nr_pages; ++i) {
             auto&& tp = pages[i];
             if (tp.size == page_size) {
-                page_provider->free(tp.addr, tp.offset);
+                page_provider->free(tp.addr, tp.offset, hw_ptep::force(tp.ptep));
             } else {
-                page_provider->free(tp.addr, tp.size, tp.offset);
+                page_provider->free(tp.addr, tp.size, tp.offset, hw_ptep::force(tp.ptep));
             }
         }
         nr_pages = 0;
@@ -577,13 +602,13 @@ public:
         // not-present may only mean mprotect(PROT_NONE).
         pt_element pte = ptep.read();
         ptep.write(make_empty_pte());
-        _tlb_gather.push(phys_to_virt(pte.addr(false)), page_size, offset);
+        _tlb_gather.push(phys_to_virt(pte.addr(false)), page_size, offset, ptep);
         this->account(mmu::page_size);
     }
     bool huge_page(hw_ptep ptep, uintptr_t offset) {
         pt_element pte = ptep.read();
         ptep.write(make_empty_pte());
-        _tlb_gather.push(phys_to_virt(pte.addr(true)), huge_page_size, offset);
+        _tlb_gather.push(phys_to_virt(pte.addr(true)), huge_page_size, offset, ptep);
         this->account(mmu::huge_page_size);
         return true;
     }
@@ -612,20 +637,6 @@ public:
     bool tlb_flush_needed(void) {return do_flush;}
 };
 
-class page_out: public vma_operation<allocate_intermediate_opt::no, skip_empty_opt::yes> {
-private:
-    bool do_flush = false;
-public:
-    page_out() { }
-    void small_page(hw_ptep ptep, uintptr_t offset) {
-        do_flush |= clear_present(ptep);
-    }
-    bool huge_page(hw_ptep ptep, uintptr_t offset) {
-        abort();
-    }
-    bool tlb_flush_needed(void) {return do_flush;}
-};
-
 class count_maps:
     public vma_operation<allocate_intermediate_opt::no,
                          skip_empty_opt::yes, account_opt::yes> {
@@ -885,16 +896,16 @@ private:
         return addr;
     }
 public:
-    virtual void* alloc(uintptr_t offset) override {
+    virtual mmupage alloc(uintptr_t offset, hw_ptep ptep, bool write) override {
         return fill(memory::alloc_page(), offset, page_size);
     }
-    virtual void* alloc(size_t size, uintptr_t offset) override {
+    virtual mmupage alloc(size_t size, uintptr_t offset, hw_ptep ptep, bool write) override {
         return fill(memory::alloc_huge_page(size), offset, size);
     }
-    virtual void free(void *addr, uintptr_t offset) override {
+    virtual void free(void *addr, uintptr_t offset, hw_ptep ptep) override {
         return memory::free_page(addr);
     }
-    virtual void free(void *addr, size_t size, uintptr_t offset) override {
+    virtual void free(void *addr, size_t size, uintptr_t offset, hw_ptep ptep) override {
         return memory::free_huge_page(addr, size);
     }
     virtual void finalize() override {
@@ -941,28 +952,24 @@ public:
 class map_file_page_mmap : public page_allocator {
 private:
     file* _file;
-    off_t _map_offset;
-    uintptr_t _start = 0;
+    off_t _foffset;
+    bool _shared;
 
 public:
-    map_file_page_mmap(file *file, off_t off) : _file(file), _map_offset(off) {}
+    map_file_page_mmap(file *file, off_t off, bool shared) : _file(file), _foffset(off), _shared(shared) {}
     virtual ~map_file_page_mmap() {};
 
-    virtual void* alloc(uintptr_t offset) override {
-        return _file->get_page(_start + offset, offset + _map_offset, page_size);
+    virtual mmupage alloc(uintptr_t offset, hw_ptep ptep, bool write) override {
+        return alloc(page_size, offset, ptep, write);
     }
-    virtual void* alloc(size_t size, uintptr_t offset) override {
-        return _file->get_page(_start + offset, offset + _map_offset, size);
+    virtual mmupage alloc(size_t size, uintptr_t offset, hw_ptep ptep, bool write) override {
+        return _file->get_page(offset + _foffset, size, ptep, write, _shared);
     }
-    virtual void free(void *addr, uintptr_t offset) override {
-        _file->put_page(addr, _start + offset, offset + _map_offset, page_size);
+    virtual void free(void *addr, uintptr_t offset, hw_ptep ptep) override {
+        free(addr, page_size, offset, ptep);
     }
-    virtual void free(void *addr, size_t size, uintptr_t offset) override {
-        _file->put_page(addr, _start + offset, offset + _map_offset, size);
-    }
-
-    virtual void set_addr(uintptr_t addr) override {
-        _start = addr;
+    virtual void free(void *addr, size_t size, uintptr_t offset, hw_ptep ptep) override {
+        _file->put_page(addr, offset + _foffset, size, ptep);
     }
 
     void finalize() {
@@ -970,11 +977,10 @@ public:
 };
 
 // In the general case, we expect only one element in the list.
-static std::unordered_multimap<void *, uintptr_t> shared_fs_maps;
+static std::unordered_multimap<void *, hw_ptep> shared_fs_maps;
 // We need to reference count the buffer, but first we need to store the
 // buffer somewhere we can find
 static std::unordered_map<void *, unsigned int> shared_fs_buf_refcnt;
-
 // Can't use the vma_list_mutex, because if we do, we can have a deadlock where
 // we call into the filesystem to read data with the vma_list_mutex held - because
 // we do that for complex operate operations, and if the filesystem decides to evict
@@ -985,45 +991,60 @@ static void fs_buf_get(void *buf_addr)
 {
     auto b = shared_fs_buf_refcnt.find(buf_addr);
     if (b == shared_fs_buf_refcnt.end()) {
-        shared_fs_buf_refcnt.insert(std::make_pair(buf_addr, 1));
+        shared_fs_buf_refcnt.emplace(buf_addr, 1);
         return;
     }
     b->second++;
 }
 
-static bool fs_buf_put(void *buf_addr)
+static bool fs_buf_put(void *buf_addr, unsigned dec = 1)
 {
     auto b = shared_fs_buf_refcnt.find(buf_addr);
     assert(b != shared_fs_buf_refcnt.end());
-    auto old = --b->second;
-    if (old == 0) {
+    assert(b->second >= dec);
+    b->second -= dec;
+    if (b->second == 0) {
         shared_fs_buf_refcnt.erase(buf_addr);
         return true;
     }
     return false;
 }
 
-void add_mapping(void *buf_addr, uintptr_t off, uintptr_t vaddr)
+void add_mapping(void *buf_addr, void *page, hw_ptep ptep)
 {
     WITH_LOCK(shared_fs_mutex) {
-        shared_fs_maps.insert(std::make_pair(buf_addr + off, vaddr));
+        shared_fs_maps.emplace(page, ptep);
         fs_buf_get(buf_addr);
     }
 }
 
-bool remove_mapping(void *buf_addr, void *paddr, uintptr_t addr)
+bool remove_mapping(void *buf_addr, void *paddr, hw_ptep ptep)
 {
     WITH_LOCK(shared_fs_mutex) {
         auto buf = shared_fs_maps.equal_range(paddr);
         for (auto it = buf.first; it != buf.second; it++) {
-            auto v = (*it).second;
-            if (v == addr) {
+            auto stored = (*it).second;
+            if (stored == ptep) {
                 shared_fs_maps.erase(it);
-                break;
+                return fs_buf_put(buf_addr);
             }
         }
-        return fs_buf_put(buf_addr);
     }
+    return false;
+}
+
+bool lookup_mapping(void *paddr, hw_ptep ptep)
+{
+    WITH_LOCK(shared_fs_mutex) {
+        auto buf = shared_fs_maps.equal_range(paddr);
+        for (auto it = buf.first; it != buf.second; it++) {
+            auto stored = (*it).second;
+            if (stored == ptep) {
+                return true;
+            }
+        }
+    }
+    return false;
 }
 
 uintptr_t allocate(vma *v, uintptr_t start, size_t size, bool search)
@@ -1071,34 +1092,43 @@ void vcleanup(void* addr, size_t size)
 }
 
 template<account_opt Account = account_opt::no>
-ulong populate_vma(vma *vma, void *v, size_t size)
+ulong populate_vma(vma *vma, void *v, size_t size, bool write = false)
 {
     page_allocator *map = vma->page_ops();
     auto total = vma->has_flags(mmap_small) ?
-        vma->operate_range(populate_small<Account>(map, vma->perm(), vma->map_dirty()), v, size) :
-        vma->operate_range(populate<Account>(map, vma->perm(), vma->map_dirty()), v, size);
+        vma->operate_range(populate_small<Account>(map, vma->perm(), write, vma->map_dirty()), v, size) :
+        vma->operate_range(populate<Account>(map, vma->perm(), write, vma->map_dirty()), v, size);
     map->finalize();
 
     return total;
 }
 
-TRACEPOINT(trace_mmu_invalidate, "addr=%p, vaddr=%p", void *, uintptr_t);
-void unmap_address(void *addr, size_t size)
+void clear_pte(hw_ptep ptep)
 {
+    ptep.write(make_empty_pte());
+}
+
+void clear_pte(std::pair<void* const, hw_ptep>& pair)
+{
+    clear_pte(pair.second);
+}
+
+bool unmap_address(void *buf_addr, void *addr, size_t size)
+{
+    bool last;
+    unsigned refs = 0;
     size = align_up(size, page_size);
     WITH_LOCK(shared_fs_mutex) {
-        shared_fs_buf_refcnt.erase(addr);
         for (uintptr_t a = reinterpret_cast<uintptr_t>(addr); size; a += page_size, size -= page_size) {
             addr = reinterpret_cast<void*>(a);
             auto buf = shared_fs_maps.equal_range(addr);
-            for (auto it = buf.first; it != buf.second; it++) {
-                auto vaddr = (*it).second;
-                trace_mmu_invalidate(addr, vaddr);
-                operate_range(page_out(), (void *)vaddr, page_size);
-            }
+            refs += clear_ptes(buf.first, buf.second);
             shared_fs_maps.erase(addr);
         }
+        last = refs ? fs_buf_put(buf_addr, refs) : false;
     }
+    tlb_flush();
+    return last;
 }
 
 void* map_anon(const void* addr, size_t size, unsigned flags, unsigned perm)
@@ -1117,12 +1147,12 @@ void* map_anon(const void* addr, size_t size, unsigned flags, unsigned perm)
 
 std::unique_ptr<file_vma> default_file_mmap(file* file, addr_range range, unsigned flags, unsigned perm, off_t offset)
 {
-    return std::unique_ptr<file_vma>(new file_vma(range, perm, file, offset, flags & mmu::mmap_shared, new map_file_page_read(file, offset)));
+    return std::unique_ptr<file_vma>(new file_vma(range, perm, flags, file, offset, new map_file_page_read(file, offset)));
 }
 
 std::unique_ptr<file_vma> map_file_mmap(file* file, addr_range range, unsigned flags, unsigned perm, off_t offset)
 {
-    return std::unique_ptr<file_vma>(new file_vma(range, perm, file, offset, flags & mmu::mmap_shared, new map_file_page_mmap(file, offset)));
+    return std::unique_ptr<file_vma>(new file_vma(range, perm, flags, file, offset, new map_file_page_mmap(file, offset, flags & mmap_shared)));
 }
 
 void* map_file(const void* addr, size_t size, unsigned flags, unsigned perm,
@@ -1251,7 +1281,6 @@ vma::~vma()
 void vma::set(uintptr_t start, uintptr_t end)
 {
     _range = addr_range(align_down(start), align_up(end));
-    _page_ops->set_addr(start);
 }
 
 void vma::protect(unsigned perm)
@@ -1328,7 +1357,7 @@ void vma::fault(uintptr_t addr, exception_frame *ef)
         size = page_size;
     }
 
-    auto total = populate_vma<account_opt::yes>(this, (void*)addr, size);
+    auto total = populate_vma<account_opt::yes>(this, (void*)addr, size, ef->error_code & page_fault_write);
 
     if (_flags & mmap_jvm_heap) {
         memory::stats::on_jvm_heap_alloc(total);
@@ -1561,11 +1590,10 @@ ulong map_jvm(unsigned char* jvm_addr, size_t size, size_t align, balloon_ptr b)
     return 0;
 }
 
-file_vma::file_vma(addr_range range, unsigned perm, fileref file, f_offset offset, bool shared, page_allocator* page_ops)
-    : vma(range, perm, shared ? mmap_small : 0, !shared, page_ops)
+file_vma::file_vma(addr_range range, unsigned perm, unsigned flags, fileref file, f_offset offset, page_allocator* page_ops)
+    : vma(range, perm, flags | mmap_small, !(flags & mmap_shared), page_ops)
     , _file(file)
     , _offset(offset)
-    , _shared(shared)
 {
     int err = validate_perm(perm);
 
@@ -1623,7 +1651,7 @@ private:
 
 error file_vma::sync(uintptr_t start, uintptr_t end)
 {
-    if (!_shared)
+    if (!has_flags(mmap_shared))
         return make_error(ENOMEM);
     start = std::max(start, _range.start());
     end = std::min(end, _range.end());
@@ -1644,7 +1672,7 @@ int file_vma::validate_perm(unsigned perm)
         return EACCES;
     }
     if (perm & perm_write) {
-        if (_shared && !(_file->f_flags & FWRITE)) {
+        if (has_flags(mmap_shared) && !(_file->f_flags & FWRITE)) {
             return EACCES;
         }
     }
@@ -1666,7 +1694,7 @@ std::unique_ptr<file_vma> shm_file::mmap(addr_range range, unsigned flags, unsig
     return map_file_mmap(this, range, flags, perm, offset);
 }
 
-void* shm_file::get_page(uintptr_t start, uintptr_t offset, size_t size)
+mmupage shm_file::get_page(uintptr_t offset, size_t size, hw_ptep ptep, bool write, bool shared)
 {
     uintptr_t hp_off = ::align_down(offset, huge_page_size);
     void *addr;
@@ -1684,7 +1712,7 @@ void* shm_file::get_page(uintptr_t start, uintptr_t offset, size_t size)
     return static_cast<char*>(addr) + offset - hp_off;
 }
 
-void shm_file::put_page(void *addr, uintptr_t start, uintptr_t offset, size_t size) {}
+void shm_file::put_page(void *addr, uintptr_t offset, size_t size, hw_ptep ptep) {}
 
 shm_file::shm_file(size_t size, int flags) : special_file(flags, DTYPE_UNSPEC), _size(size) {}
 
diff --git a/core/pagecache.cc b/core/pagecache.cc
new file mode 100644
index 0000000000000000000000000000000000000000..662b98d68097329feaf51a380a7db241d0e59151
--- /dev/null
+++ b/core/pagecache.cc
@@ -0,0 +1,229 @@
+/*
+ * Copyright (C) 2014 Cloudius Systems, Ltd.
+ *
+ * This work is open source software, licensed under the terms of the
+ * BSD license as described in the LICENSE file in the top-level directory.
+ */
+
+
+#include <unordered_map>
+#include <unordered_set>
+#include <deque>
+#include <osv/pagecache.hh>
+#include <osv/mempool.hh>
+#include <fs/vfs/vfs.h>
+
+namespace pagecache {
+struct hashkey {
+    dev_t dev;
+    ino_t ino;
+    off_t offset;
+    bool operator==(const hashkey& a) const noexcept {
+        return (dev == a.dev) && (ino == a.ino) && (offset == a.offset);
+    }
+};
+}
+
+namespace std {
+template<>
+struct hash<pagecache::hashkey> {
+    size_t operator()(const pagecache::hashkey key) const noexcept {
+        hash<uint64_t> h;
+        return h(key.dev) ^ h(key.ino) ^ h(key.offset);
+    }
+};
+
+template<> struct hash<mmu::hw_ptep> {
+    size_t operator()(const mmu::hw_ptep& ptep) const noexcept {
+        hash<const mmu::pt_element*> h;
+        return h(ptep.release());
+    }
+};
+}
+
+namespace pagecache {
+
+class cached_page {
+private:
+    const hashkey _key;
+    struct dentry* _dp;
+    void* _page;
+    std::unordered_set<mmu::hw_ptep> _ptes; // set of pointers to ptes that map the page
+public:
+    cached_page(hashkey key, vfs_file* fp) : _key(key) {
+        _dp = fp->f_dentry;
+        dref(_dp);
+        _page = memory::alloc_page();
+    }
+    ~cached_page() {
+        if (_page) {
+            writeback();
+            memory::free_page(_page);
+            drele(_dp);
+        }
+    }
+
+    int writeback()
+    {
+        struct vnode *vp = _dp->d_vnode;
+        int error;
+        struct iovec iov {_page, mmu::page_size};
+        struct uio uio {&iov, 1, _key.offset, mmu::page_size, UIO_WRITE};
+
+        vn_lock(vp);
+        error = VOP_WRITE(vp, &uio, 0);
+        vn_unlock(vp);
+
+        return error;
+    }
+
+    void map(mmu::hw_ptep ptep) {
+        _ptes.emplace(ptep);
+    }
+    void unmap(mmu::hw_ptep ptep) {
+        _ptes.erase(ptep);
+    }
+    void* addr() {
+        return _page;
+    }
+    void flush() {
+        mmu::clear_ptes(_ptes.begin(), _ptes.end());
+    }
+    const hashkey& key() {
+        return _key;
+    }
+    void* release() { // called to demote a page from cache page to anonymous
+        assert(_ptes.size() == 0);
+        void *p = _page;
+        _page = nullptr;
+        drele(_dp);
+        return p;
+    }
+};
+
+constexpr unsigned lru_max_length = 100;
+constexpr unsigned lru_free_count = 20;
+
+static mutex lock;
+static std::unordered_map<hashkey, cached_page*> cache;
+static std::deque<cached_page*> lru;
+
+static std::unique_ptr<cached_page> create_write_cached_page(vfs_file* fp, hashkey& key)
+{
+    size_t bytes;
+    cached_page* cp = new cached_page(key, fp);
+    struct iovec iov {cp->addr(), mmu::page_size};
+
+    sys_read(fp, &iov, 1, key.offset, &bytes);
+    return std::unique_ptr<cached_page>(cp);
+}
+
+static void insert(cached_page* cp) {
+    static cached_page* tofree[lru_free_count];
+    cache.emplace(cp->key(), cp);
+    lru.push_front(cp);
+
+    if (lru.size() > lru_max_length) {
+        for (unsigned i = 0; i < lru_free_count; i++) {
+            cached_page *p = lru.back();
+            lru.pop_back();
+            cache.erase(p->key());
+            p->flush();
+            tofree[i] = p;
+        }
+        mmu::tlb_flush();
+        for (auto p: tofree) {
+            delete p;
+        }
+    }
+}
+
+static cached_page *find_in_write_cache(hashkey& key)
+{
+    auto cpi = cache.find(key);
+
+    if (cpi == cache.end()) {
+        return nullptr;
+    } else {
+        return cpi->second;
+    }
+}
+
+mmu::mmupage get(vfs_file* fp, off_t offset, mmu::hw_ptep ptep, bool write, bool shared)
+{
+    void *start, *page;
+    size_t len;
+    struct stat st;
+    fp->stat(&st);
+    hashkey key {st.st_dev, st.st_ino, offset};
+    SCOPE_LOCK(lock);
+    cached_page* cp = find_in_write_cache(key);
+
+    if (write) {
+        if (!cp) {
+            auto newcp = create_write_cached_page(fp, key);
+            // FIXME: if page is not in ARC it will be read here,
+            // FIXME: we need a function that return NULL if page is not in ARC
+            fp->get_arcbuf(offset, ARC_ACTION_QUERY, &start, &len, &page);
+            if (shared) {
+                // write fault into shared mapping, there page is not in write cache yet, add it.
+                cp = newcp.release();
+                insert(cp);
+                // page is moved from ARC to write cache
+                // remove any mapping to ARC page
+                // FIXME: if pte we are changing is the only one, no need to unmap
+                if (mmu::unmap_address(start, page, mmu::page_size)) {
+                    fp->get_arcbuf(offset, ARC_ACTION_RELEASE, &start, &len, &page);
+                }
+            } else {
+                // remove mapping to ARC page if exists
+                if (mmu::remove_mapping(start, page, ptep)) {
+                    fp->get_arcbuf(offset, ARC_ACTION_RELEASE, &start, &len, &page);
+                }
+                // cow of private page from ARC
+                return newcp->release();
+            }
+        } else if (!shared) {
+            // cow of private page from write cache
+            page = memory::alloc_page();
+            memcpy(page, cp->addr(), mmu::page_size);
+            return page;
+        }
+    } else if (!cp) {
+        // read fault and page is not in write cache yet, return one from ARC, mark it cow
+        fp->get_arcbuf(offset, ARC_ACTION_HOLD, &start, &len, &page);
+        mmu::add_mapping(start, page, ptep);
+        return mmu::mmupage(page, true);
+    }
+
+    cp->map(ptep);
+    return cp->addr();
+}
+
+void release(vfs_file* fp, void *addr, off_t offset, mmu::hw_ptep ptep)
+{
+    struct stat st;
+    fp->stat(&st);
+    hashkey key {st.st_dev, st.st_ino, offset};
+    SCOPE_LOCK(lock);
+    cached_page *cp = find_in_write_cache(key);
+
+    // page is either in ARC cache or write cache or private page
+    if (cp && cp->addr() == addr) {
+        // page is in write cache
+        cp->unmap(ptep);
+    } else if (mmu::lookup_mapping(addr, ptep)) {
+        // page is in ARC
+        void *start, *page;
+        size_t len;
+        fp->get_arcbuf(offset, ARC_ACTION_QUERY, &start, &len, &page);
+        assert (addr == page);
+        if (mmu::remove_mapping(start, page, ptep)) {
+            fp->get_arcbuf(offset, ARC_ACTION_RELEASE, &start, &len, &page);
+        }
+    } else {
+        // private page
+        memory::free_page(addr);
+    }
+}
+}
diff --git a/fs/vfs/vfs_fops.cc b/fs/vfs/vfs_fops.cc
index 82dbf5b017598eec26de84b8e0e0d70728890455..4878fdd326f04073d58d8221cc0d6769a472ee89 100644
--- a/fs/vfs/vfs_fops.cc
+++ b/fs/vfs/vfs_fops.cc
@@ -13,6 +13,8 @@
 #include <fs/vfs/vfs.h>
 #include <osv/vfs_file.hh>
 #include <osv/mmu.hh>
+#include "arch-mmu.hh"
+#include <osv/pagecache.hh>
 
 vfs_file::vfs_file(unsigned flags)
 	: file(flags, DTYPE_VNODE)
@@ -137,82 +139,49 @@ int vfs_file::chmod(mode_t mode)
 	abort();
 }
 
-// Locking: vn_lock will call into the filesystem, and that can trigger an
-// eviction that will hold the mmu-side lock that protects the mappings
-// Always follow that order. We however can't just get rid of the mmu-side lock,
-// because not all invalidations will be synchronous.
-void* vfs_file::get_page(uintptr_t start, uintptr_t off, size_t size)
+mmu::mmupage vfs_file::get_page(uintptr_t off, size_t size, mmu::hw_ptep ptep, bool write, bool shared)
 {
-	assert(size == mmu::page_size);
-
-	auto fp = this;
-	struct vnode *vp = fp->f_dentry->d_vnode;
-
-	iovec io;
-	io.iov_base = nullptr;
-	io.iov_len = 0;
-
-	uio_mapper map_data;
-	uio *data = &map_data.uio;
-
-	data->uio_iov = &io;
-	data->uio_iovcnt = 1;
-	data->uio_offset = off_t(off);
-	// FIXME: If the buffer can hold, remap other pages as well, up to the
-	// buffer size.  However, this would require heavy changes in the fill
-	// and map code. Let's try it later.
-	data->uio_resid = mmu::page_size;
-	data->uio_rw = UIO_READ;
-	map_data.buffer = nullptr;
-
-	vn_lock(vp);
-	assert(VOP_MAP(vp, fp, data) == 0);
-	vn_unlock(vp);
-
-	mmu::add_mapping(io.iov_base, map_data.buf_off, start);
-	assert((reinterpret_cast<uintptr_t>(io.iov_base) & (mmu::page_size - 1)) == 0);
-	return io.iov_base + map_data.buf_off;
+    return pagecache::get(this, off, ptep, write, shared);
 }
 
-void vfs_file::put_page(void *addr, uintptr_t start, uintptr_t off, size_t size)
+void vfs_file::put_page(void *addr, uintptr_t off, size_t size, mmu::hw_ptep ptep)
 {
-	assert(size == mmu::page_size);
-
-	auto fp = this;
-	struct vnode *vp = fp->f_dentry->d_vnode;
-
-	iovec io;
-	io.iov_base = nullptr;
-	io.iov_len = 0;
-
-	uio data;
-	data.uio_iov = &io;
-	data.uio_iovcnt = 0;
-	data.uio_offset = off_t(off);
-	data.uio_resid = mmu::page_size;
-	data.uio_rw = UIO_READ;
-
-	vn_lock(vp);
-	// This first call will only query the buffer address. The result will be
-	// in uio_iov.iov_base. If this is the last reference to the buffer, then
-	// we call it again, with the iov update. (automatically done after this
-	// call) Usually it won't be, so we'll do only one call.
-	assert(VOP_UNMAP(vp, fp, &data) == 0);
-	if (mmu::remove_mapping(io.iov_base, addr, start)) {
-		assert(VOP_UNMAP(vp, fp, &data) == 0);
-	}
-	vn_unlock(vp);
+    pagecache::release(this, addr, off, ptep);
+}
 
+// Locking: vn_lock will call into the filesystem, and that can trigger an
+// eviction that will hold the mmu-side lock that protects the mappings
+// Always follow that order. We however can't just get rid of the mmu-side lock,
+// because not all invalidations will be synchronous.
+void vfs_file::get_arcbuf(uintptr_t offset, unsigned action, void** start, size_t* len, void** page)
+{
+    struct vnode *vp = f_dentry->d_vnode;
+
+    iovec io;
+    io.iov_base = nullptr;
+    io.iov_len = 0;
+
+    uio data;
+    data.uio_iov = &io;
+    data.uio_iovcnt = 1;
+    data.uio_offset = off_t(offset);
+    data.uio_resid = mmu::page_size;
+    data.uio_rw = UIO_READ;
+
+    vn_lock(vp);
+    assert(VOP_CACHE(vp, this, &data, action) == 0);
+    vn_unlock(vp);
+    *start = io.iov_base;
+    *len = io.iov_len;
+    *page = static_cast<char*>(io.iov_base) + data.uio_offset;
 }
 
 std::unique_ptr<mmu::file_vma> vfs_file::mmap(addr_range range, unsigned flags, unsigned perm, off_t offset)
 {
 	auto fp = this;
 	struct vnode *vp = fp->f_dentry->d_vnode;
-	if ((perm & mmu::perm_write) || (!vp->v_op->vop_map) || (vp->v_size < (off_t)mmu::page_size)) {
+	if (!vp->v_op->vop_cache || (vp->v_size < (off_t)mmu::page_size)) {
 		return mmu::default_file_mmap(this, range, flags, perm, offset);
 	}
-	// Don't know what to do if we have one but not the other
-	assert(vp->v_op->vop_unmap);
 	return mmu::map_file_mmap(this, range, flags, perm, offset);
 }
diff --git a/include/osv/file.h b/include/osv/file.h
index 9d3bcee563d53323594c6310d2b429127ae82a7d..76ddc4a43d4e97a93d965bf6968eb8630cfafeab 100755
--- a/include/osv/file.h
+++ b/include/osv/file.h
@@ -49,6 +49,7 @@
 #include <osv/addr_range.hh>
 #include <osv/rcu.hh>
 #include <osv/error.h>
+#include "arch-mmu.hh"
 
 #endif
 
@@ -96,8 +97,8 @@ struct file {
 	virtual std::unique_ptr<mmu::file_vma> mmap(addr_range range, unsigned flags, unsigned perm, off_t offset) {
 	    throw make_error(ENODEV);
 	}
-	virtual void* get_page(uintptr_t start, uintptr_t offset, size_t size) { throw make_error(ENOSYS);}
-	virtual void put_page(void *addr, uintptr_t start, uintptr_t offset, size_t size) { throw make_error(ENOSYS);}
+	virtual mmu::mmupage get_page(uintptr_t offset, size_t size, mmu::hw_ptep ptep, bool write, bool shared) { throw make_error(ENOSYS); }
+	virtual void put_page(void *addr, uintptr_t offset, size_t size, mmu::hw_ptep ptep) { throw make_error(ENOSYS); }
 
 	int		f_flags;	/* open flags */
 	int		f_count;	/* reference count, see below */
diff --git a/include/osv/mmu-defs.hh b/include/osv/mmu-defs.hh
new file mode 100644
index 0000000000000000000000000000000000000000..6d14b42dd52119c89a6afe875d6247dfd096790a
--- /dev/null
+++ b/include/osv/mmu-defs.hh
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2014 Cloudius Systems, Ltd.
+ *
+ * This work is open source software, licensed under the terms of the
+ * BSD license as described in the LICENSE file in the top-level directory.
+ */
+
+#ifndef MMU_DEFS_HH
+#define MMU_DEFS_HH
+
+#include <stdint.h>
+
+namespace mmu {
+
+constexpr uintptr_t page_size = 4096;
+constexpr int page_size_shift = 12; // log2(page_size)
+
+constexpr int pte_per_page = 512;
+constexpr int pte_per_page_shift = 9; // log2(pte_per_page)
+
+constexpr uintptr_t huge_page_size = mmu::page_size*pte_per_page; // 2 MB
+
+typedef uint64_t f_offset;
+typedef uint64_t phys;
+
+static char* const phys_mem = reinterpret_cast<char*>(0xffffc00000000000);
+// area for debug allocations:
+static char* const debug_base = reinterpret_cast<char*>(0xffffe00000000000);
+
+enum {
+    perm_read = 1,
+    perm_write = 2,
+    perm_exec = 4,
+    perm_rx = perm_read | perm_exec,
+    perm_rw = perm_read | perm_write,
+    perm_rwx = perm_read | perm_write | perm_exec,
+};
+
+enum {
+    page_fault_prot  = 1ul << 0,
+    page_fault_write = 1ul << 1,
+    page_fault_user  = 1ul << 2,
+    page_fault_rsvd  = 1ul << 3,
+    page_fault_insn  = 1ul << 4,
+};
+
+enum {
+    mmap_fixed       = 1ul << 0,
+    mmap_populate    = 1ul << 1,
+    mmap_shared      = 1ul << 2,
+    mmap_uninitialized = 1ul << 3,
+    mmap_jvm_heap    = 1ul << 4,
+    mmap_small       = 1ul << 5,
+    mmap_jvm_balloon = 1ul << 6,
+};
+
+class mmupage {
+    void* _page;
+    bool _cow;
+public:
+    mmupage(void *page, bool cow = false) : _page(page), _cow(cow) {}
+    void* vaddr() const;
+    phys paddr() const;
+    bool cow() const;
+};
+
+}
+#endif
diff --git a/include/osv/mmu.hh b/include/osv/mmu.hh
index 598240fe7dc5cd2e6fa113f7f2152e8c6bade520..a4b19a27b1774aa060d2c2bfce5191bcf16c6c2b 100644
--- a/include/osv/mmu.hh
+++ b/include/osv/mmu.hh
@@ -17,6 +17,8 @@
 #include <osv/addr_range.hh>
 #include <unordered_map>
 #include <memory>
+#include <osv/mmu-defs.hh>
+#include "arch-mmu.hh"
 
 struct exception_frame;
 class balloon;
@@ -27,52 +29,11 @@ typedef std::shared_ptr<balloon> balloon_ptr;
  */
 namespace mmu {
 
-constexpr uintptr_t page_size = 4096;
-constexpr int page_size_shift = 12; // log2(page_size)
-
-constexpr int pte_per_page = 512;
-constexpr int pte_per_page_shift = 9; // log2(pte_per_page)
-
-constexpr uintptr_t huge_page_size = mmu::page_size*pte_per_page; // 2 MB
-
-typedef uint64_t f_offset;
-
-static char* const phys_mem = reinterpret_cast<char*>(0xffffc00000000000);
-// area for debug allocations:
-static char* const debug_base = reinterpret_cast<char*>(0xffffe00000000000);
-
 constexpr inline unsigned pt_index(void *virt, unsigned level)
 {
     return (reinterpret_cast<ulong>(virt) >> (page_size_shift + level * pte_per_page_shift)) & (pte_per_page - 1);
 }
 
-enum {
-    perm_read = 1,
-    perm_write = 2,
-    perm_exec = 4,
-    perm_rx = perm_read | perm_exec,
-    perm_rw = perm_read | perm_write,
-    perm_rwx = perm_read | perm_write | perm_exec,
-};
-
-enum {
-    page_fault_prot  = 1ul << 0,
-    page_fault_write = 1ul << 1,
-    page_fault_user  = 1ul << 2,
-    page_fault_rsvd  = 1ul << 3,
-    page_fault_insn  = 1ul << 4,
-};
-
-enum {
-    mmap_fixed       = 1ul << 0,
-    mmap_populate    = 1ul << 1,
-    mmap_shared      = 1ul << 2,
-    mmap_uninitialized = 1ul << 3,
-    mmap_jvm_heap    = 1ul << 4,
-    mmap_small       = 1ul << 5,
-    mmap_jvm_balloon = 1ul << 6,
-};
-
 struct page_allocator;
 
 class vma {
@@ -129,7 +90,7 @@ public:
 
 class file_vma : public vma {
 public:
-    file_vma(addr_range range, unsigned perm, fileref file, f_offset offset, bool shared, page_allocator *page_ops);
+    file_vma(addr_range range, unsigned perm, unsigned flags, fileref file, f_offset offset, page_allocator *page_ops);
     ~file_vma();
     virtual void split(uintptr_t edge) override;
     virtual error sync(uintptr_t start, uintptr_t end) override;
@@ -138,7 +99,6 @@ private:
     f_offset offset(uintptr_t addr);
     fileref _file;
     f_offset _offset;
-    bool _shared;
 };
 
 ulong map_jvm(unsigned char* addr, size_t size, size_t align, balloon_ptr b);
@@ -181,8 +141,8 @@ public:
     virtual int stat(struct stat* buf) override;
     virtual int close() override;
     virtual std::unique_ptr<file_vma> mmap(addr_range range, unsigned flags, unsigned perm, off_t offset) override;
-    virtual void* get_page(uintptr_t start, uintptr_t offset, size_t size) override;
-    virtual void put_page(void *addr, uintptr_t start, uintptr_t offset, size_t size) override;
+    virtual mmupage get_page(uintptr_t offset, size_t size, hw_ptep ptep, bool write, bool shared) override;
+    virtual void put_page(void *addr, uintptr_t offset, size_t size, hw_ptep ptep) override;
 };
 
 void* map_file(const void* addr, size_t size, unsigned flags, unsigned perm,
@@ -200,11 +160,14 @@ bool isreadable(void *addr, size_t size);
 std::unique_ptr<file_vma> default_file_mmap(file* file, addr_range range, unsigned flags, unsigned perm, off_t offset);
 std::unique_ptr<file_vma> map_file_mmap(file* file, addr_range range, unsigned flags, unsigned perm, off_t offset);
 
-void unmap_address(void *addr, size_t size);
-void add_mapping(void *buf_addr, uintptr_t offset, uintptr_t vaddr);
-bool remove_mapping(void *buf_addr, void *paddr, uintptr_t addr);
+bool unmap_address(void* buf, void *addr, size_t size);
+void add_mapping(void *buf_addr, void* addr, hw_ptep ptep);
+bool remove_mapping(void *buf_addr, void *paddr, hw_ptep ptep);
+bool lookup_mapping(void *paddr, hw_ptep ptep);
+void tlb_flush();
+void clear_pte(hw_ptep ptep);
+void clear_pte(std::pair<void* const, hw_ptep>& pair);
 
-typedef uint64_t phys;
 phys virt_to_phys(void *virt);
 void* phys_to_virt(phys pa);
 
@@ -240,6 +203,17 @@ void vm_fault(uintptr_t addr, exception_frame* ef);
 
 std::string procfs_maps();
 
+template<typename I>
+unsigned clear_ptes(I start,  I end)
+{
+    unsigned i = 0;
+    for (auto it = start; it != end; it++) {
+        clear_pte(*it);
+        i++;
+    }
+    return i;
+}
+
 }
 
 #endif
diff --git a/include/osv/pagecache.hh b/include/osv/pagecache.hh
new file mode 100644
index 0000000000000000000000000000000000000000..df8d2e7761febfb4bfc12ddb75526bde4f377e0c
--- /dev/null
+++ b/include/osv/pagecache.hh
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2014 Cloudius Systems, Ltd.
+ *
+ * This work is open source software, licensed under the terms of the
+ * BSD license as described in the LICENSE file in the top-level directory.
+ */
+
+#include <osv/file.h>
+#include <osv/vfs_file.hh>
+#include <osv/mmu.hh>
+#include "arch-mmu.hh"
+
+namespace pagecache {
+
+mmu::mmupage get(vfs_file* fp, off_t offset, mmu::hw_ptep ptep, bool write, bool shared);
+void release(vfs_file* fp, void *addr, off_t offset, mmu::hw_ptep ptep);
+
+}
diff --git a/include/osv/uio.h b/include/osv/uio.h
index bd9ff17b0223a6c3ded482b257746a733f7b05ef..b57b81e5139a467c9ed96d95c0bf0f27bc75e190 100644
--- a/include/osv/uio.h
+++ b/include/osv/uio.h
@@ -60,16 +60,6 @@ struct uio {
 	enum	uio_rw uio_rw;		/* operation */
 };
 
-// This layout support only one buffer per uio, which means that it will
-// only work for a iovcnt of 1. We can extend later if needed, but because we
-// are reading it page by page, it should be fine for now.
-struct uio_mapper {
-    struct uio uio;
-    size_t buf_size;
-    size_t buf_off;
-    void *buffer;
-};
-
 int	copyinuio(struct iovec *iovp, u_int iovcnt, struct uio **uiop);
 int	uiomove(void *cp, int n, struct uio *uio);
 
diff --git a/include/osv/vfs_file.hh b/include/osv/vfs_file.hh
index 0f107d799faf84d0683bc04311ea7d5747dd3b8b..2f4fe007bc82eacee99e695282b4abd61854bfc4 100644
--- a/include/osv/vfs_file.hh
+++ b/include/osv/vfs_file.hh
@@ -22,8 +22,9 @@ public:
     virtual int close() override;
     virtual int chmod(mode_t mode) override;
     virtual std::unique_ptr<mmu::file_vma> mmap(addr_range range, unsigned flags, unsigned perm, off_t offset) override;
-    virtual void* get_page(uintptr_t start, uintptr_t offset, size_t size);
-    virtual void put_page(void *addr, uintptr_t start, uintptr_t offset, size_t size);
+    virtual mmu::mmupage get_page(uintptr_t offset, size_t size, mmu::hw_ptep ptep, bool write, bool shared);
+    virtual void put_page(void *addr, uintptr_t offset, size_t size, mmu::hw_ptep ptep);
+    void get_arcbuf(uintptr_t offset, unsigned action, void** start, size_t* len, void** page);
 };
 
 #endif /* VFS_FILE_HH_ */
diff --git a/include/osv/vnode.h b/include/osv/vnode.h
index dc01d5803fd935850a2c5f4b07a1030ec734a0ec..c5ed0449403be76c1d2185f86bf1b352750fa39f 100755
--- a/include/osv/vnode.h
+++ b/include/osv/vnode.h
@@ -118,6 +118,12 @@ struct vattr {
 #define IO_APPEND	0x0001
 #define IO_SYNC		0x0002
 
+/*
+ * ARC actions
+ */
+#define ARC_ACTION_QUERY    0
+#define ARC_ACTION_HOLD     1
+#define ARC_ACTION_RELEASE  2
 
 typedef	int (*vnop_open_t)	(struct file *);
 typedef	int (*vnop_close_t)	(struct vnode *, struct file *);
@@ -139,8 +145,7 @@ typedef	int (*vnop_setattr_t)	(struct vnode *, struct vattr *);
 typedef	int (*vnop_inactive_t)	(struct vnode *);
 typedef	int (*vnop_truncate_t)	(struct vnode *, off_t);
 typedef	int (*vnop_link_t)      (struct vnode *, struct vnode *, char *);
-typedef	int (*vnop_map_t)	(struct vnode *, struct file *, struct uio *);
-typedef	int (*vnop_unmap_t)	(struct vnode *, struct file *, struct uio *);
+typedef int (*vnop_cache_t) (struct vnode *, struct file *, struct uio *, unsigned action);
 
 /*
  * vnode operations
@@ -165,8 +170,7 @@ struct vnops {
 	vnop_inactive_t		vop_inactive;
 	vnop_truncate_t		vop_truncate;
 	vnop_link_t		vop_link;
-	vnop_map_t		vop_map;
-	vnop_unmap_t		vop_unmap;
+	vnop_cache_t      vop_cache;
 };
 
 /*
@@ -175,8 +179,7 @@ struct vnops {
 #define VOP_OPEN(VP, FP)	   ((VP)->v_op->vop_open)(FP)
 #define VOP_CLOSE(VP, FP)	   ((VP)->v_op->vop_close)(VP, FP)
 #define VOP_READ(VP, FP, U, F)	   ((VP)->v_op->vop_read)(VP, FP, U, F)
-#define VOP_MAP(VP, FP, U)	   ((VP)->v_op->vop_map)(VP, FP, U)
-#define VOP_UNMAP(VP, FP, U)	   ((VP)->v_op->vop_unmap)(VP, FP, U)
+#define VOP_CACHE(VP, FP, U, A)	   ((VP)->v_op->vop_cache)(VP, FP, U, A)
 #define VOP_WRITE(VP, U, F)	   ((VP)->v_op->vop_write)(VP, U, F)
 #define VOP_SEEK(VP, FP, OLD, NEW) ((VP)->v_op->vop_seek)(VP, FP, OLD, NEW)
 #define VOP_IOCTL(VP, FP, C, A)	   ((VP)->v_op->vop_ioctl)(VP, FP, C, A)