Skip to content
Snippets Groups Projects
Commit 4658e563 authored by Gleb Natapov's avatar Gleb Natapov Committed by Pekka Enberg
Browse files

pagecache: map zero page instead of ARC page for hole in a file


Attempt to get read ARC buffer for a hole in a file results in temporary
ARC buffer which is destroyed immediately after use. It means that
mapping such buffer is impossible, it is unmapped before page fault
handler return to application. The patch solves this by detecting that
hole in a file is accessed and mapping special zero page instead. It is
mapped as COW, so on write attempt new page is allocated.

Signed-off-by: default avatarGleb Natapov <gleb@cloudius-systems.com>
Signed-off-by: default avatarPekka Enberg <penberg@cloudius-systems.com>
parent 9f8f7068
No related branches found
No related tags found
No related merge requests found
......@@ -987,6 +987,13 @@ dmu_map_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size)
dmu_buf_t **dbp;
int err;
int numbufs = 0;
uint64_t noff = uio->uio_loffset;
err = dmu_offset_next(os, object, FALSE, &noff);
if ((err == ESRCH) || noff != uio->uio_loffset) {
return (0);
}
// This will acquire a reference both in the dbuf, and in the ARC buffer.
// The ARC buffer reference will also update the access statistics
......@@ -1002,6 +1009,7 @@ dmu_map_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size)
arc_buf_t *dbuf_abuf = dbi->db_buf;
mmu_map(uio->uio_iov->iov_base, dbuf_abuf, dbuf_abuf->b_data + (uio->uio_loffset - db->db_offset));
uio->uio_resid = 0;
dmu_buf_rele_array(dbp, numbufs, FTAG);
......
......@@ -14,6 +14,7 @@
#include <osv/mempool.hh>
#include <fs/vfs/vfs.h>
#include <osv/trace.hh>
#include <osv/prio.hh>
extern "C" {
void arc_unshare_buf(arc_buf_t*);
......@@ -39,6 +40,14 @@ template<> struct hash<mmu::hw_ptep> {
namespace pagecache {
static void* zero_page;
void __attribute__((constructor(init_prio::pagecache))) setup()
{
zero_page = memory::alloc_page();
memset(zero_page, 0, mmu::page_size);
}
class cached_page {
protected:
const hashkey _key;
......@@ -321,9 +330,9 @@ void map_arc_buf(hashkey *key, arc_buf_t* ab, void *page)
arc_share_buf(ab);
}
static void create_read_cached_page(vfs_file* fp, hashkey& key)
static int create_read_cached_page(vfs_file* fp, hashkey& key)
{
fp->get_arcbuf(&key, key.offset);
return fp->get_arcbuf(&key, key.offset);
}
static std::unique_ptr<cached_page_write> create_write_cached_page(vfs_file* fp, hashkey& key)
......@@ -396,8 +405,10 @@ bool get(vfs_file* fp, off_t offset, mmu::hw_ptep ptep, mmu::pt_element pte, boo
}
}
// page is not in cache yet, create and try again
create_read_cached_page(fp, key);
} while (true);
} while (create_read_cached_page(fp, key) != -1);
// try to access a hole in a file, map by zero_page
return mmu::write_pte(zero_page, ptep, mmu::pte_mark_cow(pte, true));
}
wcp->map(ptep);
......@@ -412,7 +423,7 @@ bool release(vfs_file* fp, void *addr, off_t offset, mmu::hw_ptep ptep)
hashkey key {st.st_dev, st.st_ino, offset};
cached_page_write* wcp = find_in_cache(write_cache, key);
// page is either in ARC cache or write cache or private page
// page is either in ARC cache or write cache or zero page or private page
if (wcp && wcp->addr() == addr) {
// page is in write cache
......@@ -429,7 +440,7 @@ bool release(vfs_file* fp, void *addr, off_t offset, mmu::hw_ptep ptep)
}
}
// private page, a caller will free it
return true;
// if a private page, caller will free it
return addr != zero_page;
}
}
......@@ -154,7 +154,7 @@ bool vfs_file::put_page(void *addr, uintptr_t off, size_t size, mmu::hw_ptep pte
// eviction that will hold the mmu-side lock that protects the mappings
// Always follow that order. We however can't just get rid of the mmu-side lock,
// because not all invalidations will be synchronous.
void vfs_file::get_arcbuf(void* key, off_t offset)
int vfs_file::get_arcbuf(void* key, off_t offset)
{
struct vnode *vp = f_dentry->d_vnode;
......@@ -166,11 +166,13 @@ void vfs_file::get_arcbuf(void* key, off_t offset)
data.uio_iovcnt = 1;
data.uio_offset = offset;
data.uio_resid = mmu::page_size;
data.uio_rw = UIO_READ;
data.uio_rw = UIO_WRITE;
vn_lock(vp);
assert(VOP_CACHE(vp, this, &data) == 0);
vn_unlock(vp);
return (data.uio_resid != 0) ? -1 : 0;
}
std::unique_ptr<mmu::file_vma> vfs_file::mmap(addr_range range, unsigned flags, unsigned perm, off_t offset)
......
......@@ -13,6 +13,7 @@ enum class init_prio : int {
cpus,
fpranges,
mempool,
pagecache,
threadlist,
pthread,
notifiers,
......
......@@ -25,7 +25,7 @@ public:
virtual bool map_page(uintptr_t offset, size_t size, mmu::hw_ptep ptep, mmu::pt_element pte, bool write, bool shared);
virtual bool put_page(void *addr, uintptr_t offset, size_t size, mmu::hw_ptep ptep);
void get_arcbuf(void *key, off_t offset);
int get_arcbuf(void *key, off_t offset);
};
#endif /* VFS_FILE_HH_ */
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment