From b6a1aa2e24c0e06099146134974caad159c0b4a0 Mon Sep 17 00:00:00 2001 From: Gleb Natapov <gleb@cloudius-systems.com> Date: Mon, 3 Feb 2014 15:15:44 +0200 Subject: [PATCH] mmu: write back only dirty pages during file sync Walk page table and write out only dirty pages during file sync instead of writing back entire mapping. Reviewed-by: Glauber Costa <glommer@cloudius-systems.com> Signed-off-by: Gleb Natapov <gleb@cloudius-systems.com> Signed-off-by: Pekka Enberg <penberg@cloudius-systems.com> --- core/mmu.cc | 77 +++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 72 insertions(+), 5 deletions(-) diff --git a/core/mmu.cc b/core/mmu.cc index bea973340..3a4b7d0c0 100644 --- a/core/mmu.cc +++ b/core/mmu.cc @@ -570,6 +570,40 @@ public: } }; +template <typename T, account_opt Account = account_opt::no> +class dirty_cleaner : public vma_operation<allocate_intermediate_opt::no, skip_empty_opt::yes, Account> { +private: + bool do_flush; + T handler; +public: + dirty_cleaner(T handler) : do_flush(false), handler(handler) {} + void small_page(hw_ptep ptep, uintptr_t offset) { + pt_element pte = ptep.read(); + if (!pte.dirty()) { + return; + } + do_flush |= true; + pte.set_dirty(false); + ptep.write(pte); + handler(ptep.read().addr(false), offset); + } + bool huge_page(hw_ptep ptep, uintptr_t offset) { + pt_element pte = ptep.read(); + if (!pte.dirty()) { + return true; + } + do_flush |= true; + pte.set_dirty(false); + ptep.write(pte); + handler(ptep.read().addr(true), offset, huge_page_size); + return true; + } + bool tlb_flush_needed(void) {return do_flush;} + void finalize() { + handler.finalize(); + } +}; + class virt_to_phys_map : public page_table_operation<allocate_intermediate_opt::no, skip_empty_opt::yes, descend_opt::yes, once_opt::yes, split_opt::no> { @@ -1228,18 +1262,51 @@ void file_vma::split(uintptr_t edge) vma_list.insert(*n); } +class dirty_page_sync { + friend dirty_cleaner<dirty_page_sync, account_opt::yes>; + friend file_vma; +private: + file *_file; + f_offset _offset; + uint64_t _size; + struct elm { + iovec iov; + off_t offset; + }; + std::stack<elm> queue; + dirty_page_sync(file *file, f_offset offset, uint64_t size) : _file(file), _offset(offset), _size(size) {} + void operator()(phys addr, uintptr_t offset, size_t size) { + off_t off = _offset + offset; + size_t len = std::min(size, _size - off); + queue.push(elm{{phys_to_virt(addr), len}, off}); + } + void operator()(phys addr, uintptr_t offset) { + (*this)(addr, offset, page_size); + } + void finalize() { + while(!queue.empty()) { + elm w = queue.top(); + uio data{&w.iov, 1, w.offset, ssize_t(w.iov.iov_len), UIO_WRITE}; + _file->write(&data, FOF_OFFSET); + queue.pop(); + } + } +}; + error file_vma::sync(uintptr_t start, uintptr_t end) { if (!_shared) return make_error(ENOMEM); start = std::max(start, _range.start()); end = std::min(end, _range.end()); - auto fsize = ::size(_file); uintptr_t size = end - start; - auto off = offset(start); - write(_file, addr(), off, std::min(size, fsize - off)); - auto err = sys_fsync(_file.get()); - return make_error(err); + + dirty_page_sync sync(_file.get(), _offset, ::size(_file)); + error err = no_error(); + if (operate_range(dirty_cleaner<dirty_page_sync, account_opt::yes>(sync), (void*)start, size) != 0) { + err = make_error(sys_fsync(_file.get())); + } + return err; } int file_vma::validate_perm(unsigned perm) -- GitLab