From 8c417e23033882674e9623a57c36a7ec391bcf2e Mon Sep 17 00:00:00 2001 From: Nadav Har'El <nyh@cloudius-systems.com> Date: Sun, 13 Apr 2014 18:58:17 +0300 Subject: [PATCH] memory: support for larger-than-page alignment Our existing implementation of posix_memalign() and the C11 aligned_alloc() used our regular malloc(), so it only work worked up to alignment of 4096 bytes - and crashed when it failed to achieve a higher desired alignment. Some applications do ask for higher alignment - for example MongoDB allocates a large buffer at 8192 byte alignment, and accordingly crashes half of the times, when the desired alignment is not achieved (half of the time, it is achieved by chance). This patch makes our support for alignment better organized, and fixes the alignment > 4096 case: The alignment is no longer available only to the outer function like posix_memalign(). Rather, it is passed down to lower-level allocation functions like malloc_large() which allocates whole pages - and this function now knows how to pick pages which start at a properly aligned boundary. This patch does not improve the wastefulness of our malloc_large(), so an overhaul of it would still be welcome. Case in point, malloc_large() always adds a full page to any allocation larger than half a page. Multiple allocations with posix_memalign(8192, 8192), rather than being tightly packed, each take 3 pages and are separated by a free page. This page is not wasted, but causes fragmentation of the heap. Note that after this patch, we still have one other bug in posix_memalign(size, align) - for small sizes and large alignments. For small sizes, we use a pool allocator with "size" alignment, and may not achieve the desired alignment (so causing an assertion failure). This bug can also be fixed, but is unrelated to this patch. This patch also adds a test for posix_memalign(), checking all alignments including large alignments which are the topic of this patch. The tests for small *sizes*, which as explained above are still buggy, are commented out, because they fail. Fixes #266. Signed-off-by: Nadav Har'El <nyh@cloudius-systems.com> Signed-off-by: Avi Kivity <avi@avi.cloudius> --- build.mk | 1 + core/mempool.cc | 68 ++++++++++++++++++++++++++++----------- tests/tst-align.cc | 79 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 129 insertions(+), 19 deletions(-) create mode 100644 tests/tst-align.cc diff --git a/build.mk b/build.mk index 805a2dbc0..aecc95734 100644 --- a/build.mk +++ b/build.mk @@ -270,6 +270,7 @@ tests += tests/tst-hello.so tests += tests/tst-concurrent-init.so tests += tests/tst-ring-spsc-wraparound.so tests += tests/tst-shm.so +tests += tests/tst-align.so tests/hello/Hello.class: javabase=tests/hello diff --git a/core/mempool.cc b/core/mempool.cc index 6fa514536..31ae8ae94 100644 --- a/core/mempool.cc +++ b/core/mempool.cc @@ -44,6 +44,12 @@ TRACEPOINT(trace_memory_wait, "allocation size=%d", size_t); bool smp_allocator = false; unsigned char *osv_reclaimer_thread; +// malloc(3) specifies that malloc(), calloc() and realloc() need to return +// a pointer which "is suitably aligned for any kind of variable.". By "any" +// variable they actually refer only to standard types (int, long, double) +// and the longest of these is 8 bytes. +static const int MALLOC_ALIGNMENT = 8; + namespace memory { size_t phys_mem_size; @@ -549,7 +555,7 @@ void reclaimer::wait_for_memory(size_t mem) _oom_blocked.wait(mem); } -static void* malloc_large(size_t size) +static void* malloc_large(size_t size, size_t alignment) { size = (size + page_size - 1) & ~(page_size - 1); size += page_size; @@ -560,13 +566,25 @@ static void* malloc_large(size_t size) for (auto i = free_page_ranges.begin(); i != free_page_ranges.end(); ++i) { auto header = &*i; - page_range* ret_header; - if (header->size >= size) { + + char *v = reinterpret_cast<char*>(header); + auto expected_ret = v + header->size - size + page_size; + auto alignment_shift = expected_ret - + align_down(expected_ret, alignment); + + if (header->size >= size + alignment_shift) { + if (alignment_shift) { + // Leave "alignment_shift" bytes at the end of the + // range free, so our allocation below is aligned. + free_page_ranges.insert(*new(v + header->size - + alignment_shift) page_range(alignment_shift)); + header->size -= alignment_shift; + } + page_range* ret_header; if (header->size == size) { free_page_ranges.erase(i); ret_header = header; } else { - void *v = header; header->size -= size; ret_header = new (v + header->size) page_range(size); } @@ -1088,12 +1106,14 @@ extern "C" { // allocated from a pool. // FIXME: be less wasteful -static inline void* std_malloc(size_t size) +static inline void* std_malloc(size_t size, size_t alignment) { if ((ssize_t)size < 0) return libc_error_ptr<void *>(ENOMEM); void *ret; if (size <= memory::pool::max_object_size) { + // FIXME: handle alignment requirement even when alignment < size + // (can happen in posix_memalign(), but not with aligned_alloc(). if (!smp_allocator) { return memory::alloc_page() + memory::non_mempool_obj_offset; } @@ -1101,7 +1121,7 @@ static inline void* std_malloc(size_t size) unsigned n = ilog2_roundup(size); ret = memory::malloc_pools[n].alloc(); } else { - ret = memory::malloc_large(size); + ret = memory::malloc_large(size, alignment); } memory::tracker_remember(ret, size); return ret; @@ -1191,18 +1211,24 @@ struct header { static const size_t pad_before = 2 * mmu::page_size; static const size_t pad_after = mmu::page_size; -void* malloc(size_t size) +void* malloc(size_t size, size_t alignment) { #ifdef AARCH64_PORT_STUB abort(); #else /* !AARCH64_PORT_STUB */ if (!enabled) { - return std_malloc(size); + return std_malloc(size, alignment); } auto asize = align_up(size, mmu::page_size); auto padded_size = pad_before + asize + pad_after; - void* v = free_area.fetch_add(padded_size, std::memory_order_relaxed); + if (alignment > mmu::page_size) { + // Our allocations are page-aligned - might need more + padded_size += alignment - mmu::page_size; + } + char* v = free_area.fetch_add(padded_size, std::memory_order_relaxed); + // change v so that (v + pad_before) is aligned. + v += align_up(v + pad_before, alignment) - (v + pad_before); mmu::vpopulate(v, mmu::page_size); new (v) header(size); v += pad_before; @@ -1210,7 +1236,7 @@ void* malloc(size_t size) memset(v + size, '$', asize - size); // fill the memory with garbage, to catch use-before-init uint8_t garbage = 3; - std::generate_n(static_cast<uint8_t*>(v), size, [&] { return garbage++; }); + std::generate_n(v, size, [&] { return garbage++; }); return v; #endif /* !AARCH64_PORT_STUB */ } @@ -1238,7 +1264,7 @@ void free(void* v) void* realloc(void* v, size_t size) { if (!v) - return malloc(size); + return malloc(size, MALLOC_ALIGNMENT); if (!size) { free(v); return nullptr; @@ -1246,7 +1272,7 @@ void* realloc(void* v, size_t size) auto h = static_cast<header*>(v - pad_before); if (h->size >= size) return v; - void* n = malloc(size); + void* n = malloc(size, MALLOC_ALIGNMENT); if (!n) return nullptr; memcpy(n, v, h->size); @@ -1259,9 +1285,9 @@ void* realloc(void* v, size_t size) void* malloc(size_t size) { #if CONF_debug_memory == 0 - void* buf = std_malloc(size); + void* buf = std_malloc(size, MALLOC_ALIGNMENT); #else - void* buf = dbg::malloc(size); + void* buf = dbg::malloc(size, MALLOC_ALIGNMENT); #endif trace_memory_malloc(buf, size); @@ -1307,7 +1333,11 @@ int posix_memalign(void **memptr, size_t alignment, size_t size) if (!is_power_of_two(alignment)) { return EINVAL; } - void *ret = malloc(size); +#if CONF_debug_memory == 0 + void* ret = std_malloc(size, alignment); +#else + void* ret = dbg::malloc(size, alignment); +#endif if (!ret) { return ENOMEM; } @@ -1341,12 +1371,12 @@ void enable_debug_allocator() void* alloc_phys_contiguous_aligned(size_t size, size_t align) { - assert(align <= page_size); // implementation limitation assert(is_power_of_two(align)); - // make use of the standard allocator returning page-aligned + // make use of the standard allocator returning properly aligned // physically contiguous memory: - size = std::max(page_size, size); - return std_malloc(size); + auto ret = std_malloc(size, align); + assert (!(reinterpret_cast<uintptr_t>(ret) & (align - 1))); + return ret; } void free_phys_contiguous_aligned(void* p) diff --git a/tests/tst-align.cc b/tests/tst-align.cc new file mode 100644 index 000000000..c6d37cf4a --- /dev/null +++ b/tests/tst-align.cc @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2014 Cloudius Systems, Ltd. + * + * This work is open source software, licensed under the terms of the + * BSD license as described in the LICENSE file in the top-level directory. + */ + +// Tests for posix_memalign and aligned_alloc. Both should be able to handle +// various alignments and sizes, and free() should be able to free the +// memory allocated by them. +// +// To compile on Linux, use: g++ -g -pthread -std=c++11 tests/tst-align.cc + + +#include <string> +#include <iostream> +#include <vector> +#include <limits> +#include <algorithm> + +#include <stdlib.h> +#include <malloc.h> + +static int tests = 0, fails = 0; + +static void report(bool ok, std::string msg) +{ + ++tests; + fails += !ok; + std::cout << (ok ? "PASS" : "FAIL") << ": " << msg << "\n"; +} + +void test_posix_memalign(size_t alignment, size_t size) +{ + constexpr int tries = 100; + std::vector<void*> ptrs; + std::cout << "Testing posix_memalign alignment=" << alignment << ", size=" + << size << "\n"; + bool fail = false; + size_t min_achieved = std::numeric_limits<size_t>::max(); + for (int i = 0; i < tries; i++) { + void *ptr; + fail = posix_memalign(&ptr, alignment, size) != 0; + if (fail) { + break; + } + ptrs.push_back(ptr); + size_t achieved = 1 << __builtin_ctz((intptr_t)ptr); + min_achieved = std::min(achieved, min_achieved); + } + for (auto ptr : ptrs) { + free(ptr); + } + report(!fail, "posix_memalign " + std::to_string(alignment) + " " + std::to_string(size)); + report(min_achieved >= alignment, "achieved desired alignment"); + if (min_achieved > alignment) { + // This case may be a waste, but it's actually fine according + // to the standard, so just warn. + std::cout << "WARNING: exceeded desired alignment - got " << min_achieved << "\n"; + } +} + +int main(int ac, char** av) +{ + // posix_memalign expects its alignment argument to be a multiple of + // sizeof(void*) (on 64 bit, that's 8 bytes) and a power of two. + for (int shift = 3; shift <= 14; shift++) { + size_t s = 1 << shift; + test_posix_memalign(s, s); + test_posix_memalign(s, s*2); + test_posix_memalign(s, s*10); +// test_posix_memalign(s, s/2); // doesn't work yet +// test_posix_memalign(s, 8); // doesn't work yet + } + + std::cout << "SUMMARY: " << tests << " tests, " << fails << " failures\n"; +} + + -- GitLab