From 8ef7f248887d2f39eb84f82f52ee860f78cb4643 Mon Sep 17 00:00:00 2001 From: Nadav Har'El <nyh@cloudius-systems.com> Date: Wed, 12 Feb 2014 13:42:34 +0200 Subject: [PATCH] Fix concurrent static initialization When multiple threads concurrently use a function which has a static variable with a constructor, gcc guarantees that only one thread will run the constructor, and the other ones waits. It uses __cxa_guard_acquire()/ release()/abort() for implementing this guarantee. Unfortunately these functions, implemented in the C++ standard library, use the Linux futex syscall which we don't implement. The futex system call is only used in the rare case of contention - in the normal case, the __cxa_guard_* functions uses just atomic memory operations. This patch implements the bare minimum we need from futex to make the __cxa_guard_* functions work: All we need is FUTEX_WAIT and FUTEX_WAKE, with 0 timeout in FUTEX_WAIT and wake all in FUTEX_WAKE. It's nice how the new waitqueues fit this implementation like glove to a hand: We could use condvars too, but we anyway need to do the wake() with the mutex locked (to get the futex's atomic test-and-wait), and in that case we can use waitqueues without their additional internal mutex. This patch also adds a test for this bug. Catching this bug in a real application is very rare, but the test exposes it by defining an function- static object with a very slow constructor (it sleeps for a second), and calls the function from several threads concurrently. Before this patch the test fails with: constructing syscall(): unimplemented system call 202. Aborting. Fixes #199. Signed-off-by: Nadav Har'El <nyh@cloudius-systems.com> Signed-off-by: Pekka Enberg <penberg@cloudius-systems.com> --- build.mk | 1 + linux.cc | 71 ++++++++++++++++++++++++++- tests/tst-concurrent-init.cc | 95 ++++++++++++++++++++++++++++++++++++ 3 files changed, 166 insertions(+), 1 deletion(-) create mode 100644 tests/tst-concurrent-init.cc diff --git a/build.mk b/build.mk index 89c8422bf..baae31c7a 100644 --- a/build.mk +++ b/build.mk @@ -241,6 +241,7 @@ tests += tests/tst-pthread-clock.so tests += tests/misc-procfs.so tests += tests/tst-chdir.so tests += tests/tst-hello.so +tests += tests/tst-concurrent-init.so tests/hello/Hello.class: javabase=tests/hello diff --git a/linux.cc b/linux.cc index 554bd8510..e37cc5f5a 100644 --- a/linux.cc +++ b/linux.cc @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013 Cloudius Systems, Ltd. + * Copyright (C) 2013-2014 Cloudius Systems, Ltd. * * This work is open source software, licensed under the terms of the * BSD license as described in the LICENSE file in the top-level directory. @@ -10,16 +10,67 @@ #include <osv/debug.hh> #include <boost/format.hpp> #include <osv/sched.hh> +#include <osv/mutex.h> +#include <osv/waitqueue.hh> #include <syscall.h> #include <stdarg.h> #include <time.h> +#include <unordered_map> + long gettid() { return sched::thread::current()->id(); } +// We don't expect applications to use the Linux futex() system call (it is +// normally only used to implement higher-level synchronization mechanisms), +// but unfortunately gcc's C++ runtime uses a subset of futex in the +// __cxa__guard_* functions, which safeguard the concurrent initialization +// of function-scope static objects. We only implement here this subset. +// The __cxa_guard_* functions only call futex in the rare case of contention, +// in fact so rarely that OSv existed for a year before anyone noticed futex +// was missing. So the performance of this implementation is not critical. +static std::unordered_map<void*, waitqueue> queues; +static mutex queues_mutex; +enum { + FUTEX_WAIT = 0, + FUTEX_WAKE = 1, +}; + +int futex(int *uaddr, int op, int val, const struct timespec *timeout, + int *uaddr2, int val3) +{ + switch (op) { + case FUTEX_WAIT: + assert(timeout == 0); + WITH_LOCK(queues_mutex) { + if (*uaddr == val) { + waitqueue &q = queues[uaddr]; + q.wait(queues_mutex); + } + } + return 0; + case FUTEX_WAKE: + assert(val == INT_MAX); + WITH_LOCK(queues_mutex) { + auto i = queues.find(uaddr); + if (i != queues.end()) { + i->second.wake_all(queues_mutex); + queues.erase(i); + } + } + // FIXME: We are expected to return a count of woken threads, but + // wake_all doesn't have this feature, and the only user we care + // about, __cxa_guard_*, doesn't need this return value anyway. + return 0; + default: + abort("Unimplemented futex() operation %d\n", op); + } +} + + long syscall(long number, ...) { switch (number) { @@ -44,6 +95,24 @@ long syscall(long number, ...) va_end(args); return clock_getres(arg1, arg2); } + case __NR_futex: { + va_list args; + int *arg1; + int arg2; + int arg3; + const struct timespec *arg4; + int *arg5; + int arg6; + va_start(args, number); + arg1 = va_arg(args, typeof(arg1)); + arg2 = va_arg(args, typeof(arg2)); + arg3 = va_arg(args, typeof(arg3)); + arg4 = va_arg(args, typeof(arg4)); + arg5 = va_arg(args, typeof(arg5)); + arg6 = va_arg(args, typeof(arg6)); + va_end(args); + return futex(arg1, arg2, arg3, arg4, arg5, arg6); + } } abort("syscall(): unimplemented system call %d. Aborting.\n", number); diff --git a/tests/tst-concurrent-init.cc b/tests/tst-concurrent-init.cc new file mode 100644 index 000000000..7bfc359fc --- /dev/null +++ b/tests/tst-concurrent-init.cc @@ -0,0 +1,95 @@ +/* + * Copyright (C) 2014 Cloudius Systems, Ltd. + * + * This work is open source software, licensed under the terms of the + * BSD license as described in the LICENSE file in the top-level directory. + */ + +// Test concurrent initialization of function-static varibles. +// Gcc implements this using __cxa_guard_acquire(). + +#include <iostream> +#include <chrono> +#include <thread> +#include <string> +#include <vector> +#include <atomic> +#include <cassert> + +using namespace std; + +static int tests = 0, fails = 0; + +static void report(bool ok, string msg) +{ + ++tests; + fails += !ok; + cout << (ok ? "PASS" : "FAIL") << ": " << msg << "\n"; +} + +// A type with a really slow constructor +static atomic<int> constructions {0}; +struct mycounter { + mycounter() { + // This is a really slow constructor + cout << "constructing\n"; + ++constructions; + this_thread::sleep_for(chrono::seconds(1)); + } + atomic<int> counter {}; + void incr() { + counter++; + } + int get() { + return counter.load(); + } +}; + +// A function using a static variable with a really slow constructor. +// If this function is concurrently called from two threads, only +// one thread will run the constructor, and the second will wait for +// it to complete (internally, gcc uses __cxa_guard_acquire to do that). +int f(){ + static mycounter a; + return a.counter++; +} + +int main(int ac, char** av) +{ + vector<thread> threads; + constexpr int nthreads = 5; + int results[nthreads]; + chrono::milliseconds::rep ms[nthreads]; + + for (int i = 0; i < nthreads; i++) { + threads.push_back(thread([i, &results, &ms] { + auto start = chrono::high_resolution_clock::now(); + results[i] = f(); + auto end = chrono::high_resolution_clock::now(); + ms[i] = chrono::duration_cast<chrono::milliseconds>(end - start). + count(); + })); + } + + for (auto &thread : threads) { + thread.join(); + } + + report(constructions.load() == 1, "mycounter constructed once"); + + bool seen[nthreads] {}; + for (int i =0; i < nthreads; i++) { + auto r = results[i]; + report(r >= 0 && r < nthreads && !seen[i], + string("thread ") + to_string(i)); + seen[i] = true; + // We don't expect f to finish too quickly, it will need close to + // a second to complete the initialization itself, or wait for + // another thread doing the initalization. + report(ms[i] > 500, string("thread ") + to_string(i) + " slow enough"); + } + + + cout << "SUMMARY: " << tests << " tests, " << fails << " failures\n"; + return fails == 0 ? 0 : 1; +} -- GitLab