Skip to content
Snippets Groups Projects
Commit 8ef7f248 authored by Nadav Har'El's avatar Nadav Har'El Committed by Pekka Enberg
Browse files

Fix concurrent static initialization


When multiple threads concurrently use a function which has a static
variable with a constructor, gcc guarantees that only one thread will run
the constructor, and the other ones waits.  It uses __cxa_guard_acquire()/
release()/abort() for implementing this guarantee. Unfortunately these
functions, implemented in the C++ standard library, use the Linux futex
syscall which we don't implement. The futex system call is only used in
the rare case of contention - in the normal case, the __cxa_guard_*
functions uses just atomic memory operations.

This patch implements the bare minimum we need from futex to make the
__cxa_guard_* functions work: All we need is FUTEX_WAIT and FUTEX_WAKE,
with 0 timeout in FUTEX_WAIT and wake all in FUTEX_WAKE.

It's nice how the new waitqueues fit this implementation like glove to
a hand: We could use condvars too, but we anyway need to do the wake()
with the mutex locked (to get the futex's atomic test-and-wait), and in
that case we can use waitqueues without their additional internal mutex.

This patch also adds a test for this bug. Catching this bug in a real
application is very rare, but the test exposes it by defining an function-
static object with a very slow constructor (it sleeps for a second), and
calls the function from several threads concurrently.
Before this patch the test fails with:
   constructing
   syscall(): unimplemented system call 202. Aborting.

Fixes #199.

Signed-off-by: default avatarNadav Har'El <nyh@cloudius-systems.com>
Signed-off-by: default avatarPekka Enberg <penberg@cloudius-systems.com>
parent af1547c4
No related branches found
No related tags found
No related merge requests found
......@@ -241,6 +241,7 @@ tests += tests/tst-pthread-clock.so
tests += tests/misc-procfs.so
tests += tests/tst-chdir.so
tests += tests/tst-hello.so
tests += tests/tst-concurrent-init.so
tests/hello/Hello.class: javabase=tests/hello
......
/*
* Copyright (C) 2013 Cloudius Systems, Ltd.
* Copyright (C) 2013-2014 Cloudius Systems, Ltd.
*
* This work is open source software, licensed under the terms of the
* BSD license as described in the LICENSE file in the top-level directory.
......@@ -10,16 +10,67 @@
#include <osv/debug.hh>
#include <boost/format.hpp>
#include <osv/sched.hh>
#include <osv/mutex.h>
#include <osv/waitqueue.hh>
#include <syscall.h>
#include <stdarg.h>
#include <time.h>
#include <unordered_map>
long gettid()
{
return sched::thread::current()->id();
}
// We don't expect applications to use the Linux futex() system call (it is
// normally only used to implement higher-level synchronization mechanisms),
// but unfortunately gcc's C++ runtime uses a subset of futex in the
// __cxa__guard_* functions, which safeguard the concurrent initialization
// of function-scope static objects. We only implement here this subset.
// The __cxa_guard_* functions only call futex in the rare case of contention,
// in fact so rarely that OSv existed for a year before anyone noticed futex
// was missing. So the performance of this implementation is not critical.
static std::unordered_map<void*, waitqueue> queues;
static mutex queues_mutex;
enum {
FUTEX_WAIT = 0,
FUTEX_WAKE = 1,
};
int futex(int *uaddr, int op, int val, const struct timespec *timeout,
int *uaddr2, int val3)
{
switch (op) {
case FUTEX_WAIT:
assert(timeout == 0);
WITH_LOCK(queues_mutex) {
if (*uaddr == val) {
waitqueue &q = queues[uaddr];
q.wait(queues_mutex);
}
}
return 0;
case FUTEX_WAKE:
assert(val == INT_MAX);
WITH_LOCK(queues_mutex) {
auto i = queues.find(uaddr);
if (i != queues.end()) {
i->second.wake_all(queues_mutex);
queues.erase(i);
}
}
// FIXME: We are expected to return a count of woken threads, but
// wake_all doesn't have this feature, and the only user we care
// about, __cxa_guard_*, doesn't need this return value anyway.
return 0;
default:
abort("Unimplemented futex() operation %d\n", op);
}
}
long syscall(long number, ...)
{
switch (number) {
......@@ -44,6 +95,24 @@ long syscall(long number, ...)
va_end(args);
return clock_getres(arg1, arg2);
}
case __NR_futex: {
va_list args;
int *arg1;
int arg2;
int arg3;
const struct timespec *arg4;
int *arg5;
int arg6;
va_start(args, number);
arg1 = va_arg(args, typeof(arg1));
arg2 = va_arg(args, typeof(arg2));
arg3 = va_arg(args, typeof(arg3));
arg4 = va_arg(args, typeof(arg4));
arg5 = va_arg(args, typeof(arg5));
arg6 = va_arg(args, typeof(arg6));
va_end(args);
return futex(arg1, arg2, arg3, arg4, arg5, arg6);
}
}
abort("syscall(): unimplemented system call %d. Aborting.\n", number);
......
/*
* Copyright (C) 2014 Cloudius Systems, Ltd.
*
* This work is open source software, licensed under the terms of the
* BSD license as described in the LICENSE file in the top-level directory.
*/
// Test concurrent initialization of function-static varibles.
// Gcc implements this using __cxa_guard_acquire().
#include <iostream>
#include <chrono>
#include <thread>
#include <string>
#include <vector>
#include <atomic>
#include <cassert>
using namespace std;
static int tests = 0, fails = 0;
static void report(bool ok, string msg)
{
++tests;
fails += !ok;
cout << (ok ? "PASS" : "FAIL") << ": " << msg << "\n";
}
// A type with a really slow constructor
static atomic<int> constructions {0};
struct mycounter {
mycounter() {
// This is a really slow constructor
cout << "constructing\n";
++constructions;
this_thread::sleep_for(chrono::seconds(1));
}
atomic<int> counter {};
void incr() {
counter++;
}
int get() {
return counter.load();
}
};
// A function using a static variable with a really slow constructor.
// If this function is concurrently called from two threads, only
// one thread will run the constructor, and the second will wait for
// it to complete (internally, gcc uses __cxa_guard_acquire to do that).
int f(){
static mycounter a;
return a.counter++;
}
int main(int ac, char** av)
{
vector<thread> threads;
constexpr int nthreads = 5;
int results[nthreads];
chrono::milliseconds::rep ms[nthreads];
for (int i = 0; i < nthreads; i++) {
threads.push_back(thread([i, &results, &ms] {
auto start = chrono::high_resolution_clock::now();
results[i] = f();
auto end = chrono::high_resolution_clock::now();
ms[i] = chrono::duration_cast<chrono::milliseconds>(end - start).
count();
}));
}
for (auto &thread : threads) {
thread.join();
}
report(constructions.load() == 1, "mycounter constructed once");
bool seen[nthreads] {};
for (int i =0; i < nthreads; i++) {
auto r = results[i];
report(r >= 0 && r < nthreads && !seen[i],
string("thread ") + to_string(i));
seen[i] = true;
// We don't expect f to finish too quickly, it will need close to
// a second to complete the initialization itself, or wait for
// another thread doing the initalization.
report(ms[i] > 500, string("thread ") + to_string(i) + " slow enough");
}
cout << "SUMMARY: " << tests << " tests, " << fails << " failures\n";
return fails == 0 ? 0 : 1;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment