diff --git a/bootfs.manifest b/bootfs.manifest index a95bdbc1fe54d111d76a7bcf57a0a8cf178d5c0b..5cd8270c7251229cd996ac06c1e369f1f10ff975 100644 --- a/bootfs.manifest +++ b/bootfs.manifest @@ -90,6 +90,7 @@ /&/tests/tst-solaris-taskq.so: ./& /&/tests/tst-vfs.so: ./& /&/tests/tst-yield.so: ./& +/&/tests/tst-ctxsw.so: ./& /testrunner.so: ./tests/testrunner.so /java/Hello.class: ./tests/hello/Hello.class /java.so: java/java.so diff --git a/build.mak b/build.mak index abd0d137c8df7c8b04dd13e8054d2adaae55c6a8..89e9b3b6690bccb11f7d3661093bba8dc9c266c2 100644 --- a/build.mak +++ b/build.mak @@ -19,7 +19,7 @@ COMMON = $(autodepend) -g -Wall -Wno-pointer-arith -Werror -Wformat=0 \ -D __BSD_VISIBLE=1 -U _FORTIFY_SOURCE -fno-stack-protector $(INCLUDES) \ $(do-sys-includes) \ $(arch-cflags) $(conf-opt) $(acpi-defines) $(tracing-flags) \ - $(configuration) -nostdinc + $(configuration) -nostdinc -D__OSV__ tracing-flags-0 = tracing-flags-1 = -finstrument-functions -finstrument-functions-exclude-file-list=c++,trace.cc,trace.hh,align.hh @@ -120,6 +120,7 @@ tests += tests/tst-condvar.so tests += tests/tst-queue-mpsc.so tests += tests/tst-af-local.so tests += tests/tst-yield.so +tests += tests/tst-ctxsw.so tests/hello/Hello.class: javabase=tests/hello diff --git a/drivers/kvmclock.cc b/drivers/kvmclock.cc index e175dfa067f84853e9db8be939979753ff9991ca..b3f06b9e629b95075985a832f5191bc9fe0e2e8d 100644 --- a/drivers/kvmclock.cc +++ b/drivers/kvmclock.cc @@ -4,6 +4,7 @@ #include "mmu.hh" #include "string.h" #include "cpuid.hh" +#include "barrier.hh" class kvmclock : public clock { private: @@ -56,9 +57,9 @@ u64 kvmclock::wall_clock_boot() u64 w; do { v1 = _wall->version; - __sync_synchronize(); + barrier(); w = u64(_wall->sec) * 1000000000 + _wall->nsec; - __sync_synchronize(); + barrier(); v2 = _wall->version; } while (v1 != v2); return w; @@ -70,7 +71,7 @@ u64 kvmclock::system_time() u64 time; do { v1 = _sys->version; - __sync_synchronize(); + barrier(); time = processor::rdtsc() - _sys->tsc_timestamp; if (_sys->tsc_shift >= 0) { time <<= _sys->tsc_shift; @@ -82,7 +83,7 @@ u64 kvmclock::system_time() : "rm"(u64(_sys->tsc_to_system_mul)) : "rdx"); time += _sys->system_time; - __sync_synchronize(); + barrier(); v2 = _sys->version; } while (v1 != v2); return time; diff --git a/include/sched.hh b/include/sched.hh index dae74d3319e8350dcbf9516357eb2233a6e89a1a..cd5e995881cb11005700266d89e6b2b252daeff2 100644 --- a/include/sched.hh +++ b/include/sched.hh @@ -40,12 +40,12 @@ const unsigned max_cpus = sizeof(unsigned long) * 8; class cpu_set { public: explicit cpu_set() : _mask() {} - cpu_set(const cpu_set& other) : _mask(other._mask.load()) {} + cpu_set(const cpu_set& other) : _mask(other._mask.load(std::memory_order_relaxed)) {} void set(unsigned c) { - _mask.fetch_or(1UL << c); + _mask.fetch_or(1UL << c, std::memory_order_release); } void clear(unsigned c) { - _mask.fetch_and(~(1UL << c)); + _mask.fetch_and(~(1UL << c), std::memory_order_release); } class iterator; iterator begin() { @@ -56,7 +56,9 @@ public: } cpu_set fetch_clear() { cpu_set ret; - ret._mask = _mask.exchange(0); + if (_mask.load(std::memory_order_relaxed)) { + ret._mask = _mask.exchange(0, std::memory_order_acquire); + } return ret; } operator bool() const { diff --git a/tests/tst-ctxsw.cc b/tests/tst-ctxsw.cc new file mode 100644 index 0000000000000000000000000000000000000000..9c1af92f298382cd1b189f7fb282e956cdcc236d --- /dev/null +++ b/tests/tst-ctxsw.cc @@ -0,0 +1,166 @@ + +#include <functional> +#include <memory> +#include <string> +#include <pthread.h> +#include <sys/time.h> +#include <cinttypes> +#include <stdio.h> + +#ifdef __OSV__ + +#include <sched.hh> + +class pinned_thread { +public: + explicit pinned_thread(std::function<void ()> f); + void pin(unsigned cpu); + void start(); + void join(); +private: + std::function<void ()> _f; + sched::thread::attr _attr; + std::unique_ptr<sched::thread> _thread; +}; + +pinned_thread::pinned_thread(std::function<void ()> f) + : _f(f) +{ +} + +void pinned_thread::pin(unsigned cpu) +{ + _attr.pinned_cpu = sched::cpus[cpu]; +} + +void pinned_thread::start() +{ + _thread.reset(new sched::thread(_f, _attr)); + _thread->start(); +} + +void pinned_thread::join() +{ + _thread->join(); +} + +#else + +#include <thread> +#include <sched.h> + +class pinned_thread { +public: + explicit pinned_thread(std::function<void ()> f); + void pin(unsigned cpu); + void start(); + void join(); +private: + void do_pin(); +private: + std::function<void ()> _f; + bool _is_pinned = false; + unsigned _cpu; + std::unique_ptr<std::thread> _thread; +}; + +pinned_thread::pinned_thread(std::function<void ()> f) + : _f(f) +{ +} + +void pinned_thread::pin(unsigned cpu) +{ + _is_pinned = true; + _cpu = cpu; +} + +void pinned_thread::start() +{ + _thread.reset(new std::thread([=] { do_pin(); _f(); })); +} + +void pinned_thread::do_pin() +{ + if (_is_pinned) { + cpu_set_t cs; + CPU_ZERO(&cs); + CPU_SET(_cpu, &cs); + sched_setaffinity(0, sizeof(cs), &cs); + } +} + +void pinned_thread::join() +{ + _thread->join(); +} + +#endif + +pthread_mutex_t mtx = PTHREAD_MUTEX_INITIALIZER; +pthread_cond_t cond = PTHREAD_COND_INITIALIZER; +unsigned owner; +unsigned remain; + +void run(unsigned me) +{ + bool done = false; + while (!done) { + pthread_mutex_lock(&mtx); + while (owner != me) { + pthread_cond_wait(&cond, &mtx); + } + if (remain == 0) { + done = true; + } else { + --remain; + } + owner = !me; + pthread_mutex_unlock(&mtx); + pthread_cond_signal(&cond); + } +} + + +uint64_t nstime() +{ + timeval tv; + gettimeofday(&tv, nullptr); + return tv.tv_sec * uint64_t(1000000000) + tv.tv_usec * uint64_t(1000); +} + +void test(std::string name, + std::function<void (pinned_thread& t)> pin0, + std::function<void (pinned_thread& t)> pin1) +{ + pinned_thread t0([] { run(0); }), t1([] { run(1); }); + pin0(t0); + pin1(t1); + auto n_iterations = 10000000; + remain = n_iterations; + + auto start = nstime(); + + t0.start(); + t1.start(); + + t0.join(); + t1.join(); + + auto end = nstime(); + + printf("%10" PRIu64 " %s\n", (end - start) / n_iterations, name.c_str()); +} + +int main(int ac, char** av) +{ + auto pin0 = [](pinned_thread& t) { t.pin(0); }; + auto pin1 = [](pinned_thread& t) { t.pin(1); }; + auto nopin = [](pinned_thread& t) {}; + test("colocated", pin0, pin0); + test("apart", pin0, pin1); + test("nopin", nopin, nopin); +} + + +