From 4ef65eb6bac7e8d03e1a0f2b2c14eb74c349f7ea Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@cloudius-systems.com>
Date: Tue, 8 Apr 2014 15:55:23 +0300
Subject: [PATCH] sched: fix waitqueue race causing failure to wake up

When waitqueue::wake_all() wakes up waiting threads, it calls
sched::thread::wake_lock() to enqueue those waiting threads on the mutex
protecting the waitqueue, thus avoiding needless contention on the mutex.
However, if a thread is already waking, we let it wake naturally and acquire
the mutex itself.

The problem is that the waitqueue code (wait_object<waitqueue>::poll())
examines the wait_record it sleeps on and see if it has woken, and if not,
goes back to sleep.  Since nothing in that thread-already-awake path clears
the wait_record, that is what happens, and the thread stalls, until a timeout
occurs.

Fix by clearing the wait record.  As it is protected by the mutex, no
extra synchronization is needed.

Observed with iperf -P 64 against the guest.  Likely triggered by net channels
waking up the thread, and then before it has a chance to wake up, a FIN
packet arrives that is processed in the driver thread; so when the packets
are consumed the thread is in the waking state.

Reviewed-by: Nadav Har'El <nyh@cloudius-systems.com>
Signed-off-by: Avi Kivity <avi@cloudius-systems.com>
Signed-off-by: Pekka Enberg <penberg@cloudius-systems.com>
---
 core/sched.cc              | 4 ++++
 include/osv/wait_record.hh | 6 ++++++
 2 files changed, 10 insertions(+)

diff --git a/core/sched.cc b/core/sched.cc
index fdb156b17..2eb13207a 100644
--- a/core/sched.cc
+++ b/core/sched.cc
@@ -24,6 +24,7 @@
 #include <osv/elf.hh>
 #include <stdlib.h>
 #include <unordered_map>
+#include <osv/wait_record.hh>
 
 __thread char* percpu_base;
 
@@ -761,6 +762,9 @@ void thread::wake_lock(mutex* mtx, wait_record* wr)
         // ones doing it, and that it doesn't wake up while we do
         auto expected = status::waiting;
         if (!st->st.compare_exchange_strong(expected, status::sending_lock, std::memory_order_relaxed)) {
+            // make sure the thread can see wr->woken() == true.  We're still protected by
+            // the mutex, so so need for extra protection
+            wr->clear();
             // let the thread acquire the lock itself
             return;
         }
diff --git a/include/osv/wait_record.hh b/include/osv/wait_record.hh
index d7e0f1fe8..d3201b780 100644
--- a/include/osv/wait_record.hh
+++ b/include/osv/wait_record.hh
@@ -61,6 +61,12 @@ public:
         return !t;
     }
 
+    // Signal the wait record as woken without actually waking it up.  Use
+    // only with external synchronization.
+    void clear() noexcept {
+        t = nullptr;
+    }
+
     // A waiter object cannot be copied or moved, as wake() on the copy will
     // simply zero its copy of the content - not the original content on which
     // wait() is waiting on.
-- 
GitLab