From a449b889df79d83bc03bcde2bd2c74dd0337066d Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cloudius-systems.com>
Date: Tue, 1 Oct 2013 10:53:18 +0300
Subject: [PATCH] x64: Enable sleeping in fault context

In preparation for enabling demand paging, enable sleeping in fault
context by using a per-thread exception stack for normal faults and
per-CPU exception stack for nested faults.

Avi Kivity explains:

  Before [demand paging] can even hope to work, we need to enable
  sleeping in fault context.  Right now each cpu has its own exception
  stack, which leads immediately to stack corruption:

  thread 1 faults
  enters exception stack
  tries to take mutex
  scheduler switches to thread 2
  thread 2 faults
  enters same exception stack

  So we need to switch stacks.  This can be done in the same way as for
  interrupt stacks (see thread::switch_to()).

Signed-off-by: Pekka Enberg <penberg@cloudius-systems.com>
Signed-off-by: Avi Kivity <avi@cloudius-systems.com>
---
 arch/x64/arch-cpu.cc    |  5 ++---
 arch/x64/arch-cpu.hh    | 14 ++++++++++++--
 arch/x64/arch-switch.hh |  2 ++
 3 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/arch/x64/arch-cpu.cc b/arch/x64/arch-cpu.cc
index fb0e6cf04..1c82cea3e 100644
--- a/arch/x64/arch-cpu.cc
+++ b/arch/x64/arch-cpu.cc
@@ -17,14 +17,13 @@ inline void arch_cpu::enter_exception()
         abort("nested exception");
     }
     in_exception = true;
-    auto& s = initstack.stack;
+    auto& s = percpu_exception_stack;
     set_exception_stack(s, sizeof(s));
 }
 
 inline void arch_cpu::exit_exception()
 {
-    auto& s = exception_stack;
-    set_exception_stack(s, sizeof(s));
+    set_exception_stack(&thread::current()->_arch);
     in_exception = false;
 }
 
diff --git a/arch/x64/arch-cpu.hh b/arch/x64/arch-cpu.hh
index 6c73c1dac..9c7531b05 100644
--- a/arch/x64/arch-cpu.hh
+++ b/arch/x64/arch-cpu.hh
@@ -40,7 +40,8 @@ struct arch_cpu {
     arch_cpu();
     processor::aligned_task_state_segment atss;
     init_stack initstack;
-    char exception_stack[4096] __attribute__((aligned(16)));
+    // The per-CPU exception stack is used for nested exceptions.
+    char percpu_exception_stack[4096] __attribute__((aligned(16)));
     u32 apic_id;
     u32 acpi_id;
     u64 gdt[nr_gdt];
@@ -48,6 +49,7 @@ struct arch_cpu {
     void init_on_cpu();
     void set_ist_entry(unsigned ist, char* base, size_t size);
     void set_exception_stack(char* base, size_t size);
+    void set_exception_stack(arch_thread* t);
     void set_interrupt_stack(arch_thread* t);
     void enter_exception();
     void exit_exception();
@@ -55,6 +57,7 @@ struct arch_cpu {
 
 struct arch_thread {
     char interrupt_stack[4096] __attribute__((aligned(16)));
+    char exception_stack[4096] __attribute__((aligned(16)));
 };
 
 
@@ -91,7 +94,8 @@ inline arch_cpu::arch_cpu()
     gdt[gdt_tss] |= (tss_addr & 0x00ffffff) << 16;
     gdt[gdt_tss] |= (tss_addr & 0xff000000) << 32;
     gdt[gdt_tssx] = tss_addr >> 32;
-    set_exception_stack(exception_stack, sizeof(exception_stack));
+    // Use the per-CPU stack for early boot faults.
+    set_exception_stack(percpu_exception_stack, sizeof(percpu_exception_stack));
 }
 
 inline void arch_cpu::set_ist_entry(unsigned ist, char* base, size_t size)
@@ -104,6 +108,12 @@ inline void arch_cpu::set_exception_stack(char* base, size_t size)
     set_ist_entry(1, base, size);
 }
 
+inline void arch_cpu::set_exception_stack(arch_thread* t)
+{
+    auto& s = t->exception_stack;
+    set_ist_entry(1, s, sizeof(s));
+}
+
 inline void arch_cpu::set_interrupt_stack(arch_thread* t)
 {
     auto& s = t->interrupt_stack;
diff --git a/arch/x64/arch-switch.hh b/arch/x64/arch-switch.hh
index 5341b14eb..dc47850b7 100644
--- a/arch/x64/arch-switch.hh
+++ b/arch/x64/arch-switch.hh
@@ -54,6 +54,7 @@ void thread::switch_to()
     set_fsbase(reinterpret_cast<u64>(_tcb));
     barrier();
     _cpu->arch.set_interrupt_stack(&_arch);
+    _cpu->arch.set_exception_stack(&_arch);
     asm volatile
         ("mov %%rbp, %c[rbp](%0) \n\t"
          "movq $1f, %c[rip](%0) \n\t"
@@ -80,6 +81,7 @@ void thread::switch_to_first()
     current_cpu = _cpu;
     remote_thread_local_var(percpu_base) = _cpu->percpu_base;
     _cpu->arch.set_interrupt_stack(&_arch);
+    _cpu->arch.set_exception_stack(&_arch);
     asm volatile
         ("mov %c[rsp](%0), %%rsp \n\t"
          "mov %c[rbp](%0), %%rbp \n\t"
-- 
GitLab