From db71c36657d1fbb5b226cad473b52d56804a8283 Mon Sep 17 00:00:00 2001
From: Daniel Bertalan <dani@danielbertalan.dev>
Date: Sun, 24 Oct 2021 17:34:59 +0200
Subject: Kernel: Properly align stack for signal handlers

The System V ABI requires that the stack is 16-byte aligned on function
call. Confusingly, however, they mean that the stack must be aligned
this way **before** the `CALL` instruction is executed. That instruction
pushes the return value onto the stack, so the callee will actually see
the stack pointer as a value `sizeof(FlatPtr)` smaller.

The signal trampoline was written with this in mind, but `setup_stack`
aligned the entire stack, *including the return address* to a 16-byte
boundary. Because of this, the trampoline subtracted too much from the
stack pointer, thus misaligning it.

This was not a problem on i686 because we didn't execute any
instructions from signal handlers that would require memory operands to
be aligned to more than 4 bytes. This is not the case, however, on
x86_64, where SSE instructions are enabled by default and they require
16-byte aligned operands. Running such instructions raised a GP fault,
immediately killing the offending program with a SIGSEGV signal.

This issue caused TestKernelAlarm to fail in LibC when ran locally, and
at one point, the zsh port was affected too.

Fixes #9291
---
 Kernel/Thread.cpp | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

(limited to 'Kernel/Thread.cpp')

diff --git a/Kernel/Thread.cpp b/Kernel/Thread.cpp
index 059914338a..fa58eae54a 100644
--- a/Kernel/Thread.cpp
+++ b/Kernel/Thread.cpp
@@ -932,11 +932,11 @@ DispatchSignalResult Thread::dispatch_signal(u8 signal)
 
 #if ARCH(I386)
         // Align the stack to 16 bytes.
-        // Note that we push 56 bytes (4 * 14) on to the stack,
-        // so we need to account for this here.
-        // 56 % 16 = 8, so we only need to take 8 bytes into consideration for
+        // Note that we push 52 bytes (4 * 13) on to the stack
+        // before the return address, so we need to account for this here.
+        // 56 % 16 = 4, so we only need to take 4 bytes into consideration for
         // the stack alignment.
-        FlatPtr stack_alignment = (stack - 8) % 16;
+        FlatPtr stack_alignment = (stack - 4) % 16;
         stack -= stack_alignment;
 
         push_value_on_user_stack(stack, ret_flags);
@@ -952,12 +952,12 @@ DispatchSignalResult Thread::dispatch_signal(u8 signal)
         push_value_on_user_stack(stack, state.edi);
 #else
         // Align the stack to 16 bytes.
-        // Note that we push 176 bytes (8 * 22) on to the stack,
-        // so we need to account for this here.
-        // 22 % 2 = 0, so we dont need to take anything into consideration
-        // for the alignment.
+        // Note that we push 168 bytes (8 * 21) on to the stack
+        // before the return address, so we need to account for this here.
+        // 168 % 16 = 8, so we only need to take 8 bytes into consideration for
+        // the stack alignment.
         // We also are not allowed to touch the thread's red-zone of 128 bytes
-        FlatPtr stack_alignment = stack % 16;
+        FlatPtr stack_alignment = (stack - 8) % 16;
         stack -= 128 + stack_alignment;
 
         push_value_on_user_stack(stack, ret_flags);
@@ -986,13 +986,14 @@ DispatchSignalResult Thread::dispatch_signal(u8 signal)
 
         push_value_on_user_stack(stack, signal);
         push_value_on_user_stack(stack, handler_vaddr.get());
+
+        VERIFY((stack % 16) == 0);
+
         push_value_on_user_stack(stack, 0); // push fake return address
 
         // We write back the adjusted stack value into the register state.
         // We have to do this because we can't just pass around a reference to a packed field, as it's UB.
         state.set_userspace_sp(stack);
-
-        VERIFY((stack % 16) == 0);
     };
 
     // We now place the thread state on the userspace stack.
-- 
cgit v1.2.3