Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Re-add Stack Overflow handling in NativeAOT with larger alternate stack #95808

Closed
wants to merge 10 commits into from
Closed
1 change: 1 addition & 0 deletions src/coreclr/nativeaot/Runtime/inc/CommonTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#ifndef __COMMON_TYPES_H__
#define __COMMON_TYPES_H__

#include <assert.h>
#include <cstddef>
#include <cstdint>
#include <stdlib.h>
Expand Down
102 changes: 98 additions & 4 deletions src/coreclr/nativeaot/Runtime/thread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "CommonTypes.h"
#include "CommonMacros.h"
#include "daccess.h"
#include "CommonMacros.inl"
#include "PalRedhawkCommon.h"
#include "PalRedhawk.h"
#include "rhassert.h"
Expand All @@ -28,6 +29,11 @@
#include "RhConfig.h"
#include "GcEnum.h"

#if defined(TARGET_UNIX) && !HAVE_MACH_EXCEPTIONS && !defined(HOST_TVOS)
#include <signal.h>
#include <sys/mman.h>
#endif // defined(TARGET_UNIX) && !HAVE_MACH_EXCEPTIONS && !defined(HOST_TVOS)

#ifndef DACCESS_COMPILE

EXTERN_C NATIVEAOT_API void* REDHAWK_CALLCONV RhpHandleAlloc(void* pObject, int type);
Expand Down Expand Up @@ -295,6 +301,9 @@ void Thread::Construct()
if (StressLog::StressLogOn(~0u, 0))
m_pThreadStressLog = StressLog::CreateThreadStressLog(this);
#endif // STRESS_LOG
#if defined(TARGET_UNIX) && !HAVE_MACH_EXCEPTIONS && !defined(HOST_TVOS)
EnsureSignalAlternateStack();
#endif // defined(TARGET_UNIX) && !HAVE_MACH_EXCEPTIONS && !defined(HOST_TVOS)

// Everything else should be initialized to 0 via the static initialization of tls_CurrentThread.

Expand All @@ -311,6 +320,87 @@ void Thread::Construct()
ASSERT(m_interruptedContext == NULL);
}

#if defined(TARGET_UNIX) && !HAVE_MACH_EXCEPTIONS && !defined(HOST_TVOS)
void Thread::FreeSignalAlternateStack()
{
void *altstack = m_alternateStack;
m_alternateStack = nullptr;

if (altstack != nullptr)
{
stack_t ss, oss;
// The man page for sigaltstack says that when the ss.ss_flags is set to SS_DISABLE,
// all other ss fields are ignored. However, MUSL implementation checks that the
// ss_size is >= MINSIGSTKSZ even in this case.
ss.ss_size = MINSIGSTKSZ;
ss.ss_flags = SS_DISABLE;
ss.ss_sp = NULL;
int st = sigaltstack(&ss, &oss);
if ((st == 0) && (oss.ss_flags != SS_DISABLE))
{
// Make sure this altstack is this PAL's before freeing.
if ((void*)((uint8_t*)oss.ss_sp - PalOsPageSize()) == altstack)
{
int st = munmap(altstack, oss.ss_size + PalOsPageSize());
_ASSERTE(st == 0);
}
}
}
}

bool Thread::EnsureSignalAlternateStack()
{
int st = 0;

stack_t oss;

// Query the current alternate signal stack
st = sigaltstack(NULL, &oss);
if ((st == 0) && (oss.ss_flags == SS_DISABLE))
{
// There is no alternate stack for SIGSEGV handling installed yet so allocate one
int altStackSize = SIGSTKSZ;
#ifdef HAS_ADDRESS_SANITIZER
// Asan also uses alternate stack so we increase its size on the SIGSTKSZ * 4 that enough for asan
// (see kAltStackSize in compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cc)
altStackSize += SIGSTKSZ * 4;
#endif
altStackSize = ALIGN_UP(altStackSize, PalOsPageSize());
int flags = MAP_ANONYMOUS | MAP_PRIVATE;
#ifdef MAP_STACK
flags |= MAP_STACK;
#endif
// Allocate stack+guard page
void* altStack = mmap(NULL, altStackSize + PalOsPageSize(), PROT_READ | PROT_WRITE, flags, -1, 0);
if (altStack != MAP_FAILED)
{
// Create a guard page for the alternate stack
st = mprotect(altStack, PalOsPageSize(), PROT_NONE);
if (st == 0)
{
stack_t ss;
ss.ss_sp = (uint8_t*)altStack + PalOsPageSize();
ss.ss_size = altStackSize;
ss.ss_flags = 0;
st = sigaltstack(&ss, NULL);
}

if (st == 0)
{
m_alternateStack = altStack;
}
else
{
int st2 = munmap(altStack, altStackSize);
_ASSERTE(st2 == 0);
}
}
}

return (st == 0);
}
#endif // defined(TARGET_UNIX) && !HAVE_MACH_EXCEPTIONS && !defined(HOST_TVOS)

bool Thread::IsInitialized()
{
return (m_ThreadStateFlags != TSF_Unknown);
Expand Down Expand Up @@ -388,6 +478,10 @@ void Thread::Destroy()
}
#endif //FEATURE_SUSPEND_REDIRECTION

#if defined(TARGET_UNIX) && !HAVE_MACH_EXCEPTIONS && !defined(HOST_TVOS)
FreeSignalAlternateStack();
#endif // defined(TARGET_UNIX) && !HAVE_MACH_EXCEPTIONS && !defined(HOST_TVOS)

ASSERT(m_pGCFrameRegistrations == NULL);
}

Expand Down Expand Up @@ -893,19 +987,19 @@ void Thread::Unhijack()
}

// This unhijack routine is called to undo a hijack, that is potentially on a different thread.
//
//
// Although there are many code sequences (here and in asm) to
// perform an unhijack operation, they will never execute concurrently:
//
//
// - A thread may unhijack itself at any time so long as it does that from unmanaged code while in coop mode.
// This ensures that coop thread can access its stack synchronously.
// Unhijacking from unmanaged code ensures that another thread will not attempt to hijack it,
// since we only hijack threads that are executing managed code.
//
//
// - A GC thread may access a thread asynchronously, including unhijacking it.
// Asynchronously accessed thread must be in preemptive mode and should not
// access the managed portion of its stack.
//
//
// - A thread that owns the suspension can access another thread as long as the other thread is
// in preemptive mode or suspended in managed code.
// Either way the other thread cannot be accessing its hijack.
Expand Down
10 changes: 9 additions & 1 deletion src/coreclr/nativeaot/Runtime/thread.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,9 @@ struct ThreadBuffer
#ifdef FEATURE_GC_STRESS
uint32_t m_uRand; // current per-thread random number
#endif // FEATURE_GC_STRESS
#if defined(TARGET_UNIX) && !HAVE_MACH_EXCEPTIONS && !defined(HOST_TVOS)
void * m_alternateStack; // ptr to alternate signal stack
#endif // defined(TARGET_UNIX) && !HAVE_MACH_EXCEPTIONS && !defined(HOST_TVOS)
};

struct ReversePInvokeFrame
Expand Down Expand Up @@ -152,7 +155,7 @@ class Thread : private ThreadBuffer
// For suspension APCs it is mostly harmless, but wasteful and in extreme
// cases may force the target thread into stack oveflow.
// We use this flag to avoid sending another APC when one is still going through.
//
//
// On Unix this is an optimization to not queue up more signals when one is
// still being processed.
};
Expand Down Expand Up @@ -317,6 +320,11 @@ class Thread : private ThreadBuffer

bool IsActivationPending();
void SetActivationPending(bool isPending);

#if defined(TARGET_UNIX) && !HAVE_MACH_EXCEPTIONS && !defined(HOST_TVOS)
bool EnsureSignalAlternateStack();
void FreeSignalAlternateStack();
#endif // defined(TARGET_UNIX) && !HAVE_MACH_EXCEPTIONS && !defined(HOST_TVOS)
};

#ifndef __GCENV_BASE_INCLUDED__
Expand Down
43 changes: 29 additions & 14 deletions src/coreclr/nativeaot/Runtime/unix/HardwareExceptions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
#include "HardwareExceptions.h"
#include "UnixSignals.h"
#include "PalCreateDump.h"
#include "thread.h"
#include "threadstore.h"
#include <sys/mman.h>

#if defined(HOST_APPLE)
#include <mach/mach.h>
Expand Down Expand Up @@ -545,6 +548,17 @@ bool HardwareExceptionHandler(int code, siginfo_t *siginfo, void *context, void*
// Handler for the SIGSEGV signal
void SIGSEGVHandler(int code, siginfo_t *siginfo, void *context)
{
// First check if we have a stack overflow
size_t sp = ((UNIX_CONTEXT *)context)->GetSp();
size_t failureAddress = (size_t)siginfo->si_addr;

// If the failure address is at most one page above or below the stack pointer,
// we have a stack overflow.
if ((failureAddress - (sp - PalOsPageSize())) < (size_t)PalOsPageSize() * 2)
{
PalPrintFatalError("\nProcess is terminating due to StackOverflowException.\n");
RhFailFast();
}
bool isHandled = HardwareExceptionHandler(code, siginfo, context, siginfo->si_addr);
if (isHandled)
{
Expand Down Expand Up @@ -589,7 +603,8 @@ void SIGFPEHandler(int code, siginfo_t *siginfo, void *context)
// Initialize hardware exception handling
bool InitializeHardwareExceptionHandling()
{
if (!AddSignalHandler(SIGSEGV, SIGSEGVHandler, &g_previousSIGSEGV))
// Run SIGSEGV handler on separate stack so we can handle stack overflow. Otherwise, the current (invalid) stack is used and another segfault is raised.
if (!AddSignalHandler(SIGSEGV, SIGSEGVHandler, &g_previousSIGSEGV, SA_ONSTACK))
{
return false;
}
Expand All @@ -601,23 +616,23 @@ bool InitializeHardwareExceptionHandling()

#if defined(HOST_APPLE)
#ifndef HOST_TVOS // task_set_exception_ports is not supported on tvOS
// LLDB installs task-wide Mach exception handlers. XNU dispatches Mach
// exceptions first to any registered "activation" handler and then to
// any registered task handler before dispatching the exception to a
// host-wide Mach exception handler that does translation to POSIX
// signals. This makes it impossible to use LLDB with implicit null
// LLDB installs task-wide Mach exception handlers. XNU dispatches Mach
// exceptions first to any registered "activation" handler and then to
// any registered task handler before dispatching the exception to a
// host-wide Mach exception handler that does translation to POSIX
// signals. This makes it impossible to use LLDB with implicit null
// checks in NativeAOT; continuing execution after LLDB traps an
// EXC_BAD_ACCESS will result in LLDB's EXC_BAD_ACCESS handler being
// invoked again. This also interferes with the translation of SIGFPEs
// to .NET-level ArithmeticExceptions. Work around this here by
// installing a no-op task-wide Mach exception handler for
// EXC_BAD_ACCESS and EXC_ARITHMETIC.
kern_return_t kr = task_set_exception_ports(
mach_task_self(),
EXC_MASK_BAD_ACCESS | EXC_MASK_ARITHMETIC, /* SIGSEGV, SIGFPE */
MACH_PORT_NULL,
EXCEPTION_STATE_IDENTITY,
MACHINE_THREAD_STATE);
// installing a no-op task-wide Mach exception handler for
// EXC_BAD_ACCESS and EXC_ARITHMETIC.
kern_return_t kr = task_set_exception_ports(
mach_task_self(),
EXC_MASK_BAD_ACCESS | EXC_MASK_ARITHMETIC, /* SIGSEGV, SIGFPE */
MACH_PORT_NULL,
EXCEPTION_STATE_IDENTITY,
MACHINE_THREAD_STATE);
ASSERT(kr == KERN_SUCCESS);
#endif
#endif
Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/nativeaot/Runtime/unix/UnixSignals.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
#include "UnixSignals.h"

// Add handler for hardware exception signal
bool AddSignalHandler(int signal, SignalHandler handler, struct sigaction* previousAction)
bool AddSignalHandler(int signal, SignalHandler handler, struct sigaction* previousAction, int additionalFlags)
{
struct sigaction newAction;

newAction.sa_flags = SA_RESTART;
newAction.sa_flags = SA_RESTART | additionalFlags;
newAction.sa_handler = NULL;
newAction.sa_sigaction = handler;
newAction.sa_flags |= SA_SIGINFO;
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/nativeaot/Runtime/unix/UnixSignals.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

typedef void (*SignalHandler)(int code, siginfo_t* siginfo, void* context);

bool AddSignalHandler(int signal, SignalHandler handler, struct sigaction* previousAction);
bool AddSignalHandler(int signal, SignalHandler handler, struct sigaction* previousAction, int additionalFlags = 0);
void RestoreSignalHandler(int signal_id, struct sigaction* previousAction);

#endif // __UNIX_SIGNALS_H__
1 change: 1 addition & 0 deletions src/tests/Common/CoreCLRTestLibrary/Utilities.cs
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ public static bool IsWindowsIoTCore

public static bool IsMonoRuntime => Type.GetType("Mono.RuntimeStructs") != null;
public static bool IsNotMonoRuntime => !IsMonoRuntime;

public static bool IsNativeAot => IsNotMonoRuntime && !IsReflectionEmitSupported;
public static bool IsNotNativeAot => !IsNativeAot;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ struct LargeStruct65536
LargeStruct4096 se;
LargeStruct4096 sf;
}
class Program

internal class StackOverflow
{
[MethodImpl(MethodImplOptions.NoInlining)]
static void InfiniteRecursionA()
Expand Down Expand Up @@ -134,7 +135,7 @@ static void SecondaryThreadsTest(bool smallframe)
}
}

static void Main(string[] args)
internal static void Run(string[] args)
{
bool smallframe = (args[0] == "smallframe");
if (args[1] == "secondary")
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@
// The .NET Foundation licenses this file to you under the MIT license.
using System;

namespace TestStackOverflow3
namespace TestStackOverflow
{
class Program
internal class StackOverflow3
{
private const int MAX_RECURSIVE_CALLS = 1000000;
static int ctr = 0;

public static void Main()
public static void Run()
{
Program ex = new Program();
StackOverflow3 ex = new StackOverflow3();
ex.Execute();
}

Expand Down

This file was deleted.

Loading