[Git][ghc/ghc][master] rts: forward clone-stack messages after TSO migration
Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC Commits: 5b550754 by Matthew Pickering at 2026-03-26T03:54:51-04:00 rts: forward clone-stack messages after TSO migration MSG_CLONE_STACK assumed that the target TSO was still owned by the capability that received the message. This is not always true: the TSO can migrate before the inbox entry is handled. When that happened, handleCloneStackMessage could clone a live stack from the wrong capability and use the wrong capability for allocation and performTryPutMVar, leading to stack sanity failures such as checkStackFrame: weird activation record found on stack. Fix this by passing the current capability into handleCloneStackMessage, rechecking msg->tso->cap at handling time, and forwarding the message if the TSO has migrated. Once ownership matches, use the executing capability consistently for cloneStack, rts_apply, and performTryPutMVar. Fixes #27008 - - - - - 5 changed files: - rts/CloneStack.c - rts/CloneStack.h - rts/Messages.c - testsuite/tests/rts/all.T - + testsuite/tests/rts/cloneThreadStackMigrating.hs Changes: ===================================== rts/CloneStack.c ===================================== @@ -84,15 +84,31 @@ void sendCloneStackMessage(StgTSO *tso, HsStablePtr mvar) { sendMessage(srcCapability, tso->cap, (Message *)msg); } -void handleCloneStackMessage(MessageCloneStack *msg){ - StgStack* newStackClosure = cloneStack(msg->tso->cap, msg->tso->stackobj); +// The cap argument is the capability which is handling the CloneStack message +void handleCloneStackMessage(Capability *cap, MessageCloneStack *msg){ + // We must check that the current owner of the thread we want to clone the stack for + // is still this capability. + Capability *owner = RELAXED_LOAD(&msg->tso->cap); + if (owner != cap) { + // The target TSO may have migrated after the message was queued on the old + // capability. In that case we must forward the request to the current + // owner; otherwise we would race with another capability mutating the + // stack while we clone it. + sendMessage(cap, owner, (Message *)msg); + return; + } + + // At this point the executing capability owns the TSO, so it is the only + // capability that may safely inspect the live stack and the one whose + // allocator we must use for the cloned StgStack closure. + StgStack* newStackClosure = cloneStack(cap, msg->tso->stackobj); // Lift StackSnapshot# to StackSnapshot by applying it's constructor. // This is necessary because performTryPutMVar() puts the closure onto the // stack for evaluation and stacks can not be evaluated (entered). - HaskellObj result = rts_apply(msg->tso->cap, StackSnapshot_constructor_closure, (HaskellObj) newStackClosure); + HaskellObj result = rts_apply(cap, StackSnapshot_constructor_closure, (HaskellObj) newStackClosure); - bool putMVarWasSuccessful = performTryPutMVar(msg->tso->cap, msg->result, result); + bool putMVarWasSuccessful = performTryPutMVar(cap, msg->result, result); if(!putMVarWasSuccessful) { barf("Can't put stack cloning result into MVar."); ===================================== rts/CloneStack.h ===================================== @@ -17,7 +17,7 @@ void sendCloneStackMessage(StgTSO *tso, HsStablePtr mvar); #include "BeginPrivate.h" #if defined(THREADED_RTS) -void handleCloneStackMessage(MessageCloneStack *msg); +void handleCloneStackMessage(Capability *cap, MessageCloneStack *msg); #endif #include "EndPrivate.h" ===================================== rts/Messages.c ===================================== @@ -135,7 +135,7 @@ loop: } else if(i == &stg_MSG_CLONE_STACK_info){ MessageCloneStack *cloneStackMessage = (MessageCloneStack*) m; - handleCloneStackMessage(cloneStackMessage); + handleCloneStackMessage(cap, cloneStackMessage); } else { ===================================== testsuite/tests/rts/all.T ===================================== @@ -587,6 +587,15 @@ test('cloneMyStack_retBigStackFrame', [req_c, extra_files(['cloneStackLib.c']), test('cloneThreadStack', [req_c, only_ways(['threaded1']), extra_ways(['threaded1']), extra_files(['cloneStackLib.c']), req_ghc_with_threaded_rts], compile_and_run, ['cloneStackLib.c -threaded']) +test('cloneThreadStackMigrating', + [ ignore_stdout + , only_ways(['threaded1']) + , extra_ways(['threaded1']) + , extra_run_opts('+RTS -N -DS -RTS') + , req_ghc_with_threaded_rts + , req_target_smp + ], compile_and_run, ['-threaded -debug -rtsopts']) + test('decodeMyStack', [ omit_ghci, js_broken(22261) # cloneMyStack# not yet implemented , when(ghc_with_ipe(), skip) # IPE builds can change decoded stack output. ===================================== testsuite/tests/rts/cloneThreadStackMigrating.hs ===================================== @@ -0,0 +1,37 @@ +module Main where + +import Control.Concurrent +import Control.Monad +import GHC.Exts.Stack +import GHC.Stack.CloneStack + +numWorkers :: Int +numWorkers = 100 + +startN :: Int +startN = 10 + +runForMicros :: Int +runForMicros = 1000000 + +fib :: Int -> Int +fib 0 = 1 +fib 1 = 1 +fib n = fib (n - 1) + fib (n - 2) + +workerThread :: Int -> IO () +workerThread n = do + fib n `seq` pure () + workerThread (n + 1) + +cloneThread :: ThreadId -> IO () +cloneThread tid = forever $ do + snapshot <- cloneThreadStack tid + stack <- decodeStack snapshot + stack `seq` pure () + +main :: IO () +main = do + tids <- replicateM numWorkers (forkIO $ workerThread startN) + mapM_ (forkIO . cloneThread) tids + threadDelay runForMicros View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/5b550754ca0153a705ec607407074fe5... -- View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/5b550754ca0153a705ec607407074fe5... You're receiving this email because of your account on gitlab.haskell.org.
participants (1)
-
Marge Bot (@marge-bot)