Duncan Coutts pushed to branch wip/io-manager-deadlock-detection at Glasgow Haskell Compiler / GHC Commits: a37c97bc by Duncan Coutts at 2025-11-15T22:01:39+00:00 FIXUP: Add an FdWakup module for posix I/O managers cpp warnings - - - - - 0ad3b03d by Duncan Coutts at 2025-11-15T22:10:18+00:00 FIXUP: Rationalise some scheduler run queue utilities win32 conditional compilation issues - - - - - 31bdf200 by Duncan Coutts at 2025-11-15T23:00:51+00:00 FIXUP: Clean up signal handling internal API ifdefs - - - - - 050e03a4 by Duncan Coutts at 2025-11-16T22:58:03+00:00 FIXUP: event and timer manager init synchronisation I'm not terribly satisfied by this. It might work but it's not principled. - - - - - 4 changed files: - libraries/ghc-internal/src/GHC/Internal/Event/Thread.hs - rts/RtsStartup.c - rts/Schedule.c - rts/posix/FdWakeup.c Changes: ===================================== libraries/ghc-internal/src/GHC/Internal/Event/Thread.hs ===================================== @@ -223,38 +223,48 @@ threadWaitWriteSTM = threadWaitSTM evtWrite {-# INLINE threadWaitWriteSTM #-} --- | Retrieve the system event manager for the capability on which the --- calling thread is running. --- --- This function always returns 'Just' the current thread's event manager --- when using the threaded RTS and 'Nothing' otherwise. -getSystemEventManager :: IO (Maybe EventManager) -getSystemEventManager = do +getSystemEventManager_ :: IO EventManager +getSystemEventManager_ = do t <- myThreadId eventManagerArray <- readIORef eventManager let r = boundsIOArray eventManagerArray (cap, _) <- threadCapability t - -- It is possible that we've just increased the number of capabilities and the - -- new EventManager has not yet been constructed by - -- 'ioManagerCapabilitiesChanged'. We expect this to happen very rarely. - -- T21561 exercises this. - -- Two options to proceed: - -- 1) return the EventManager for capability 0. This is guaranteed to exist, - -- and "shouldn't" cause any correctness issues. - -- 2) Busy wait, with or without a call to 'yield'. This can't deadlock, - -- because we must be on a brand capability and there must be a call to - -- 'ioManagerCapabilitiesChanged' pending. + -- It is possible that either: + -- 1) we've just started the RTS or done forkProcess and the EventManager + -- is still being started concurrently by 'ensureIOManagerIsRunning'. + -- This usually happens once at startup. + -- 2) we've just increased the number of capabilities and the new + -- EventManager is being started concurrently by + -- 'ioManagerCapabilitiesChanged'. We expect this to happen very rarely. + -- T21561 exercises this. + -- + -- For both situations we follow the strategy to busy wait, with a call to + -- 'yield'. This can't deadlock, because there must be a call to either + -- 'ensureIOManagerIsRunning' or 'ioManagerCapabilitiesChanged' running. -- - -- We take the second option, with the yield, judging it the most robust. if not (inRange r cap) - then yield >> getSystemEventManager - else fmap snd `fmap` readIOArray eventManagerArray cap + then yield >> getSystemEventManager_ -- for num caps changed + else do + mem <- readIOArray eventManagerArray cap + case mem of + Just (_, em) -> return em + Nothing + | threaded -> yield >> getSystemEventManager_ -- for startup + | otherwise -> err + where + err = error "GHC.Internal.Event.Thread.getSystemEventManager: the EventManager requires linking against the threaded runtime" -getSystemEventManager_ :: IO EventManager -getSystemEventManager_ = do - Just mgr <- getSystemEventManager - return mgr -{-# INLINE getSystemEventManager_ #-} + +-- | Retrieve the system event manager for the capability on which the +-- calling thread is running. +-- +-- This function always returns 'Just' the current thread's event +-- manager when using the threaded RTS and 'Nothing' otherwise. +getSystemEventManager :: IO (Maybe EventManager) +getSystemEventManager + | threaded = Just `fmap` getSystemEventManager_ + | otherwise = return Nothing +{-# INLINE getSystemEventManager #-} foreign import ccall unsafe "getOrSetSystemEventThreadEventManagerStore" getOrSetSystemEventThreadEventManagerStore :: Ptr a -> IO (Ptr a) @@ -299,8 +309,18 @@ ioManagerLock = unsafePerformIO $ do sharedCAF m getOrSetSystemEventThreadIOManagerThreadStore getSystemTimerManager :: IO TM.TimerManager -getSystemTimerManager = - fromMaybe err `fmap` readIORef timerManager +getSystemTimerManager = do + mtm <- readIORef timerManager + case mtm of + Just tm -> return tm + Nothing + -- Same logic as in getSystemEventManager: yield and try again. + -- This can't deadlock, because we must be on a brand new + -- capability (either the main cap during startup or a new cap + -- after forkProcess) and there must be a call to + -- 'ensureIOManagerIsRunning' pending. + | threaded -> yield >> getSystemTimerManager + | otherwise -> err where err = error "GHC.Internal.Event.Thread.getSystemTimerManager: the TimerManager requires linking against the threaded runtime" ===================================== rts/RtsStartup.c ===================================== @@ -625,7 +625,9 @@ hs_exit_(bool wait_foreign) hs_restoreConsoleCP(); #endif - finiUserSignals(); +#if defined(RTS_USER_SIGNALS) + finiUserSignals(); +#endif /* tear down statistics subsystem */ stat_exit(); ===================================== rts/Schedule.c ===================================== @@ -173,8 +173,9 @@ static void deleteAllThreads (void); static void deleteThread_(StgTSO *tso); #endif -static void removeFromRunQueue (Capability *cap, StgTSO *tso); +#if defined(FORKPROCESS_PRIMOP_SUPPORTED) static void truncateRunQueue(Capability *cap); +#endif static StgTSO *popRunQueue (Capability *cap); /* --------------------------------------------------------------------------- @@ -3025,6 +3026,7 @@ static StgTSO *popRunQueue (Capability *cap) return t; } +#if defined(FORKPROCESS_PRIMOP_SUPPORTED) static void truncateRunQueue(Capability *cap) { // Can only be called by the task owning the capability. @@ -3035,6 +3037,7 @@ static void truncateRunQueue(Capability *cap) cap->run_queue_tl = END_TSO_QUEUE; cap->n_run_queue = 0; } +#endif static void removeFromRunQueue (Capability *cap, StgTSO *tso) { ===================================== rts/posix/FdWakeup.c ===================================== @@ -47,7 +47,7 @@ static void fcntl_CLOEXEC_NONBLOCK(int fd) void newFdWakeup(int *wakeup_fd_r, int *wakeup_fd_w) { -#if HAVE_EVENTFD +#if defined(HAVE_EVENTFD) int wakeup_fd; #if defined(EFD_CLOEXEC) && defined(EFD_NONBLOCK) wakeup_fd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); @@ -79,7 +79,7 @@ void newFdWakeup(int *wakeup_fd_r, int *wakeup_fd_w) void closeFdWakeup(int wakeup_fd_r, int wakeup_fd_w) { -#if HAVE_EVENTFD +#if defined(HAVE_EVENTFD) ASSERT(wakeup_fd_r == wakeup_fd_w); close(wakeup_fd_r); #else @@ -92,7 +92,7 @@ void closeFdWakeup(int wakeup_fd_r, int wakeup_fd_w) void sendFdWakeup(int wakeup_fd_w) { int res; -#if HAVE_EVENTFD +#if defined(HAVE_EVENTFD) uint64_t val = 1; res = write(wakeup_fd_w, &val, 8); #else @@ -110,7 +110,7 @@ void sendFdWakeup(int wakeup_fd_w) void collectFdWakeup(int wakeup_fd_r) { int res; -#if HAVE_EVENTFD +#if defined(HAVE_EVENTFD) uint64_t buf; /* eventfd combines events into one counter, so a single read is enough */ res = read(wakeup_fd_r, &buf, 8); View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/eb13b6d4e399b073746fd79abd736e3... -- View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/eb13b6d4e399b073746fd79abd736e3... You're receiving this email because of your account on gitlab.haskell.org.