Zubin pushed to branch wip/semaphore-v2 at Glasgow Haskell Compiler / GHC Commits: a70aac8e by Zubin Duggal at 2026-05-19T15:48:59+05:30 Update to semaphore-compat 2.0.0 using v2 of the protocol On Linux and other POSIX platforms, GHC's -jsem jobserver client now speaks v2 of the semaphore-compat protocol, which uses Unix domain sockets in place of POSIX named semaphores. This avoids the libc-ABI issues that affected the old implementation. Windows is unaffected and continues to use the v1 protocol (Win32 named semaphores); its reported protocol version remains v1. When GHC receives a -jsem name whose protocol version it does not support, it emits a -Wsemaphore-version-mismatch warning and falls back to -j<N> rather than crashing. ghc --info exposes the supported version in a new "Semaphore version" entry so cabal-install can detect a mismatch before invoking GHC. Users on a cabal-install that predates the v2 update will continue to build successfully on Linux/POSIX, but will lose the cross-process -jsem coordination and fall back to -j<N> per GHC invocation. Users must upgrade to a cabal-install that supports protocol v2 to recover full parallelism. Also fix a leak in cleanupSem (#27253): cleanupSem used to snapshot heldTokens and release them before killing the loop, while the loop's in-flight acquire/release children could still be mutating it. Cleanup now runs inside the loop's own exit handler, after draining the active child via a new activeChild TVar, so the snapshot has no concurrent mutator. See also: - GHC proposal amendment: https://github.com/ghc-proposals/ghc-proposals/pull/673 - cabal-install patch: https://github.com/haskell/cabal/pull/11628 - semaphore-compat MR: https://gitlab.haskell.org/ghc/semaphore-compat/-/merge_requests/8 Bump semaphore-compat submodule to 2.0.0 Fixes #25087 and #27253 - - - - - 21 changed files: - + changelog.d/jobserver-leak-fix - + changelog.d/semaphore-v2 - compiler/GHC/Driver/Errors/Ppr.hs - compiler/GHC/Driver/Errors/Types.hs - compiler/GHC/Driver/Flags.hs - compiler/GHC/Driver/MakeAction.hs - compiler/GHC/Driver/MakeSem.hs - compiler/GHC/Driver/Session.hs - compiler/GHC/Types/Error/Codes.hs - compiler/GHC/Types/Hint.hs - compiler/GHC/Types/Hint/Ppr.hs - docs/users_guide/using-warnings.rst - docs/users_guide/using.rst - hadrian/cabal.project - hadrian/hadrian.cabal - hadrian/src/Flavour.hs - hadrian/src/Rules/Generate.hs - hadrian/src/Settings/Packages.hs - hadrian/stack.yaml - libraries/semaphore-compat - testsuite/tests/diagnostic-codes/codes.stdout Changes: ===================================== changelog.d/jobserver-leak-fix ===================================== @@ -0,0 +1,8 @@ +section: compiler +issues: #27253 +mrs: !15729 +synopsis: + Fix a token leak in the ``-jsem`` jobserver shutdown path +description: + A build interrupted by Ctrl-C while a ``-jsem`` token transfer was in + flight could leak that token. ===================================== changelog.d/semaphore-v2 ===================================== @@ -0,0 +1,30 @@ +section: compiler +issues: #25087 +mrs: !15729 +synopsis: + Update to semaphore-compat 2.0.0 (``-jsem`` protocol v2) +description: + On Linux and other POSIX platforms, GHC's ``-jsem`` jobserver client + now speaks v2 of the semaphore-compat protocol, which uses Unix + domain sockets in place of POSIX named semaphores. This avoids the + libc-ABI issues that affected the old implementation. Windows is + unaffected and continues to use the v1 protocol (Win32 named + semaphores); its reported protocol version remains v1. + + When GHC receives a ``-jsem`` name whose protocol version it does not + support, it now emits a ``-Wsemaphore-version-mismatch`` warning and + falls back to ``-j1`` rather than crashing. ``ghc --info`` exposes the + supported version in a new ``"Semaphore version"`` entry so + cabal-install can detect a mismatch before invoking GHC. + + Users on a ``cabal-install`` that predates the v2 update will continue + to build successfully, but on Linux/POSIX will lose the cross-process + ``-jsem`` coordination and fall back to ``-j1`` per GHC invocation. + To recover full parallelism, upgrade to a ``cabal-install`` that + supports protocol v2. + + See also: + + - the `GHC proposal amendment https://github.com/ghc-proposals/ghc-proposals/pull/673`_ + - the `cabal-install patch https://github.com/haskell/cabal/pull/11628`_ + - the `semaphore-compat library MR https://gitlab.haskell.org/ghc/semaphore-compat/-/merge_requests/8`_ ===================================== compiler/GHC/Driver/Errors/Ppr.hs ===================================== @@ -24,6 +24,8 @@ import GHC.Types.Hint import GHC.Types.SrcLoc import Data.Version +import System.Semaphore + ( SemaphoreError(..), getSemaphoreProtocolVersion ) import Language.Haskell.Syntax.Decls (RuleDecl(..)) import GHC.Tc.Errors.Types (TcRnMessage) import GHC.HsToCore.Errors.Types (DsMessage) @@ -90,6 +92,20 @@ instance Diagnostic GhcMessage where instance HasDefaultDiagnosticOpts DriverMessageOpts where defaultOpts = DriverMessageOpts (defaultDiagnosticOpts @PsMessage) (defaultDiagnosticOpts @IfaceMessage) +pprSemaphoreError :: SemaphoreError -> SDoc +pprSemaphoreError = \case + SemaphoreAlreadyExists nm -> + text "a semaphore named" <+> quotes (text nm) <+> text "already exists" + SemaphoreDoesNotExist nm -> + text "no semaphore named" <+> quotes (text nm) + SemaphoreIncompatibleVersion got want -> + text "protocol version mismatch (got v" + <> int (getSemaphoreProtocolVersion got) + <> text ", supported v" + <> int (getSemaphoreProtocolVersion want) <> text ")" + SemaphoreOtherError ioe -> + text (show ioe) + instance Diagnostic DriverMessage where type DiagnosticOpts DriverMessage = DriverMessageOpts diagnosticMessage opts = \case @@ -282,6 +298,10 @@ instance Diagnostic DriverMessage where -> mkSimpleDecorated $ vcat [ text "The following modules are missing a linkable which is needed for creating a library:" , nest 2 $ hcat (map ppr mods) ] + DriverSemaphoreOpenFailure _ err + -> mkSimpleDecorated $ + text "Failed to open -jsem semaphore:" <+> pprSemaphoreError err <> + text "; ignoring -jsem and compiling sequentially." diagnosticReason = \case DriverUnknownMessage m @@ -355,6 +375,8 @@ instance Diagnostic DriverMessage where -> WarningWithoutFlag DriverMissingLinkableForModule {} -> ErrorWithoutFlag + DriverSemaphoreOpenFailure {} + -> WarningWithFlag Opt_WarnSemaphoreOpenFailure diagnosticHints = \case DriverUnknownMessage m @@ -430,5 +452,19 @@ instance Diagnostic DriverMessage where -> noHints DriverMissingLinkableForModule {} -> noHints + DriverSemaphoreOpenFailure buildingCabal (SemaphoreIncompatibleVersion received supported) + | received < supported + -> let required = getSemaphoreProtocolVersion supported + target = case buildingCabal of + YesBuildingCabalPackage -> UpgradeCabalInstall + NoBuildingCabalPackage -> UpgradeJobserver + in [SuggestUpgradeForSemaphoreVersionMismatch target required] + | received > supported + -> [SuggestUpgradeForSemaphoreVersionMismatch + UpgradeGHC (getSemaphoreProtocolVersion received)] + | otherwise + -> noHints + DriverSemaphoreOpenFailure {} + -> noHints diagnosticCode = constructorCode @GHC ===================================== compiler/GHC/Driver/Errors/Types.hs ===================================== @@ -37,6 +37,7 @@ import qualified GHC.LanguageExtensions as LangExt import GHC.Generics ( Generic ) +import System.Semaphore ( SemaphoreError ) import GHC.Tc.Errors.Types import GHC.Iface.Errors.Types @@ -419,6 +420,17 @@ data DriverMessage where DriverMissingLinkableForModule :: ![Module] -> DriverMessage + {-| DriverSemaphoreOpenFailure is a warning that occurs when GHC fails to + open the semaphore specified by @-jsem@, e.g. the socket does not + exist, the protocol version is incompatible, or a system error + occurred. GHC ignores @-jsem@ and compiles sequentially. + + The 'BuildingCabalPackage' flag controls whether the diagnostic + hint suggests upgrading @cabal-install@ (it only does so when GHC + is invoked by Cabal). + -} + DriverSemaphoreOpenFailure :: !BuildingCabalPackage -> !SemaphoreError -> DriverMessage + deriving instance Generic DriverMessage data DriverMessageOpts = ===================================== compiler/GHC/Driver/Flags.hs ===================================== @@ -1115,6 +1115,7 @@ data WarningFlag = | Opt_WarnUnusableUnpackPragmas -- ^ @since 9.14 | Opt_WarnPatternNamespaceSpecifier -- ^ @since 9.14 | Opt_WarnUnrecognisedModifiers -- ^ @since 10.0 + | Opt_WarnSemaphoreOpenFailure -- Since 10.0.1 deriving (Eq, Ord, Show, Enum, Bounded) -- | Return the names of a WarningFlag @@ -1237,6 +1238,7 @@ warnFlagNames wflag = case wflag of Opt_WarnUnusableUnpackPragmas -> "unusable-unpack-pragmas" :| [] Opt_WarnPatternNamespaceSpecifier -> "pattern-namespace-specifier" :| [] Opt_WarnUnrecognisedModifiers -> "unrecognised-modifiers" :| [] + Opt_WarnSemaphoreOpenFailure -> "semaphore-open-failure" :| [] -- ----------------------------------------------------------------------------- -- Standard sets of warning options @@ -1383,7 +1385,8 @@ standardWarnings -- see Note [Documenting warning flags] Opt_WarnDeprecatedPragmas, Opt_WarnRuleLhsEqualities, Opt_WarnUnusableUnpackPragmas, - Opt_WarnUnrecognisedModifiers + Opt_WarnUnrecognisedModifiers, + Opt_WarnSemaphoreOpenFailure ] -- | Things you get with @-W@. ===================================== compiler/GHC/Driver/MakeAction.hs ===================================== @@ -28,6 +28,14 @@ import GHC.Driver.Errors.Types import GHC.Driver.Messager import GHC.Driver.MakeSem +import System.Semaphore + ( SemaphoreError, SemaphoreIdentifier ) + +import GHC.Driver.Config.Diagnostic ( initDiagOpts, initPrintConfig ) +import GHC.Driver.Errors ( printOrThrowDiagnostics ) +import GHC.Types.Error ( singleMessage ) +import GHC.Types.SrcLoc ( noSrcSpan ) +import GHC.Utils.Error ( mkPlainMsgEnvelope ) import GHC.Utils.Logger import GHC.Utils.TmpFs @@ -49,7 +57,7 @@ mkWorkerLimit :: DynFlags -> IO WorkerLimit mkWorkerLimit dflags = case parMakeCount dflags of Nothing -> pure $ num_procs 1 - Just (ParMakeSemaphore h) -> pure (JSemLimit (SemaphoreName h)) + Just (ParMakeSemaphore h) -> pure (JSemLimit h) Just ParMakeNumProcessors -> num_procs <$> getNumProcessors Just (ParMakeThisMany n) -> pure $ num_procs n where @@ -65,8 +73,8 @@ isWorkerLimitSequential (JSemLimit {}) = False data WorkerLimit = NumProcessorsLimit Int | JSemLimit - SemaphoreName - -- ^ Semaphore name to use + SemaphoreIdentifier + -- ^ Semaphore identifier from @-jsem@ deriving Eq -- | Environment used when compiling a module @@ -122,17 +130,24 @@ runNjobsAbstractSem n_jobs action = do resetNumCapabilities = set_num_caps n_capabilities MC.bracket_ updNumCapabilities resetNumCapabilities $ action asem -runWorkerLimit :: WorkerLimit -> (AbstractSem -> IO a) -> IO a +runWorkerLimit :: Logger -> DynFlags -> WorkerLimit -> (AbstractSem -> IO a) -> IO a #if defined(wasm32_HOST_ARCH) -runWorkerLimit _ action = do +runWorkerLimit _logger _dflags _ action = do lock <- newMVar () action $ AbstractSem (takeMVar lock) (putMVar lock ()) #else -runWorkerLimit worker_limit action = case worker_limit of +runWorkerLimit logger dflags worker_limit action = case worker_limit of NumProcessorsLimit n_jobs -> runNjobsAbstractSem n_jobs action - JSemLimit sem -> - runJSemAbstractSem sem action + JSemLimit sem_ident -> do + result <- MC.try @_ @SemaphoreError $ runJSemAbstractSem sem_ident action + case result of + Right a -> return a + Left err -> do + let diag = DriverSemaphoreOpenFailure (checkBuildingCabalPackage dflags) err + msg = singleMessage $ mkPlainMsgEnvelope (initDiagOpts dflags) noSrcSpan diag + printOrThrowDiagnostics logger (initPrintConfig dflags) (initDiagOpts dflags) (GhcDriverMessage <$> msg) + runNjobsAbstractSem 1 action #endif -- | Build and run a pipeline @@ -159,7 +174,7 @@ runParPipelines worker_limit plugin_hsc_env diag_wrapper mHscMessager all_pipeli thread_safe_logger <- liftIO $ makeThreadSafe (hsc_logger plugin_hsc_env) let thread_safe_hsc_env = plugin_hsc_env { hsc_logger = thread_safe_logger } - runWorkerLimit worker_limit $ \abstract_sem -> do + runWorkerLimit (hsc_logger plugin_hsc_env) (hsc_dflags plugin_hsc_env) worker_limit $ \abstract_sem -> do let env = MakeEnv { hsc_env = thread_safe_hsc_env , withLogger = withParLog log_queue_queue_var , compile_sem = abstract_sem ===================================== compiler/GHC/Driver/MakeSem.hs ===================================== @@ -9,9 +9,6 @@ module GHC.Driver.MakeSem -- by a system semaphore (Posix/Windows) runJSemAbstractSem - -- * System semaphores - , Semaphore, SemaphoreName(..) - -- * Abstract semaphores , AbstractSem(..) , withAbstractSem @@ -46,11 +43,14 @@ import Debug.Trace -- available from the semaphore. data Jobserver = Jobserver - { jSemaphore :: !Semaphore + { jSemaphore :: !ClientSemaphore -- ^ The semaphore which controls available resources , jobs :: !(TVar JobResources) -- ^ The currently pending jobs, and the resources -- obtained from the semaphore + , activeChild :: !(TVar (Maybe (ThreadId, TMVar (Maybe MC.SomeException)))) + -- ^ Handle on the current acquire thread (if any). The loop's exit + -- handler reads this to drain a still-running child on shutdown. } data JobserverOptions @@ -81,6 +81,9 @@ data JobResources , jobsWaiting :: !(OrdList (TMVar ())) -- ^ Pending jobs waiting on a token, the job will be blocked on the TMVar so putting into -- the TMVar will allow the job to continue. + , heldTokens :: [SemaphoreToken] + -- ^ Actual semaphore tokens (for release/cleanup). + -- Length should equal tokensOwned - 1 (the implicit token has no SemaphoreToken). } instance Outputable JobResources where @@ -93,9 +96,9 @@ instance Outputable JobResources where ] ) -- | Add one new token. -addToken :: JobResources -> JobResources -addToken jobs@( Jobs { tokensOwned = owned, tokensFree = free }) - = jobs { tokensOwned = owned + 1, tokensFree = free + 1 } +addToken :: SemaphoreToken -> JobResources -> JobResources +addToken tok jobs@( Jobs { tokensOwned = owned, tokensFree = free, heldTokens = toks }) + = jobs { tokensOwned = owned + 1, tokensFree = free + 1, heldTokens = tok : toks } -- | Free one token. addFreeToken :: JobResources -> JobResources @@ -111,12 +114,14 @@ removeFreeToken jobs@( Jobs { tokensFree = free }) (text "removeFreeToken:" <+> ppr free) $ jobs { tokensFree = free - 1 } --- | Return one owned token. -removeOwnedToken :: JobResources -> JobResources -removeOwnedToken jobs@( Jobs { tokensOwned = owned }) +-- | Return one owned token, extracting the 'SemaphoreToken' for release. +removeOwnedToken :: JobResources -> (SemaphoreToken, JobResources) +removeOwnedToken jobs@( Jobs { tokensOwned = owned, heldTokens = toks }) = assertPpr (owned > 1) (text "removeOwnedToken:" <+> ppr owned) - $ jobs { tokensOwned = owned - 1 } + $ case toks of + (t:rest) -> (t, jobs { tokensOwned = owned - 1, heldTokens = rest }) + [] -> panic "removeOwnedToken: no held tokens" -- | Add one new job to the end of the list of pending jobs. addJob :: TMVar () -> JobResources -> JobResources @@ -143,7 +148,7 @@ data JobserverAction = Idle -- | A thread is waiting for a token on the semaphore. | Acquiring - { activeWaitId :: WaitId + { activeThreadId :: ThreadId , threadFinished :: TMVar (Maybe MC.SomeException) } -- | Retrieve the 'TMVar' that signals if the current thread has finished, @@ -189,17 +194,30 @@ releaseJob jobs_tvar = do return ((), addFreeToken jobs) --- | Release all tokens owned from the semaphore (to clean up --- the jobserver at the end). -cleanupJobserver :: Jobserver -> IO () -cleanupJobserver (Jobserver { jSemaphore = sem - , jobs = jobs_tvar }) - = do - Jobs { tokensOwned = owned } <- readTVarIO jobs_tvar - let toks_to_release = owned - 1 - -- Subtract off the implicit token: whoever spawned the ghc process - -- in the first place is responsible for that token. - releaseSemaphore sem toks_to_release +-- | Kill the current acquire thread, if any, and wait for it to exit. +-- +-- Called from the jobserver loop's exit handler, which runs masked. +-- Relies on the invariant from 'acquireThread' that a forked child +-- always fills its 'threadFinished' TMVar before it dies; this is what +-- lets the 'takeTMVar' below terminate after the 'killThread'. +drainActiveChild :: Jobserver -> IO () +drainActiveChild (Jobserver { activeChild = active_tvar }) = do + mb <- readTVarIO active_tvar + for_ mb $ \(tid, tmv) -> do + killThread tid + void $ atomically (takeTMVar tmv) + atomically $ writeTVar active_tvar Nothing + +-- | Release every token currently in 'heldTokens'. +-- +-- Called from the jobserver loop's exit handler, which runs masked, +-- after 'drainActiveChild': no other thread is mutating 'JobResources' +-- at this point. +releaseAllHeld :: Jobserver -> IO () +releaseAllHeld (Jobserver { jobs = jobs_tvar }) = do + Jobs { heldTokens = toks } <- readTVarIO jobs_tvar + forM_ toks $ \t -> + void $ MC.try @_ @MC.SomeException (releaseSemaphoreToken t) -- | Dispatch the available tokens acquired from the semaphore -- to the pending jobs in the job server. @@ -252,7 +270,7 @@ tracedAtomically origin act = do return a renderJobResources :: String -> JobResources -> String -renderJobResources origin (Jobs own free pending) = showSDocUnsafe $ renderJSON $ +renderJobResources origin (Jobs own free pending _heldToks) = showSDocUnsafe $ renderJSON $ JSObject [ ("name", JSString origin) , ("owned", JSInt own) , ("free", JSInt free) @@ -262,61 +280,68 @@ renderJobResources origin (Jobs own free pending) = showSDocUnsafe $ renderJSON -- | Spawn a new thread that waits on the semaphore in order to acquire -- an additional token. +-- +-- The child is forked masked so the only async-exception delivery point +-- is the interruptible 'waitOnSemaphore'; the STM commit afterwards then +-- always runs to completion, so 'threadFinished' is always filled. +-- +-- The (tid, threadFinished) pair is also published to 'activeChild' so +-- shutdown can drain the child even after the in-loop 'JobserverState' +-- is gone. acquireThread :: Jobserver -> IO JobserverAction -acquireThread (Jobserver { jSemaphore = sem, jobs = jobs_tvar }) = do +acquireThread (Jobserver { jSemaphore = sem, jobs = jobs_tvar, activeChild = active_tvar }) = do threadFinished_tmvar <- newEmptyTMVarIO - let - wait_result_action :: Either MC.SomeException Bool -> IO () - wait_result_action wait_res = + tid <- MC.mask_ $ do + tid <- forkIO $ do + wait_res <- MC.try @_ @MC.SomeException $ waitOnSemaphore sem tracedAtomically_ "acquire_thread" do (r, jb) <- case wait_res of Left (e :: MC.SomeException) -> do return $ (Just e, Nothing) - Right success -> do - if success - then do - modifyJobResources jobs_tvar \ jobs -> - return (Nothing, addToken jobs) - else - return (Nothing, Nothing) + Right tok -> do + modifyJobResources jobs_tvar \ jobs -> + return (Nothing, addToken tok jobs) putTMVar threadFinished_tmvar r return jb - wait_id <- forkWaitOnSemaphoreInterruptible sem wait_result_action - labelThread (waitingThreadId wait_id) "acquire_thread" - return $ Acquiring { activeWaitId = wait_id + atomically $ writeTVar active_tvar (Just (tid, threadFinished_tmvar)) + return tid + labelThread tid "acquire_thread" + return $ Acquiring { activeThreadId = tid , threadFinished = threadFinished_tmvar } -- | Spawn a thread to release ownership of one resource from the semaphore, -- provided we have spare resources and no pending jobs. releaseThread :: Jobserver -> IO JobserverAction -releaseThread (Jobserver { jSemaphore = sem, jobs = jobs_tvar }) = do +releaseThread (Jobserver { jobs = jobs_tvar }) = do threadFinished_tmvar <- newEmptyTMVarIO MC.mask_ do -- Pre-release the resource so that another thread doesn't take control of it -- just as we release the lock on the semaphore. - still_ok_to_release + mb_tok <- tracedAtomically "pre_release" $ modifyJobResources jobs_tvar \ jobs -> if guardRelease jobs - -- TODO: should this also debounce? - then return (True , removeOwnedToken $ removeFreeToken jobs) - else return (False, jobs) - if not still_ok_to_release - then return Idle - else do - tid <- forkIO $ do - x <- MC.try $ releaseSemaphore sem 1 - tracedAtomically_ "post-release" $ do - (r, jobs) <- case x of - Left (e :: MC.SomeException) -> do - modifyJobResources jobs_tvar \ jobs -> - return (Just e, addToken jobs) - Right _ -> do - return (Nothing, Nothing) - putTMVar threadFinished_tmvar r - return jobs - labelThread tid "release_thread" - return Idle + then let (tok, jobs') = removeOwnedToken $ removeFreeToken jobs + in return (Just tok, jobs') + else return (Nothing, jobs) + case mb_tok of + Nothing -> + -- Not OK to release: there are other pending jobs that could make use of the token. + return Idle + Just tok -> do + tid <- forkIO $ do + x <- MC.try @_ @MC.SomeException $ releaseSemaphoreToken tok + tracedAtomically_ "post-release" $ do + (r, jobs) <- case x of + Left (e :: MC.SomeException) -> do + modifyJobResources jobs_tvar \ jobs -> + return (Just e, addToken tok jobs) + Right _ -> do + return (Nothing, Nothing) + putTMVar threadFinished_tmvar r + return jobs + labelThread tid "release_thread" + return Idle -- | When there are pending jobs but no free tokens, -- spawn a thread to acquire a new token from the semaphore. @@ -363,13 +388,14 @@ tryRelease _ _ = retry -- | Wait for an active thread to finish. Once it finishes: -- -- - set the 'JobserverAction' to 'Idle', +-- - clear the 'activeChild' handle, -- - update the number of capabilities to reflect the number -- of owned tokens from the semaphore. tryNoticeIdle :: JobserverOptions - -> TVar JobResources + -> Jobserver -> JobserverState -> STM (IO JobserverState) -tryNoticeIdle opts jobs_tvar jobserver_state +tryNoticeIdle opts (Jobserver { jobs = jobs_tvar, activeChild = active_tvar }) jobserver_state | Just threadFinished_tmvar <- activeThread_maybe $ jobserverAction jobserver_state = sync_num_caps (canChangeNumCaps jobserver_state) threadFinished_tmvar | otherwise @@ -381,6 +407,7 @@ tryNoticeIdle opts jobs_tvar jobserver_state sync_num_caps can_change_numcaps_tvar threadFinished_tmvar = do mb_ex <- takeTMVar threadFinished_tmvar for_ mb_ex MC.throwM + writeTVar active_tvar Nothing Jobs { tokensOwned } <- readTVar jobs_tvar can_change_numcaps <- readTVar can_change_numcaps_tvar guard can_change_numcaps @@ -404,11 +431,11 @@ tryStopThread :: TVar JobResources -> STM (IO JobserverState) tryStopThread jobs_tvar jsj = do case jobserverAction jsj of - Acquiring { activeWaitId = wait_id } -> do + Acquiring { activeThreadId = tid } -> do jobs <- readTVar jobs_tvar guard $ null (jobsWaiting jobs) return do - interruptWaitOnSemaphore wait_id + killThread tid return $ jsj { jobserverAction = Idle } _ -> retry @@ -430,30 +457,38 @@ jobserverLoop opts sjs@(Jobserver { jobs = jobs_tvar }) action <- atomically $ asum $ (\x -> x s) <$> [ tryRelease sjs , tryAcquire opts sjs - , tryNoticeIdle opts jobs_tvar + , tryNoticeIdle opts sjs , tryStopThread jobs_tvar ] s <- action loop s --- | Create a new jobserver using the given semaphore handle. -makeJobserver :: SemaphoreName -> IO (AbstractSem, IO ()) -makeJobserver sem_name = do - semaphore <- openSemaphore sem_name +-- | Create a new jobserver using the given semaphore identifier. +makeJobserver :: SemaphoreIdentifier -> IO (AbstractSem, IO ()) +makeJobserver sem_ident = do + semaphore <- openSemaphore sem_ident >>= either MC.throwM pure let init_jobs = Jobs { tokensOwned = 1 , tokensFree = 1 , jobsWaiting = NilOL + , heldTokens = [] } jobs_tvar <- newTVarIO init_jobs + active_tvar <- newTVarIO Nothing let opts = defaultJobserverOptions -- TODO: allow this to be configured - sjs = Jobserver { jSemaphore = semaphore - , jobs = jobs_tvar } + sjs = Jobserver { jSemaphore = semaphore + , jobs = jobs_tvar + , activeChild = active_tvar } loop_finished_mvar <- newEmptyMVar loop_tid <- forkIOWithUnmask \ unmask -> do r <- try $ unmask $ jobserverLoop opts sjs + -- Always-run exit handler: any child the loop spawned is still alive + -- in its own thread, so drain it before touching jobs_tvar. No one + -- else can mutate the resources once both are dead. + drainActiveChild sjs + releaseAllHeld sjs putMVar loop_finished_mvar $ case r of Left e @@ -467,8 +502,8 @@ makeJobserver sem_name = do acquireSem = acquireJob jobs_tvar releaseSem = releaseJob jobs_tvar cleanupSem = do - -- this is interruptible - cleanupJobserver sjs + -- Trigger the loop's exit handler; it drains the active child and + -- releases all held tokens, then signals loop_finished_mvar. killThread loop_tid mb_ex <- takeMVar loop_finished_mvar for_ mb_ex MC.throwM @@ -477,12 +512,12 @@ makeJobserver sem_name = do -- | Implement an abstract semaphore using a semaphore 'Jobserver' -- which queries the system semaphore of the given name for resources. -runJSemAbstractSem :: SemaphoreName -- ^ the system semaphore to use +runJSemAbstractSem :: SemaphoreIdentifier -- ^ the semaphore identifier (from @-jsem@) -> (AbstractSem -> IO a) -- ^ the operation to run -- which requires a semaphore -> IO a -runJSemAbstractSem sem action = MC.mask \ unmask -> do - (abs, cleanup) <- makeJobserver sem +runJSemAbstractSem sem_ident action = MC.mask \ unmask -> do + (abs, cleanup) <- makeJobserver sem_ident r <- try $ unmask $ action abs case r of Left (e1 :: MC.SomeException) -> do @@ -517,8 +552,13 @@ increases the number of `free` jobs. If there are more pending jobs when the fre is increased, the token is immediately reused (see `modifyJobResources`). The `jobServerLoop` interacts with the system semaphore: when there are pending -jobs, `acquireThread` blocks, waiting for a token from the semaphore. Once a -token is obtained, it increases the owned count. +jobs, `acquireThread` forks a child that calls the interruptible +`waitOnSemaphore`. The child is forked in the masked state, so the only place +an async exception can be delivered is the wait itself; once the wait returns, +the child's STM commit always completes, recording either the new token in +`heldTokens` or the failure exception in `threadFinished`. The (tid, tmvar) +pair is also published in `activeChild` so the loop's exit handler can drain +the child on shutdown even after the in-loop `JobserverState` is gone. When GHC has free tokens (tokens from the semaphore that it is not using), no pending jobs, and the debounce has expired, then `releaseThread` will @@ -531,6 +571,12 @@ This second token is no longer needed, so we should cancel the wait (as it would not be used to do any work, and not be returned until the debounce). We only need to kill `acquireJob`, because `releaseJob` never blocks. +Shutdown starts with `killThread loop_tid`. The loop's exit handler then +runs `drainActiveChild` followed by `releaseAllHeld`; only then does the +loop signal `loop_finished_mvar`. This sequence makes the heldTokens +snapshot consistent because no other thread can mutate it once the loop and +its child are both dead. + Note [Eventlog Messages for jsem] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ It can be tricky to verify that the work is shared adequately across different ===================================== compiler/GHC/Driver/Session.hs ===================================== @@ -2445,6 +2445,7 @@ wWarningFlagsDeps = [minBound..maxBound] >>= \x -> case x of Opt_WarnUnusableUnpackPragmas -> warnSpec x Opt_WarnPatternNamespaceSpecifier -> warnSpec x Opt_WarnUnrecognisedModifiers -> warnSpec x + Opt_WarnSemaphoreOpenFailure -> warnSpec x warningGroupsDeps :: [(Deprecation, FlagSpec WarningGroup)] warningGroupsDeps = map mk warningGroups ===================================== compiler/GHC/Types/Error/Codes.hs ===================================== @@ -403,6 +403,7 @@ type family GhcDiagnosticCode c = n | n -> c where GhcDiagnosticCode "DriverInstantiationNodeInDependencyGeneration" = 74284 GhcDiagnosticCode "DriverNoConfiguredLLVMToolchain" = 66599 GhcDiagnosticCode "DriverMissingLinkableForModule" = 47338 + GhcDiagnosticCode "DriverSemaphoreOpenFailure" = 19877 -- Constraint solver diagnostic codes GhcDiagnosticCode "BadTelescope" = 97739 ===================================== compiler/GHC/Types/Hint.hs ===================================== @@ -11,6 +11,7 @@ module GHC.Types.Hint ( , StarIsType(..) , UntickedPromotedThing(..) , AssumedDerivingStrategy(..) + , SemaphoreUpgradeTarget(..) , SigLike(..) , pprUntickedConstructor, isBareSymbol , suggestExtension @@ -538,6 +539,28 @@ data GhcHint {-| Suggest adding signature to modifier -} | SuggestModifierSignature (HsModifier GhcRn) Name + {-| Suggest upgrading either the @-jsem@ jobserver or GHC itself to + support the given semaphore protocol version. + + Triggered by 'GHC.Driver.Errors.Types.DriverSemaphoreOpenFailure' + carrying a 'System.Semaphore.SemaphoreIncompatibleVersion'. + -} + | SuggestUpgradeForSemaphoreVersionMismatch !SemaphoreUpgradeTarget !Int + -- ^ The 'Int' is the required protocol version. + +-- | What the user should upgrade to resolve an @-jsem@ semaphore +-- protocol version mismatch. +data SemaphoreUpgradeTarget + = UpgradeCabalInstall + -- ^ Jobserver is @cabal-install@ (we are building a Cabal package) + -- and speaks an older protocol than GHC. + | UpgradeJobserver + -- ^ Jobserver (not @cabal-install@) speaks an older protocol than + -- GHC. + | UpgradeGHC + -- ^ Jobserver speaks a newer protocol than GHC. + deriving (Eq, Show) + -- | The deriving strategy that was assumed when not explicitly listed in the -- source. This is used solely by the missing-deriving-strategies warning. -- There's no `Via` case because we never assume that. ===================================== compiler/GHC/Types/Hint/Ppr.hs ===================================== @@ -306,6 +306,20 @@ instance Outputable GhcHint where (text "Perhaps it should have a kind signature, like") 2 (hsep [text "%(" <> ppr ty, text "::", ppr name <> text ")"]) + SuggestUpgradeForSemaphoreVersionMismatch target required + -> case target of + UpgradeCabalInstall -> + text "The cabal-install jobserver uses an older semaphore protocol." + $$ (text "Upgrade cabal-install to a version that supports semaphore protocol v" + <> int required <> text " to resolve this.") + UpgradeJobserver -> + text "The jobserver uses an older semaphore protocol." + $$ (text "Upgrade it to a version that supports semaphore protocol v" + <> int required <> text " to resolve this.") + UpgradeGHC -> + text "The jobserver uses a newer semaphore protocol than this GHC." + $$ (text "Upgrade GHC to a version that supports semaphore protocol v" + <> int required <> text " to resolve this.") perhapsAsPat :: SDoc perhapsAsPat = text "Perhaps you meant an as-pattern, which must not be surrounded by whitespace" ===================================== docs/users_guide/using-warnings.rst ===================================== @@ -2721,6 +2721,23 @@ of ``-W(no-)*``. f :: a %True -> a g :: a %(k :: Int) -> a +.. ghc-flag:: -Wsemaphore-open-failure + :shortdesc: warn when GHC cannot open the ``-jsem`` semaphore. + :type: dynamic + :reverse: -Wno-semaphore-open-failure + :category: + + :since: 10.0.1 + + Warn when GHC is invoked with :ghc-flag:`-jsem` but the semaphore + cannot be opened (e.g. the socket does not exist, the protocol + version is incompatible, or a system error occurred). When this + occurs, GHC ignores ``-jsem`` and compiles modules sequentially. + + A common cause is ``cabal-install`` and GHC being built against + different versions of the ``semaphore-compat`` library; upgrading + both to versions that use the same protocol resolves the mismatch. + ---- If you're feeling really paranoid, the :ghc-flag:`-dcore-lint` option is a good choice. ===================================== docs/users_guide/using.rst ===================================== @@ -797,7 +797,12 @@ There are two kinds of participants in the GHC Jobserver protocol: Perform compilation in parallel when possible, coordinating with other processes through the semaphore ⟨sem⟩ (specified as a string). - Error if the semaphore doesn't exist. + + If the semaphore cannot be opened (e.g. the socket does not exist + or its protocol version is incompatible with this GHC), GHC emits + a :ghc-flag:`-Wsemaphore-open-failure` warning and compiles + sequentially, using only the implicit token inherited from the + parent process. Use of ``-jsem`` will override use of :ghc-flag:`-j[⟨n⟩]`, and vice-versa. ===================================== hadrian/cabal.project ===================================== @@ -1,6 +1,7 @@ packages: ./ ../utils/ghc-toolchain/ ../libraries/ghc-platform/ + ../libraries/semaphore-compat/ -- This essentially freezes the build plan for hadrian -- It would be wise to keep this up to date with the state set in .gitlab/ci.sh. ===================================== hadrian/hadrian.cabal ===================================== @@ -172,6 +172,7 @@ executable hadrian , base16-bytestring >= 0.1.1 && < 1.1.0.0 , ghc-platform , ghc-toolchain + , semaphore-compat ghc-options: -Wall -Wincomplete-record-updates -Wredundant-constraints ===================================== hadrian/src/Flavour.hs ===================================== @@ -149,10 +149,6 @@ werror = -- unix has many unused imports , package unix ? mconcat [arg "-Wwarn=unused-imports", arg "-Wwarn=unused-top-binds"] - -- semaphore-compat relies on sem_getvalue as provided by unix, which is - -- not implemented on Darwin and therefore throws a deprecation warning - , package semaphoreCompat - ? mconcat [arg "-Wwarn=deprecations"] ] , builder Ghc ? package rts ===================================== hadrian/src/Rules/Generate.hs ===================================== @@ -25,6 +25,7 @@ import Utilities import GHC.Toolchain as Toolchain hiding (HsCpp(HsCpp)) import GHC.Platform.ArchOS import Settings.Program (ghcWithInterpreter) +import System.Semaphore (semaphoreVersion, getSemaphoreProtocolVersion) -- | Track this file to rebuild generated files whenever it changes. trackGenerateHs :: Expr () @@ -488,6 +489,7 @@ generateSettings settingsFile = do , ("RTS ways", escapeArgs . map show . Set.toList <$> getRtsWays) , ("Relative Global Package DB", pure rel_pkg_db) , ("base unit-id", pure base_unit_id) + , ("Semaphore version", pure (show (getSemaphoreProtocolVersion semaphoreVersion))) ] let showTuple (k, v) = "(" ++ show k ++ ", " ++ show v ++ ")" pure $ case settings of ===================================== hadrian/src/Settings/Packages.hs ===================================== @@ -231,6 +231,10 @@ packageArgs = do , package hpcBin ? builder (Cabal Flags) ? arg "-build-tool-depends" + ------------------------------ semaphore-compat ---------------------------- + , package semaphoreCompat + ? builder (Cabal Flags) ? arg "-build-testing" + ] ghcInternalArgs :: Args ===================================== hadrian/stack.yaml ===================================== @@ -16,6 +16,7 @@ packages: - '.' - '../utils/ghc-toolchain' - '../libraries/ghc-platform' +- '../libraries/semaphore-compat' nix: enable: false ===================================== libraries/semaphore-compat ===================================== @@ -1 +1 @@ -Subproject commit 7929702401d49bc64d809c501ed5fe80aebc3cc1 +Subproject commit 83b8935203e8c57b189c8a2a19c4173d6a93ea2b ===================================== testsuite/tests/diagnostic-codes/codes.stdout ===================================== @@ -21,6 +21,7 @@ [GHC-29747] is untested (constructor = DriverMissingSafeHaskellMode) [GHC-74284] is untested (constructor = DriverInstantiationNodeInDependencyGeneration) [GHC-66599] is untested (constructor = DriverNoConfiguredLLVMToolchain) +[GHC-19877] is untested (constructor = DriverSemaphoreOpenFailure) [GHC-81325] is untested (constructor = ExpectingMoreArguments) [GHC-78125] is untested (constructor = AmbiguityPreventsSolvingCt) [GHC-84170] is untested (constructor = TcRnModMissingRealSrcSpan) View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/a70aac8e5a1e3621a1efc9008f72b6dd... -- View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/a70aac8e5a1e3621a1efc9008f72b6dd... You're receiving this email because of your account on gitlab.haskell.org.
participants (1)
-
Zubin (@wz1000)