12 Jun '26
Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC
Commits:
8965cb76 by Marc Scholten at 2026-06-12T04:53:22-04:00
haddock: render modules concurrently
- - - - -
6 changed files:
- utils/haddock/haddock-api/haddock-api.cabal
- utils/haddock/haddock-api/src/Haddock.hs
- utils/haddock/haddock-api/src/Haddock/Backends/Hyperlinker.hs
- utils/haddock/haddock-api/src/Haddock/Backends/Xhtml.hs
- utils/haddock/haddock-api/src/Haddock/Options.hs
- utils/haddock/haddock-api/src/Haddock/Utils.hs
Changes:
=====================================
utils/haddock/haddock-api/haddock-api.cabal
=====================================
@@ -97,6 +97,7 @@ library
, filepath
, ghc-boot
, mtl
+ , semaphore-compat
, transformers
, text
=====================================
utils/haddock/haddock-api/src/Haddock.hs
=====================================
@@ -29,6 +29,7 @@ module Haddock (
withGhc
) where
+import Control.Concurrent.MVar (modifyMVar, modifyMVar_, newMVar)
import Control.DeepSeq (force)
import Control.Monad hiding (forM_)
import Control.Monad.IO.Class (MonadIO(..))
@@ -41,6 +42,7 @@ import Data.Maybe
import Data.IORef
import Data.Map.Strict (Map)
import Data.Version (makeVersion)
+import GHC.Conc (getNumProcessors)
import GHC.Parser.Lexer (ParserOpts)
import qualified GHC.Driver.Config.Parser as Parser
import qualified Data.Map.Strict as Map
@@ -84,11 +86,55 @@ import Haddock.Options
import Haddock.Utils
import Haddock.GhcUtils (modifySessionDynFlags, setOutputDir)
import Haddock.Compat (getProcessID)
+import System.Semaphore (AbstractSem(..), openSemaphore, releaseSemaphoreToken, waitOnSemaphore)
--------------------------------------------------------------------------------
-- * Exception handling
--------------------------------------------------------------------------------
+concSemChoiceFromFlags :: [Flag] -> Maybe (Either FilePath (Maybe Int))
+concSemChoiceFromFlags =
+ List.foldl' step Nothing
+ where
+ step _ (Flag_ParCount n) = Just (Right n)
+ step _ (Flag_ParSemaphore sem) = Just (Left sem)
+ step acc _ = acc
+
+-- | Build the render concurrency semaphore selected by Haddock's parallelism flags.
+-- Without an explicit flag, render sequentially; @-j@ uses the host processor
+-- count, @-jN@ uses a local bounded semaphore, and @-jsem@ joins the external
+-- semaphore used for GHC jobserver coordination.
+concSemFromChoice :: Maybe (Either FilePath (Maybe Int)) -> IO AbstractSem
+concSemFromChoice choice =
+ case choice of
+ Nothing -> newBoundedSem 1
+ Just (Right Nothing) -> newBoundedSem =<< getNumProcessors
+ Just (Right (Just n)) -> newBoundedSem n
+ Just (Left semName) -> do
+ openSemaphore semName >>= \case
+ Left err -> throwIO err
+ Right sem -> do
+ tokens <- newMVar []
+ pure
+ AbstractSem
+ { acquireSem = mask $ \restore -> do
+ token <- restore (waitOnSemaphore sem)
+ modifyMVar_ tokens $ \held -> pure (token : held)
+ , releaseSem = mask_ $ do
+ token <- modifyMVar tokens $ \case
+ [] -> pure ([], Nothing)
+ heldToken : heldTokens -> pure (heldTokens, Just heldToken)
+ forM_ token releaseSemaphoreToken
+ }
+
+injectParFlags :: Maybe (Either FilePath (Maybe Int)) -> [Flag] -> [Flag]
+injectParFlags choice flags =
+ case choice of
+ Nothing -> flags
+ Just (Right Nothing) -> Flag_OptGhc "-j" : flags
+ Just (Right (Just n)) -> Flag_OptGhc ("-j" ++ show n) : flags
+ Just (Left sem) -> Flag_OptGhc "-jsem" : Flag_OptGhc sem : flags
+
handleTopExceptions :: IO a -> IO a
handleTopExceptions =
@@ -177,11 +223,12 @@ haddockWithGhc ghc args = handleTopExceptions $ do
Just "YES" | not noCompilation -> return $ Flag_OptGhc "-dynamic-too" : flags
_ -> return flags
- -- Inject `-j` into ghc options, if given to Haddock
- flags' <- pure $ case optParCount flags'' of
- Nothing -> flags''
- Just Nothing -> Flag_OptGhc "-j" : flags''
- Just (Just n) -> Flag_OptGhc ("-j" ++ show n) : flags''
+ let parChoice = concSemChoiceFromFlags flags''
+
+ -- Inject parallelism flags into ghc options, if given to Haddock
+ flags' <- pure $ injectParFlags parChoice flags''
+
+ concSem <- concSemFromChoice parChoice
-- Whether or not to bypass the interface version check
let noChecks = Flag_BypassInterfaceVersonCheck `elem` flags
@@ -238,7 +285,7 @@ haddockWithGhc ghc args = handleTopExceptions $ do
}
-- Render the interfaces.
- liftIO $ renderStep dflags parserOpts logger unit_state flags sinceQual qual packages ifaces
+ liftIO $ renderStep dflags parserOpts logger unit_state flags sinceQual qual concSem packages ifaces
-- If we were not given any input files, error if documentation was
-- requested
@@ -251,7 +298,7 @@ haddockWithGhc ghc args = handleTopExceptions $ do
packages <- liftIO $ readInterfaceFiles name_cache (readIfaceArgs flags) noChecks
-- Render even though there are no input files (usually contents/index).
- liftIO $ renderStep dflags parserOpts logger unit_state flags sinceQual qual packages []
+ liftIO $ renderStep dflags parserOpts logger unit_state flags sinceQual qual concSem packages []
-- | Run the GHC action using a temporary output directory
withTempOutputDir :: Ghc a -> Ghc a
@@ -311,10 +358,11 @@ renderStep
-> [Flag]
-> SinceQual
-> QualOption
+ -> AbstractSem
-> [(DocPaths, Visibility, FilePath, InterfaceFile)]
-> [Interface]
-> IO ()
-renderStep dflags parserOpts logger unit_state flags sinceQual nameQual pkgs interfaces = do
+renderStep dflags parserOpts logger unit_state flags sinceQual nameQual concSem pkgs interfaces = do
updateHTMLXRefs (map (\(docPath, _ifaceFilePath, _showModules, ifaceFile) ->
( case baseUrl flags of
Nothing -> docPathsHtml docPath
@@ -330,7 +378,7 @@ renderStep dflags parserOpts logger unit_state flags sinceQual nameQual pkgs int
(DocPaths {docPathsSources=Just path}, _, _, ifile) <- pkgs
iface <- ifInstalledIfaces ifile
return (instMod iface, path)
- render dflags parserOpts logger unit_state flags sinceQual nameQual interfaces installedIfaces extSrcMap
+ render dflags parserOpts logger unit_state flags sinceQual nameQual concSem interfaces installedIfaces extSrcMap
where
-- get package name from unit-id
packageName :: Unit -> String
@@ -348,11 +396,12 @@ render
-> [Flag]
-> SinceQual
-> QualOption
+ -> AbstractSem
-> [Interface]
-> [(FilePath, PackageInterfaces)]
-> Map Module FilePath
-> IO ()
-render dflags parserOpts logger unit_state flags sinceQual qual ifaces packages extSrcMap = do
+render dflags parserOpts logger unit_state flags sinceQual qual concSem ifaces packages extSrcMap = do
let
packageInfo = PackageInfo { piPackageName = fromMaybe (PackageName mempty)
$ optPackageName flags
@@ -516,7 +565,7 @@ render dflags parserOpts logger unit_state flags sinceQual qual ifaces packages
prologue
themes opt_mathjax sourceUrls' opt_wiki_urls opt_base_url
opt_contents_url opt_index_url unicode sincePkg packageInfo
- qual pretty withQuickjump
+ qual pretty concSem withQuickjump
return ()
unless (withBaseURL || isJust (optOneShot flags)) $ do
copyHtmlBits odir libDir themes withQuickjump
@@ -555,7 +604,7 @@ render dflags parserOpts logger unit_state flags sinceQual qual ifaces packages
when (Flag_HyperlinkedSource `elem` flags && not (null ifaces)) $ do
withTiming logger "ppHyperlinkedSource" (const ()) $ do
_ <- {-# SCC ppHyperlinkedSource #-}
- ppHyperlinkedSource (verbosity flags) (isJust (optOneShot flags)) odir libDir opt_source_css pretty srcMap ifaces
+ ppHyperlinkedSource (verbosity flags) (isJust (optOneShot flags)) odir libDir opt_source_css pretty concSem srcMap ifaces
return ()
@@ -842,4 +891,3 @@ getPrologue parserOpts flags =
rightOrThrowE :: Either String b -> IO b
rightOrThrowE (Left msg) = throwE msg
rightOrThrowE (Right x) = pure x
-
=====================================
utils/haddock/haddock-api/src/Haddock/Backends/Hyperlinker.hs
=====================================
@@ -31,7 +31,8 @@ import Haddock.Backends.Hyperlinker.Utils
import Haddock.Backends.Xhtml.Utils (renderToBuilder)
import Haddock.InterfaceFile
import Haddock.Types
-import Haddock.Utils (Verbosity, out, verbose)
+import Haddock.Utils (Verbosity, out, verbose, mapConcurrentlyWith_)
+import System.Semaphore (AbstractSem)
import qualified Data.ByteString.Builder as Builder
-- | Generate hyperlinked source for given interfaces.
@@ -51,19 +52,21 @@ ppHyperlinkedSource
-- ^ Custom CSS file path
-> Bool
-- ^ Flag indicating whether to pretty-print HTML
+ -> AbstractSem
+ -- ^ Concurrency semaphore for module renders
-> M.Map Module SrcPath
-- ^ Paths to sources
-> [Interface]
-- ^ Interfaces for which we create source
-> IO ()
-ppHyperlinkedSource verbosity isOneShot outdir libdir mstyle pretty srcs' ifaces = do
+ppHyperlinkedSource verbosity isOneShot outdir libdir mstyle pretty concSem srcs' ifaces = do
createDirectoryIfMissing True srcdir
unless isOneShot $ do
let cssFile = fromMaybe (defaultCssFile libdir) mstyle
copyFile cssFile $ srcdir </> srcCssFile
copyFile (libdir </> "html" </> highlightScript) $
srcdir </> highlightScript
- mapM_ (ppHyperlinkedModuleSource verbosity srcdir pretty srcs) ifaces
+ mapConcurrentlyWith_ concSem (ppHyperlinkedModuleSource verbosity srcdir pretty srcs) ifaces
where
srcdir = outdir </> hypSrcDir
srcs = (srcs', M.mapKeys moduleName srcs')
=====================================
utils/haddock/haddock-api/src/Haddock/Backends/Xhtml.hs
=====================================
@@ -69,6 +69,7 @@ import Haddock.ModuleTree
import Haddock.Options (Visibility (..))
import Haddock.Types
import Haddock.Utils
+import System.Semaphore (AbstractSem)
import Haddock.Utils.Json
import Haddock.Version
@@ -115,6 +116,8 @@ ppHtml
-- ^ How to qualify names
-> Bool
-- ^ Output pretty html (newlines and indenting)
+ -> AbstractSem
+ -- ^ Concurrency semaphore for module renders
-> Bool
-- ^ Also write Quickjump index
-> IO ()
@@ -138,6 +141,7 @@ ppHtml
packageInfo
qual
debug
+ concSem
withQuickjump = do
let
visible_ifaces = filter visible ifaces
@@ -192,7 +196,7 @@ ppHtml
visible_ifaces
[]
- mapM_
+ mapConcurrentlyWith_ concSem
( ppHtmlModule
odir
doctitle
=====================================
utils/haddock/haddock-api/src/Haddock/Options.hs
=====================================
@@ -29,6 +29,7 @@ module Haddock.Options
, wikiUrls
, baseUrl
, optParCount
+ , optParSemaphore
, optDumpInterfaceFile
, optShowInterfaceFile
, optLaTeXStyle
@@ -48,7 +49,7 @@ module Haddock.Options
import Control.Applicative
import qualified Data.Char as Char
-import Data.List (dropWhileEnd)
+import Data.List (dropWhileEnd, isPrefixOf)
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Set (Set)
@@ -122,6 +123,7 @@ data Flag
| Flag_SinceQualification String
| Flag_IgnoreLinkSymbol String
| Flag_ParCount (Maybe Int)
+ | Flag_ParSemaphore FilePath
| Flag_TraceArgs
| Flag_OneShot String
| Flag_NoCompilation
@@ -406,6 +408,11 @@ options backwardsCompat =
[]
(OptArg (\count -> Flag_ParCount (fmap read count)) "n")
"load modules in parallel"
+ , Option
+ []
+ ["jsem"]
+ (ReqArg Flag_ParSemaphore "SEM")
+ "use semaphore SEM to limit parallelism"
, Option
[]
["trace-args"]
@@ -423,7 +430,7 @@ getUsage = do
parseHaddockOpts :: [String] -> IO ([Flag], [String])
parseHaddockOpts params =
- case getOpt Permute (options True) params of
+ case getOpt Permute (options True) (normalizeJsemArgs params) of
(flags, args, []) -> return (flags, args)
(_, _, errors) -> do
usage <- getUsage
@@ -498,6 +505,18 @@ optMathjax flags = optLast [str | Flag_Mathjax str <- flags]
optParCount :: [Flag] -> Maybe (Maybe Int)
optParCount flags = optLast [n | Flag_ParCount n <- flags]
+optParSemaphore :: [Flag] -> Maybe FilePath
+optParSemaphore flags = optLast [s | Flag_ParSemaphore s <- flags]
+
+normalizeJsemArgs :: [String] -> [String]
+normalizeJsemArgs = map rewrite
+ where
+ rewrite arg
+ | arg == "-jsem" = "--jsem"
+ | "-jsem=" `isPrefixOf` arg = "--jsem=" ++ drop 6 arg
+ | "-jsem" `isPrefixOf` arg = "--jsem=" ++ drop 5 arg
+ | otherwise = arg
+
qualification :: [Flag] -> Either String QualOption
qualification flags =
case map (map Char.toLower) [str | Flag_Qualification str <- flags] of
=====================================
utils/haddock/haddock-api/src/Haddock/Utils.hs
=====================================
@@ -54,6 +54,10 @@ module Haddock.Utils
, replace
, spanWith
+ -- * Concurrency utilities
+ , mapConcurrentlyWith_
+ , newBoundedSem
+
-- * Logging
, parseVerbosity
, Verbosity (..)
@@ -86,6 +90,13 @@ import Haddock.Types
import Data.Text.Lazy (Text)
import qualified Data.Text.Lazy as LText
+import Control.Concurrent (forkFinally)
+import Control.Concurrent.QSem (newQSem, signalQSem, waitQSem)
+import Control.Concurrent.MVar (newEmptyMVar, putMVar, takeMVar)
+import Control.Exception (throwIO)
+import Control.Monad (void)
+import System.Semaphore (AbstractSem (..))
+
--------------------------------------------------------------------------------
-- * Logging
@@ -334,6 +345,43 @@ html_xrefs = unsafePerformIO (readIORef html_xrefs_ref)
html_xrefs' :: Map ModuleName FilePath
html_xrefs' = unsafePerformIO (readIORef html_xrefs_ref')
+-- * Concurrency utilities
+
+--------------------------------------------------------------------------------
+
+mapConcurrentlyWith_ :: AbstractSem -> (a -> IO ()) -> [a] -> IO ()
+mapConcurrentlyWith_ _ _ [] = return ()
+mapConcurrentlyWith_ concSem f xs = do
+ -- Create MVars to wait for completion and collect results
+ resultMVars <- mapM (const newEmptyMVar) xs
+
+ -- Fork a thread for each element
+ mapM_ (forkThread concSem) (zip xs resultMVars)
+
+ -- Wait for all threads and collect any errors
+ results <- mapM takeMVar resultMVars
+
+ -- Re-throw the first exception if any
+ case [err | Left err <- results] of
+ (err:_) -> throwIO err
+ [] -> return ()
+ where
+ forkThread concSem' (x, resultMVar) = do
+ acquireSem concSem'
+ void $ forkFinally (f x) $ \res -> do
+ releaseSem concSem'
+ putMVar resultMVar res
+
+newBoundedSem :: Int -> IO AbstractSem
+newBoundedSem maxThreads = do
+ sem <- newQSem (max 1 maxThreads)
+ pure
+ AbstractSem
+ { acquireSem = waitQSem sem
+ , releaseSem = signalQSem sem
+ }
+
+
-----------------------------------------------------------------------------
-- * List utils
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/8965cb762bb0478f2cb7377522ef39e…
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/8965cb762bb0478f2cb7377522ef39e…
You're receiving this email because of your account on gitlab.haskell.org.
1
0
[Git][ghc/ghc][wip/supersven/cross-compiler-manual-test-jobs] ci: run full testsuite for cross-compilers when 'cross-run-testsuite' MR label is set (#27372)
by Sven Tennie (@supersven) 12 Jun '26
by Sven Tennie (@supersven) 12 Jun '26
12 Jun '26
Sven Tennie pushed to branch wip/supersven/cross-compiler-manual-test-jobs at Glasgow Haskell Compiler / GHC
Commits:
dedcc241 by Sven Tennie at 2026-06-12T08:21:24+00:00
ci: run full testsuite for cross-compilers when 'cross-run-testsuite' MR label is set (#27372)
Some tests are expected to fail under emulation, but this gives an
overview and confidence in cross-compiler correctness. The opt-in label
is required because emulated testsuite runs take a long time.
- - - - -
2 changed files:
- .gitlab/ci.sh
- + changelog.d/cross-compiler-testsuite
Changes:
=====================================
.gitlab/ci.sh
=====================================
@@ -708,6 +708,17 @@ function test_hadrian() {
# ---
# > main = putStrLn "hello world"
run diff -w expected actual
+
+ if [[ "${CI_MERGE_REQUEST_LABELS:-}" == *"cross-run-testsuite"* ]]; then
+ EXTRA_HC_OPTS="-fexternal-interpreter" run_hadrian \
+ test \
+ --summary-junit=./junit.xml \
+ --test-have-intree-files \
+ --test-compiler="${test_compiler}" \
+ "runtest.opts+=${RUNTEST_ARGS:-}" \
+ "runtest.opts+=--unexpected-output-dir=$TOP/unexpected-test-output" \
+ || fail "hadrian cross full testsuite"
+ fi
elif [[ -n "${REINSTALL_GHC:-}" ]]; then
run_hadrian \
test \
=====================================
changelog.d/cross-compiler-testsuite
=====================================
@@ -0,0 +1,14 @@
+section: packaging
+issues: #27372
+mrs: !16176
+synopsis: Run full testsuite for cross-compilers via 'cross-run-testsuite' MR label
+
+description: {
+ Adding the ``cross-run-testsuite`` label to a merge request causes the CI
+ pipeline to run the full testsuite for cross-compiler targets (riscv64,
+ aarch64-linux-gnu, loongarch64, aarch64-unknown-mingw32) in addition to
+ the existing smoke test. Results are expected to have failures under
+ emulation, but provide an overview and confidence in cross-compiler
+ correctness. The label is opt-in because emulated test runs take
+ significantly longer than native ones.
+}
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/dedcc2418ec8fb6b12c8b629b40b1c1…
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/dedcc2418ec8fb6b12c8b629b40b1c1…
You're receiving this email because of your account on gitlab.haskell.org.
1
0
[Git][ghc/ghc][wip/supersven/cross-compiler-manual-test-jobs] ci: run full testsuite for cross-compilers when 'cross-run-testsuite' MR label is set
by Sven Tennie (@supersven) 12 Jun '26
by Sven Tennie (@supersven) 12 Jun '26
12 Jun '26
Sven Tennie pushed to branch wip/supersven/cross-compiler-manual-test-jobs at Glasgow Haskell Compiler / GHC
Commits:
0b293144 by Sven Tennie at 2026-06-12T08:09:35+00:00
ci: run full testsuite for cross-compilers when 'cross-run-testsuite' MR label is set
Some tests are expected to fail under emulation, but this gives an
overview and confidence in cross-compiler correctness. The opt-in label
is required because emulated testsuite runs take a long time.
- - - - -
1 changed file:
- .gitlab/ci.sh
Changes:
=====================================
.gitlab/ci.sh
=====================================
@@ -708,6 +708,17 @@ function test_hadrian() {
# ---
# > main = putStrLn "hello world"
run diff -w expected actual
+
+ if [[ "${CI_MERGE_REQUEST_LABELS:-}" == *"cross-run-testsuite"* ]]; then
+ EXTRA_HC_OPTS="-fexternal-interpreter" run_hadrian \
+ test \
+ --summary-junit=./junit.xml \
+ --test-have-intree-files \
+ --test-compiler="${test_compiler}" \
+ "runtest.opts+=${RUNTEST_ARGS:-}" \
+ "runtest.opts+=--unexpected-output-dir=$TOP/unexpected-test-output" \
+ || fail "hadrian cross full testsuite"
+ fi
elif [[ -n "${REINSTALL_GHC:-}" ]]; then
run_hadrian \
test \
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/0b29314462198c18076996fe1803e7d…
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/0b29314462198c18076996fe1803e7d…
You're receiving this email because of your account on gitlab.haskell.org.
1
0
[Git][ghc/ghc][wip/mangoiv/process-1.6.30.0] libraries/process: bump submodule to v1.6.30.0
by Magnus (@MangoIV) 12 Jun '26
by Magnus (@MangoIV) 12 Jun '26
12 Jun '26
Magnus pushed to branch wip/mangoiv/process-1.6.30.0 at Glasgow Haskell Compiler / GHC
Commits:
c7f53f31 by mangoiv at 2026-06-12T09:57:32+02:00
libraries/process: bump submodule to v1.6.30.0
- bump the submodule to the appropriate tag
- suppress benign warning resulting from the change
- - - - -
2 changed files:
- libraries/process
- testsuite/driver/testlib.py
Changes:
=====================================
libraries/process
=====================================
@@ -1 +1 @@
-Subproject commit 92deb52c1781bf10ad390296dbc435abe103bfe4
+Subproject commit 11fd247ad33208da7a914acf15d4a09d64a6a4c4
=====================================
testsuite/driver/testlib.py
=====================================
@@ -3066,11 +3066,14 @@ def normalise_errmsg(s: str) -> str:
# Old emcc warns when we export HEAP8 but new one requires it (see #26290)
s = s.replace('warning: invalid item in EXPORTED_RUNTIME_METHODS: HEAP8\nwarning: invalid item in EXPORTED_RUNTIME_METHODS: HEAPU8\nemcc: warning: warnings in JS library compilation [-Wjs-compiler]\n','')
- # on newer versions of MacOS X, the shipped ranlib warns about object files with no symbols,
- # however, these are completely benign stubs.
- # See https://gitlab.haskell.org/ghc/ghc/-/issues/27116
if opsys('darwin'):
+ # on newer versions of MacOS X, the shipped ranlib warns about object files with no symbols,
+ # however, these are completely benign stubs.
+ # See https://gitlab.haskell.org/ghc/ghc/-/issues/27116
s = modify_lines(s, lambda l: re.sub(r'.*ranlib:.*has no symbols', '', l))
+ # we also want to remove linker warnings having to do with undefined dynamic_lookup in combination with
+ # making a single weak symbol undefined as this is dependent on other linker flags
+ s = drop_lines_containing(s, 'ld: warning: -U option is redundant when using -undefined dynamic_lookup')
return s
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/c7f53f31d0039ba430da40bae216498…
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/c7f53f31d0039ba430da40bae216498…
You're receiving this email because of your account on gitlab.haskell.org.
1
0
[Git][ghc/ghc][wip/supersven/cross-compiler-manual-test-jobs] ci: run full testsuite for cross-compilers when 'cross-run-testsuite' MR label is set
by Sven Tennie (@supersven) 12 Jun '26
by Sven Tennie (@supersven) 12 Jun '26
12 Jun '26
Sven Tennie pushed to branch wip/supersven/cross-compiler-manual-test-jobs at Glasgow Haskell Compiler / GHC
Commits:
9614ffa2 by Sven Tennie at 2026-06-12T05:57:04+00:00
ci: run full testsuite for cross-compilers when 'cross-run-testsuite' MR label is set
- - - - -
1 changed file:
- .gitlab/ci.sh
Changes:
=====================================
.gitlab/ci.sh
=====================================
@@ -708,6 +708,17 @@ function test_hadrian() {
# ---
# > main = putStrLn "hello world"
run diff -w expected actual
+
+ if [[ "${CI_MERGE_REQUEST_LABELS:-}" == *"cross-run-testsuite"* ]]; then
+ EXTRA_HC_OPTS="-fexternal-interpreter" run_hadrian \
+ test \
+ --summary-junit=./junit.xml \
+ --test-have-intree-files \
+ --test-compiler="${test_compiler}" \
+ "runtest.opts+=${RUNTEST_ARGS:-}" \
+ "runtest.opts+=--unexpected-output-dir=$TOP/unexpected-test-output" \
+ || fail "hadrian cross full testsuite"
+ fi
elif [[ -n "${REINSTALL_GHC:-}" ]]; then
run_hadrian \
test \
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/9614ffa2de36dfa0088dee14673ec43…
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/9614ffa2de36dfa0088dee14673ec43…
You're receiving this email because of your account on gitlab.haskell.org.
1
0
[Git][ghc/ghc][wip/marge_bot_batch_merge_job] 10 commits: Fix AArch64 clobbering bug for MUL2
by Marge Bot (@marge-bot) 12 Jun '26
by Marge Bot (@marge-bot) 12 Jun '26
12 Jun '26
Marge Bot pushed to branch wip/marge_bot_batch_merge_job at Glasgow Haskell Compiler / GHC
Commits:
c9015f09 by sheaf at 2026-06-11T12:40:28-04:00
Fix AArch64 clobbering bug for MUL2
On AArch64, the code generator could clobber one of the input operands
when computing the lower bits of a MUL2 operation. This rendered invalid
the subsequent computation of the high bits.
This commit fixes that by using a temporary register. The register
allocator can remove the redundant move in the common case when the
registers do not conflict.
Fixes #27046
- - - - -
7ab90288 by Rodrigo Mesquita at 2026-06-11T12:41:11-04:00
fix: make T27131 less flaky
It seems that T27131 fails flakily in a race where we check the flag
before the capability had the chance to process the mailbox which sets
the flag. This seemingly should only happen if the capability ends up
being the same for setting and checking the flag.
- - - - -
a6549ab1 by Marc Scholten at 2026-06-11T22:32:39-04:00
haddock: render modules concurrently
- - - - -
f18757ae by Duncan Coutts at 2026-06-11T22:32:39-04:00
Promote HAVE_PREEMPTION from Timer.c to OSThreads.h
We will want to know about HAVE_PREEMPTION in more places.
HAVE_PREEMPTION tells us that we do have OS threads available,
irrespective of whether THREADED is defined. In particular,
HAVE_PREEMPTION is defined on all proper OSs, but not on WASM (and
hyopthetically may not be true on some other platforms like
micro-controllers, RTOSs, VM hypervisors etc).
- - - - -
9660ff53 by Duncan Coutts at 2026-06-11T22:32:39-04:00
Define ACQUIRE_LOCK_ALWAYS and friends
Fix issue #27335
Like the atomic _ALWAYS variants, these lock actions are always defined,
rather than being dependent on whether we are in the THREADED case. All
the "normal" LOCK macros are defined to be no-ops when !THREADED.
The use case for the _ALWAYS variants is where we are using OS threads
even in the non-threaded RTS. This includes everything to do with the
timer/ticker thread, which is used in the non-threaded RTS too.
In particular, we will want to use this for eventlog things, because the
timer thread performs eventlogging concurrently with the main
capability, even in the non-threaded RTS.
- - - - -
863a8d25 by Duncan Coutts at 2026-06-11T22:32:39-04:00
Use ACQUIRE/RELEASE_LOCK_ALWAYS with eventBufMutex
Even in the non-threaded RTS the eventBufMutex is needed by both the
main capability and the timer/ticker thread, so always use the mutex.
This should fix #25165 which is about the main capability and the timer
thread posting events to the eventlog buffer concurrently and thereby
corrupting the buffer data.
- - - - -
92461617 by Duncan Coutts at 2026-06-11T22:32:39-04:00
Expose eventBufMutex in the EventLog interface/header
We will need it in forkProcess to ensure we don't write to the global
eventlog buffer concurrently with trying to flush eventlog buffers and
do the fork().
- - - - -
f52f601b by Duncan Coutts at 2026-06-11T22:32:39-04:00
Split flushAllCapsEventsBufs into safe and unlocked version
Following the convention that unlocked versions have a trailing _
underscore in their name. This one requires the caller to hold the
eventlog global buffer mutex. We will need this in forkProcess.
- - - - -
6f8d5318 by Duncan Coutts at 2026-06-11T22:32:40-04:00
Remove redundant use of stopTimer in setNumCapabilities
Historically, the comment here was:
We must stop the interval timer while we are changing the
capabilities array lest handle_tick may try to context switch
an old capability. See #17289.
and
We must disable the timer while we do this since the tick handler may
call contextSwitchAllCapabilities, which may see the capabilities array
as we free it.
What this refers to is that historically, when changing the number of
capabilities, the array of capabilities was reallocated to a new size,
allocating new ones and freeing the old ones, thus invalidating all
existing capbility pointers.
Strangely, for good measure the code used to call stopTimer twice (hence
the two similar comments above).
However, since commit a3eccf06292dd666b24606251a52da2b466a9612, the
capabilities array is no longer reallocated. Instead the array is
allcoated once on RTS startup to the maximum size it could ever be
allowed to be, and then capabilities get enabled/disabled at runtime. So
the capability pointers never become invalid anymore. At worst, they may
point to capabilities that are disabled.
Thus we no longer need to stop the timer (twice) while we change the
number of enabled capabilities. This also partially solves issue #27105,
which notes that stopTimer is being used as if it were synchronous, when
it is not. At least for this case, the solution is that stopTimer is not
needed at all!
- - - - -
faf06ab1 by Duncan Coutts at 2026-06-11T22:32:40-04:00
Remove redundant use of stopTimer in forkProcess
but replace it with taking the eventlog buffer lock during the fork.
Fixes issue #27105
The original reason to block the timer during a fork was that
historically the timer was implemented using a periodic timer signal,
and the signal itself would interrupt the fork system call (returning
EINTR). For large processes (where fork() takes a while) this could
permanently livelock: the timer always would go off before the fork
could complete, which got retried in a loop forever.
The timer is no longer implemented as a unix signal, but uses threads.
Thus the original problem no longer exists. The only remaining reason to
block the timer tick is to prevent actions taken by the tick from
interfering with the delicate process involved in fork (taking a load of
locks and pausing everything).
The only thing we need to do is to prevent the eventlog from being
written to or flushed while the fork is taking place. To achieve this
all we need to do is hold the mutex for the global eventlog buffer.
This removes the last use of stopTimer that expects stopTimer to work
synchronously (which it was not) and thus solves issue #27105. To be
clear, we solve issue #27105 not by making stopTimer synchronous, but by
eliminating the use sites that expected it to be synchronous.
- - - - -
19 changed files:
- + changelog.d/T27046
- compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
- rts/Capability.c
- rts/Schedule.c
- rts/Timer.c
- rts/eventlog/EventLog.c
- rts/eventlog/EventLog.h
- rts/include/rts/OSThreads.h
- + testsuite/tests/codeGen/should_run/T27046.hs
- + testsuite/tests/codeGen/should_run/T27046_cmm.cmm
- testsuite/tests/codeGen/should_run/all.T
- testsuite/tests/rts/T27131.hs
- testsuite/tests/rts/T27131.stdout
- utils/haddock/haddock-api/haddock-api.cabal
- utils/haddock/haddock-api/src/Haddock.hs
- utils/haddock/haddock-api/src/Haddock/Backends/Hyperlinker.hs
- utils/haddock/haddock-api/src/Haddock/Backends/Xhtml.hs
- utils/haddock/haddock-api/src/Haddock/Options.hs
- utils/haddock/haddock-api/src/Haddock/Utils.hs
Changes:
=====================================
changelog.d/T27046
=====================================
@@ -0,0 +1,9 @@
+section: compiler
+issues: #27046
+mrs: !16031
+synopsis:
+ Avoid AArch64 register clobbering bug in MUL2
+description:
+ Fixes an issue in which, on AArch64, code generation for the MUL2 operation
+ could clobber one of the input operands when computing the lower bits, which
+ rendered invalid the subsequent computation of the high bits.
=====================================
compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
=====================================
@@ -2300,11 +2300,19 @@ genCCall target dest_regs arg_regs = do
let lo = getRegisterReg platform (CmmLocal dst_lo)
hi = getRegisterReg platform (CmmLocal dst_hi)
nd = getRegisterReg platform (CmmLocal dst_needed)
+
+ -- Generate a fresh virtual register for the low word computation.
+ -- This avoids clobbering reg_a or reg_b in the first MUL instruction,
+ -- which could for example happen if 'lo' and 'reg_a' are the same
+ -- virtual register.
+ tmp_lo <- getNewRegNat II64
+
return $
code_x `appOL`
code_y `snocOL`
- MUL II64 (OpReg W64 lo) (OpReg W64 reg_a) (OpReg W64 reg_b) `snocOL`
+ MUL II64 (OpReg W64 tmp_lo) (OpReg W64 reg_a) (OpReg W64 reg_b) `snocOL`
SMULH (OpReg W64 hi) (OpReg W64 reg_a) (OpReg W64 reg_b) `snocOL`
+ MOV (OpReg W64 lo) (OpReg W64 tmp_lo) `snocOL`
-- Are all high bits equal to the sign bit of the low word?
-- nd = (hi == ASR(lo,width-1)) ? 1 : 0
CMP (OpReg W64 hi) (OpRegShift W64 lo SASR (widthInBits w - 1)) `snocOL`
=====================================
rts/Capability.c
=====================================
@@ -443,13 +443,6 @@ void
moreCapabilities (uint32_t from USED_IF_THREADS, uint32_t to USED_IF_THREADS)
{
#if defined(THREADED_RTS)
- // We must disable the timer while we do this since the tick handler may
- // call contextSwitchAllCapabilities, which may see the capabilities array
- // as we free it. The alternative would be to protect the capabilities
- // array with a lock but this seems more expensive than necessary.
- // See #17289.
- stopTimer();
-
if (to == 1) {
// THREADED_RTS must work on builds that don't have a mutable
// BaseReg (eg. unregisterised), so in this case
@@ -470,8 +463,6 @@ moreCapabilities (uint32_t from USED_IF_THREADS, uint32_t to USED_IF_THREADS)
}
debugTrace(DEBUG_sched, "allocated %d more capabilities", to - from);
-
- startTimer();
#endif
}
=====================================
rts/Schedule.c
=====================================
@@ -37,6 +37,7 @@
#include "win32/AsyncWinIO.h"
#endif
#include "Trace.h"
+#include "eventlog/EventLog.h"
#include "RaiseAsync.h"
#include "Threads.h"
#include "Timer.h"
@@ -2100,24 +2101,31 @@ forkProcess(HsStablePtr *entry
ACQUIRE_LOCK(&all_tasks_mutex);
#endif
- stopTimer(); // See #4074
-
#if defined(TRACING)
- flushAllCapsEventsBufs(); // so that child won't inherit dirty file buffers
+#if defined(HAVE_PREEMPTION)
+ // We must hold the eventlog global mutex over the fork to prevent the
+ // timer thread from trying to post events. While holding the mutex we need
+ // to flush the eventlogs (global and per-cap) so that child won't inherit
+ // dirty eventlog buffers or file buffers.
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
+#endif
+ flushAllCapsEventsBufs_();
#endif
pid = fork();
if (pid) { // parent
- startTimer(); // #4074
-
RELEASE_LOCK(&sched_mutex);
RELEASE_LOCK(&sm_mutex);
RELEASE_LOCK(&stable_ptr_mutex);
RELEASE_LOCK(&stable_name_mutex);
RELEASE_LOCK(&task->lock);
+#if defined(TRACING) && defined(HAVE_PREEMPTION)
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
+#endif
+
#if defined(THREADED_RTS)
/* N.B. releaseCapability_ below may need to take all_tasks_mutex */
RELEASE_LOCK(&all_tasks_mutex);
@@ -2303,12 +2311,6 @@ setNumCapabilities (uint32_t new_n_capabilities USED_IF_THREADS)
cap = rts_lock();
task = cap->running_task;
-
- // N.B. We must stop the interval timer while we are changing the
- // capabilities array lest handle_tick may try to context switch
- // an old capability. See #17289.
- stopTimer();
-
stopAllCapabilities(&cap, task);
if (new_n_capabilities < enabled_capabilities)
@@ -2364,9 +2366,7 @@ setNumCapabilities (uint32_t new_n_capabilities USED_IF_THREADS)
tracingAddCapabilities(n_capabilities, new_n_capabilities);
#endif
- // Resize the capabilities array
- // NB. after this, capabilities points somewhere new. Any pointers
- // of type (Capability *) are now invalid.
+ // Allocate and initialise the extra capabilities
moreCapabilities(n_capabilities, new_n_capabilities);
// Resize and update storage manager data structures
@@ -2394,8 +2394,6 @@ setNumCapabilities (uint32_t new_n_capabilities USED_IF_THREADS)
// Notify IO manager that the number of capabilities has changed.
notifyIOManagerCapabilitiesChanged(&cap);
- startTimer();
-
rts_unlock(cap);
#endif // THREADED_RTS
=====================================
rts/Timer.c
=====================================
@@ -28,11 +28,6 @@
#include "RtsSignals.h"
#include "rts/EventLogWriter.h"
-// See Note [No timer on wasm32]
-#if !defined(wasm32_HOST_ARCH)
-#define HAVE_PREEMPTION
-#endif
-
// This global counter is used to allow multiple threads to stop the
// timer temporarily with a stopTimer()/startTimer() pair. If
// timer_enabled == 0 timer is enabled
=====================================
rts/eventlog/EventLog.c
=====================================
@@ -129,8 +129,11 @@ typedef struct _EventsBuf {
static EventsBuf *capEventBuf; // one EventsBuf for each Capability
static EventsBuf eventBuf; // an EventsBuf not associated with any Capability
-#if defined(THREADED_RTS)
-static Mutex eventBufMutex; // protected by this mutex
+#if defined(HAVE_PREEMPTION)
+// Note that this mutex is used even in the non-threaded RTS, since the timer
+// thread posts events and flushes. So _all_ uses of this mutex must use
+// ACQUIRE_LOCK_ALWAYS/RELEASE_LOCK_ALWAYS.
+Mutex eventBufMutex; // protects eventBuf above
#endif
// Event type
@@ -393,8 +396,10 @@ initEventLogging(void)
moreCapEventBufs(0, get_n_capabilities());
initEventsBuf(&eventBuf, EVENT_LOG_SIZE, (EventCapNo)(-1));
-#if defined(THREADED_RTS)
+#if defined(HAVE_PREEMPTION)
initMutex(&eventBufMutex);
+#endif
+#if defined(THREADED_RTS)
initMutex(&state_change_mutex);
#endif
}
@@ -416,7 +421,7 @@ startEventLogging_(void)
{
initEventLogWriter();
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
postHeaderEvents();
/*
@@ -425,7 +430,7 @@ startEventLogging_(void)
*/
printAndClearEventBuf(&eventBuf);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
return true;
}
@@ -495,7 +500,7 @@ endEventLogging(void)
flushEventLog_(NULL);
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
// Mark end of events (data).
postEventTypeNum(&eventBuf, EVENT_DATA_END);
@@ -503,7 +508,7 @@ endEventLogging(void)
// Flush the end of data marker.
printAndClearEventBuf(&eventBuf);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
stopEventLogWriter();
event_log_writer = NULL;
@@ -666,7 +671,7 @@ void
postCapEvent (EventTypeNum tag,
EventCapNo capno)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
ensureRoomForEvent(&eventBuf, tag);
postEventHeader(&eventBuf, tag);
@@ -685,14 +690,14 @@ postCapEvent (EventTypeNum tag,
barf("postCapEvent: unknown event tag %d", tag);
}
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
void postCapsetEvent (EventTypeNum tag,
EventCapsetID capset,
StgWord info)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
ensureRoomForEvent(&eventBuf, tag);
postEventHeader(&eventBuf, tag);
@@ -726,7 +731,7 @@ void postCapsetEvent (EventTypeNum tag,
barf("postCapsetEvent: unknown event tag %d", tag);
}
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
void postCapsetStrEvent (EventTypeNum tag,
@@ -740,14 +745,14 @@ void postCapsetStrEvent (EventTypeNum tag,
return;
}
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
if (!hasRoomForVariableEvent(&eventBuf, size)){
printAndClearEventBuf(&eventBuf);
if (!hasRoomForVariableEvent(&eventBuf, size)){
errorBelch("Event size exceeds buffer size, bail out");
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
return;
}
}
@@ -758,7 +763,7 @@ void postCapsetStrEvent (EventTypeNum tag,
postBuf(&eventBuf, (StgWord8*) msg, strsize);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
void postCapsetVecEvent (EventTypeNum tag,
@@ -783,14 +788,14 @@ void postCapsetVecEvent (EventTypeNum tag,
}
}
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
if (!hasRoomForVariableEvent(&eventBuf, size)){
printAndClearEventBuf(&eventBuf);
if(!hasRoomForVariableEvent(&eventBuf, size)){
errorBelch("Event size exceeds buffer size, bail out");
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
return;
}
}
@@ -804,7 +809,7 @@ void postCapsetVecEvent (EventTypeNum tag,
postBuf(&eventBuf, (StgWord8*) argv[i], 1 + strlen(argv[i]));
}
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
void postWallClockTime (EventCapsetID capset)
@@ -813,7 +818,7 @@ void postWallClockTime (EventCapsetID capset)
StgWord64 sec;
StgWord32 nsec;
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
/* The EVENT_WALL_CLOCK_TIME event is intended to allow programs
reading the eventlog to match up the event timestamps with wall
@@ -846,7 +851,7 @@ void postWallClockTime (EventCapsetID capset)
postWord64(&eventBuf, sec);
postWord32(&eventBuf, nsec);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
/*
@@ -885,7 +890,7 @@ void postEventHeapInfo (EventCapsetID heap_capset,
W_ mblockSize,
W_ blockSize)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
ensureRoomForEvent(&eventBuf, EVENT_HEAP_INFO_GHC);
postEventHeader(&eventBuf, EVENT_HEAP_INFO_GHC);
@@ -899,7 +904,7 @@ void postEventHeapInfo (EventCapsetID heap_capset,
postWord64(&eventBuf, mblockSize);
postWord64(&eventBuf, blockSize);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
void postEventGcStats (Capability *cap,
@@ -952,7 +957,7 @@ void postTaskCreateEvent (EventTaskId taskId,
EventCapNo capno,
EventKernelThreadId tid)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
ensureRoomForEvent(&eventBuf, EVENT_TASK_CREATE);
postEventHeader(&eventBuf, EVENT_TASK_CREATE);
@@ -961,14 +966,14 @@ void postTaskCreateEvent (EventTaskId taskId,
postCapNo(&eventBuf, capno);
postKernelThreadId(&eventBuf, tid);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
void postTaskMigrateEvent (EventTaskId taskId,
EventCapNo capno,
EventCapNo new_capno)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
ensureRoomForEvent(&eventBuf, EVENT_TASK_MIGRATE);
postEventHeader(&eventBuf, EVENT_TASK_MIGRATE);
@@ -977,28 +982,28 @@ void postTaskMigrateEvent (EventTaskId taskId,
postCapNo(&eventBuf, capno);
postCapNo(&eventBuf, new_capno);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
void postTaskDeleteEvent (EventTaskId taskId)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
ensureRoomForEvent(&eventBuf, EVENT_TASK_DELETE);
postEventHeader(&eventBuf, EVENT_TASK_DELETE);
/* EVENT_TASK_DELETE (taskID) */
postTaskId(&eventBuf, taskId);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
void
postEventNoCap (EventTypeNum tag)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
ensureRoomForEvent(&eventBuf, tag);
postEventHeader(&eventBuf, tag);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
void
@@ -1042,9 +1047,9 @@ void postLogMsg(EventsBuf *eb, EventTypeNum type, char *msg, va_list ap)
void postMsg(char *msg, va_list ap)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
postLogMsg(&eventBuf, EVENT_LOG_MSG, msg, ap);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
void postCapMsg(Capability *cap, char *msg, va_list ap)
@@ -1138,32 +1143,32 @@ void postConcUpdRemSetFlush(Capability *cap)
void postConcMarkEnd(StgWord32 marked_obj_count)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
ensureRoomForEvent(&eventBuf, EVENT_CONC_MARK_END);
postEventHeader(&eventBuf, EVENT_CONC_MARK_END);
postWord32(&eventBuf, marked_obj_count);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
void postNonmovingHeapCensus(uint16_t blk_size,
const struct NonmovingAllocCensus *census)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
postEventHeader(&eventBuf, EVENT_NONMOVING_HEAP_CENSUS);
postWord16(&eventBuf, blk_size);
postWord32(&eventBuf, census->n_active_segs);
postWord32(&eventBuf, census->n_filled_segs);
postWord32(&eventBuf, census->n_live_blocks);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
void postNonmovingPrunedSegments(uint32_t pruned_segments, uint32_t free_segments)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
postEventHeader(&eventBuf, EVENT_NONMOVING_PRUNED_SEGMENTS);
postWord32(&eventBuf, pruned_segments);
postWord32(&eventBuf, free_segments);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
void closeBlockMarker (EventsBuf *ebuf)
@@ -1224,7 +1229,7 @@ static HeapProfBreakdown getHeapProfBreakdown(void)
void postHeapProfBegin(void)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
PROFILING_FLAGS *flags = &RtsFlags.ProfFlags;
StgWord modSelector_len =
flags->modSelector ? strlen(flags->modSelector) : 0;
@@ -1258,42 +1263,42 @@ void postHeapProfBegin(void)
postStringLen(&eventBuf, flags->ccsSelector, ccsSelector_len);
postStringLen(&eventBuf, flags->retainerSelector, retainerSelector_len);
postStringLen(&eventBuf, flags->bioSelector, bioSelector_len);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
void postHeapProfSampleBegin(StgInt era)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
ensureRoomForEvent(&eventBuf, EVENT_HEAP_PROF_SAMPLE_BEGIN);
postEventHeader(&eventBuf, EVENT_HEAP_PROF_SAMPLE_BEGIN);
postWord64(&eventBuf, era);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
void postHeapBioProfSampleBegin(StgInt era, StgWord64 time)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
ensureRoomForEvent(&eventBuf, EVENT_HEAP_BIO_PROF_SAMPLE_BEGIN);
postEventHeader(&eventBuf, EVENT_HEAP_BIO_PROF_SAMPLE_BEGIN);
postWord64(&eventBuf, era);
postWord64(&eventBuf, time);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
void postHeapProfSampleEnd(StgInt era)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
ensureRoomForEvent(&eventBuf, EVENT_HEAP_PROF_SAMPLE_END);
postEventHeader(&eventBuf, EVENT_HEAP_PROF_SAMPLE_END);
postWord64(&eventBuf, era);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
void postHeapProfSampleString(const char *label,
StgWord64 residency)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
StgWord label_len = strlen(label);
StgWord len = 1+8+label_len+1;
CHECK(!ensureRoomForVariableEvent(&eventBuf, len));
@@ -1303,7 +1308,7 @@ void postHeapProfSampleString(const char *label,
postWord8(&eventBuf, 0);
postWord64(&eventBuf, residency);
postStringLen(&eventBuf, label, label_len);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
#if defined(PROFILING)
@@ -1313,7 +1318,7 @@ void postHeapProfCostCentre(StgWord32 ccID,
const char *srcloc,
StgBool is_caf)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
StgWord label_len = strlen(label);
StgWord module_len = strlen(module);
StgWord srcloc_len = strlen(srcloc);
@@ -1326,13 +1331,13 @@ void postHeapProfCostCentre(StgWord32 ccID,
postStringLen(&eventBuf, module, module_len);
postStringLen(&eventBuf, srcloc, srcloc_len);
postWord8(&eventBuf, is_caf);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
void postHeapProfSampleCostCentre(CostCentreStack *stack,
StgWord64 residency)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
StgWord depth = 0;
CostCentreStack *ccs;
for (ccs = stack; ccs != NULL && ccs != CCS_MAIN; ccs = ccs->prevStack)
@@ -1351,7 +1356,7 @@ void postHeapProfSampleCostCentre(CostCentreStack *stack,
depth>0 && ccs != NULL && ccs != CCS_MAIN;
ccs = ccs->prevStack, depth--)
postWord32(&eventBuf, ccs->cc->ccID);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
@@ -1359,7 +1364,7 @@ void postProfSampleCostCentre(Capability *cap,
CostCentreStack *stack,
StgWord64 tick)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
StgWord depth = 0;
CostCentreStack *ccs;
for (ccs = stack; ccs != NULL && ccs != CCS_MAIN; ccs = ccs->prevStack)
@@ -1377,7 +1382,7 @@ void postProfSampleCostCentre(Capability *cap,
depth>0 && ccs != NULL && ccs != CCS_MAIN;
ccs = ccs->prevStack, depth--)
postWord32(&eventBuf, ccs->cc->ccID);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
// This event is output at the start of profiling so the tick interval can
@@ -1385,11 +1390,11 @@ void postProfSampleCostCentre(Capability *cap,
// can be calculated from how many samples there are.
void postProfBegin(void)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
postEventHeader(&eventBuf, EVENT_PROF_BEGIN);
// The interval that each tick was sampled, in nanoseconds
postWord64(&eventBuf, TimeToNS(RtsFlags.MiscFlags.tickInterval));
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
#endif /* PROFILING */
@@ -1415,11 +1420,11 @@ static void postTickyCounterDef(EventsBuf *eb, StgEntCounter *p)
void postTickyCounterDefs(StgEntCounter *counters)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
for (StgEntCounter *p = counters; p != NULL; p = p->link) {
postTickyCounterDef(&eventBuf, p);
}
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
static void postTickyCounterSample(EventsBuf *eb, StgEntCounter *p)
@@ -1443,13 +1448,13 @@ static void postTickyCounterSample(EventsBuf *eb, StgEntCounter *p)
void postTickyCounterSamples(StgEntCounter *counters)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
ensureRoomForEvent(&eventBuf, EVENT_TICKY_COUNTER_SAMPLE);
postEventHeader(&eventBuf, EVENT_TICKY_COUNTER_BEGIN_SAMPLE);
for (StgEntCounter *p = counters; p != NULL; p = p->link) {
postTickyCounterSample(&eventBuf, p);
}
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
#endif /* TICKY_TICKY */
void postIPE(const InfoProvEnt *ipe)
@@ -1459,7 +1464,7 @@ void postIPE(const InfoProvEnt *ipe)
// See Note [Maximum event length].
const StgWord MAX_IPE_STRING_LEN = 65535;
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
StgWord table_name_len = MIN(strlen(ipe->prov.table_name), MAX_IPE_STRING_LEN);
StgWord closure_desc_len = MIN(strlen(closure_desc_buf), MAX_IPE_STRING_LEN);
StgWord ty_desc_len = MIN(strlen(ipe->prov.ty_desc), MAX_IPE_STRING_LEN);
@@ -1489,7 +1494,7 @@ void postIPE(const InfoProvEnt *ipe)
postBuf(&eventBuf, &colon, 1);
postStringLen(&eventBuf, ipe->prov.src_span, src_span_len);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
void printAndClearEventBuf (EventsBuf *ebuf)
@@ -1601,14 +1606,21 @@ void flushLocalEventsBuf(Capability *cap)
// Flush all capabilities' event buffers when we already hold all capabilities.
// Used during forkProcess.
void flushAllCapsEventsBufs(void)
+{
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
+ flushAllCapsEventsBufs_();
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
+}
+
+// Unsafe version that does not acquire/release eventBufMutex. You must
+// hold the eventBufMutex, which you must acquire with ACQUIRE_LOCK_ALWAYS!
+void flushAllCapsEventsBufs_(void)
{
if (!event_log_writer) {
return;
}
- ACQUIRE_LOCK(&eventBufMutex);
printAndClearEventBuf(&eventBuf);
- RELEASE_LOCK(&eventBufMutex);
for (unsigned int i=0; i < getNumCapabilities(); i++) {
flushLocalEventsBuf(getCapability(i));
@@ -1641,9 +1653,9 @@ static void flushEventLog_(Capability **cap USED_IF_THREADS)
return;
}
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
printAndClearEventBuf(&eventBuf);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
#if defined(THREADED_RTS)
Task *task = newBoundTask();
=====================================
rts/eventlog/EventLog.h
=====================================
@@ -18,6 +18,13 @@
#if defined(TRACING)
extern bool eventlog_enabled;
+#if defined(HAVE_PREEMPTION)
+// Avoid using this mutex directly if at all possible. It is needed in the
+// implementation of forkProcess.
+//
+// All uses of this mutex must use ACQUIRE_LOCK_ALWAYS/RELEASE_LOCK_ALWAYS.
+extern Mutex eventBufMutex;
+#endif
void initEventLogging(void);
void restartEventLogging(void);
@@ -27,6 +34,7 @@ void abortEventLogging(void); // #4512 - after fork child needs to abort
void moreCapEventBufs (uint32_t from, uint32_t to);
void flushLocalEventsBuf(Capability *cap);
void flushAllCapsEventsBufs(void);
+void flushAllCapsEventsBufs_(void);
void flushAllEventsBufs(Capability *cap);
typedef void (*EventlogInitPost)(void);
=====================================
rts/include/rts/OSThreads.h
=====================================
@@ -14,6 +14,46 @@
#pragma once
+/* Note [Threads and preemption]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ All full-fat OSs that GHC works on have OS threads, and we use them even in
+ the non-threaded RTS for a few features:
+ * Haskell thread preemption;
+ * sample-based profiling;
+ * idle GC;
+ * periodic eventlog flushing.
+
+ We use defined(HAVE_PREEMPTION) to decide if these features are implemented
+ via OS threads.
+
+ On platforms like WASM/js we do not have OS threads in any conventional
+ sense, and the features above are either not available or are implemented
+ differently. See Note [No timer on wasm32].
+
+ In future if GHC is ported to platforms like bare-metal micro-controllers,
+ RTOSs or to run directly under hypervisors then such platforms may also not
+ have threads available and they should not define HAVE_PREEMPTION here. Or
+ for some micro-controller RTOSs like Zeypher one may have a choice about
+ whether to use threads or not (at a size cost). Here would be the right
+ place to control whether the feature list above is supported.
+ */
+#if defined(wasm32_HOST_ARCH)
+ // See Note [No timer on wasm32]
+ // To confuse matters, WASM _does_ have pthread.h but it doesnt work.
+#elif defined(HAVE_PTHREAD_H) || defined(HAVE_WINDOWS_H)
+#define HAVE_PREEMPTION
+#else
+#error Decide if this platform has threads and pre-emption or not.
+#endif
+// And JS does all of this differently, without using this bit of the RTS.
+
+// Configuration sanity check
+#if defined(THREADED_RTS) && !defined(HAVE_PREEMPTION)
+//TODO we would like to be able to assert this:
+// #error Configuration error: THREADED_RTS should imply HAVE_PREEMPTION
+// however at the moment we cannot due to issue #27346.
+#endif
+
#if defined(HAVE_PTHREAD_H) && !defined(mingw32_HOST_OS)
#if defined(CMINUSMINUS)
@@ -210,9 +250,29 @@ extern bool timedWaitCondition ( Condition* pCond, Mutex* pMut, Time timeout)
//
// Mutexes
//
+// Even in the non-threaded RTS we use threads and mutexes! In particular the
+// timer/ticker is implemented using a thread. And using threads needs locks.
+// In particular we need locks for the data shared between the timer/ticker
+// thread and the thread running the main capability.
+#if defined(HAVE_PREEMPTION)
extern void initMutex ( Mutex* pMut );
extern void closeMutex ( Mutex* pMut );
+// The "always" variants do locking in the threaded and non-threaded RTS.
+// The normal variants below are no-ops in the non-threaded RTS.
+#define ACQUIRE_LOCK_ALWAYS(l) OS_ACQUIRE_LOCK(l)
+#define TRY_ACQUIRE_LOCK_ALWAYS(l) OS_TRY_ACQUIRE_LOCK(l)
+#define RELEASE_LOCK_ALWAYS(l) OS_RELEASE_LOCK(l)
+#define ASSERT_LOCK_HELD_ALWAYS(l) OS_ASSERT_LOCK_HELD(l)
+#else
+// And just to be a bit confusing, the always variants are still no-ops when we
+// do not HAVE_PREEMPTION, since then we don't have threads or mutexes at all.
+#define ACQUIRE_LOCK_ALWAYS(l)
+#define TRY_ACQUIRE_LOCK_ALWAYS(l) 0
+#define RELEASE_LOCK_ALWAYS(l)
+#define ASSERT_LOCK_HELD_ALWAYS(l)
+#endif
+
// Processors and affinity
void setThreadAffinity (uint32_t n, uint32_t m);
void setThreadNode (uint32_t node);
@@ -228,6 +288,7 @@ void releaseThreadNode (void);
#else
+// No-ops in the non-threaded RTS. See also the _ALWAYS variants above.
#define ACQUIRE_LOCK(l)
#define TRY_ACQUIRE_LOCK(l) 0
#define RELEASE_LOCK(l)
=====================================
testsuite/tests/codeGen/should_run/T27046.hs
=====================================
@@ -0,0 +1,29 @@
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE ForeignFunctionInterface, GHCForeignImportPrim, UnliftedFFITypes #-}
+
+module Main where
+
+import Control.Monad
+ ( unless )
+import Data.Bits
+ ( shiftL )
+import GHC.Exts
+ ( Int64# )
+import GHC.Int
+ ( Int64(..) )
+
+foreign import prim "test_mul2_clobber"
+ test_mul2_clobber :: Int64# -> Int64# -> Int64#
+
+main :: IO ()
+main = do
+ let
+ I64# x = 1 `shiftL` 32
+ hi = I64# $ test_mul2_clobber x x
+
+ unless ( hi == 1 ) $
+ error $ unlines
+ [ "Incorrect result for Mul2 operation."
+ , "Expected high word: 1"
+ , " Actual high word: " ++ show hi
+ ]
=====================================
testsuite/tests/codeGen/should_run/T27046_cmm.cmm
=====================================
@@ -0,0 +1,13 @@
+#include "Cmm.h"
+
+// Test for #27046
+test_mul2_clobber (bits64 x, bits64 y)
+{
+ bits64 hi, nd;
+
+ // Deliberately alias the destination 'lo' with the source 'x'
+ // This forces the NCG to use the same virtual register for both.
+ (nd, hi, x) = prim %mul2_64(x, y);
+
+ return (hi);
+}
=====================================
testsuite/tests/codeGen/should_run/all.T
=====================================
@@ -260,6 +260,13 @@ test('T25364', normal, compile_and_run, [''])
test('T26061', normal, compile_and_run, [''])
test('T26537', normal, compile_and_run, ['-O2 -fregs-graph'])
test('T24016', normal, compile_and_run, ['-O1 -fPIC'])
+test('T27046',
+ [ req_cmm
+ , when(arch('i386'), skip) # i386 does not support MO_S_Mul2 W64
+ , when(arch('wasm32'), skip)
+ , js_skip
+ , when(unregisterised(), skip) # pprCallishMachOp_for_C: MO_S_Mul2 W64 not supported
+ ], compile_and_run, ['T27046_cmm.cmm'])
# Check that GHC-generated finalizers run on Darwin. The Apple linker doesn't
# support --wrap, so we can't intercept hs_spt_remove directly. Instead we
=====================================
testsuite/tests/rts/T27131.hs
=====================================
@@ -30,16 +30,22 @@ foreign import ccall unsafe "has_local_stop_after_return"
main :: IO ()
main = do
setNumCapabilities 2
- checkFlag
- "TSO_STOP_NEXT_BREAKPOINT"
- rts_enableStopNextBreakpoint
- rts_disableStopNextBreakpoint
- c_hasLocalStopNextBreakpoint
- checkFlag
- "TSO_STOP_AFTER_RETURN"
- rts_enableStopAfterReturn
- rts_disableStopAfterReturn
- c_hasLocalStopAfterReturn
+ -- Bind to capability 0 so it can't float between capabilities while the
+ -- target thread runs on capability 1.
+ doneVar <- newEmptyMVar
+ _ <- forkOn 0 $ do
+ checkFlag
+ "TSO_STOP_NEXT_BREAKPOINT"
+ rts_enableStopNextBreakpoint
+ rts_disableStopNextBreakpoint
+ c_hasLocalStopNextBreakpoint
+ checkFlag
+ "TSO_STOP_AFTER_RETURN"
+ rts_enableStopAfterReturn
+ rts_disableStopAfterReturn
+ c_hasLocalStopAfterReturn
+ putMVar doneVar ()
+ takeMVar doneVar
checkFlag
:: String
@@ -58,6 +64,7 @@ checkFlag label enable disable isMyThreadFlagSet = do
ThreadId tid# <- forkOn 1 $ do
replicateM_ 2 $ do
replyVar <- takeMVar targetCheckVar
+ yield -- make sure we reprocess the mailbox
isSet <- (/= 0) <$> isMyThreadFlagSet
putMVar replyVar isSet
=====================================
testsuite/tests/rts/T27131.stdout
=====================================
@@ -1,6 +1,6 @@
-(0,False)
+(0,True)
TSO_STOP_NEXT_BREAKPOINT set: ok
TSO_STOP_NEXT_BREAKPOINT unset: ok
-(0,False)
+(0,True)
TSO_STOP_AFTER_RETURN set: ok
TSO_STOP_AFTER_RETURN unset: ok
=====================================
utils/haddock/haddock-api/haddock-api.cabal
=====================================
@@ -97,6 +97,7 @@ library
, filepath
, ghc-boot
, mtl
+ , semaphore-compat
, transformers
, text
=====================================
utils/haddock/haddock-api/src/Haddock.hs
=====================================
@@ -29,6 +29,7 @@ module Haddock (
withGhc
) where
+import Control.Concurrent.MVar (modifyMVar, modifyMVar_, newMVar)
import Control.DeepSeq (force)
import Control.Monad hiding (forM_)
import Control.Monad.IO.Class (MonadIO(..))
@@ -41,6 +42,7 @@ import Data.Maybe
import Data.IORef
import Data.Map.Strict (Map)
import Data.Version (makeVersion)
+import GHC.Conc (getNumProcessors)
import GHC.Parser.Lexer (ParserOpts)
import qualified GHC.Driver.Config.Parser as Parser
import qualified Data.Map.Strict as Map
@@ -84,11 +86,55 @@ import Haddock.Options
import Haddock.Utils
import Haddock.GhcUtils (modifySessionDynFlags, setOutputDir)
import Haddock.Compat (getProcessID)
+import System.Semaphore (AbstractSem(..), openSemaphore, releaseSemaphoreToken, waitOnSemaphore)
--------------------------------------------------------------------------------
-- * Exception handling
--------------------------------------------------------------------------------
+concSemChoiceFromFlags :: [Flag] -> Maybe (Either FilePath (Maybe Int))
+concSemChoiceFromFlags =
+ List.foldl' step Nothing
+ where
+ step _ (Flag_ParCount n) = Just (Right n)
+ step _ (Flag_ParSemaphore sem) = Just (Left sem)
+ step acc _ = acc
+
+-- | Build the render concurrency semaphore selected by Haddock's parallelism flags.
+-- Without an explicit flag, render sequentially; @-j@ uses the host processor
+-- count, @-jN@ uses a local bounded semaphore, and @-jsem@ joins the external
+-- semaphore used for GHC jobserver coordination.
+concSemFromChoice :: Maybe (Either FilePath (Maybe Int)) -> IO AbstractSem
+concSemFromChoice choice =
+ case choice of
+ Nothing -> newBoundedSem 1
+ Just (Right Nothing) -> newBoundedSem =<< getNumProcessors
+ Just (Right (Just n)) -> newBoundedSem n
+ Just (Left semName) -> do
+ openSemaphore semName >>= \case
+ Left err -> throwIO err
+ Right sem -> do
+ tokens <- newMVar []
+ pure
+ AbstractSem
+ { acquireSem = mask $ \restore -> do
+ token <- restore (waitOnSemaphore sem)
+ modifyMVar_ tokens $ \held -> pure (token : held)
+ , releaseSem = mask_ $ do
+ token <- modifyMVar tokens $ \case
+ [] -> pure ([], Nothing)
+ heldToken : heldTokens -> pure (heldTokens, Just heldToken)
+ forM_ token releaseSemaphoreToken
+ }
+
+injectParFlags :: Maybe (Either FilePath (Maybe Int)) -> [Flag] -> [Flag]
+injectParFlags choice flags =
+ case choice of
+ Nothing -> flags
+ Just (Right Nothing) -> Flag_OptGhc "-j" : flags
+ Just (Right (Just n)) -> Flag_OptGhc ("-j" ++ show n) : flags
+ Just (Left sem) -> Flag_OptGhc "-jsem" : Flag_OptGhc sem : flags
+
handleTopExceptions :: IO a -> IO a
handleTopExceptions =
@@ -177,11 +223,12 @@ haddockWithGhc ghc args = handleTopExceptions $ do
Just "YES" | not noCompilation -> return $ Flag_OptGhc "-dynamic-too" : flags
_ -> return flags
- -- Inject `-j` into ghc options, if given to Haddock
- flags' <- pure $ case optParCount flags'' of
- Nothing -> flags''
- Just Nothing -> Flag_OptGhc "-j" : flags''
- Just (Just n) -> Flag_OptGhc ("-j" ++ show n) : flags''
+ let parChoice = concSemChoiceFromFlags flags''
+
+ -- Inject parallelism flags into ghc options, if given to Haddock
+ flags' <- pure $ injectParFlags parChoice flags''
+
+ concSem <- concSemFromChoice parChoice
-- Whether or not to bypass the interface version check
let noChecks = Flag_BypassInterfaceVersonCheck `elem` flags
@@ -238,7 +285,7 @@ haddockWithGhc ghc args = handleTopExceptions $ do
}
-- Render the interfaces.
- liftIO $ renderStep dflags parserOpts logger unit_state flags sinceQual qual packages ifaces
+ liftIO $ renderStep dflags parserOpts logger unit_state flags sinceQual qual concSem packages ifaces
-- If we were not given any input files, error if documentation was
-- requested
@@ -251,7 +298,7 @@ haddockWithGhc ghc args = handleTopExceptions $ do
packages <- liftIO $ readInterfaceFiles name_cache (readIfaceArgs flags) noChecks
-- Render even though there are no input files (usually contents/index).
- liftIO $ renderStep dflags parserOpts logger unit_state flags sinceQual qual packages []
+ liftIO $ renderStep dflags parserOpts logger unit_state flags sinceQual qual concSem packages []
-- | Run the GHC action using a temporary output directory
withTempOutputDir :: Ghc a -> Ghc a
@@ -311,10 +358,11 @@ renderStep
-> [Flag]
-> SinceQual
-> QualOption
+ -> AbstractSem
-> [(DocPaths, Visibility, FilePath, InterfaceFile)]
-> [Interface]
-> IO ()
-renderStep dflags parserOpts logger unit_state flags sinceQual nameQual pkgs interfaces = do
+renderStep dflags parserOpts logger unit_state flags sinceQual nameQual concSem pkgs interfaces = do
updateHTMLXRefs (map (\(docPath, _ifaceFilePath, _showModules, ifaceFile) ->
( case baseUrl flags of
Nothing -> docPathsHtml docPath
@@ -330,7 +378,7 @@ renderStep dflags parserOpts logger unit_state flags sinceQual nameQual pkgs int
(DocPaths {docPathsSources=Just path}, _, _, ifile) <- pkgs
iface <- ifInstalledIfaces ifile
return (instMod iface, path)
- render dflags parserOpts logger unit_state flags sinceQual nameQual interfaces installedIfaces extSrcMap
+ render dflags parserOpts logger unit_state flags sinceQual nameQual concSem interfaces installedIfaces extSrcMap
where
-- get package name from unit-id
packageName :: Unit -> String
@@ -348,11 +396,12 @@ render
-> [Flag]
-> SinceQual
-> QualOption
+ -> AbstractSem
-> [Interface]
-> [(FilePath, PackageInterfaces)]
-> Map Module FilePath
-> IO ()
-render dflags parserOpts logger unit_state flags sinceQual qual ifaces packages extSrcMap = do
+render dflags parserOpts logger unit_state flags sinceQual qual concSem ifaces packages extSrcMap = do
let
packageInfo = PackageInfo { piPackageName = fromMaybe (PackageName mempty)
$ optPackageName flags
@@ -516,7 +565,7 @@ render dflags parserOpts logger unit_state flags sinceQual qual ifaces packages
prologue
themes opt_mathjax sourceUrls' opt_wiki_urls opt_base_url
opt_contents_url opt_index_url unicode sincePkg packageInfo
- qual pretty withQuickjump
+ qual pretty concSem withQuickjump
return ()
unless (withBaseURL || isJust (optOneShot flags)) $ do
copyHtmlBits odir libDir themes withQuickjump
@@ -555,7 +604,7 @@ render dflags parserOpts logger unit_state flags sinceQual qual ifaces packages
when (Flag_HyperlinkedSource `elem` flags && not (null ifaces)) $ do
withTiming logger "ppHyperlinkedSource" (const ()) $ do
_ <- {-# SCC ppHyperlinkedSource #-}
- ppHyperlinkedSource (verbosity flags) (isJust (optOneShot flags)) odir libDir opt_source_css pretty srcMap ifaces
+ ppHyperlinkedSource (verbosity flags) (isJust (optOneShot flags)) odir libDir opt_source_css pretty concSem srcMap ifaces
return ()
@@ -842,4 +891,3 @@ getPrologue parserOpts flags =
rightOrThrowE :: Either String b -> IO b
rightOrThrowE (Left msg) = throwE msg
rightOrThrowE (Right x) = pure x
-
=====================================
utils/haddock/haddock-api/src/Haddock/Backends/Hyperlinker.hs
=====================================
@@ -31,7 +31,8 @@ import Haddock.Backends.Hyperlinker.Utils
import Haddock.Backends.Xhtml.Utils (renderToBuilder)
import Haddock.InterfaceFile
import Haddock.Types
-import Haddock.Utils (Verbosity, out, verbose)
+import Haddock.Utils (Verbosity, out, verbose, mapConcurrentlyWith_)
+import System.Semaphore (AbstractSem)
import qualified Data.ByteString.Builder as Builder
-- | Generate hyperlinked source for given interfaces.
@@ -51,19 +52,21 @@ ppHyperlinkedSource
-- ^ Custom CSS file path
-> Bool
-- ^ Flag indicating whether to pretty-print HTML
+ -> AbstractSem
+ -- ^ Concurrency semaphore for module renders
-> M.Map Module SrcPath
-- ^ Paths to sources
-> [Interface]
-- ^ Interfaces for which we create source
-> IO ()
-ppHyperlinkedSource verbosity isOneShot outdir libdir mstyle pretty srcs' ifaces = do
+ppHyperlinkedSource verbosity isOneShot outdir libdir mstyle pretty concSem srcs' ifaces = do
createDirectoryIfMissing True srcdir
unless isOneShot $ do
let cssFile = fromMaybe (defaultCssFile libdir) mstyle
copyFile cssFile $ srcdir </> srcCssFile
copyFile (libdir </> "html" </> highlightScript) $
srcdir </> highlightScript
- mapM_ (ppHyperlinkedModuleSource verbosity srcdir pretty srcs) ifaces
+ mapConcurrentlyWith_ concSem (ppHyperlinkedModuleSource verbosity srcdir pretty srcs) ifaces
where
srcdir = outdir </> hypSrcDir
srcs = (srcs', M.mapKeys moduleName srcs')
=====================================
utils/haddock/haddock-api/src/Haddock/Backends/Xhtml.hs
=====================================
@@ -69,6 +69,7 @@ import Haddock.ModuleTree
import Haddock.Options (Visibility (..))
import Haddock.Types
import Haddock.Utils
+import System.Semaphore (AbstractSem)
import Haddock.Utils.Json
import Haddock.Version
@@ -115,6 +116,8 @@ ppHtml
-- ^ How to qualify names
-> Bool
-- ^ Output pretty html (newlines and indenting)
+ -> AbstractSem
+ -- ^ Concurrency semaphore for module renders
-> Bool
-- ^ Also write Quickjump index
-> IO ()
@@ -138,6 +141,7 @@ ppHtml
packageInfo
qual
debug
+ concSem
withQuickjump = do
let
visible_ifaces = filter visible ifaces
@@ -192,7 +196,7 @@ ppHtml
visible_ifaces
[]
- mapM_
+ mapConcurrentlyWith_ concSem
( ppHtmlModule
odir
doctitle
=====================================
utils/haddock/haddock-api/src/Haddock/Options.hs
=====================================
@@ -29,6 +29,7 @@ module Haddock.Options
, wikiUrls
, baseUrl
, optParCount
+ , optParSemaphore
, optDumpInterfaceFile
, optShowInterfaceFile
, optLaTeXStyle
@@ -48,7 +49,7 @@ module Haddock.Options
import Control.Applicative
import qualified Data.Char as Char
-import Data.List (dropWhileEnd)
+import Data.List (dropWhileEnd, isPrefixOf)
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Set (Set)
@@ -122,6 +123,7 @@ data Flag
| Flag_SinceQualification String
| Flag_IgnoreLinkSymbol String
| Flag_ParCount (Maybe Int)
+ | Flag_ParSemaphore FilePath
| Flag_TraceArgs
| Flag_OneShot String
| Flag_NoCompilation
@@ -406,6 +408,11 @@ options backwardsCompat =
[]
(OptArg (\count -> Flag_ParCount (fmap read count)) "n")
"load modules in parallel"
+ , Option
+ []
+ ["jsem"]
+ (ReqArg Flag_ParSemaphore "SEM")
+ "use semaphore SEM to limit parallelism"
, Option
[]
["trace-args"]
@@ -423,7 +430,7 @@ getUsage = do
parseHaddockOpts :: [String] -> IO ([Flag], [String])
parseHaddockOpts params =
- case getOpt Permute (options True) params of
+ case getOpt Permute (options True) (normalizeJsemArgs params) of
(flags, args, []) -> return (flags, args)
(_, _, errors) -> do
usage <- getUsage
@@ -498,6 +505,18 @@ optMathjax flags = optLast [str | Flag_Mathjax str <- flags]
optParCount :: [Flag] -> Maybe (Maybe Int)
optParCount flags = optLast [n | Flag_ParCount n <- flags]
+optParSemaphore :: [Flag] -> Maybe FilePath
+optParSemaphore flags = optLast [s | Flag_ParSemaphore s <- flags]
+
+normalizeJsemArgs :: [String] -> [String]
+normalizeJsemArgs = map rewrite
+ where
+ rewrite arg
+ | arg == "-jsem" = "--jsem"
+ | "-jsem=" `isPrefixOf` arg = "--jsem=" ++ drop 6 arg
+ | "-jsem" `isPrefixOf` arg = "--jsem=" ++ drop 5 arg
+ | otherwise = arg
+
qualification :: [Flag] -> Either String QualOption
qualification flags =
case map (map Char.toLower) [str | Flag_Qualification str <- flags] of
=====================================
utils/haddock/haddock-api/src/Haddock/Utils.hs
=====================================
@@ -54,6 +54,10 @@ module Haddock.Utils
, replace
, spanWith
+ -- * Concurrency utilities
+ , mapConcurrentlyWith_
+ , newBoundedSem
+
-- * Logging
, parseVerbosity
, Verbosity (..)
@@ -86,6 +90,13 @@ import Haddock.Types
import Data.Text.Lazy (Text)
import qualified Data.Text.Lazy as LText
+import Control.Concurrent (forkFinally)
+import Control.Concurrent.QSem (newQSem, signalQSem, waitQSem)
+import Control.Concurrent.MVar (newEmptyMVar, putMVar, takeMVar)
+import Control.Exception (throwIO)
+import Control.Monad (void)
+import System.Semaphore (AbstractSem (..))
+
--------------------------------------------------------------------------------
-- * Logging
@@ -334,6 +345,43 @@ html_xrefs = unsafePerformIO (readIORef html_xrefs_ref)
html_xrefs' :: Map ModuleName FilePath
html_xrefs' = unsafePerformIO (readIORef html_xrefs_ref')
+-- * Concurrency utilities
+
+--------------------------------------------------------------------------------
+
+mapConcurrentlyWith_ :: AbstractSem -> (a -> IO ()) -> [a] -> IO ()
+mapConcurrentlyWith_ _ _ [] = return ()
+mapConcurrentlyWith_ concSem f xs = do
+ -- Create MVars to wait for completion and collect results
+ resultMVars <- mapM (const newEmptyMVar) xs
+
+ -- Fork a thread for each element
+ mapM_ (forkThread concSem) (zip xs resultMVars)
+
+ -- Wait for all threads and collect any errors
+ results <- mapM takeMVar resultMVars
+
+ -- Re-throw the first exception if any
+ case [err | Left err <- results] of
+ (err:_) -> throwIO err
+ [] -> return ()
+ where
+ forkThread concSem' (x, resultMVar) = do
+ acquireSem concSem'
+ void $ forkFinally (f x) $ \res -> do
+ releaseSem concSem'
+ putMVar resultMVar res
+
+newBoundedSem :: Int -> IO AbstractSem
+newBoundedSem maxThreads = do
+ sem <- newQSem (max 1 maxThreads)
+ pure
+ AbstractSem
+ { acquireSem = waitQSem sem
+ , releaseSem = signalQSem sem
+ }
+
+
-----------------------------------------------------------------------------
-- * List utils
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/1c1bf1e36e7b064c17adf1d75563ad…
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/1c1bf1e36e7b064c17adf1d75563ad…
You're receiving this email because of your account on gitlab.haskell.org.
1
0
[Git][ghc/ghc][wip/dcoutts/issue-27105-stopTicker-2] 10 commits: hadrian: Remove old package.conf files when generating new ones
by Duncan Coutts (@dcoutts) 11 Jun '26
by Duncan Coutts (@dcoutts) 11 Jun '26
11 Jun '26
Duncan Coutts pushed to branch wip/dcoutts/issue-27105-stopTicker-2 at Glasgow Haskell Compiler / GHC
Commits:
5ac9ce7d by Zubin Duggal at 2026-06-10T21:26:32+05:30
hadrian: Remove old package.conf files when generating new ones
Old package.conf files might exists with different hashes, causing issues like #26661
Fixes #26661
- - - - -
c9015f09 by sheaf at 2026-06-11T12:40:28-04:00
Fix AArch64 clobbering bug for MUL2
On AArch64, the code generator could clobber one of the input operands
when computing the lower bits of a MUL2 operation. This rendered invalid
the subsequent computation of the high bits.
This commit fixes that by using a temporary register. The register
allocator can remove the redundant move in the common case when the
registers do not conflict.
Fixes #27046
- - - - -
7ab90288 by Rodrigo Mesquita at 2026-06-11T12:41:11-04:00
fix: make T27131 less flaky
It seems that T27131 fails flakily in a race where we check the flag
before the capability had the chance to process the mailbox which sets
the flag. This seemingly should only happen if the capability ends up
being the same for setting and checking the flag.
- - - - -
de196632 by Duncan Coutts at 2026-06-11T22:55:19+01:00
Promote HAVE_PREEMPTION from Timer.c to OSThreads.h
We will want to know about HAVE_PREEMPTION in more places.
HAVE_PREEMPTION tells us that we do have OS threads available,
irrespective of whether THREADED is defined. In particular,
HAVE_PREEMPTION is defined on all proper OSs, but not on WASM (and
hyopthetically may not be true on some other platforms like
micro-controllers, RTOSs, VM hypervisors etc).
- - - - -
9cc10562 by Duncan Coutts at 2026-06-11T22:55:19+01:00
Define ACQUIRE_LOCK_ALWAYS and friends
Fix issue #27335
Like the atomic _ALWAYS variants, these lock actions are always defined,
rather than being dependent on whether we are in the THREADED case. All
the "normal" LOCK macros are defined to be no-ops when !THREADED.
The use case for the _ALWAYS variants is where we are using OS threads
even in the non-threaded RTS. This includes everything to do with the
timer/ticker thread, which is used in the non-threaded RTS too.
In particular, we will want to use this for eventlog things, because the
timer thread performs eventlogging concurrently with the main
capability, even in the non-threaded RTS.
- - - - -
567ac8e7 by Duncan Coutts at 2026-06-11T22:55:19+01:00
Use ACQUIRE/RELEASE_LOCK_ALWAYS with eventBufMutex
Even in the non-threaded RTS the eventBufMutex is needed by both the
main capability and the timer/ticker thread, so always use the mutex.
This should fix #25165 which is about the main capability and the timer
thread posting events to the eventlog buffer concurrently and thereby
corrupting the buffer data.
- - - - -
f59329b3 by Duncan Coutts at 2026-06-11T22:55:19+01:00
Expose eventBufMutex in the EventLog interface/header
We will need it in forkProcess to ensure we don't write to the global
eventlog buffer concurrently with trying to flush eventlog buffers and
do the fork().
- - - - -
0f7eb782 by Duncan Coutts at 2026-06-11T22:55:19+01:00
Split flushAllCapsEventsBufs into safe and unlocked version
Following the convention that unlocked versions have a trailing _
underscore in their name. This one requires the caller to hold the
eventlog global buffer mutex. We will need this in forkProcess.
- - - - -
c3b93527 by Duncan Coutts at 2026-06-11T22:55:19+01:00
Remove redundant use of stopTimer in setNumCapabilities
Historically, the comment here was:
We must stop the interval timer while we are changing the
capabilities array lest handle_tick may try to context switch
an old capability. See #17289.
and
We must disable the timer while we do this since the tick handler may
call contextSwitchAllCapabilities, which may see the capabilities array
as we free it.
What this refers to is that historically, when changing the number of
capabilities, the array of capabilities was reallocated to a new size,
allocating new ones and freeing the old ones, thus invalidating all
existing capbility pointers.
Strangely, for good measure the code used to call stopTimer twice (hence
the two similar comments above).
However, since commit a3eccf06292dd666b24606251a52da2b466a9612, the
capabilities array is no longer reallocated. Instead the array is
allcoated once on RTS startup to the maximum size it could ever be
allowed to be, and then capabilities get enabled/disabled at runtime. So
the capability pointers never become invalid anymore. At worst, they may
point to capabilities that are disabled.
Thus we no longer need to stop the timer (twice) while we change the
number of enabled capabilities. This also partially solves issue #27105,
which notes that stopTimer is being used as if it were synchronous, when
it is not. At least for this case, the solution is that stopTimer is not
needed at all!
- - - - -
86cfb7b6 by Duncan Coutts at 2026-06-11T22:55:19+01:00
Remove redundant use of stopTimer in forkProcess
but replace it with taking the eventlog buffer lock during the fork.
Fixes issue #27105
The original reason to block the timer during a fork was that
historically the timer was implemented using a periodic timer signal,
and the signal itself would interrupt the fork system call (returning
EINTR). For large processes (where fork() takes a while) this could
permanently livelock: the timer always would go off before the fork
could complete, which got retried in a loop forever.
The timer is no longer implemented as a unix signal, but uses threads.
Thus the original problem no longer exists. The only remaining reason to
block the timer tick is to prevent actions taken by the tick from
interfering with the delicate process involved in fork (taking a load of
locks and pausing everything).
The only thing we need to do is to prevent the eventlog from being
written to or flushed while the fork is taking place. To achieve this
all we need to do is hold the mutex for the global eventlog buffer.
This removes the last use of stopTimer that expects stopTimer to work
synchronously (which it was not) and thus solves issue #27105. To be
clear, we solve issue #27105 not by making stopTimer synchronous, but by
eliminating the use sites that expected it to be synchronous.
- - - - -
15 changed files:
- + changelog.d/T27046
- + changelog.d/hadrian-stale-package-confs-26661
- compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
- hadrian/src/Hadrian/Haskell/Cabal/Parse.hs
- rts/Capability.c
- rts/Schedule.c
- rts/Timer.c
- rts/eventlog/EventLog.c
- rts/eventlog/EventLog.h
- rts/include/rts/OSThreads.h
- + testsuite/tests/codeGen/should_run/T27046.hs
- + testsuite/tests/codeGen/should_run/T27046_cmm.cmm
- testsuite/tests/codeGen/should_run/all.T
- testsuite/tests/rts/T27131.hs
- testsuite/tests/rts/T27131.stdout
Changes:
=====================================
changelog.d/T27046
=====================================
@@ -0,0 +1,9 @@
+section: compiler
+issues: #27046
+mrs: !16031
+synopsis:
+ Avoid AArch64 register clobbering bug in MUL2
+description:
+ Fixes an issue in which, on AArch64, code generation for the MUL2 operation
+ could clobber one of the input operands when computing the lower bits, which
+ rendered invalid the subsequent computation of the high bits.
=====================================
changelog.d/hadrian-stale-package-confs-26661
=====================================
@@ -0,0 +1,6 @@
+section: packaging
+synopsis: Hadrian no longer leaves stale `.conf` files in its package databases
+ when rebuilding in the same build root with different settings (e.g. another
+ flavour, or when hashes change with +hash-unit-ids).
+issues: #26661
+mrs: !15186
=====================================
compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
=====================================
@@ -2300,11 +2300,19 @@ genCCall target dest_regs arg_regs = do
let lo = getRegisterReg platform (CmmLocal dst_lo)
hi = getRegisterReg platform (CmmLocal dst_hi)
nd = getRegisterReg platform (CmmLocal dst_needed)
+
+ -- Generate a fresh virtual register for the low word computation.
+ -- This avoids clobbering reg_a or reg_b in the first MUL instruction,
+ -- which could for example happen if 'lo' and 'reg_a' are the same
+ -- virtual register.
+ tmp_lo <- getNewRegNat II64
+
return $
code_x `appOL`
code_y `snocOL`
- MUL II64 (OpReg W64 lo) (OpReg W64 reg_a) (OpReg W64 reg_b) `snocOL`
+ MUL II64 (OpReg W64 tmp_lo) (OpReg W64 reg_a) (OpReg W64 reg_b) `snocOL`
SMULH (OpReg W64 hi) (OpReg W64 reg_a) (OpReg W64 reg_b) `snocOL`
+ MOV (OpReg W64 lo) (OpReg W64 tmp_lo) `snocOL`
-- Are all high bits equal to the sign bit of the low word?
-- nd = (hi == ASR(lo,width-1)) ? 1 : 0
CMP (OpReg W64 hi) (OpRegShift W64 lo SASR (widthInBits w - 1)) `snocOL`
=====================================
hadrian/src/Hadrian/Haskell/Cabal/Parse.hs
=====================================
@@ -15,6 +15,7 @@ module Hadrian.Haskell.Cabal.Parse (
) where
import Data.Bifunctor
+import Data.Char (isDigit)
import Data.List.Extra
import Development.Shake
import qualified Distribution.Compat.Graph as Graph
@@ -55,6 +56,8 @@ import Builder
import Context
import Settings
import Distribution.Simple.LocalBuildInfo
+import Distribution.Types.LocalBuildInfo (allTargetsInBuildOrder')
+import Distribution.Types.TargetInfo (TargetInfo (..))
import qualified Distribution.Simple.Register as C
import System.Directory (getCurrentDirectory)
import qualified Distribution.InstalledPackageInfo as CP
@@ -394,35 +397,48 @@ registerPackage rs context = do
-- Note: the @cPath@ is ignored. The path that's used is the 'buildDir' path
-- from the local build info @lbi@.
lbi <- liftIO $ C.getPersistBuildConfig Nothing (C.makeSymbolicPath cPath)
- liftIO $ register db_path pid pd lbi
+ -- This runs `ghc --abi-hash`, so do it outside the critical section below.
+ installedPkgInfo <- liftIO $ generateRegistrationInfo pd lbi
+
+ let pkg_name = pkgName (package context)
+ -- Is this a pkg.conf for a previous build?
+ -- we want to match "ghc-9.15.1-abcd.conf" but not "ghc-boot-9.15.1.conf"
+ isPkgConf f = case stripPrefix (pkg_name ++ "-") (takeBaseName f) of
+ Just (c:_) -> isDigit c
+ _ -> takeBaseName f == pkg_name
+
+ -- Unlike `ghc-pkg update/register` (used to populate the inplace and stage0
+ -- databases), writing the .conf file directly doesn't remove units this
+ -- package was previously registered under. Stale .conf files from earlier
+ -- builds make this package's modules ambiguous (#26661), so delete them
+ -- before writing the new .conf file.
+ withResources rs $ do
+ confs <- liftIO $ getDirectoryFilesIO db_path ["*.conf"]
+ mapM_ (removeFile . (db_path </>))
+ [ f | f <- confs, isPkgConf f, takeBaseName f /= pid ]
+ liftIO $ writeUTF8File (db_path </> pid <.> "conf")
+ (CP.showInstalledPackageInfo installedPkgInfo)
-- Then after the register, which just writes the .conf file, do the recache step.
buildWithResources rs $
target context (GhcPkg Recache (stage context)) [] []
-- This is copied and simplified from Cabal, because we want to install the package
-- into a different package database to the one it was configured against.
-register :: FilePath
- -> String -- ^ Package Identifier
- -> C.PackageDescription
- -> LocalBuildInfo
- -> IO ()
-register pkg_db pid pd lbi
- = withLibLBI pd lbi $ \lib clbi -> do
-
- when reloc $ error "register does not support reloc"
- installedPkgInfo <- generateRegistrationInfo pd lbi lib clbi
- writeRegistrationFile installedPkgInfo
-
- where
- regFile = pkg_db </> pid <.> "conf"
- reloc = relocatable lbi
-
- generateRegistrationInfo pkg lbi lib clbi = do
- abi_hash <- C.mkAbiHash <$> GHC.libAbiHash C.silent pkg lbi lib clbi
- return (C.absoluteInstalledPackageInfo pkg abi_hash lib lbi clbi)
-
- writeRegistrationFile installedPkgInfo = do
- writeUTF8File regFile (CP.showInstalledPackageInfo installedPkgInfo)
+-- See generateRegistrationInfo in Distribution.Simple.Register. we can't use it
+-- directly because it computes the abi-hash using Cabal's internal package
+-- database, which hadrian never creates.
+generateRegistrationInfo :: C.PackageDescription
+ -> LocalBuildInfo
+ -> IO Installed.InstalledPackageInfo
+generateRegistrationInfo pd lbi = do
+ when (relocatable lbi) $ error "register does not support reloc"
+ case [ (lib, targetCLBI tgt) | tgt <- allTargetsInBuildOrder' pd lbi
+ , CLib lib <- [targetComponent tgt] ] of
+ [(lib, clbi)] -> do
+ abi_hash <- C.mkAbiHash <$> GHC.libAbiHash C.silent pd lbi lib clbi
+ return (C.absoluteInstalledPackageInfo pd abi_hash lib lbi clbi)
+ libs -> error $ "generateRegistrationInfo: expected exactly one library for "
+ ++ C.display (C.package pd) ++ ", got " ++ show (length libs)
-- | Build autogenerated files @autogen/cabal_macros.h@ and @autogen/Paths_*.hs@.
=====================================
rts/Capability.c
=====================================
@@ -443,13 +443,6 @@ void
moreCapabilities (uint32_t from USED_IF_THREADS, uint32_t to USED_IF_THREADS)
{
#if defined(THREADED_RTS)
- // We must disable the timer while we do this since the tick handler may
- // call contextSwitchAllCapabilities, which may see the capabilities array
- // as we free it. The alternative would be to protect the capabilities
- // array with a lock but this seems more expensive than necessary.
- // See #17289.
- stopTimer();
-
if (to == 1) {
// THREADED_RTS must work on builds that don't have a mutable
// BaseReg (eg. unregisterised), so in this case
@@ -470,8 +463,6 @@ moreCapabilities (uint32_t from USED_IF_THREADS, uint32_t to USED_IF_THREADS)
}
debugTrace(DEBUG_sched, "allocated %d more capabilities", to - from);
-
- startTimer();
#endif
}
=====================================
rts/Schedule.c
=====================================
@@ -37,6 +37,7 @@
#include "win32/AsyncWinIO.h"
#endif
#include "Trace.h"
+#include "eventlog/EventLog.h"
#include "RaiseAsync.h"
#include "Threads.h"
#include "Timer.h"
@@ -2100,24 +2101,31 @@ forkProcess(HsStablePtr *entry
ACQUIRE_LOCK(&all_tasks_mutex);
#endif
- stopTimer(); // See #4074
-
#if defined(TRACING)
- flushAllCapsEventsBufs(); // so that child won't inherit dirty file buffers
+#if defined(HAVE_PREEMPTION)
+ // We must hold the eventlog global mutex over the fork to prevent the
+ // timer thread from trying to post events. While holding the mutex we need
+ // to flush the eventlogs (global and per-cap) so that child won't inherit
+ // dirty eventlog buffers or file buffers.
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
+#endif
+ flushAllCapsEventsBufs_();
#endif
pid = fork();
if (pid) { // parent
- startTimer(); // #4074
-
RELEASE_LOCK(&sched_mutex);
RELEASE_LOCK(&sm_mutex);
RELEASE_LOCK(&stable_ptr_mutex);
RELEASE_LOCK(&stable_name_mutex);
RELEASE_LOCK(&task->lock);
+#if defined(TRACING) && defined(HAVE_PREEMPTION)
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
+#endif
+
#if defined(THREADED_RTS)
/* N.B. releaseCapability_ below may need to take all_tasks_mutex */
RELEASE_LOCK(&all_tasks_mutex);
@@ -2303,12 +2311,6 @@ setNumCapabilities (uint32_t new_n_capabilities USED_IF_THREADS)
cap = rts_lock();
task = cap->running_task;
-
- // N.B. We must stop the interval timer while we are changing the
- // capabilities array lest handle_tick may try to context switch
- // an old capability. See #17289.
- stopTimer();
-
stopAllCapabilities(&cap, task);
if (new_n_capabilities < enabled_capabilities)
@@ -2364,9 +2366,7 @@ setNumCapabilities (uint32_t new_n_capabilities USED_IF_THREADS)
tracingAddCapabilities(n_capabilities, new_n_capabilities);
#endif
- // Resize the capabilities array
- // NB. after this, capabilities points somewhere new. Any pointers
- // of type (Capability *) are now invalid.
+ // Allocate and initialise the extra capabilities
moreCapabilities(n_capabilities, new_n_capabilities);
// Resize and update storage manager data structures
@@ -2394,8 +2394,6 @@ setNumCapabilities (uint32_t new_n_capabilities USED_IF_THREADS)
// Notify IO manager that the number of capabilities has changed.
notifyIOManagerCapabilitiesChanged(&cap);
- startTimer();
-
rts_unlock(cap);
#endif // THREADED_RTS
=====================================
rts/Timer.c
=====================================
@@ -28,11 +28,6 @@
#include "RtsSignals.h"
#include "rts/EventLogWriter.h"
-// See Note [No timer on wasm32]
-#if !defined(wasm32_HOST_ARCH)
-#define HAVE_PREEMPTION
-#endif
-
// This global counter is used to allow multiple threads to stop the
// timer temporarily with a stopTimer()/startTimer() pair. If
// timer_enabled == 0 timer is enabled
=====================================
rts/eventlog/EventLog.c
=====================================
@@ -129,8 +129,11 @@ typedef struct _EventsBuf {
static EventsBuf *capEventBuf; // one EventsBuf for each Capability
static EventsBuf eventBuf; // an EventsBuf not associated with any Capability
-#if defined(THREADED_RTS)
-static Mutex eventBufMutex; // protected by this mutex
+#if defined(HAVE_PREEMPTION)
+// Note that this mutex is used even in the non-threaded RTS, since the timer
+// thread posts events and flushes. So _all_ uses of this mutex must use
+// ACQUIRE_LOCK_ALWAYS/RELEASE_LOCK_ALWAYS.
+Mutex eventBufMutex; // protects eventBuf above
#endif
// Event type
@@ -393,8 +396,10 @@ initEventLogging(void)
moreCapEventBufs(0, get_n_capabilities());
initEventsBuf(&eventBuf, EVENT_LOG_SIZE, (EventCapNo)(-1));
-#if defined(THREADED_RTS)
+#if defined(HAVE_PREEMPTION)
initMutex(&eventBufMutex);
+#endif
+#if defined(THREADED_RTS)
initMutex(&state_change_mutex);
#endif
}
@@ -416,7 +421,7 @@ startEventLogging_(void)
{
initEventLogWriter();
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
postHeaderEvents();
/*
@@ -425,7 +430,7 @@ startEventLogging_(void)
*/
printAndClearEventBuf(&eventBuf);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
return true;
}
@@ -495,7 +500,7 @@ endEventLogging(void)
flushEventLog_(NULL);
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
// Mark end of events (data).
postEventTypeNum(&eventBuf, EVENT_DATA_END);
@@ -503,7 +508,7 @@ endEventLogging(void)
// Flush the end of data marker.
printAndClearEventBuf(&eventBuf);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
stopEventLogWriter();
event_log_writer = NULL;
@@ -666,7 +671,7 @@ void
postCapEvent (EventTypeNum tag,
EventCapNo capno)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
ensureRoomForEvent(&eventBuf, tag);
postEventHeader(&eventBuf, tag);
@@ -685,14 +690,14 @@ postCapEvent (EventTypeNum tag,
barf("postCapEvent: unknown event tag %d", tag);
}
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
void postCapsetEvent (EventTypeNum tag,
EventCapsetID capset,
StgWord info)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
ensureRoomForEvent(&eventBuf, tag);
postEventHeader(&eventBuf, tag);
@@ -726,7 +731,7 @@ void postCapsetEvent (EventTypeNum tag,
barf("postCapsetEvent: unknown event tag %d", tag);
}
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
void postCapsetStrEvent (EventTypeNum tag,
@@ -740,14 +745,14 @@ void postCapsetStrEvent (EventTypeNum tag,
return;
}
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
if (!hasRoomForVariableEvent(&eventBuf, size)){
printAndClearEventBuf(&eventBuf);
if (!hasRoomForVariableEvent(&eventBuf, size)){
errorBelch("Event size exceeds buffer size, bail out");
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
return;
}
}
@@ -758,7 +763,7 @@ void postCapsetStrEvent (EventTypeNum tag,
postBuf(&eventBuf, (StgWord8*) msg, strsize);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
void postCapsetVecEvent (EventTypeNum tag,
@@ -783,14 +788,14 @@ void postCapsetVecEvent (EventTypeNum tag,
}
}
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
if (!hasRoomForVariableEvent(&eventBuf, size)){
printAndClearEventBuf(&eventBuf);
if(!hasRoomForVariableEvent(&eventBuf, size)){
errorBelch("Event size exceeds buffer size, bail out");
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
return;
}
}
@@ -804,7 +809,7 @@ void postCapsetVecEvent (EventTypeNum tag,
postBuf(&eventBuf, (StgWord8*) argv[i], 1 + strlen(argv[i]));
}
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
void postWallClockTime (EventCapsetID capset)
@@ -813,7 +818,7 @@ void postWallClockTime (EventCapsetID capset)
StgWord64 sec;
StgWord32 nsec;
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
/* The EVENT_WALL_CLOCK_TIME event is intended to allow programs
reading the eventlog to match up the event timestamps with wall
@@ -846,7 +851,7 @@ void postWallClockTime (EventCapsetID capset)
postWord64(&eventBuf, sec);
postWord32(&eventBuf, nsec);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
/*
@@ -885,7 +890,7 @@ void postEventHeapInfo (EventCapsetID heap_capset,
W_ mblockSize,
W_ blockSize)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
ensureRoomForEvent(&eventBuf, EVENT_HEAP_INFO_GHC);
postEventHeader(&eventBuf, EVENT_HEAP_INFO_GHC);
@@ -899,7 +904,7 @@ void postEventHeapInfo (EventCapsetID heap_capset,
postWord64(&eventBuf, mblockSize);
postWord64(&eventBuf, blockSize);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
void postEventGcStats (Capability *cap,
@@ -952,7 +957,7 @@ void postTaskCreateEvent (EventTaskId taskId,
EventCapNo capno,
EventKernelThreadId tid)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
ensureRoomForEvent(&eventBuf, EVENT_TASK_CREATE);
postEventHeader(&eventBuf, EVENT_TASK_CREATE);
@@ -961,14 +966,14 @@ void postTaskCreateEvent (EventTaskId taskId,
postCapNo(&eventBuf, capno);
postKernelThreadId(&eventBuf, tid);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
void postTaskMigrateEvent (EventTaskId taskId,
EventCapNo capno,
EventCapNo new_capno)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
ensureRoomForEvent(&eventBuf, EVENT_TASK_MIGRATE);
postEventHeader(&eventBuf, EVENT_TASK_MIGRATE);
@@ -977,28 +982,28 @@ void postTaskMigrateEvent (EventTaskId taskId,
postCapNo(&eventBuf, capno);
postCapNo(&eventBuf, new_capno);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
void postTaskDeleteEvent (EventTaskId taskId)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
ensureRoomForEvent(&eventBuf, EVENT_TASK_DELETE);
postEventHeader(&eventBuf, EVENT_TASK_DELETE);
/* EVENT_TASK_DELETE (taskID) */
postTaskId(&eventBuf, taskId);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
void
postEventNoCap (EventTypeNum tag)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
ensureRoomForEvent(&eventBuf, tag);
postEventHeader(&eventBuf, tag);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
void
@@ -1042,9 +1047,9 @@ void postLogMsg(EventsBuf *eb, EventTypeNum type, char *msg, va_list ap)
void postMsg(char *msg, va_list ap)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
postLogMsg(&eventBuf, EVENT_LOG_MSG, msg, ap);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
void postCapMsg(Capability *cap, char *msg, va_list ap)
@@ -1138,32 +1143,32 @@ void postConcUpdRemSetFlush(Capability *cap)
void postConcMarkEnd(StgWord32 marked_obj_count)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
ensureRoomForEvent(&eventBuf, EVENT_CONC_MARK_END);
postEventHeader(&eventBuf, EVENT_CONC_MARK_END);
postWord32(&eventBuf, marked_obj_count);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
void postNonmovingHeapCensus(uint16_t blk_size,
const struct NonmovingAllocCensus *census)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
postEventHeader(&eventBuf, EVENT_NONMOVING_HEAP_CENSUS);
postWord16(&eventBuf, blk_size);
postWord32(&eventBuf, census->n_active_segs);
postWord32(&eventBuf, census->n_filled_segs);
postWord32(&eventBuf, census->n_live_blocks);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
void postNonmovingPrunedSegments(uint32_t pruned_segments, uint32_t free_segments)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
postEventHeader(&eventBuf, EVENT_NONMOVING_PRUNED_SEGMENTS);
postWord32(&eventBuf, pruned_segments);
postWord32(&eventBuf, free_segments);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
void closeBlockMarker (EventsBuf *ebuf)
@@ -1224,7 +1229,7 @@ static HeapProfBreakdown getHeapProfBreakdown(void)
void postHeapProfBegin(void)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
PROFILING_FLAGS *flags = &RtsFlags.ProfFlags;
StgWord modSelector_len =
flags->modSelector ? strlen(flags->modSelector) : 0;
@@ -1258,42 +1263,42 @@ void postHeapProfBegin(void)
postStringLen(&eventBuf, flags->ccsSelector, ccsSelector_len);
postStringLen(&eventBuf, flags->retainerSelector, retainerSelector_len);
postStringLen(&eventBuf, flags->bioSelector, bioSelector_len);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
void postHeapProfSampleBegin(StgInt era)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
ensureRoomForEvent(&eventBuf, EVENT_HEAP_PROF_SAMPLE_BEGIN);
postEventHeader(&eventBuf, EVENT_HEAP_PROF_SAMPLE_BEGIN);
postWord64(&eventBuf, era);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
void postHeapBioProfSampleBegin(StgInt era, StgWord64 time)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
ensureRoomForEvent(&eventBuf, EVENT_HEAP_BIO_PROF_SAMPLE_BEGIN);
postEventHeader(&eventBuf, EVENT_HEAP_BIO_PROF_SAMPLE_BEGIN);
postWord64(&eventBuf, era);
postWord64(&eventBuf, time);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
void postHeapProfSampleEnd(StgInt era)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
ensureRoomForEvent(&eventBuf, EVENT_HEAP_PROF_SAMPLE_END);
postEventHeader(&eventBuf, EVENT_HEAP_PROF_SAMPLE_END);
postWord64(&eventBuf, era);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
void postHeapProfSampleString(const char *label,
StgWord64 residency)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
StgWord label_len = strlen(label);
StgWord len = 1+8+label_len+1;
CHECK(!ensureRoomForVariableEvent(&eventBuf, len));
@@ -1303,7 +1308,7 @@ void postHeapProfSampleString(const char *label,
postWord8(&eventBuf, 0);
postWord64(&eventBuf, residency);
postStringLen(&eventBuf, label, label_len);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
#if defined(PROFILING)
@@ -1313,7 +1318,7 @@ void postHeapProfCostCentre(StgWord32 ccID,
const char *srcloc,
StgBool is_caf)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
StgWord label_len = strlen(label);
StgWord module_len = strlen(module);
StgWord srcloc_len = strlen(srcloc);
@@ -1326,13 +1331,13 @@ void postHeapProfCostCentre(StgWord32 ccID,
postStringLen(&eventBuf, module, module_len);
postStringLen(&eventBuf, srcloc, srcloc_len);
postWord8(&eventBuf, is_caf);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
void postHeapProfSampleCostCentre(CostCentreStack *stack,
StgWord64 residency)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
StgWord depth = 0;
CostCentreStack *ccs;
for (ccs = stack; ccs != NULL && ccs != CCS_MAIN; ccs = ccs->prevStack)
@@ -1351,7 +1356,7 @@ void postHeapProfSampleCostCentre(CostCentreStack *stack,
depth>0 && ccs != NULL && ccs != CCS_MAIN;
ccs = ccs->prevStack, depth--)
postWord32(&eventBuf, ccs->cc->ccID);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
@@ -1359,7 +1364,7 @@ void postProfSampleCostCentre(Capability *cap,
CostCentreStack *stack,
StgWord64 tick)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
StgWord depth = 0;
CostCentreStack *ccs;
for (ccs = stack; ccs != NULL && ccs != CCS_MAIN; ccs = ccs->prevStack)
@@ -1377,7 +1382,7 @@ void postProfSampleCostCentre(Capability *cap,
depth>0 && ccs != NULL && ccs != CCS_MAIN;
ccs = ccs->prevStack, depth--)
postWord32(&eventBuf, ccs->cc->ccID);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
// This event is output at the start of profiling so the tick interval can
@@ -1385,11 +1390,11 @@ void postProfSampleCostCentre(Capability *cap,
// can be calculated from how many samples there are.
void postProfBegin(void)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
postEventHeader(&eventBuf, EVENT_PROF_BEGIN);
// The interval that each tick was sampled, in nanoseconds
postWord64(&eventBuf, TimeToNS(RtsFlags.MiscFlags.tickInterval));
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
#endif /* PROFILING */
@@ -1415,11 +1420,11 @@ static void postTickyCounterDef(EventsBuf *eb, StgEntCounter *p)
void postTickyCounterDefs(StgEntCounter *counters)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
for (StgEntCounter *p = counters; p != NULL; p = p->link) {
postTickyCounterDef(&eventBuf, p);
}
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
static void postTickyCounterSample(EventsBuf *eb, StgEntCounter *p)
@@ -1443,13 +1448,13 @@ static void postTickyCounterSample(EventsBuf *eb, StgEntCounter *p)
void postTickyCounterSamples(StgEntCounter *counters)
{
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
ensureRoomForEvent(&eventBuf, EVENT_TICKY_COUNTER_SAMPLE);
postEventHeader(&eventBuf, EVENT_TICKY_COUNTER_BEGIN_SAMPLE);
for (StgEntCounter *p = counters; p != NULL; p = p->link) {
postTickyCounterSample(&eventBuf, p);
}
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
#endif /* TICKY_TICKY */
void postIPE(const InfoProvEnt *ipe)
@@ -1459,7 +1464,7 @@ void postIPE(const InfoProvEnt *ipe)
// See Note [Maximum event length].
const StgWord MAX_IPE_STRING_LEN = 65535;
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
StgWord table_name_len = MIN(strlen(ipe->prov.table_name), MAX_IPE_STRING_LEN);
StgWord closure_desc_len = MIN(strlen(closure_desc_buf), MAX_IPE_STRING_LEN);
StgWord ty_desc_len = MIN(strlen(ipe->prov.ty_desc), MAX_IPE_STRING_LEN);
@@ -1489,7 +1494,7 @@ void postIPE(const InfoProvEnt *ipe)
postBuf(&eventBuf, &colon, 1);
postStringLen(&eventBuf, ipe->prov.src_span, src_span_len);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
}
void printAndClearEventBuf (EventsBuf *ebuf)
@@ -1601,14 +1606,21 @@ void flushLocalEventsBuf(Capability *cap)
// Flush all capabilities' event buffers when we already hold all capabilities.
// Used during forkProcess.
void flushAllCapsEventsBufs(void)
+{
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
+ flushAllCapsEventsBufs_();
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
+}
+
+// Unsafe version that does not acquire/release eventBufMutex. You must
+// hold the eventBufMutex, which you must acquire with ACQUIRE_LOCK_ALWAYS!
+void flushAllCapsEventsBufs_(void)
{
if (!event_log_writer) {
return;
}
- ACQUIRE_LOCK(&eventBufMutex);
printAndClearEventBuf(&eventBuf);
- RELEASE_LOCK(&eventBufMutex);
for (unsigned int i=0; i < getNumCapabilities(); i++) {
flushLocalEventsBuf(getCapability(i));
@@ -1641,9 +1653,9 @@ static void flushEventLog_(Capability **cap USED_IF_THREADS)
return;
}
- ACQUIRE_LOCK(&eventBufMutex);
+ ACQUIRE_LOCK_ALWAYS(&eventBufMutex);
printAndClearEventBuf(&eventBuf);
- RELEASE_LOCK(&eventBufMutex);
+ RELEASE_LOCK_ALWAYS(&eventBufMutex);
#if defined(THREADED_RTS)
Task *task = newBoundTask();
=====================================
rts/eventlog/EventLog.h
=====================================
@@ -18,6 +18,13 @@
#if defined(TRACING)
extern bool eventlog_enabled;
+#if defined(HAVE_PREEMPTION)
+// Avoid using this mutex directly if at all possible. It is needed in the
+// implementation of forkProcess.
+//
+// All uses of this mutex must use ACQUIRE_LOCK_ALWAYS/RELEASE_LOCK_ALWAYS.
+extern Mutex eventBufMutex;
+#endif
void initEventLogging(void);
void restartEventLogging(void);
@@ -27,6 +34,7 @@ void abortEventLogging(void); // #4512 - after fork child needs to abort
void moreCapEventBufs (uint32_t from, uint32_t to);
void flushLocalEventsBuf(Capability *cap);
void flushAllCapsEventsBufs(void);
+void flushAllCapsEventsBufs_(void);
void flushAllEventsBufs(Capability *cap);
typedef void (*EventlogInitPost)(void);
=====================================
rts/include/rts/OSThreads.h
=====================================
@@ -14,6 +14,46 @@
#pragma once
+/* Note [Threads and preemption]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ All full-fat OSs that GHC works on have OS threads, and we use them even in
+ the non-threaded RTS for a few features:
+ * Haskell thread preemption;
+ * sample-based profiling;
+ * idle GC;
+ * periodic eventlog flushing.
+
+ We use defined(HAVE_PREEMPTION) to decide if these features are implemented
+ via OS threads.
+
+ On platforms like WASM/js we do not have OS threads in any conventional
+ sense, and the features above are either not available or are implemented
+ differently. See Note [No timer on wasm32].
+
+ In future if GHC is ported to platforms like bare-metal micro-controllers,
+ RTOSs or to run directly under hypervisors then such platforms may also not
+ have threads available and they should not define HAVE_PREEMPTION here. Or
+ for some micro-controller RTOSs like Zeypher one may have a choice about
+ whether to use threads or not (at a size cost). Here would be the right
+ place to control whether the feature list above is supported.
+ */
+#if defined(wasm32_HOST_ARCH)
+ // See Note [No timer on wasm32]
+ // To confuse matters, WASM _does_ have pthread.h but it doesnt work.
+#elif defined(HAVE_PTHREAD_H) || defined(HAVE_WINDOWS_H)
+#define HAVE_PREEMPTION
+#else
+#error Decide if this platform has threads and pre-emption or not.
+#endif
+// And JS does all of this differently, without using this bit of the RTS.
+
+// Configuration sanity check
+#if defined(THREADED_RTS) && !defined(HAVE_PREEMPTION)
+//TODO we would like to be able to assert this:
+// #error Configuration error: THREADED_RTS should imply HAVE_PREEMPTION
+// however at the moment we cannot due to issue #27346.
+#endif
+
#if defined(HAVE_PTHREAD_H) && !defined(mingw32_HOST_OS)
#if defined(CMINUSMINUS)
@@ -210,9 +250,29 @@ extern bool timedWaitCondition ( Condition* pCond, Mutex* pMut, Time timeout)
//
// Mutexes
//
+// Even in the non-threaded RTS we use threads and mutexes! In particular the
+// timer/ticker is implemented using a thread. And using threads needs locks.
+// In particular we need locks for the data shared between the timer/ticker
+// thread and the thread running the main capability.
+#if defined(HAVE_PREEMPTION)
extern void initMutex ( Mutex* pMut );
extern void closeMutex ( Mutex* pMut );
+// The "always" variants do locking in the threaded and non-threaded RTS.
+// The normal variants below are no-ops in the non-threaded RTS.
+#define ACQUIRE_LOCK_ALWAYS(l) OS_ACQUIRE_LOCK(l)
+#define TRY_ACQUIRE_LOCK_ALWAYS(l) OS_TRY_ACQUIRE_LOCK(l)
+#define RELEASE_LOCK_ALWAYS(l) OS_RELEASE_LOCK(l)
+#define ASSERT_LOCK_HELD_ALWAYS(l) OS_ASSERT_LOCK_HELD(l)
+#else
+// And just to be a bit confusing, the always variants are still no-ops when we
+// do not HAVE_PREEMPTION, since then we don't have threads or mutexes at all.
+#define ACQUIRE_LOCK_ALWAYS(l)
+#define TRY_ACQUIRE_LOCK_ALWAYS(l) 0
+#define RELEASE_LOCK_ALWAYS(l)
+#define ASSERT_LOCK_HELD_ALWAYS(l)
+#endif
+
// Processors and affinity
void setThreadAffinity (uint32_t n, uint32_t m);
void setThreadNode (uint32_t node);
@@ -228,6 +288,7 @@ void releaseThreadNode (void);
#else
+// No-ops in the non-threaded RTS. See also the _ALWAYS variants above.
#define ACQUIRE_LOCK(l)
#define TRY_ACQUIRE_LOCK(l) 0
#define RELEASE_LOCK(l)
=====================================
testsuite/tests/codeGen/should_run/T27046.hs
=====================================
@@ -0,0 +1,29 @@
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE ForeignFunctionInterface, GHCForeignImportPrim, UnliftedFFITypes #-}
+
+module Main where
+
+import Control.Monad
+ ( unless )
+import Data.Bits
+ ( shiftL )
+import GHC.Exts
+ ( Int64# )
+import GHC.Int
+ ( Int64(..) )
+
+foreign import prim "test_mul2_clobber"
+ test_mul2_clobber :: Int64# -> Int64# -> Int64#
+
+main :: IO ()
+main = do
+ let
+ I64# x = 1 `shiftL` 32
+ hi = I64# $ test_mul2_clobber x x
+
+ unless ( hi == 1 ) $
+ error $ unlines
+ [ "Incorrect result for Mul2 operation."
+ , "Expected high word: 1"
+ , " Actual high word: " ++ show hi
+ ]
=====================================
testsuite/tests/codeGen/should_run/T27046_cmm.cmm
=====================================
@@ -0,0 +1,13 @@
+#include "Cmm.h"
+
+// Test for #27046
+test_mul2_clobber (bits64 x, bits64 y)
+{
+ bits64 hi, nd;
+
+ // Deliberately alias the destination 'lo' with the source 'x'
+ // This forces the NCG to use the same virtual register for both.
+ (nd, hi, x) = prim %mul2_64(x, y);
+
+ return (hi);
+}
=====================================
testsuite/tests/codeGen/should_run/all.T
=====================================
@@ -260,6 +260,13 @@ test('T25364', normal, compile_and_run, [''])
test('T26061', normal, compile_and_run, [''])
test('T26537', normal, compile_and_run, ['-O2 -fregs-graph'])
test('T24016', normal, compile_and_run, ['-O1 -fPIC'])
+test('T27046',
+ [ req_cmm
+ , when(arch('i386'), skip) # i386 does not support MO_S_Mul2 W64
+ , when(arch('wasm32'), skip)
+ , js_skip
+ , when(unregisterised(), skip) # pprCallishMachOp_for_C: MO_S_Mul2 W64 not supported
+ ], compile_and_run, ['T27046_cmm.cmm'])
# Check that GHC-generated finalizers run on Darwin. The Apple linker doesn't
# support --wrap, so we can't intercept hs_spt_remove directly. Instead we
=====================================
testsuite/tests/rts/T27131.hs
=====================================
@@ -30,16 +30,22 @@ foreign import ccall unsafe "has_local_stop_after_return"
main :: IO ()
main = do
setNumCapabilities 2
- checkFlag
- "TSO_STOP_NEXT_BREAKPOINT"
- rts_enableStopNextBreakpoint
- rts_disableStopNextBreakpoint
- c_hasLocalStopNextBreakpoint
- checkFlag
- "TSO_STOP_AFTER_RETURN"
- rts_enableStopAfterReturn
- rts_disableStopAfterReturn
- c_hasLocalStopAfterReturn
+ -- Bind to capability 0 so it can't float between capabilities while the
+ -- target thread runs on capability 1.
+ doneVar <- newEmptyMVar
+ _ <- forkOn 0 $ do
+ checkFlag
+ "TSO_STOP_NEXT_BREAKPOINT"
+ rts_enableStopNextBreakpoint
+ rts_disableStopNextBreakpoint
+ c_hasLocalStopNextBreakpoint
+ checkFlag
+ "TSO_STOP_AFTER_RETURN"
+ rts_enableStopAfterReturn
+ rts_disableStopAfterReturn
+ c_hasLocalStopAfterReturn
+ putMVar doneVar ()
+ takeMVar doneVar
checkFlag
:: String
@@ -58,6 +64,7 @@ checkFlag label enable disable isMyThreadFlagSet = do
ThreadId tid# <- forkOn 1 $ do
replicateM_ 2 $ do
replyVar <- takeMVar targetCheckVar
+ yield -- make sure we reprocess the mailbox
isSet <- (/= 0) <$> isMyThreadFlagSet
putMVar replyVar isSet
=====================================
testsuite/tests/rts/T27131.stdout
=====================================
@@ -1,6 +1,6 @@
-(0,False)
+(0,True)
TSO_STOP_NEXT_BREAKPOINT set: ok
TSO_STOP_NEXT_BREAKPOINT unset: ok
-(0,False)
+(0,True)
TSO_STOP_AFTER_RETURN set: ok
TSO_STOP_AFTER_RETURN unset: ok
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/2de7ec638ed584fffbb38612a5d2e6…
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/2de7ec638ed584fffbb38612a5d2e6…
You're receiving this email because of your account on gitlab.haskell.org.
1
0
[Git][ghc/ghc][wip/sjakobi/T16720] 5 commits: Simplifier: reduce intermediate lists in abstractFloats
by Simon Jakobi (@sjakobi2) 11 Jun '26
by Simon Jakobi (@sjakobi2) 11 Jun '26
11 Jun '26
Simon Jakobi pushed to branch wip/sjakobi/T16720 at Glasgow Haskell Compiler / GHC
Commits:
14b86d18 by Simon Jakobi at 2026-06-11T16:15:41+02:00
Simplifier: reduce intermediate lists in abstractFloats
Build the SCC triples for depAnal directly from the Rec pairs instead
of going through unzip/map/zip3, and process a CyclicSCC with a single
mapM over the triples instead of unzip3/mapAndUnzipM plus two zips.
No change in behaviour; node order, and hence depAnal determinism, is
preserved.
Co-Authored-By: Claude Fable 5 <noreply(a)anthropic.com>
- - - - -
f27bf621 by Simon Jakobi at 2026-06-11T20:01:19+02:00
Simplifier: thread the subst through abstractFloats' cyclic case
Instead of collecting (id, poly_id, poly_app, rhs) quadruples and then
building an [(id, poly_app)] list for extendSubstList, thread the
substitution through the mk_poly1 pass with mapAccumLM, extending it
directly via extendIdSubst (extendSubstList is just a fold of
extendSubst, which for these RHSs is extendIdSubst). This leaves only
two lists per cyclic group: the (poly_id, rhs) pairs and the result.
Also force the free-var sets in to_sccs: depAnal always demands them
when building edges, so the per-element thunk was pure overhead.
Co-Authored-By: Claude Fable 5 <noreply(a)anthropic.com>
- - - - -
0e3818b6 by Simon Jakobi at 2026-06-11T22:04:52+02:00
testsuite: Repeat output of unexpected failures in the final summary
Previously the end-of-run summary named each unexpectedly failing test
with only a one-line reason such as "bad exit code (2)". The output
explaining *why* the test failed was printed mid-run, where it is
interleaved with unrelated test chatter and, in CI, buried thousands of
lines up the log (#16720).
The driver already captures a failing test's stdout/stderr in TestResult
for junit.xml, so reuse that: after the failure lists, print each
unexpected failure's captured output, truncated to 100 lines per stream
to keep pathological tests from flooding the log.
Also attach the captured run stdout/stderr to bad-exit-code failures in
simple_run, which previously recorded no output at all - neither
junit.xml nor the new summary section could explain such failures. The
interactive-run path already did this.
Addresses #16720.
Co-Authored-By: Claude Fable 5 <noreply(a)anthropic.com>
- - - - -
a1623b26 by Simon Jakobi at 2026-06-11T22:04:57+02:00
DO NOT MERGE: Break some tests to exercise the failure summary
Deliberately break three tests, one per failure mode, to see the new
end-of-run failure output in CI:
* cgrun001: wrong expected stdout -> bad stdout (diff in summary)
* T18619: wrong expected stderr -> bad stderr (diff in summary)
* cgrun002: program now exits 3 and writes to stderr
-> bad exit code, exercising the newly captured
run stdout/stderr in simple_run
Co-Authored-By: Claude Fable 5 <noreply(a)anthropic.com>
- - - - -
d7380efd by Simon Jakobi at 2026-06-11T22:12:00+02:00
testsuite: Colorize the failure-output summary, also in CI
Make the per-test blocks in the "Output of unexpected failures"
section easier to scan:
* the '=====> test(way) [reason]' header is red,
* the 'Captured stdout/stderr:' labels are cyan,
Colors were previously disabled in CI because the driver only emits
them when stdout is a tty, yet the GitLab log viewer renders ANSI
colors fine. Add a --force-colors driver flag and pass it from
.gitlab/ci.sh. Note that config.supports_colors stays tty-based, since
it also guards terminal-title escape sequences, which must not end up
in a CI log.
The SUMMARY header and the new section now honor summary()'s color
parameter, so the plain-text summary file no longer receives escape
codes when colors are enabled.
Co-Authored-By: Claude Fable 5 <noreply(a)anthropic.com>
- - - - -
7 changed files:
- .gitlab/ci.sh
- compiler/GHC/Core/Opt/Simplify/Utils.hs
- testsuite/driver/runtests.py
- testsuite/driver/testlib.py
- testsuite/tests/codeGen/should_run/cgrun001.stdout
- testsuite/tests/codeGen/should_run/cgrun002.hs
- testsuite/tests/numeric/should_run/T18619.stderr
Changes:
=====================================
.gitlab/ci.sh
=====================================
@@ -644,6 +644,10 @@ function test_hadrian() {
check_msys2_deps _build/stage1/bin/ghc --version
check_release_build
+ # GitLab's log viewer renders ANSI colors, but stdout here is not a tty,
+ # so the driver must be told to emit them.
+ RUNTEST_ARGS="${RUNTEST_ARGS:-} --force-colors"
+
# Ensure that statically-linked builds are actually static
if [[ "${BUILD_FLAVOUR}" = *static* ]]; then
bad_execs=""
=====================================
compiler/GHC/Core/Opt/Simplify/Utils.hs
=====================================
@@ -2432,12 +2432,10 @@ abstractFloats uf_opts top_lvl main_tvs floats body
-- for why we need to re-do dependency analysis
to_sccs :: OutBind -> [SCC (Id, CoreExpr, VarSet)]
to_sccs (NonRec id e) = [AcyclicSCC (id, e, emptyVarSet)] -- emptyVarSet: abstract doesn't need it
- to_sccs (Rec prs) = sccs
- where
- (ids,rhss) = unzip prs
- sccs = depAnal (\(id,_rhs,_fvs) -> [getName id])
- (\(_id,_rhs,fvs) -> nonDetStrictFoldVarSet ((:) . getName) [] fvs) -- Wrinkle (AB3)
- (zip3 ids rhss (map exprFreeVars rhss))
+ to_sccs (Rec prs)
+ = depAnal (\(id,_rhs,_fvs) -> [getName id])
+ (\(_id,_rhs,fvs) -> nonDetStrictFoldVarSet ((:) . getName) [] fvs) -- Wrinkle (AB3)
+ [ (id, rhs, fvs) | (id, rhs) <- prs, let !fvs = exprFreeVars rhs ]
abstract :: GHC.Core.Subst.Subst -> SCC (Id, CoreExpr, VarSet) -> SimplM (GHC.Core.Subst.Subst, OutBind)
abstract subst (AcyclicSCC (id, rhs, _empty_var_set))
@@ -2452,14 +2450,15 @@ abstractFloats uf_opts top_lvl main_tvs floats body
tvs_here = choose_tvs (exprSomeFreeVars isTyVar rhs')
abstract subst (CyclicSCC trpls)
- = do { (poly_ids, poly_apps) <- mapAndUnzipM (mk_poly1 tvs_here) ids
- ; let subst' = GHC.Core.Subst.extendSubstList subst (ids `zip` poly_apps)
- poly_pairs = [ mk_poly2 poly_id tvs_here rhs'
- | (poly_id, rhs) <- poly_ids `zip` rhss
- , let rhs' = GHC.Core.Subst.substExpr subst' rhs ]
+ = do { (subst', poly_prs) <- mapAccumLM mk1 subst trpls
+ ; let poly_pairs = [ mk_poly2 poly_id tvs_here
+ (GHC.Core.Subst.substExpr subst' rhs)
+ | (poly_id, rhs) <- poly_prs ]
; return (subst', Rec poly_pairs) }
where
- (ids,rhss,_fvss) = unzip3 trpls
+ mk1 s (id, rhs, _fvs)
+ = do { (poly_id, poly_app) <- mk_poly1 tvs_here id
+ ; return (GHC.Core.Subst.extendIdSubst s id poly_app, (poly_id, rhs)) }
-- tvs_here: see Note [Which type variables to abstract over]
tvs_here = choose_tvs (mapUnionVarSet get_bind_fvs trpls)
=====================================
testsuite/driver/runtests.py
=====================================
@@ -94,6 +94,8 @@ parser.add_argument("--ignore-perf-failures", choices=['increases','decreases','
help="Do not fail due to out-of-tolerance perf tests")
parser.add_argument("--only-report-hadrian-deps", type=Path,
help="Dry run the testsuite and report all extra hadrian dependencies needed on the given file")
+parser.add_argument("--force-colors", action="store_true",
+ help="emit ANSI colors even when stdout is not a tty (e.g. for CI logs)")
args = parser.parse_args()
@@ -259,7 +261,9 @@ def supports_colors():
return True
config.supports_colors = supports_colors()
-term_color.enable_color = config.supports_colors
+# config.supports_colors deliberately stays tty-based: it also guards
+# terminal-title updates, which must not end up in a CI log.
+term_color.enable_color = config.supports_colors or args.force_colors
# This has to come after arg parsing as the args can change the compiler
get_compiler_info()
@@ -587,7 +591,7 @@ else:
print(Perf.allow_changes_string([(m.change, m.stat) for m in t.metrics]))
print('-' * 25)
- summary(t, sys.stdout, color=config.supports_colors)
+ summary(t, sys.stdout, color=term_color.enable_color)
# Write perf stats if any exist or if a metrics file is specified.
stats_metrics = [stat for (_, stat, __) in t.metrics] # type: List[PerfStat]
=====================================
testsuite/driver/testlib.py
=====================================
@@ -2442,7 +2442,9 @@ async def simple_run(name: TestName, way: WayName, prog: str, extra_run_opts: st
dump_stdout(name)
dump_stderr(name)
message = format_bad_exit_code_message(exit_code)
- return failBecause(message)
+ return failBecause(message,
+ stderr=read_stderr(name),
+ stdout=read_stdout(name))
if not (opts.ignore_stderr or await stderr_ok(name, way) or opts.combined_output):
return failBecause('bad stderr',
@@ -3541,7 +3543,8 @@ def summary(t: TestRun, file: TextIO, color=False) -> None:
summary_color = Color.GREEN
assert t.start_time is not None
- file.write(colored(summary_color, 'SUMMARY') + ' for test run started at '
+ summary_header = colored(summary_color, 'SUMMARY') if color else 'SUMMARY'
+ file.write(summary_header + ' for test run started at '
+ t.start_time.strftime("%c %Z") + '\n'
+ str(datetime.datetime.now() - t.start_time).rjust(8)
+ ' spent to go through\n'
@@ -3593,6 +3596,10 @@ def summary(t: TestRun, file: TextIO, color=False) -> None:
file.write('Framework warnings:\n')
printTestInfosSummary(file, t.framework_warnings)
+ if t.unexpected_failures:
+ file.write('Output of unexpected failures:\n\n')
+ printTestOutputSummary(file, t.unexpected_failures, color)
+
if stopping():
file.write('WARNING: Testsuite run was terminated early\n')
@@ -3606,6 +3613,34 @@ def printUnexpectedTests(file: TextIO, testInfoss):
file.write('TEST="' + ' '.join(sorted(unexpected)) + '"\n')
file.write('\n')
+# Per-stream cap on a failing test's output repeated in the final summary.
+MAX_SUMMARY_OUTPUT_LINES = 100
+
+def printTestOutputSummary(file: TextIO, testInfos, color: bool=False) -> None:
+ # Repeat failing tests' captured output in the summary, so one needn't
+ # hunt for it earlier in a possibly very long log; see #16720.
+ for result in sorted(testInfos, key=lambda r: (r.testname.lower(), r.way, r.directory)):
+ header = '=====> {}({}) [{}]'.format(result.testname, result.way, result.reason)
+ if color:
+ header = colored(Color.RED, header)
+ file.write(header + '\n')
+ for stream_name, contents in [('stdout', result.stdout), ('stderr', result.stderr)]:
+ if contents and contents.strip():
+ label = 'Captured {}:'.format(stream_name)
+ if color:
+ label = colored(Color.CYAN, label)
+ lines = contents.rstrip('\n').split('\n')
+ if len(lines) > MAX_SUMMARY_OUTPUT_LINES:
+ omitted = len(lines) - MAX_SUMMARY_OUTPUT_LINES
+ lines = lines[:MAX_SUMMARY_OUTPUT_LINES] \
+ + ['... ({} more lines omitted, see junit.xml)'.format(omitted)]
+ s = label + '\n' + ''.join(l + '\n' for l in lines)
+ # Test output can contain characters that file's encoding
+ # cannot represent; replace rather than crash (cf safe_print).
+ enc = getattr(file, 'encoding', None) or 'utf-8'
+ file.write(s.encode(enc, errors='replace').decode(enc))
+ file.write('\n')
+
def printTestInfosSummary(file: TextIO, testInfos):
maxDirLen = max(len(tr.directory) for tr in testInfos)
for result in sorted(testInfos, key=lambda r: (r.testname.lower(), r.way, r.directory)):
=====================================
testsuite/tests/codeGen/should_run/cgrun001.stdout
=====================================
@@ -1 +1 @@
--42
+-43
=====================================
testsuite/tests/codeGen/should_run/cgrun002.hs
=====================================
@@ -1,4 +1,10 @@
-main = print ((f id2) (10 + thirty_two))
+import System.Exit
+import System.IO
+
+main = do
+ print ((f id2) (10 + thirty_two))
+ hPutStrLn stderr "deliberate breakage to test summary output"
+ exitWith (ExitFailure 3)
where
f x = g x
where
=====================================
testsuite/tests/numeric/should_run/T18619.stderr
=====================================
@@ -1,3 +1,3 @@
T18619: Uncaught exception ghc-internal:GHC.Internal.Exception.Type.ArithException:
-arithmetic overflow
+arithmetic underflow
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/3d1919a5194d22eb960eac3c6a4492…
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/3d1919a5194d22eb960eac3c6a4492…
You're receiving this email because of your account on gitlab.haskell.org.
1
0
[Git][ghc/ghc][wip/sjakobi/testsuite-atomic-output] testsuite: Emit each test's output atomically
by Simon Jakobi (@sjakobi2) 11 Jun '26
by Simon Jakobi (@sjakobi2) 11 Jun '26
11 Jun '26
Simon Jakobi pushed to branch wip/sjakobi/testsuite-atomic-output at Glasgow Haskell Compiler / GHC
Commits:
5e4892c2 by Simon Jakobi at 2026-06-11T21:41:49+02:00
testsuite: Emit each test's output atomically
Tests run concurrently (as asyncio tasks) and report progress and
failures with many separate print() calls, some to stdout and some to
stderr. As a result the lines of concurrently failing tests interleave
arbitrarily, and since the two streams have different buffering when
piped (as in CI), even a single test's lines arrive scrambled relative
to each other. CI logs show diff headers separated from their diffs and
failure messages wedged between unrelated tests' progress markers.
Replace sys.stdout/sys.stderr with proxies that redirect writes into a
per-asyncio-task buffer while a test is running (a context variable,
following the existing testopts_ctx_var pattern). When the test
completes, its accumulated output is written to the real stdout in a
single call and flushed, so each test's output appears as one
contiguous, correctly ordered block.
Routing both streams through one buffer also eliminates the
stdout/stderr reordering, and the explicit flush after each test means
output now reaches CI logs as tests finish rather than sitting in a
block buffer until the end of the run (cf #12934).
Writes from contexts without an active buffer (driver preamble and
summary, terminal-title updates from timer threads) pass through to the
real streams unchanged.
Closes #27367.
Co-Authored-By: Claude Fable 5 <noreply(a)anthropic.com>
- - - - -
2 changed files:
- testsuite/driver/runtests.py
- testsuite/driver/testlib.py
Changes:
=====================================
testsuite/driver/runtests.py
=====================================
@@ -487,6 +487,7 @@ if config.list_broken:
print('')
else:
# Now run all the tests
+ install_output_proxies() # avoid interleaved output from concurrent tests
try:
async def run_parallelTests():
sem = asyncio.Semaphore(config.threads)
=====================================
testsuite/driver/testlib.py
=====================================
@@ -88,6 +88,61 @@ def get_all_ways() -> Set[WayName]:
global testopts_ctx_var
testopts_ctx_var = contextvars.ContextVar('testopts_ctx_var') # type: ignore
+# Pipe each test's output into a per-test buffer (set up by runTestAtomically)
+# to avoid interleaving the output of concurrent tests. Writes from contexts
+# with no active buffer pass straight through to the real streams.
+
+output_buffer_ctx_var = contextvars.ContextVar('output_buffer_ctx_var', default=None) # type: contextvars.ContextVar[Optional[io.StringIO]]
+
+class _OutputProxyBuffer:
+ """The .buffer of an _OutputProxy; takes bytes."""
+ def __init__(self, real) -> None:
+ self._real = real
+
+ def write(self, b: bytes) -> None:
+ buf = output_buffer_ctx_var.get()
+ if buf is None:
+ self._real.buffer.write(b)
+ else:
+ buf.write(b.decode('utf-8', errors='backslashreplace'))
+
+ def flush(self) -> None:
+ if output_buffer_ctx_var.get() is None:
+ self._real.buffer.flush()
+
+class _OutputProxy:
+ def __init__(self, real) -> None:
+ self._real = real
+ self.buffer = _OutputProxyBuffer(real)
+
+ @property
+ def encoding(self) -> str:
+ return self._real.encoding
+
+ def write(self, s: str) -> int:
+ buf = output_buffer_ctx_var.get()
+ if buf is None:
+ return self._real.write(s)
+ return buf.write(s)
+
+ def flush(self) -> None:
+ if output_buffer_ctx_var.get() is None:
+ self._real.flush()
+
+ def isatty(self) -> bool:
+ return self._real.isatty()
+
+ def fileno(self) -> int:
+ return self._real.fileno()
+
+def install_output_proxies() -> None:
+ # Both streams feed the same per-task buffer, so a test's stdout and
+ # stderr stay in print order.
+ if not isinstance(sys.stdout, _OutputProxy):
+ sys.stdout = _OutputProxy(sys.stdout)
+ if not isinstance(sys.stderr, _OutputProxy):
+ sys.stderr = _OutputProxy(sys.stderr)
+
def getTestOpts() -> TestOptions:
return testopts_ctx_var.get()
@@ -1502,9 +1557,25 @@ allTestNames = set([]) # type: Set[TestName]
async def runTest(sem, opts, name: TestName, func, args):
if sem is None:
- return await test_common_work(name, opts, func, args)
+ return await runTestAtomically(opts, name, func, args)
async with sem:
+ return await runTestAtomically(opts, name, func, args)
+
+async def runTestAtomically(opts, name: TestName, func, args):
+ # Buffer this test's output and emit it as one block at the end, so that
+ # concurrent tests' output does not interleave.
+ buf = io.StringIO()
+ token = output_buffer_ctx_var.set(buf)
+ try:
return await test_common_work(name, opts, func, args)
+ finally:
+ output_buffer_ctx_var.reset(token)
+ s = buf.getvalue()
+ if s:
+ # The event loop is single-threaded and there is no await between
+ # these calls, so the block is written out atomically.
+ sys.stdout.write(s)
+ sys.stdout.flush()
# name :: String
# setup :: [TestOpt] -> IO ()
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/5e4892c2a223f99e0b4156e2d212f5f…
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/5e4892c2a223f99e0b4156e2d212f5f…
You're receiving this email because of your account on gitlab.haskell.org.
1
0
[Git][ghc/ghc] Pushed new branch wip/sjakobi/abstractFloats-alloc
by Simon Jakobi (@sjakobi2) 11 Jun '26
by Simon Jakobi (@sjakobi2) 11 Jun '26
11 Jun '26
Simon Jakobi pushed new branch wip/sjakobi/abstractFloats-alloc at Glasgow Haskell Compiler / GHC
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/tree/wip/sjakobi/abstractFloats-alloc
You're receiving this email because of your account on gitlab.haskell.org.
1
0