[Git][ghc/ghc][wip/marge_bot_batch_merge_job] 5 commits: Add type family performance test for #26426
Marge Bot pushed to branch wip/marge_bot_batch_merge_job at Glasgow Haskell Compiler / GHC Commits: 40764930 by sheaf at 2026-06-12T14:54:43-04:00 Add type family performance test for #26426 Some GHC versions produced large numbers of coercions after typechecking and desugaring when compiling the program in #26426: Version | Typechecker time | Typechecker allocations | Coercions -------:|-----------------:|------------------------:|---------: 9.6 | 47 ms | 48 MB | 110k 9.8 | 1000 ms | 486 MB | 10,437k 9.10 | 922 ms | 489 MB | 10,436k 9.12 | 906 ms | 482 MB | 10,437k 9.14 | 63 ms | 55 MB | 333k 10.0 | 47 ms | 64 MB | 35k The improvement 9.12 -> 9.14 was due to commit 22d11fa818fae2c95c494fc0fac1f8cb4c6e7cb6, while the improvement 9.14 -> 10.0 was due to commit 0b7df6db9e46df40e86fbff1a66dc10440b99db5. As the behaviour of GHC seems better than it's ever been on this program, we declare victory, adding this performance test to ensure we don't regress on this program. On the way, we update Note [Combining equalities] in GHC.Tc.SolveR.Equality with the explanation of the 9.12 -> 9.14 improvement (getting rid of an exponential blowup in coercion sizes), and we update Note [Exploiting closed type families] in GHC.Tc.Solver.FunDeps with the explanation of the 9.14 -> 10.0 improvement (bringing down coercion size growth from cubic to quadratic). - - - - - 0f3d0a71 by Zubin Duggal at 2026-06-12T14:55:30-04:00 compiler: mark tool messages as errors/warnings depending on the exit code Fixes #27370 - - - - - acfc184b by mangoiv at 2026-06-12T15:28:59-04:00 libraries/process: bump submodule to v1.6.30.0 - bump the submodule to the appropriate tag - suppress benign warning resulting from the change - - - - - d1b8a9c2 by David Eichmann at 2026-06-12T15:28:59-04:00 ghc-toolchain: don't throw when candidate executables are not found Fixes #27369 - - - - - 9426fd1f by David Eichmann at 2026-06-12T15:29:00-04:00 CI: lint-changelog checks for no-changelog label in script instead of rules - - - - - 16 changed files: - .gitlab-ci.yml - + changelog.d/tool-messages-27370 - compiler/GHC/SysTools/Process.hs - compiler/GHC/Tc/Solver/Equality.hs - compiler/GHC/Tc/Solver/FunDeps.hs - libraries/process - testsuite/driver/testlib.py - + testsuite/tests/driver/T27370/Makefile - + testsuite/tests/driver/T27370/T27370.hs - + testsuite/tests/driver/T27370/T27370.pp - + testsuite/tests/driver/T27370/T27370.stderr - + testsuite/tests/driver/T27370/all.T - + testsuite/tests/perf/compiler/T26426.hs - testsuite/tests/perf/compiler/all.T - utils/ghc-toolchain/src/GHC/Toolchain/Program.hs - utils/ghc-toolchain/src/GHC/Toolchain/Utils.hs Changes: ===================================== .gitlab-ci.yml ===================================== @@ -255,6 +255,9 @@ lint-changelog: BUILD_FLAVOUR: default CHANGELOG_EXPECT_MR: "$CI_MERGE_REQUEST_IID" script: + # Cancel the job if there is a no-changelog label + - | + [[ ",${CI_MERGE_REQUEST_LABELS}," == *",no-changelog,"* ]] && exit 0 # Check that the MR adds at least one changelog entry - git fetch "$CI_MERGE_REQUEST_PROJECT_URL" "$CI_MERGE_REQUEST_TARGET_BRANCH_NAME" - base="$(git merge-base FETCH_HEAD $CI_COMMIT_SHA)" @@ -274,8 +277,6 @@ lint-changelog: rules: - if: '$CI_MERGE_REQUEST_LABELS =~ /.*marge_bot_batch_merge_job.*/' when: never - - if: '$CI_MERGE_REQUEST_LABELS =~ /.*no-changelog.*/' - when: never - if: $CI_MERGE_REQUEST_ID - *drafts-can-fail-lint ===================================== changelog.d/tool-messages-27370 ===================================== @@ -0,0 +1,6 @@ +section: compiler +synopsis: Mark messages from external tools as errors or warnings depending on + the tool's exit code. Previously, output printed to stderr by tools was + unconditionally reported as errors. +issues: #27370 +mrs: !16170 ===================================== compiler/GHC/SysTools/Process.hs ===================================== @@ -26,8 +26,9 @@ import GHC.Utils.Logger import GHC.Utils.TmpFs import GHC.Utils.CliOption -import GHC.Driver.Errors (reportError) +import GHC.Driver.Errors (reportDiagnostic) +import GHC.Types.Error ( DiagnosticReason(..) ) import GHC.Types.SrcLoc ( SrcLoc, mkSrcLoc, mkSrcSpan ) import GHC.Data.FastString @@ -271,9 +272,10 @@ builderMainLoop logger filter_fn pgm real_args mb_cwd mb_env = withPipe $ \ (rea getLocaleEncoding >>= hSetEncoding readEnd hSetNewlineMode readEnd nativeNewlineMode hSetBuffering readEnd LineBuffering - messages <- parseBuildMessages . filter_fn . lines <$> hGetContents readEnd - mapM_ processBuildMessage messages - waitForProcess hProcess + messages <- parseBuildMessages . filter_fn . lines <$> hGetContents' readEnd + code <- waitForProcess hProcess + mapM_ (processBuildMessage code) messages + return code hClose hStdIn case r of Left (SomeException e) -> do @@ -282,13 +284,16 @@ builderMainLoop logger filter_fn pgm real_args mb_cwd mb_env = withPipe $ \ (rea Right s -> do return s where - processBuildMessage :: BuildMessage -> IO () - processBuildMessage msg = do + processBuildMessage :: ExitCode -> BuildMessage -> IO () + processBuildMessage code msg = do case msg of BuildMsg msg -> do logInfo logger $ withPprStyle defaultUserStyle msg BuildError loc msg -> do - reportError logger neverQualify emptyDiagOpts (mkSrcSpan loc loc) msg + let reason = case code of + ExitSuccess -> WarningWithoutFlag + ExitFailure{} -> ErrorWithoutFlag + reportDiagnostic logger neverQualify emptyDiagOpts (mkSrcSpan loc loc) reason msg parseBuildMessages :: [String] -> [BuildMessage] parseBuildMessages str = loop str Nothing ===================================== compiler/GHC/Tc/Solver/Equality.hs ===================================== @@ -2980,15 +2980,30 @@ But it's not so simple: we kick out g1. Now we have two constraints [W] g1 : F a ~ a Int (arising from (F a ~ a Int)) [W] g2{rw:g1} : F a ~ a Int (arising from (F alpha ~ F a)) - If we end up with g2 in the inert set (not g1) we'll get a very confusing - error message that we can solve (F a ~ a Int) - arising from F a ~ F a + If we solve `g1` from `g2` we end up with + g1 := g2 + [W] g2{} : F a ~ a Int (arising from (F alpha ~ F a)) + and hence (since alpha := a) we report that we can't solve (F a ~ a Int) + arising from (F a ~ F a), which is extremely confusing. Moreover, it seems + wrong to "solve" `g1` using `g2` when `g2` has itself been rewritten by `g1`! TL;DR: Better to hang on to `g1` (with no rewriters), in preference to `g2` (which has a rewriter). See (WRW11) in Note [Wanteds rewrite Wanteds: rewriter-sets] in GHC.Tc.Types.Constraint. + + Note that the decision to prefer a constraint without rewriters over one that + has rewriters can also have a /huge/ effect on performance. For instance, it + avoids an **exponential** blow-up in the size of coercions produced when + typechecking in T26426. In that program, we have coercions of the form: + + co_i :: TaggedTypes as `Append` TaggedTypes '[ty] + ~# TaggedTypes (as `Append` '[ty]) + + and each 'co_{i+1}' contains the previous 'co_i' twice. Without preferring + Wanteds with no rewriters, we essentially end up inlining 'co_i' into 'co_{i+1}', + which results in exponentially-sized proof terms, growing like O(2^i). -} tryInertEqs :: EqCt -> SolverStage () ===================================== compiler/GHC/Tc/Solver/FunDeps.hs ===================================== @@ -1198,6 +1198,56 @@ Key point: equations that are not relevant do not need to be considered for fund and [W] I alpha ~ Int |> g2 Here we definiteily want to take advantage of injectivity. +(CF6) This machinery can also have a significant positive effect on the size of + proof terms. For example (simplification of T26426): + + type family (++) a b where { '[] ++ ys = ys; (x:xs) ++ ys = x : (xs ++ ys) } + type family MapId a where { MapId '[] = '[]; MapId (x:xs) = x : MapId xs } + + app :: (MapId xs ++ MapId ys ~ MapId (xs ++ ys)) => Proxy xs -> Proxy ys -> Proxy (xs ++ ys) + + test :: Proxy [ty_1, ..., ty_n] + test = Proxy @'[ty_1] + `app` Proxy @'[ty_2] + ... + `app` Proxy @'[ty_n] + + Every `app` call gives rise to a Wanted of the form: + + [W] MapId acc_i ++ MapId '[ty_i] ~ MapId (acc_i ++ '[ty_i]) + + while the overall result type gives us a Wanted of the form + + [W] acc_n ++ '[ty_n] ~ [ty_1, ..., ty_n] + + By using (CFFA) on this result Wanted, we deduce that we must have + + acc_n ~ [ty_1, ..., ty_{n-1}] + + which is a flat list. Repeating the process, (CFFA) allows us to deduce that + + acc_i ~ [ty_1, ..., ty_{i-1}] + + for all i. This allows the other Wanteds to be solved directly, giving rise to + proof terms with the typical triangular O(n^2) shape + + co_i = (O(i) proof that MapId acc_i ++ MapId '[ty_i] ~ acc_{i+1}) + ; (O(i) proof that acc_{i+1} ~ MapId (acc_i ++ '[ty_i])) + + However, /without/ (CFFA), 'acc_i' is not unified with a flat list but is left + as the nested application: + + acc_i ~ (... (('[ty_1] ++ '[ty_2]) ++ '[ty_3]) ... ++ '[ty_{i-1}]) + + This means that 'MapId acc_i' is stuck until we reduce the above, which takes + O(i^2) type family reduction steps, instead of O(i). The same applies to + the other proof term involving 'MapId (acc_i ++ '[ty_i])'. + Consequently, without (CFFA) the overall coercion size blows up to O(n^3). + + The takeaway is that (CFFA) allows us to push in the (flat) result type, + instead of relying on recursively built sub-proof terms, which brings down + coercion sizes (in certain situations) from O(n^3) to O(n^2). + Note [Cache-caused loops] ~~~~~~~~~~~~~~~~~~~~~~~~~ It is very dangerous to cache a rewritten wanted family equation as 'solved' in ===================================== libraries/process ===================================== @@ -1 +1 @@ -Subproject commit 92deb52c1781bf10ad390296dbc435abe103bfe4 +Subproject commit 11fd247ad33208da7a914acf15d4a09d64a6a4c4 ===================================== testsuite/driver/testlib.py ===================================== @@ -3066,11 +3066,15 @@ def normalise_errmsg(s: str) -> str: # Old emcc warns when we export HEAP8 but new one requires it (see #26290) s = s.replace('warning: invalid item in EXPORTED_RUNTIME_METHODS: HEAP8\nwarning: invalid item in EXPORTED_RUNTIME_METHODS: HEAPU8\nemcc: warning: warnings in JS library compilation [-Wjs-compiler]\n','') - # on newer versions of MacOS X, the shipped ranlib warns about object files with no symbols, - # however, these are completely benign stubs. - # See https://gitlab.haskell.org/ghc/ghc/-/issues/27116 if opsys('darwin'): + # on newer versions of MacOS X, the shipped ranlib warns about object files with no symbols, + # however, these are completely benign stubs. + # See https://gitlab.haskell.org/ghc/ghc/-/issues/27116 s = modify_lines(s, lambda l: re.sub(r'.*ranlib:.*has no symbols', '', l)) + # we also want to remove linker warnings having to do with undefined dynamic_lookup in combination with + # making a single weak symbol undefined as this is dependent on other linker flags + # See also https://github.com/haskell/process/pull/377 + s = drop_lines_containing(s, 'ld: warning: -U option is redundant when using -undefined dynamic_lookup') return s ===================================== testsuite/tests/driver/T27370/Makefile ===================================== @@ -0,0 +1,7 @@ +TOP=../../.. +include $(TOP)/mk/boilerplate.mk +include $(TOP)/mk/test.mk + +T27370: + chmod +x ./T27370.pp + '$(TEST_HC)' $(TEST_HC_OPTS) -c T27370.hs ===================================== testsuite/tests/driver/T27370/T27370.hs ===================================== @@ -0,0 +1,2 @@ +{-# OPTIONS_GHC -F -pgmF ./T27370.pp #-} +module T27370 where ===================================== testsuite/tests/driver/T27370/T27370.pp ===================================== @@ -0,0 +1,5 @@ +#!/bin/sh +cp "$2" "$3" +echo "$1:2:8: a located warning from an external tool" +echo " with a continuation line" +echo "an unlocated line from an external tool" ===================================== testsuite/tests/driver/T27370/T27370.stderr ===================================== @@ -0,0 +1,5 @@ +T27370.hs:2:8: warning: + a located warning from an external tool + with a continuation line + +an unlocated line from an external tool ===================================== testsuite/tests/driver/T27370/all.T ===================================== @@ -0,0 +1,4 @@ +test('T27370', + [extra_files(['T27370.hs', 'T27370.pp']), + when(opsys('mingw32'), skip)], + makefile_test, ['T27370']) ===================================== testsuite/tests/perf/compiler/T26426.hs ===================================== @@ -0,0 +1,66 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE TypeFamilies #-} +module T26426 ( + someTypes +) where + +import Data.Kind (Type) +import GHC.TypeLits (Symbol) + +type family Append (left :: [k]) (right :: [k]) :: [k] where + Append '[] right = right + Append (a : rest) right = a : Append rest right + +type family TaggedTypes (tags :: [(Symbol, Type)]) :: [Type] where + TaggedTypes '[] = '[] + TaggedTypes ('(_, typ) : rest) = typ : TaggedTypes rest + +data Types (types :: [(Symbol, Type)]) = Types + +mkTypes :: forall sym val. val -> Types '[ '(sym, val) ] +mkTypes _ = Types + +appendTypes :: + -- This constraint is the one that causes the issue. If the next line is commented + -- out, then this module compiles quickly + Append (TaggedTypes left) (TaggedTypes right) ~ TaggedTypes (Append left right) => + Types left -> Types right -> Types (Append left right) +appendTypes _ _ = Types + +someTypes :: + Types + [ '("01", Int) + , '("02", Int) + , '("03", Int) + , '("04", Int) + , '("05", Int) + , '("06", Int) + , '("07", Int) + , '("08", Int) + , '("09", Int) + , '("10", Int) + , '("11", Int) + , '("12", Int) + , '("13", Int) + , '("14", Int) + , '("15", Int) + , '("16", Int) + ] + +someTypes = + mkTypes @"01" 1 `appendTypes` + mkTypes @"02" 2 `appendTypes` + mkTypes @"03" 3 `appendTypes` + mkTypes @"04" 4 `appendTypes` + mkTypes @"05" 5 `appendTypes` + mkTypes @"06" 6 `appendTypes` + mkTypes @"07" 7 `appendTypes` + mkTypes @"08" 8 `appendTypes` + mkTypes @"09" 9 `appendTypes` + mkTypes @"10" 10 `appendTypes` + mkTypes @"11" 11 `appendTypes` + mkTypes @"12" 12 `appendTypes` + mkTypes @"13" 13 `appendTypes` + mkTypes @"14" 14 `appendTypes` + mkTypes @"15" 15 `appendTypes` + mkTypes @"16" 16 ===================================== testsuite/tests/perf/compiler/all.T ===================================== @@ -87,6 +87,13 @@ test('T783', ], compile,['']) +test ('T26426' + , [ only_ways(['normal']) + , collect_compiler_stats('bytes allocated',4) ] + , compile + , [''] + ) + test('T5321Fun', [ only_ways(['normal']), # no optimisation for this one collect_compiler_runtime(2), ===================================== utils/ghc-toolchain/src/GHC/Toolchain/Program.hs ===================================== @@ -148,12 +148,14 @@ findProgram description userSpec candidates Just prefix -> map (prefix++) candidates Nothing -> [] candidates' = prefixedCandidates ++ candidates - err = - [ "Failed to find " ++ description ++ "." - , "Looked for one of " ++ show candidates' ++ " in the system search path." - ] - path <- oneOf' err (map findExecutableErr candidates') - return Program { prgPath = path, prgFlags = fromMaybe [] (poFlags userSpec) } + pathMay <- findM doesExecutableExist candidates' + case pathMay of + Nothing -> throwEs + [ "Failed to find " ++ description ++ "." + , "Looked for one of " ++ show candidates' ++ " in the system search path." + ] + Just path -> + return Program { prgPath = path, prgFlags = fromMaybe [] (poFlags userSpec) } -- Note that @configure.ac@ checks these llvm version constants (using @sed@) to -- ensure they are the same as the @$LlvmMinVersion@ and @$LlvmMaxVersion@ @@ -222,21 +224,18 @@ maybeFindProgramFromProgOpts :: String -> ProgOpt -> Maybe (M Program) maybeFindProgramFromProgOpts description userSpec = case poPath userSpec of Nothing -> Nothing Just path -> Just $ do - let err = - [ "Failed to find " ++ description ++ "." - , "Looked for user-specified program '" ++ path ++ "' in the system search path." - ] - path' <- findExecutableErr path <|> throwEs err - return Program { prgPath = path', prgFlags = fromMaybe [] (poFlags userSpec) } - -findExecutableErr :: String -> M FilePath -findExecutableErr name = do - r <- liftIO $ findExecutable name - case r of - Nothing -> throwE $ name ++ " not found in search path" - -- Use the given `prgPath` or candidate name rather than the - -- absolute path returned by `findExecutable`. - Just _x -> return name + exists <- doesExecutableExist path + unless exists $ throwEs + [ "Failed to find " ++ description ++ "." + , "Looked for user-specified program '" ++ path ++ "' in the system search path." + ] + return Program { prgPath = path, prgFlags = fromMaybe [] (poFlags userSpec) } + +doesExecutableExist + :: String -- ^ executable name + -> M Bool +doesExecutableExist name = isJust <$> liftIO (findExecutable name) + -------------------- Compiling utilities -------------------- ===================================== utils/ghc-toolchain/src/GHC/Toolchain/Utils.hs ===================================== @@ -9,6 +9,7 @@ module GHC.Toolchain.Utils , oneOf' , isSuccess , lastLine + , findM ) where import Control.Exception @@ -69,3 +70,10 @@ isSuccess = \case lastLine :: String -> String lastLine = maybe "" snd . unsnoc . lines + +findM :: Monad m => (a -> m Bool) -> [a] -> m (Maybe a) +findM f = \case + [] -> return Nothing + a:as -> do + found <- f a + if found then return (Just a) else findM f as View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/396d6f0e7708ca4d6982b1ad3202077... -- View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/396d6f0e7708ca4d6982b1ad3202077... You're receiving this email because of your account on gitlab.haskell.org.
participants (1)
-
Marge Bot (@marge-bot)