Simon Jakobi pushed to branch wip/sjakobi/T27296-stable-simpl at Glasgow Haskell Compiler / GHC
Commits:
f5eb81e6 by Simon Jakobi at 2026-06-09T16:40:04+02:00
Stabilise anonymous float ordering in untidied Core dumps
Anonymous floats are all built with OccName "lvl" and noSrcSpan
(newLvlVar), so the source-span/name sort key is identical for every
one of them; sortOn then falls back to the unique-driven input order --
the very churn -dstable-core-dump-order is meant to remove. (Tidied
dumps like -ddump-simpl are unaffected, as tidy gives the floats
distinct names lvl, lvl1, ...)
Add a content-based, unique-independent tie-break (rhsKey): the floated
literal, if any, then the RHS size statistics.
Add test T27296b pinning the float ordering in an untidied
-ddump-float-out dump. It is a makefile_test that seds the dump down to
just the bindings (collapsing each pass header to a bare "Float out"
separator and dropping the FOS config / size lines), so the six lvl
floats are asserted to come out ordered by literal value.
Co-Authored-By: Claude Opus 4.7
- - - - -
5 changed files:
- compiler/GHC/Core/Ppr.hs
- testsuite/tests/simplCore/should_compile/Makefile
- + testsuite/tests/simplCore/should_compile/T27296b.hs
- + testsuite/tests/simplCore/should_compile/T27296b.stdout
- testsuite/tests/simplCore/should_compile/all.T
Changes:
=====================================
compiler/GHC/Core/Ppr.hs
=====================================
@@ -28,10 +28,10 @@ module GHC.Core.Ppr (
import GHC.Prelude
import GHC.Core
-import GHC.Core.Stats (exprStats)
+import GHC.Core.Stats (CoreStats(..), exprStats)
import GHC.Types.Fixity (LexicalFixity(..))
-import GHC.Types.Literal( pprLiteral )
-import GHC.Types.Name( pprInfixName, pprPrefixName, getOccString, getSrcSpan )
+import GHC.Types.Literal( Literal, pprLiteral )
+import GHC.Types.Name( getOccString, getSrcSpan, pprInfixName, pprPrefixName )
import GHC.Types.Var
import GHC.Types.Id
import GHC.Types.Id.Info
@@ -46,8 +46,8 @@ import GHC.Types.Basic
import GHC.Utils.Misc
import GHC.Utils.Outputable
import GHC.Utils.Panic (panic)
-import GHC.Types.SrcLoc ( SrcSpan(..), srcSpanStartLine, srcSpanStartCol
- , pprUserRealSpan )
+import GHC.Types.SrcLoc ( SrcSpan(..), pprUserRealSpan, srcSpanStartCol
+ , srcSpanStartLine )
import GHC.Types.Tickish
import Data.List ( sortOn )
@@ -99,7 +99,18 @@ Uniques*, so two dumps line up across rebuilds. The sort key is:
whether the OccName *contains* a '$', which marks a derived binder: a worker
is @$wfoo@, but a call-site specialisation is tidied to @bar_$sfoo@ (no
leading '$'), so a leading-'$' test would miss it.
- 3. the OccName string, as a final lexical, deterministic tie-break.
+ 3. the OccName string, as a lexical, deterministic tie-break.
+ 4. a content-based tie-break on the right-hand side ('rhsKey'): the floated
+ literal, if any, then the RHS size statistics. This matters for the
+ anonymous floats: 'newLvlVar' builds them all with OccName "lvl" and
+ noSrcSpan, so keys 1-3 are identical and without it their order would fall
+ back to the Unique-driven input order -- the churn we set out to remove.
+ (Tidied dumps like -ddump-simpl give the floats distinct names lvl,
+ lvl1, ...; this additionally stabilises untidied dumps such as
+ -ddump-simpl-iterations.) It is only a best-effort tie-break -- RHSs
+ agreeing on both components keep their input order -- and Unique-independent
+ for the numeric CAFs we target (a rubbish literal is the exception: its
+ 'cmpLit' falls back to the Unique-dependent 'nonDetCmpType').
Recursive groups are never split: a 'Rec' is one 'CoreBind', placed as a unit by
its earliest-source member, with its members sorted by the same key.
@@ -114,24 +125,36 @@ suffer the cross-module churn this flag addresses.
useful for debugging the compiler itself.
-}
+-- | The sort key for one top-level binder. The trailing 'RhsKey' is a
+-- content-based tiebreak, used only when two binders agree on everything
+-- before it. See Note [Stable Core dump order].
+type DumpSortKey =
+ ( Int -- source-span bucket: 0 = real span, 1 = noSrcSpan (sorts last)
+ , Int -- source-span start line
+ , Int -- source-span start column
+ , Int -- dollar-rank: 0 = derived ($w/$s) binder, 1 = its origin
+ , String -- the OccName string, a lexical tiebreak
+ , RhsKey -- content-based tiebreak (see 'rhsKey')
+ )
+
-- | Reorder a 'CoreProgram' into a stable, source-location-driven order for
-- dumping. See Note [Stable Core dump order]. Used by 'dumpPassResult' when
-- -dstable-core-dump-order is enabled.
sortCoreBindingsForDump :: CoreProgram -> CoreProgram
sortCoreBindingsForDump = sortOn bindKey . map sortRecMembers
where
- sortRecMembers (Rec prs) = Rec (sortOn (bndrKey . fst) prs)
+ sortRecMembers (Rec prs) = Rec (sortOn (uncurry elemKey) prs)
sortRecMembers b = b
- -- 'sortRecMembers' runs first, so a 'Rec' is already sorted by 'bndrKey'
+ -- 'sortRecMembers' runs first, so a 'Rec' is already sorted by 'elemKey'
-- when 'bindKey' sees it; its first member is therefore the minimum key.
- bindKey :: CoreBind -> (Int, Int, Int, Int, String)
- bindKey (NonRec b _) = bndrKey b
- bindKey (Rec ((b,_):_)) = bndrKey b
- bindKey (Rec []) = panic "sortCoreBindingsForDump: empty Rec"
+ bindKey :: CoreBind -> DumpSortKey
+ bindKey (NonRec b rhs) = elemKey b rhs
+ bindKey (Rec ((b,rhs):_)) = elemKey b rhs
+ bindKey (Rec []) = panic "sortCoreBindingsForDump: empty Rec"
- bndrKey :: CoreBndr -> (Int, Int, Int, Int, String)
- bndrKey b = (bucket, line, col, dollar_rank, s)
+ elemKey :: CoreBndr -> CoreExpr -> DumpSortKey
+ elemKey b rhs = (bucket, line, col, dollar_rank, s, rhsKey rhs)
where
s = getOccString b
(bucket, line, col) = case getSrcSpan b of
@@ -145,6 +168,23 @@ sortCoreBindingsForDump = sortOn bindKey . map sortRecMembers
dollar_rank | '$' `elem` s = 0
| otherwise = 1
+-- | A content-based tie-break on a binder's right-hand side: see point 4 of
+-- Note [Stable Core dump order].
+type RhsKey =
+ ( Maybe Literal -- the floated literal, if any (Nothing sorts first)
+ , (Int, Int, Int, Int, Int) -- exprStats counts: terms, types, coercions, value binds, join binds
+ )
+
+rhsKey :: CoreExpr -> RhsKey
+rhsKey rhs = (litOf rhs, statsTuple (exprStats rhs))
+ where
+ statsTuple (CS tm ty co vb jb) = (tm, ty, co, vb, jb)
+ litOf (Lit l) = Just l
+ litOf (App f a) = case a of { Lit l -> Just l; _ -> litOf f }
+ litOf (Cast e _) = litOf e
+ litOf (Tick _ e) = litOf e
+ litOf _ = Nothing
+
instance OutputableBndr b => Outputable (Bind b) where
ppr bind = ppr_bind noAnn bind
=====================================
testsuite/tests/simplCore/should_compile/Makefile
=====================================
@@ -316,3 +316,13 @@ T27296:
-dstable-core-dump-order T27296.hs 2> /dev/null \
| sed -nE 's/^(\$$fEqKey|\$$fOrdKey|\$$fOrdKey_\$$ccompare|size|findI_\$$slookupG|lookupG|member|findI|\$$wrotate|rotate|insertG|insertManyI|insertTwoI|weight|balance|ratios|fromAscI)( .*)?$$/\1/p' \
| uniq
+
+# See T27296b.hs for what this pins and why. The six floated "lvl" constants
+# are scrambled in source order; grep them out and the dump coming out
+# 1000..6000 confirms the stable, value-ordered float ordering.
+T27296b:
+ $(RM) -f T27296b.o T27296b.hi
+ '$(TEST_HC)' $(TEST_HC_OPTS) -O -c -ddump-float-out -dsuppress-uniques \
+ -dsuppress-idinfo -dsuppress-module-prefixes -dno-typeable-binds \
+ -dstable-core-dump-order T27296b.hs 2> /dev/null \
+ | grep '^lvl = I#'
=====================================
testsuite/tests/simplCore/should_compile/T27296b.hs
=====================================
@@ -0,0 +1,21 @@
+-- See Note [Stable Core dump order] in GHC.Core.Ppr.
+--
+-- Companion to T27296 that pins the ordering of *anonymous* top-level floats.
+-- Under -O the boxed Int constants in sel's branches are floated to top level
+-- as separate CAFs, all of which the compiler names "lvl" with noSrcSpan (see
+-- newLvlVar). Before -dstable-core-dump-order their dump order was the
+-- unique-driven processing order; the flag's content-based tie-break (rhsKey)
+-- now orders them by literal value -- here 1000..6000, despite the scrambled
+-- source order. This dump is intentionally *untidied* (-ddump-float-out), the
+-- only place the "lvl" collision is observable; tidied dumps like -ddump-simpl
+-- already give the floats distinct names (lvl, lvl1, ...).
+module T27296b (sel) where
+
+{-# NOINLINE sel #-}
+sel :: Int -> Int
+sel 0 = 5000
+sel 1 = 1000
+sel 2 = 4000
+sel 3 = 2000
+sel 4 = 3000
+sel _ = 6000
=====================================
testsuite/tests/simplCore/should_compile/T27296b.stdout
=====================================
@@ -0,0 +1,6 @@
+lvl = I# 1000#
+lvl = I# 2000#
+lvl = I# 3000#
+lvl = I# 4000#
+lvl = I# 5000#
+lvl = I# 6000#
=====================================
testsuite/tests/simplCore/should_compile/all.T
=====================================
@@ -603,3 +603,4 @@ test('T25718c', normal, compile, ['-O -ddump-simpl -dsuppress-uniques -dsuppress
test('T19166', normal, compile, ['-O -ddump-simpl -dsuppress-uniques -dsuppress-all -dno-typeable-binds'])
test('T27261', [extra_files(['T27261_aux.hs'])], multimod_compile, ['T27261', '-v0 -O'])
test('T27296', [], makefile_test, ['T27296'])
+test('T27296b', [], makefile_test, ['T27296b'])
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/f5eb81e6745870feb1a1a2cdc3adec7d...
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/f5eb81e6745870feb1a1a2cdc3adec7d...
You're receiving this email because of your account on gitlab.haskell.org.