[Git][ghc/ghc][wip/T26831] 4 commits: Eta expansion comments
Simon Peyton Jones pushed to branch wip/T26831 at Glasgow Haskell Compiler / GHC
Commits:
1d9f90d5 by Simon Peyton Jones at 2026-03-17T10:39:40+00:00
Eta expansion comments
- - - - -
1660f8f8 by Simon Peyton Jones at 2026-03-17T10:55:37+00:00
Adapt test to coreToStg API change
- - - - -
cd8b676f by Simon Peyton Jones at 2026-03-17T10:56:35+00:00
Update stderr changes
- - - - -
11637c4a by Simon Peyton Jones at 2026-03-17T12:10:00+00:00
Tighten up eta expansion and CBV stuff
- - - - -
12 changed files:
- compiler/GHC/Builtin/PrimOps.hs
- compiler/GHC/Core/Tidy.hs
- compiler/GHC/CoreToStg/Prep.hs
- compiler/GHC/Stg/Lint.hs
- compiler/GHC/Types/Id.hs
- compiler/GHC/Types/Id/Info.hs
- testsuite/tests/arityanal/should_compile/Arity01.stderr
- testsuite/tests/arityanal/should_compile/Arity05.stderr
- testsuite/tests/arityanal/should_compile/Arity08.stderr
- testsuite/tests/arityanal/should_compile/Arity11.stderr
- testsuite/tests/arityanal/should_compile/Arity14.stderr
- testsuite/tests/wasm/should_run/control-flow/LoadCmmGroup.hs
Changes:
=====================================
compiler/GHC/Builtin/PrimOps.hs
=====================================
@@ -807,16 +807,23 @@ the former has an additional type binder. Hmmm....
Note [Eta expanding primops]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
STG requires that primop applications be saturated. This makes code generation
significantly simpler since otherwise we would need to define a calling
convention for curried applications that can accommodate representation
polymorphism.
-To ensure saturation, CorePrep eta expands all primop applications as
-described in Note [Eta expansion of hasNoBinding things in CorePrep] in
+To ensure saturation, CorePrep eta expands all primop applications
+as described in Note [Eta expansion of unsaturated calls] in
GHC.Core.Prep.
+Side note: this decision is somewhat in flux: see comments with `hasNoBinding`.
+The question is: do we generate a trivial wrapper for each primop
+ (+#) x y = (+#) x y
+and now we can call that wrapper unsaturated. But in practice we
+might never call it because in practice Prep eta-expands all partial
+applications!
+
+
Historical Note:
For a short period around GHC 8.8 we rewrote unsaturated primop applications to
=====================================
compiler/GHC/Core/Tidy.hs
=====================================
@@ -165,6 +165,7 @@ computeCbvInfo fun_id rhs
map mkMark val_args
cbv_bndr | any isMarkedCbv cbv_marks
+ -- isMarkedCbv: see (CBV2) in Note [CBV Function Ids: overview]
= cbv_marks `seqList` setIdCbvMarks fun_id cbv_marks
-- seqList: avoid retaining the original rhs
@@ -176,6 +177,7 @@ computeCbvInfo fun_id rhs
-- We don't set CBV marks on functions which take unboxed tuples or sums as
-- arguments. Doing so would require us to compute the result of unarise
-- here in order to properly determine argument positions at runtime.
+ -- See (CBV1) in Note [CBV Function Ids: overview]
--
-- In practice this doesn't matter much. Most "interesting" functions will
-- get a W/W split which will eliminate unboxed tuple arguments, and unboxed
=====================================
compiler/GHC/CoreToStg/Prep.hs
=====================================
@@ -1566,12 +1566,12 @@ maybeSaturate :: Id -> CpeApp
maybeSaturate fn expr n_args unsat_ticks
| isJoinId fn -- Never eta-expand a call to a join point
-- See Note [Do not eta-expand join points]
- = assertPpr (n_args >= mark_arity) (ppr expr) $
+ = assertPpr (not must_eta_expand) (ppr expr) $
-- assertPpr: check that all arguments that need to be passed cbv
-- are visible, so the backend can evalaute them if required
expr
- | hasNoBinding fn || (n_args > 0 && excess_arity > 0)
+ | must_eta_expand || desirable_to_eta_expand
-- n_args > 0: do not eta-expand a naked variable!
= wrapLamBody (mkTicks unsat_ticks) $
cpeEtaExpand excess_arity expr
@@ -1580,6 +1580,15 @@ maybeSaturate fn expr n_args unsat_ticks
= expr
where
+ must_eta_expand
+ = (hasNoBinding fn && fn_arity > n_args)
+ -- hasNoBinding functions must be saturated
+ || (mark_arity > n_args)
+ -- CBV functions must be CBV-saturated
+
+ desirable_to_eta_expand = fn_arity > n_args && n_args > 0
+ -- n_args > 0: do not eta-expand a naked variable unless we have to
+
mark_arity = idCbvMarkArity fn
fn_arity = idArity fn
excess_arity = (max fn_arity mark_arity) - n_args
@@ -1623,8 +1632,11 @@ there are three reasons we might want to eta-expand:
* Must eta-expand: if `f` is a `hasNoBinding` function, we must saturate
it, because the function has no (curried) binding to call. Currently
- this includes: foreign calls, unboxed tuple/sum constructors, and
- representation-polymorphic primitives such as 'coerce' and 'unsafeCoerce#'.
+ this includes:
+ - foreign calls,
+ - unboxed tuple/sum constructors
+ - representation-polymorphic primitives such as 'coerce' and 'unsafeCoerce#'
+ - primops (for now anyway; see comments in `hasNoBinding`)
* Must eta-expand: if `f` has a call-by-value calling convention, we /must/
call it with evaluated arguments. The back end deals with adding the
=====================================
compiler/GHC/Stg/Lint.hs
=====================================
@@ -105,7 +105,7 @@ import GHC.Core ( AltCon(..) )
import GHC.Core.Type
import GHC.Core.Lint ( lintMessage )
-import GHC.Types.Basic ( TopLevelFlag(..), isTopLevel, isMarkedCbv )
+import GHC.Types.Basic ( TopLevelFlag(..), isTopLevel )
import GHC.Types.CostCentre ( isCurrentCCS )
import GHC.Types.Id
import GHC.Types.Var.Set
@@ -123,12 +123,9 @@ import GHC.Unit.Module ( Module )
import GHC.Data.Bag ( Bag, emptyBag, isEmptyBag, snocBag, bagToList )
import Control.Monad
-import Data.Maybe
-import GHC.Utils.Misc
import GHC.Core.Multiplicity (scaledThing)
import GHC.Settings (Platform)
import GHC.Core.TyCon (primRepCompatible, primRepsCompatible)
-import GHC.Utils.Panic.Plain (panic)
lintStgTopBindings :: forall a . (OutputablePass a, BinderP a ~ Id)
=> Platform
@@ -174,36 +171,37 @@ lintStgTopBindings platform logger diag_opts opts extra_vars this_mod unarised w
lint_bind (StgTopStringLit v _) = return [v]
lintStgConArg :: StgArg -> LintM ()
-lintStgConArg arg = do
- unarised <- lf_unarised <$> getLintFlags
- when unarised $ case stgArgRep_maybe arg of
- -- Note [Post-unarisation invariants], invariant 4
- Just [_] -> pure ()
- badRep -> addErrL $
- text "Non-unary constructor arg: " <> ppr arg $$
- text "Its PrimReps are: " <> ppr badRep
-
- case arg of
- StgLitArg _ -> pure ()
- StgVarArg v -> lintStgVar v
+lintStgConArg arg
+ = do { lintStgArg arg
+
+ ; unarised <- lf_unarised <$> getLintFlags
+ ; when unarised $ case stgArgRep_maybe arg of
+ -- Note [Post-unarisation invariants], invariant 4
+ Just [_] -> pure ()
+ badRep -> addErrL $
+ text "Non-unary constructor arg: " <> ppr arg $$
+ text "Its PrimReps are: " <> ppr badRep }
lintStgFunArg :: StgArg -> LintM ()
-lintStgFunArg arg = do
- unarised <- lf_unarised <$> getLintFlags
- when unarised $ case stgArgRep_maybe arg of
- -- Note [Post-unarisation invariants], invariant 3
- Just [] -> pure ()
- Just [_] -> pure ()
- badRep -> addErrL $
- text "Function arg is not unary or void: " <> ppr arg $$
- text "Its PrimReps are: " <> ppr badRep
-
- case arg of
- StgLitArg _ -> pure ()
- StgVarArg v -> lintStgVar v
-
-lintStgVar :: Id -> LintM ()
-lintStgVar id = checkInScope id
+lintStgFunArg arg
+ = do { lintStgArg arg
+
+ ; unarised <- lf_unarised <$> getLintFlags
+ ; when unarised $ case stgArgRep_maybe arg of
+ -- Note [Post-unarisation invariants], invariant 3
+ Just [] -> pure ()
+ Just [_] -> pure ()
+ badRep -> addErrL $
+ text "Function arg is not unary or void: " <> ppr arg $$
+ text "Its PrimReps are: " <> ppr badRep }
+
+lintStgArg :: StgArg -> LintM ()
+lintStgArg (StgLitArg _) = pure ()
+lintStgArg (StgVarArg v) = do { lintStgVarOcc v
+ ; lintAppCbvMarks v [] }
+
+lintStgVarOcc :: Id -> LintM ()
+lintStgVarOcc id = checkInScope id
lintStgBinds
:: (OutputablePass a, BinderP a ~ Id)
@@ -275,13 +273,11 @@ lintStgExpr :: (OutputablePass a, BinderP a ~ Id) => GenStgExpr a -> LintM ()
lintStgExpr (StgLit _) = return ()
-lintStgExpr e@(StgApp fun args) = do
- lintStgVar fun
- mapM_ lintStgFunArg args
- lintAppCbvMarks e
- lintStgAppReps fun args
-
-
+lintStgExpr (StgApp fun args)
+ = do { lintStgVarOcc fun
+ ; mapM_ lintStgFunArg args
+ ; lintAppCbvMarks fun args
+ ; lintStgAppReps fun args }
lintStgExpr app@(StgConApp con _n args _arg_tys) = do
-- unboxed sums should vanish during unarise
@@ -413,22 +409,20 @@ lintStgAppReps fun args = do
match_args actual_arg_reps fun_arg_tys_reps
-lintAppCbvMarks :: OutputablePass pass
- => GenStgExpr pass -> LintM ()
-lintAppCbvMarks e@(StgApp fun args) = do
- lf <- getLintFlags
- when (lf_unarised lf) $ do
+lintAppCbvMarks :: Id -> [StgArg] -> LintM ()
+lintAppCbvMarks fun args
+ | idCbvMarkArity fun > length args
-- A function which expects a unlifted argument as n'th argument
-- always needs to be applied to n arguments.
-- See Note [CBV Function Ids: overview].
- let marks = fromMaybe [] $ idCbvMarks_maybe fun
- when (length (dropWhileEndLE (not . isMarkedCbv) marks) > length args) $ do
- addErrL $ hang (text "Undersatured cbv marked ID in App" <+> ppr e ) 2 $
- (text "marks" <> ppr marks $$
- text "args" <> ppr args $$
- text "arity" <> ppr (idArity fun) $$
- text "join_arity" <> ppr (idJoinPointHood fun))
-lintAppCbvMarks _ = panic "impossible - lintAppCbvMarks"
+ = addErrL $ hang (text "Undersatured cbv marked ID in App" <+> ppr fun)
+ 2 (vcat [ text "marks" <> ppr (idCbvMarks_maybe fun)
+ , text "args" <> ppr args
+ , text "arity" <> ppr (idArity fun)
+ , text "join_arity" <> ppr (idJoinPointHood fun) ])
+
+ | otherwise
+ = return ()
{-
************************************************************************
=====================================
compiler/GHC/Types/Id.hs
=====================================
@@ -852,7 +852,7 @@ idCbvMarks_maybe id = case idDetails id of
_ -> Nothing
-- Id must be called with at least this arity in order to allow arguments to
--- be passed unlifted.
+-- be passed unlifted. Return 0 if there are no CBV marks.
idCbvMarkArity :: Id -> Arity
idCbvMarkArity fn = maybe 0 length (idCbvMarks_maybe fn)
=====================================
compiler/GHC/Types/Id/Info.hs
=====================================
@@ -210,6 +210,7 @@ data IdDetails
-- Can also work as a WorkerLikeId if given `CbvMark`s.
-- See Note [CBV Function Ids: overview]
-- The [CbvMark] is always empty (and ignored) until after Tidy.
+
| WorkerLikeId [CbvMark]
-- ^ An 'Id' for a worker like function, which might expect some arguments to be
-- passed both evaluated and tagged.
@@ -217,8 +218,10 @@ data IdDetails
-- aren't used unapplied.
-- See Note [CBV Function Ids: overview]
-- See Note [EPT enforcement]
- -- The [CbvMark] is always empty (and ignored) until after Tidy for ids from the current
- -- module.
+ -- Invariants:
+ -- - the [CbvMark] is always empty (and ignored) until after Tidy
+ -- for ids from the current module
+ -- - If non-empty, at least is isMarkedCbbv; see (CBV2)
data RecSelInfo
= RSI { rsi_def :: [ConLike] -- Record selector defined for these
@@ -297,9 +300,7 @@ Here's how it all works:
to identify strict arguments. See Note [Call-by-value for worker args] for
how a worker guarantees to be strict in strict datacon fields.
- TODO: We currently don't do this for arguments that are unboxed sums or tuples,
- because then we'd have to predict the result of unarisation. But it would be nice to
- do so. See `computeCbvInfo`.
+ See (CBV1) and (CBV2).
* During CorePrep calls to CBV Ids are eta expanded.
See `GHC.CoreToStg.Prep.maybeSaturate`.
@@ -319,6 +320,16 @@ Here's how it all works:
* Imported functions may be CBV, and then there is no point in eta-reducing
them; we'll just have to eta-expand later; see GHC.Core.Opt.Arity.cantEtaReduceFun.
+Wrinkles
+
+(CBV1) We do not set the CBV-marks for a function that takes an unboxed sum or tuple,
+ as an argument, because then we'd have to predict the result of unarisation.
+ It would be nice to do so in future. See `computeCbvInfo`.
+
+(CBV2) We do not set CBV-marks if none of them are `isMarkedCbv`. Why not?
+ Because if none are CBV then there is nothing special to do for this function;
+ in particular, we don't need to saturate its calls. See `computeCbvInfo`.
+
*** SPJ really? Andreas? ****
We only use this for workers and specialized versions of SpecConstr
But we also check other functions during tidy and potentially turn some of them into
=====================================
testsuite/tests/arityanal/should_compile/Arity01.stderr
=====================================
@@ -5,19 +5,19 @@ Result size of Tidy Core = {terms: 71, types: 43, coercions: 0, joins: 0/0}
-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0}
F1.f2 :: Integer
[GblId, Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True, WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 10 10}]
-F1.f2 = GHC.Num.Integer.IS 1#
+F1.f2 = GHC.Internal.Bignum.Integer.IS 1#
Rec {
-- RHS size: {terms: 24, types: 6, coercions: 0, joins: 0/0}
F1.f1_h1 [Occ=LoopBreaker] :: Integer -> Integer -> Integer -> Integer
[GblId, Arity=3, Str=<1L><1L><SL>, Unf=OtherCon []]
F1.f1_h1
- = \ (n :: Integer) (x :: Integer) (eta [OS=OneShot] :: Integer) ->
+ = \ (n :: Integer) (x [OS=OneShot] :: Integer) (eta [OS=OneShot] :: Integer) ->
case x of x1 { __DEFAULT ->
case n of y1 { __DEFAULT ->
- case GHC.Num.Integer.integerLt# x1 y1 of {
+ case GHC.Internal.Bignum.Integer.integerLt# x1 y1 of {
__DEFAULT -> eta;
- 1# -> F1.f1_h1 y1 (GHC.Num.Integer.integerAdd x1 F1.f2) (GHC.Num.Integer.integerAdd x1 eta)
+ 1# -> F1.f1_h1 y1 (GHC.Internal.Bignum.Integer.integerAdd x1 F1.f2) (GHC.Internal.Bignum.Integer.integerAdd x1 eta)
}
}
}
@@ -26,7 +26,7 @@ end Rec }
-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0}
F1.f3 :: Integer
[GblId, Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True, WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 10 10}]
-F1.f3 = GHC.Num.Integer.IS 5#
+F1.f3 = GHC.Internal.Bignum.Integer.IS 5#
-- RHS size: {terms: 4, types: 0, coercions: 0, joins: 0/0}
f1 :: Integer
@@ -36,27 +36,27 @@ f1 = F1.f1_h1 F1.f3 F1.f2 F1.f3
-- RHS size: {terms: 14, types: 5, coercions: 0, joins: 0/0}
g :: Integer -> Integer -> Integer -> Integer -> Integer -> Integer
[GblId, Arity=5, Str=<1L><SL><SL><SL><SL>, Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True, WorkFree=True, Expandable=True, Guidance=IF_ARGS [0 0 0 0 0] 120 0}]
-g = \ (x1 :: Integer) (x2 :: Integer) (x3 :: Integer) (x4 :: Integer) (x5 :: Integer) -> GHC.Num.Integer.integerAdd (GHC.Num.Integer.integerAdd (GHC.Num.Integer.integerAdd (GHC.Num.Integer.integerAdd x1 x2) x3) x4) x5
+g = \ (x1 :: Integer) (x2 :: Integer) (x3 :: Integer) (x4 :: Integer) (x5 :: Integer) -> GHC.Internal.Bignum.Integer.integerAdd (GHC.Internal.Bignum.Integer.integerAdd (GHC.Internal.Bignum.Integer.integerAdd (GHC.Internal.Bignum.Integer.integerAdd x1 x2) x3) x4) x5
-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0}
F1.s1 :: Integer
[GblId, Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True, WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 10 10}]
-F1.s1 = GHC.Num.Integer.IS 3#
+F1.s1 = GHC.Internal.Bignum.Integer.IS 3#
-- RHS size: {terms: 8, types: 7, coercions: 0, joins: 0/0}
s :: forall {t1} {t2}. Num t1 => (t1 -> t2) -> t2
-[GblId, Arity=2, Str=
participants (1)
-
Simon Peyton Jones (@simonpj)