[Git][ghc/ghc][wip/26416] DmdAnal: Take stable unfoldings into account when determining argument demands
Zubin pushed to branch wip/26416 at Glasgow Haskell Compiler / GHC
Commits:
870243e4 by Zubin Duggal at 2026-03-12T17:33:28+05:30
DmdAnal: Take stable unfoldings into account when determining argument demands
Previously, demand analysis only looked at the RHS to compute argument demands.
If the optimised RHS discarded uses of an argument that the stable unfolding
still needed, it would be incorrectly marked absent. Worker/wrapper would then
replace it with LitRubbish, and inlining the stable unfolding would use the
rubbish value, causing a segfault.
To fix, we introduce addUnfoldingDemands which analyses the stable unfolding
with dmdAnal and combines its DmdType with the RHS's via the new `maxDmdType`
which combines the demands of the stable unfolding with the rhs, so we can avoid
any situation where we give an absent demand to something which is still used
by the stable unfolding.
Fixes #26416.
- - - - -
8 changed files:
- compiler/GHC/Core/Opt/DmdAnal.hs
- compiler/GHC/Types/Demand.hs
- testsuite/tests/dmdanal/should_compile/T18894.stderr
- + testsuite/tests/dmdanal/should_run/M1.hs
- + testsuite/tests/dmdanal/should_run/T26416.hs
- + testsuite/tests/dmdanal/should_run/T26416.stdout
- testsuite/tests/dmdanal/should_run/all.T
- testsuite/tests/dmdanal/sigs/T21081.stderr
Changes:
=====================================
compiler/GHC/Core/Opt/DmdAnal.hs
=====================================
@@ -23,7 +23,7 @@ import GHC.Core.DataCon
import GHC.Core.Utils
import GHC.Core.TyCon
import GHC.Core.Type
-import GHC.Core.FVs ( rulesRhsFreeIds, bndrRuleAndUnfoldingIds )
+import GHC.Core.FVs ( rulesRhsFreeIds, bndrRuleAndUnfoldingIds, idRuleVars )
import GHC.Core.Coercion ( Coercion )
import GHC.Core.TyCo.FVs ( coVarsOfCos )
import GHC.Core.TyCo.Compare ( eqType )
@@ -1106,9 +1106,22 @@ dmdAnalRhsSig top_lvl rec_flag env let_sd id rhs
rhs_sd = mkCalledOnceDmds ww_arity adjusted_body_sd
WithDmdType rhs_dmd_ty rhs' = dmdAnal env rhs_sd rhs
- DmdType rhs_env rhs_dmds = rhs_dmd_ty
- (final_rhs_dmds, final_rhs) = finaliseArgBoxities env id ww_arity
- rhs_dmds (de_div rhs_env) rhs'
+
+ -- See Note [Absence analysis for stable unfoldings and RULES], Wrinkle (W3)
+ full_dmd_ty = addUnfoldingDemands env rhs_sd id rhs_dmd_ty
+ DmdType full_rhs_env combined_rhs_dmds = full_dmd_ty
+
+ final_rhs_dmds = finaliseArgBoxities env id ww_arity
+ combined_rhs_dmds (de_div full_rhs_env) rhs'
+
+ -- Attach the final demands to the lambda binders of the RHS.
+ -- IMPORTANT: The lambda binders of final_rhs must carry the final demand
+ -- info, because worker/wrapper drives decisions from the idDemandInfo on
+ -- the lambdas (see mkWwstr_one), NOT from the strictness signature of the
+ -- function. So the demands must reflect both the unfolding combination
+ -- (from addUnfoldingDemands) and the boxity finalisation (from
+ -- finaliseArgBoxities).
+ final_rhs = setLamDmds final_rhs_dmds rhs'
dmd_sig_arity = ww_arity + strictCallArity body_sd
sig = mkDmdSigForArity dmd_sig_arity (DmdType sig_env final_rhs_dmds)
@@ -1132,19 +1145,51 @@ dmdAnalRhsSig top_lvl rec_flag env let_sd id rhs
-- we never get used-once info for FVs of recursive functions.
-- See #14816 where we try to get rid of reuseEnv.
rhs_env1 = case rec_flag of
- Recursive -> reuseEnv rhs_env
- NonRecursive -> rhs_env
+ Recursive -> reuseEnv full_rhs_env
+ NonRecursive -> full_rhs_env
-- See Note [Absence analysis for stable unfoldings and RULES]
- rhs_env2 = rhs_env1 `plusDmdEnv` demandRootSet env (bndrRuleAndUnfoldingIds id)
+ -- The unfolding FVs are already included in full_rhs_env via addUnfoldingDemands.
+ -- Here we only need demandRoots for RULES.
+ rhs_env2 = rhs_env1 `plusDmdEnv` demandRootSet env (filterVarSet isId (idRuleVars id))
-- See Note [Lazy and unleashable free variables]
!(!sig_env, !weak_fvs) = splitWeakDmds rhs_env2
+setLamDmds :: [Demand] -> CoreExpr -> CoreExpr
+-- Attach the demands to the outer lambdas of this expression
+setLamDmds (dmd:dmds) (Lam v e)
+ | isTyVar v = Lam v (setLamDmds (dmd:dmds) e)
+ | otherwise = Lam (v `setIdDemandInfo` dmd) (setLamDmds dmds e)
+setLamDmds dmds (Cast e co) = Cast (setLamDmds dmds e) co
+ -- This case happens for an OPAQUE function, which may look like
+ -- f = (\x y. blah) |> co
+ -- We give it strictness but no boxity (#22502)
+setLamDmds _ e = e
+ -- In the OPAQUE case, the list of demands at this point might be
+ -- non-empty, e.g., when looking at a PAP. Hence don't panic (#22997).
+
splitWeakDmds :: DmdEnv -> (DmdEnv, WeakDmds)
splitWeakDmds (DE fvs div) = (DE sig_fvs div, weak_fvs)
where (!weak_fvs, !sig_fvs) = partitionVarEnv isWeakDmd fvs
+-- | If there is a stable unfolding, combine argument demands and free variable
+-- demands from the unfolding with those from the RHS.
+-- See Note [Absence analysis for stable unfoldings and RULES], Wrinkle (W3).
+-- See Note [Combining demands for stable unfoldings] in GHC.Types.Demand.
+addUnfoldingDemands :: AnalEnv -> SubDemand -> Id -> DmdType -> DmdType
+addUnfoldingDemands env rhs_sd id rhs_dmd_ty
+ | isStableUnfolding unf
+ , Just unf_body <- maybeUnfoldingTemplate unf
+ , let WithDmdType unf_dmd_ty _ = dmdAnal env rhs_sd unf_body
+ = -- pprTrace "addUnfoldingDemands" (ppr id $$ ppr rhs_dmd_ty $$ ppr unf_dmd_ty) $
+ maxDmdType rhs_dmd_ty unf_dmd_ty
+
+ | otherwise
+ = rhs_dmd_ty -- No stable unfolding, nothing to do
+ where
+ unf = realIdUnfolding id
+
-- | The result type after applying 'idArity' many arguments. Returns 'Nothing'
-- when the type doesn't have exactly 'idArity' many arrows.
resultType_maybe :: Id -> Maybe Type
@@ -1482,10 +1527,21 @@ and transform to
Now if f is subsequently inlined, we'll use 'g' and ... disaster.
-SOLUTION: if f has a stable unfolding, treat every free variable as a
-/demand root/, that is: Analyse it as if it was a variable occurring in a
+SOLUTION for stable unfoldings: in `dmdAnalRhsSig`, if the function has a
+stable unfolding, analyse it with `dmdAnal` and combine the resulting `DmdType`
+with the RHS's `DmdType`. This is done by `addUnfoldingDemands`, which uses
+`maxDmdType` to combine both argument demands and free variable demands.
+See Note [Combining demands for stable unfoldings] in GHC.Types.Demand for
+details of the combining operation.
+
+This handles both the free variables and arguments of stable unfoldings in one
+go. For example, in the scenario above, the unfolding's `DmdType` will mention
+`g` as a free variable, so `maxDmdType` will keep it alive.
+
+SOLUTION for RULES: treat every Id free in the RHS of a RULE as a
+/demand root/, that is: analyse it as if it was a variable occurring in a
'topDmd' context. This is done in `demandRoot` (which we also use for exported
-top-level ids). Do the same for Ids free in the RHS of any RULES for f.
+top-level ids).
Wrinkles:
@@ -1502,7 +1558,7 @@ Wrinkles:
this, that actually happened in practice.
(W2) You might wonder why we don't simply take the free vars of the
- unfolding/RULE and map them to topDmd. The reason is that any of the free vars
+ RULE and map them to topDmd. The reason is that any of the free vars
might have demand signatures themselves that in turn demand transitive free
variables and that we hence need to unleash! This came up in #23208.
Consider
@@ -1524,6 +1580,24 @@ Wrinkles:
for `sg`, failing to unleash the signature and hence observed an absent
error instead of the `really important message`.
+ (W3) The stable unfolding solution above handles /free variables/, but
+ what about /arguments/? Consider (#26416)
+
+ fromVector :: (Storable a, KnownNat n) => Vector a -> Vector a
+ fromVector v = ... (uses Storable dictionary) ...
+ {-# INLINABLE fromVector #-}
+
+ Suppose that the optimised RHS of `fromVector` somehow discards the use of
+ the Storable dictionary, but the stable unfolding still uses it. Then the
+ demand signature will say that the Storable dictionary argument is absent,
+ and worker/wrapper will replace it with `LitRubbish`. But when the
+ worker's unfolding is inlined, it will use that rubbish value as a real
+ dictionary, leading to a segfault!
+
+ `addUnfoldingDemands` handles this too: since `maxDmdType` combines both
+ the argument demands and free variable demands from the unfolding's
+ `DmdType` with the RHS's, argument absence is correctly prevented.
+
Note [DmdAnal for DataCon wrappers]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
We give DataCon wrappers a (necessarily flat) demand signature in
@@ -2001,22 +2075,20 @@ positiveTopBudget (MkB n _) = n >= 0
finaliseArgBoxities :: AnalEnv -> Id -> Arity
-> [Demand] -> Divergence
- -> CoreExpr -> ([Demand], CoreExpr)
+ -> CoreExpr -> [Demand]
-- POSTCONDITION:
--- If: (dmds', rhs') = finaliseArgBoxitities ... dmds .. rhs
+-- If: dmds' = finaliseArgBoxities ... dmds .. rhs
-- Then:
-- dmds' is the same as dmds (including length), except for boxity info
--- rhs' is the same as rhs, except for dmd info on lambda binders
-- NB: For join points, length dmds might be greater than ww_arity
+-- NB: rhs is needed only to count visible binders.
finaliseArgBoxities env fn ww_arity arg_dmds div rhs
-- Check for an OPAQUE function: see Note [OPAQUE pragma]
-- In that case, trim off all boxity info from argument demands
- -- and demand info on lambda binders
-- See Note [The OPAQUE pragma and avoiding the reboxing of arguments]
| isOpaquePragma (idInlinePragma fn)
- , let trimmed_arg_dmds = map trimBoxity arg_dmds
- = (trimmed_arg_dmds, set_lam_dmds trimmed_arg_dmds rhs)
+ = map trimBoxity arg_dmds
-- Check that we have enough visible binders to match the
-- ww arity; if not, we won't do worker/wrapper
@@ -2027,7 +2099,7 @@ finaliseArgBoxities env fn ww_arity arg_dmds div rhs
-- It's a bit of a corner case. Anyway for now we pass on the
-- unadulterated demands from the RHS, without any boxity trimming.
| ww_arity > count isId bndrs
- = (arg_dmds, rhs)
+ = arg_dmds
-- The normal case
| otherwise
@@ -2036,10 +2108,7 @@ finaliseArgBoxities env fn ww_arity arg_dmds div rhs
-- , text "max" <+> ppr max_wkr_args
-- , text "dmds before:" <+> ppr (map idDemandInfo (filter isId bndrs))
-- , text "dmds after: " <+> ppr arg_dmds' ]) $
- (arg_dmds', set_lam_dmds arg_dmds' rhs)
- -- set_lam_dmds: we must attach the final boxities to the lambda-binders
- -- of the function, both because that's kosher, and because CPR analysis
- -- uses the info on the binders directly.
+ arg_dmds'
where
opts = ae_opts env
(bndrs, _body) = collectBinders rhs
@@ -2047,8 +2116,11 @@ finaliseArgBoxities env fn ww_arity arg_dmds div rhs
arg_triples :: [(Type, StrictnessMark, Demand)]
arg_triples = take ww_arity $
- [ (idType bndr, NotMarkedStrict, get_dmd bndr)
- | bndr <- bndrs, isRuntimeVar bndr ]
+ zipWith mk_triple
+ [ bndr | bndr <- bndrs, isRuntimeVar bndr ]
+ arg_dmds
+ where
+ mk_triple bndr arg_dmd = (idType bndr, NotMarkedStrict, get_dmd arg_dmd)
arg_dmds' = ww_arg_dmds ++ map trimBoxity (drop ww_arity arg_dmds)
-- If ww_arity < length arg_dmds, the leftover ones
@@ -2064,12 +2136,10 @@ finaliseArgBoxities env fn ww_arity arg_dmds div rhs
-- This is the budget initialisation step of
-- Note [Worker argument budget]
- get_dmd :: Id -> Demand
- get_dmd bndr
+ get_dmd :: Demand -> Demand
+ get_dmd dmd
| is_bot_fn = unboxDeeplyDmd dmd -- See Note [Boxity for bottoming functions],
| otherwise = dmd -- case (B)
- where
- dmd = idDemandInfo bndr
-- is_bot_fn: see Note [Boxity for bottoming functions]
is_bot_fn = div == botDiv
@@ -2126,19 +2196,6 @@ finaliseArgBoxities env fn ww_arity arg_dmds div rhs
| positiveTopBudget bg_inner' = (bg_inner', dmd')
| otherwise = (bg_inner, trimBoxity dmd)
- set_lam_dmds :: [Demand] -> CoreExpr -> CoreExpr
- -- Attach the demands to the outer lambdas of this expression
- set_lam_dmds (dmd:dmds) (Lam v e)
- | isTyVar v = Lam v (set_lam_dmds (dmd:dmds) e)
- | otherwise = Lam (v `setIdDemandInfo` dmd) (set_lam_dmds dmds e)
- set_lam_dmds dmds (Cast e co) = Cast (set_lam_dmds dmds e) co
- -- This case happens for an OPAQUE function, which may look like
- -- f = (\x y. blah) |> co
- -- We give it strictness but no boxity (#22502)
- set_lam_dmds _ e = e
- -- In the OPAQUE case, the list of demands at this point might be
- -- non-empty, e.g., when looking at a PAP. Hence don't panic (#22997).
-
finaliseLetBoxity
:: AnalEnv
-> Type -- ^ Type of the let-bound Id
=====================================
compiler/GHC/Types/Demand.hs
=====================================
@@ -23,6 +23,8 @@ module GHC.Types.Demand (
lubCard, lubDmd, lubSubDmd,
-- *** Greatest lower bound
glbCard,
+ -- *** Maximum (glb on strictness, lub on usage)
+ maxCard, maxDmd,
-- *** Plus
plusCard, plusDmd, plusSubDmd,
-- *** Multiply
@@ -49,13 +51,13 @@ module GHC.Types.Demand (
-- * Demand environments
DmdEnv(..), addVarDmdEnv, mkTermDmdEnv, nopDmdEnv, plusDmdEnv, plusDmdEnvs,
- multDmdEnv, reuseEnv,
+ lubDmdEnv, multDmdEnv, reuseEnv,
-- * Demand types
DmdType(..), dmdTypeDepth,
-- ** Algebra
nopDmdType, botDmdType,
- lubDmdType, plusDmdType, multDmdType, discardArgDmds,
+ lubDmdType, maxDmdType, plusDmdType, multDmdType, discardArgDmds,
-- ** Other operations
peelFV, findIdDemand, addDemand, splitDmdTy, deferAfterPreciseException,
@@ -864,6 +866,89 @@ lubSubDmd sd1@Poly{} sd2 = lubSubDmd sd2 sd1
-- Otherwise (Call `lub` Prod) return Top
lubSubDmd _ _ = topSubDmd
+{- Note [Combining demands for stable unfoldings]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+When a function has a stable unfolding, the optimised RHS and the unfolding
+may have different demand signatures for the same arguments. This can happen
+because:
+
+ * The optimised RHS may have had transformations applied that reveal
+ strictness (e.g., inlining exposes a case on an argument).
+ Example:
+ RHS: x
+ Unfolding: head [x]
+ It's clear that the RHS is strict in `x`, but the demand analyser won't
+ spot that when it analyses the unfolding.
+
+ * The optimised RHS may have had transformations applied that drop usage
+ (e.g., a rewrite rule fires that doesn't use an argument, or a seq on
+ a dictionary is dropped because dictionaries are known to terminate).
+ Example:
+ RHS: a
+ Unfolding: fst g
+ where `g` is in scope as `g = (a,b)`.
+
+See Note [Absence analysis for stable unfoldings and RULES] in
+GHC.Core.Opt.DmdAnal for the broader context.
+
+When we inline the stable unfolding at a call site, we get the unfolding's
+behaviour, not the RHS's. So we must be conservative and combine the demands:
+
+ * For strictness (lower bounds): we can take the MAXIMUM (glb).
+ If the RHS reveals that an argument is strict, that strictness was
+ always there semantically - the analysis just couldn't see it in the
+ unfolding. Sound optimisations never make lazy code strict.
+
+ * For usage (upper bounds): we must take the MAXIMUM (lub).
+ If the unfolding uses an argument but the RHS doesn't, we must not
+ mark it absent, or we'll replace it with rubbish that the unfolding
+ will then try to use, causing a segfault. See #26416.
+
+So for cardinality bounds [l1..u1] from RHS and [l2..u2] from unfolding,
+we compute [max(l1,l2)..max(u1,u2)].
+-}
+
+-- | Takes the maximum of both the lower and upper bound of two 'Card's.
+-- Semantically, this is glb on lower (strictness) and lub on upper (usage).
+-- See Note [Combining demands for stable unfoldings].
+maxCard :: Card -> Card -> Card
+-- Given Note [Bit vector representation for Card]:
+-- * bit 0 (strictness): take AND (glb) - 0 means strict, so 0 wins
+-- * bits 1,2 (usage): take OR (lub) - if either uses, result uses
+maxCard (Card a) (Card b) = Card ((a .&. b .&. 0b001) .|. ((a .|. b) .&. 0b110))
+
+-- | Takes the maximum of both the lower and upper bounds of two 'Demand's.
+-- Semantically, glb on lower (strictness) and lub on upper (usage).
+-- See Note [Combining demands for stable unfoldings].
+maxDmd :: Demand -> Demand -> Demand
+maxDmd BotDmd dmd2 = dmd2
+maxDmd dmd1 BotDmd = dmd1
+maxDmd (n1 :* sd1) (n2 :* sd2) =
+ maxCard n1 n2 :* maxSubDmd sd1 sd2
+
+maxSubDmd :: SubDemand -> SubDemand -> SubDemand
+-- Shortcuts for neutral and absorbing elements.
+maxSubDmd (Poly Unboxed C_00) sd = sd
+maxSubDmd sd (Poly Unboxed C_00) = sd
+maxSubDmd sd@(Poly Boxed C_1N) _ = sd
+maxSubDmd _ sd@(Poly Boxed C_1N) = sd
+-- Prod
+maxSubDmd (Prod b1 ds1) (Poly b2 n2)
+ | let !d = polyFieldDmd b2 n2
+ = mkProd (lubBoxity b1 b2) (strictMap (maxDmd d) ds1)
+maxSubDmd (Prod b1 ds1) (Prod b2 ds2)
+ | equalLength ds1 ds2
+ = mkProd (lubBoxity b1 b2) (strictZipWith maxDmd ds1 ds2)
+-- Handle Call
+maxSubDmd (Call n1 sd1) (viewCall -> Just (n2, sd2)) =
+ mkCall (maxCard n1 n2) (maxSubDmd sd1 sd2)
+-- Handle Poly
+maxSubDmd (Poly b1 n1) (Poly b2 n2) = Poly (lubBoxity b1 b2) (maxCard n1 n2)
+-- Other Poly case by commutativity
+maxSubDmd sd1@Poly{} sd2 = maxSubDmd sd2 sd1
+-- Otherwise (Call `max` Prod) return Top
+maxSubDmd _ _ = topSubDmd
+
-- | Denotes '+' on 'Demand'.
plusDmd :: Demand -> Demand -> Demand
plusDmd AbsDmd dmd2 = dmd2
@@ -1834,6 +1919,26 @@ lubDmdType d1 d2 = DmdType lub_fv lub_ds
lub_ds = zipWithEqual lubDmd ds1 ds2
lub_fv = lubDmdEnv fv1 fv2
+-- | Combine two 'DmdType's for stable unfolding analysis.
+-- See Note [Combining demands for stable unfoldings].
+maxDmdType :: DmdType -> DmdType -> DmdType
+maxDmdType (DmdType fv1 ds1) (DmdType fv2 ds2)
+ = DmdType combined_fv combined_ds
+ where
+ combined_fv = maxDmdEnv fv1 fv2
+ combined_ds = go ds1 ds2
+ -- If lists have different lengths, keep remaining ds1 (from RHS)
+ go rhs [] = rhs
+ go [] _ = []
+ go (r:rhs) (u:unfs) = maxDmd r u : go rhs unfs
+
+-- | See Note [Combining demands for stable unfoldings].
+maxDmdEnv :: DmdEnv -> DmdEnv -> DmdEnv
+maxDmdEnv (DE fv1 d1) (DE fv2 d2) = DE combined_fv combined_div
+ where
+ combined_fv = plusVarEnv_CD maxDmd fv1 (defaultFvDmd d1) fv2 (defaultFvDmd d2)
+ combined_div = lubDivergence d1 d2
+
discardArgDmds :: DmdType -> DmdEnv
discardArgDmds (DmdType fv _) = fv
=====================================
testsuite/tests/dmdanal/should_compile/T18894.stderr
=====================================
@@ -399,7 +399,8 @@ lvl :: (Int, Int)
lvl = case $wg1 2# of { (# ww, ww #) -> (GHC.Types.I# ww, ww) }
-- RHS size: {terms: 22, types: 16, coercions: 0, joins: 0/0}
-$wh1 [InlPrag=[2]] :: GHC.Prim.Int# -> Int
+$wh1 [InlPrag=[2], Dmd=LC(S,!P(L))]
+ :: GHC.Prim.Int# -> Int
[LclId[StrictWorker([])],
Arity=1,
Str=<1L>,
=====================================
testsuite/tests/dmdanal/should_run/M1.hs
=====================================
@@ -0,0 +1,17 @@
+-- Short module name is essential, or else f doesn't inline
+module M1 where
+{-# INLINABLE [2] f #-}
+f :: Int -> Int -> Float
+f !dummy x = if times dummy 0 x == 1
+ then 3.0 else 4.0
+
+{-# INLINE [0] times #-}
+times :: Int -> Int -> Int -> Int
+times dummy 0 x = x `seq` ( 0 + big dummy )
+times _ a b = a * b
+
+{-# RULES "times" [1] forall dummy x. times dummy 0 x = 0 + big dummy #-}
+
+big :: Int -> Int
+big x = succ . succ . succ . succ . succ . succ . succ . succ . succ $ x
+{-# INLINE big #-}
=====================================
testsuite/tests/dmdanal/should_run/T26416.hs
=====================================
@@ -0,0 +1,3 @@
+module Main where
+import M1 ( f )
+main = print (f 19 12)
=====================================
testsuite/tests/dmdanal/should_run/T26416.stdout
=====================================
@@ -0,0 +1 @@
+4.0
=====================================
testsuite/tests/dmdanal/should_run/all.T
=====================================
@@ -35,3 +35,4 @@ test('T22549', normal, compile_and_run, ['-fdicts-strict -fno-specialise'])
test('T23208', exit_code(1), multimod_compile_and_run, ['T23208_Lib', 'T23208'])
test('T25439', normal, compile_and_run, [''])
test('T26748', normal, compile_and_run, [''])
+test('T26416', [extra_files(['M1.hs'])], multimod_compile_and_run, ['T26416','M1.hs'])
=====================================
testsuite/tests/dmdanal/sigs/T21081.stderr
=====================================
@@ -62,7 +62,7 @@ T21081.g: <ML>
T21081.h:
T21081.myfoldl:
participants (1)
-
Zubin (@wz1000)