| ... |
... |
@@ -23,7 +23,7 @@ import GHC.Core.DataCon |
|
23
|
23
|
import GHC.Core.Utils
|
|
24
|
24
|
import GHC.Core.TyCon
|
|
25
|
25
|
import GHC.Core.Type
|
|
26
|
|
-import GHC.Core.FVs ( rulesRhsFreeIds, bndrRuleAndUnfoldingIds )
|
|
|
26
|
+import GHC.Core.FVs ( rulesRhsFreeIds, bndrRuleAndUnfoldingIds, idRuleVars )
|
|
27
|
27
|
import GHC.Core.Coercion ( Coercion )
|
|
28
|
28
|
import GHC.Core.TyCo.FVs ( coVarsOfCos )
|
|
29
|
29
|
import GHC.Core.TyCo.Compare ( eqType )
|
| ... |
... |
@@ -1106,9 +1106,22 @@ dmdAnalRhsSig top_lvl rec_flag env let_sd id rhs |
|
1106
|
1106
|
rhs_sd = mkCalledOnceDmds ww_arity adjusted_body_sd
|
|
1107
|
1107
|
|
|
1108
|
1108
|
WithDmdType rhs_dmd_ty rhs' = dmdAnal env rhs_sd rhs
|
|
1109
|
|
- DmdType rhs_env rhs_dmds = rhs_dmd_ty
|
|
1110
|
|
- (final_rhs_dmds, final_rhs) = finaliseArgBoxities env id ww_arity
|
|
1111
|
|
- rhs_dmds (de_div rhs_env) rhs'
|
|
|
1109
|
+
|
|
|
1110
|
+ -- See Note [Absence analysis for stable unfoldings and RULES], Wrinkle (W3)
|
|
|
1111
|
+ full_dmd_ty = addUnfoldingDemands env rhs_sd id rhs_dmd_ty
|
|
|
1112
|
+ DmdType full_rhs_env combined_rhs_dmds = full_dmd_ty
|
|
|
1113
|
+
|
|
|
1114
|
+ final_rhs_dmds = finaliseArgBoxities env id ww_arity
|
|
|
1115
|
+ combined_rhs_dmds (de_div full_rhs_env) rhs'
|
|
|
1116
|
+
|
|
|
1117
|
+ -- Attach the final demands to the lambda binders of the RHS.
|
|
|
1118
|
+ -- IMPORTANT: The lambda binders of final_rhs must carry the final demand
|
|
|
1119
|
+ -- info, because worker/wrapper drives decisions from the idDemandInfo on
|
|
|
1120
|
+ -- the lambdas (see mkWwstr_one), NOT from the strictness signature of the
|
|
|
1121
|
+ -- function. So the demands must reflect both the unfolding combination
|
|
|
1122
|
+ -- (from addUnfoldingDemands) and the boxity finalisation (from
|
|
|
1123
|
+ -- finaliseArgBoxities).
|
|
|
1124
|
+ final_rhs = setLamDmds final_rhs_dmds rhs'
|
|
1112
|
1125
|
|
|
1113
|
1126
|
dmd_sig_arity = ww_arity + strictCallArity body_sd
|
|
1114
|
1127
|
sig = mkDmdSigForArity dmd_sig_arity (DmdType sig_env final_rhs_dmds)
|
| ... |
... |
@@ -1132,19 +1145,51 @@ dmdAnalRhsSig top_lvl rec_flag env let_sd id rhs |
|
1132
|
1145
|
-- we never get used-once info for FVs of recursive functions.
|
|
1133
|
1146
|
-- See #14816 where we try to get rid of reuseEnv.
|
|
1134
|
1147
|
rhs_env1 = case rec_flag of
|
|
1135
|
|
- Recursive -> reuseEnv rhs_env
|
|
1136
|
|
- NonRecursive -> rhs_env
|
|
|
1148
|
+ Recursive -> reuseEnv full_rhs_env
|
|
|
1149
|
+ NonRecursive -> full_rhs_env
|
|
1137
|
1150
|
|
|
1138
|
1151
|
-- See Note [Absence analysis for stable unfoldings and RULES]
|
|
1139
|
|
- rhs_env2 = rhs_env1 `plusDmdEnv` demandRootSet env (bndrRuleAndUnfoldingIds id)
|
|
|
1152
|
+ -- The unfolding FVs are already included in full_rhs_env via addUnfoldingDemands.
|
|
|
1153
|
+ -- Here we only need demandRoots for RULES.
|
|
|
1154
|
+ rhs_env2 = rhs_env1 `plusDmdEnv` demandRootSet env (filterVarSet isId (idRuleVars id))
|
|
1140
|
1155
|
|
|
1141
|
1156
|
-- See Note [Lazy and unleashable free variables]
|
|
1142
|
1157
|
!(!sig_env, !weak_fvs) = splitWeakDmds rhs_env2
|
|
1143
|
1158
|
|
|
|
1159
|
+setLamDmds :: [Demand] -> CoreExpr -> CoreExpr
|
|
|
1160
|
+-- Attach the demands to the outer lambdas of this expression
|
|
|
1161
|
+setLamDmds (dmd:dmds) (Lam v e)
|
|
|
1162
|
+ | isTyVar v = Lam v (setLamDmds (dmd:dmds) e)
|
|
|
1163
|
+ | otherwise = Lam (v `setIdDemandInfo` dmd) (setLamDmds dmds e)
|
|
|
1164
|
+setLamDmds dmds (Cast e co) = Cast (setLamDmds dmds e) co
|
|
|
1165
|
+ -- This case happens for an OPAQUE function, which may look like
|
|
|
1166
|
+ -- f = (\x y. blah) |> co
|
|
|
1167
|
+ -- We give it strictness but no boxity (#22502)
|
|
|
1168
|
+setLamDmds _ e = e
|
|
|
1169
|
+ -- In the OPAQUE case, the list of demands at this point might be
|
|
|
1170
|
+ -- non-empty, e.g., when looking at a PAP. Hence don't panic (#22997).
|
|
|
1171
|
+
|
|
1144
|
1172
|
splitWeakDmds :: DmdEnv -> (DmdEnv, WeakDmds)
|
|
1145
|
1173
|
splitWeakDmds (DE fvs div) = (DE sig_fvs div, weak_fvs)
|
|
1146
|
1174
|
where (!weak_fvs, !sig_fvs) = partitionVarEnv isWeakDmd fvs
|
|
1147
|
1175
|
|
|
|
1176
|
+-- | If there is a stable unfolding, combine argument demands and free variable
|
|
|
1177
|
+-- demands from the unfolding with those from the RHS.
|
|
|
1178
|
+-- See Note [Absence analysis for stable unfoldings and RULES], Wrinkle (W3).
|
|
|
1179
|
+-- See Note [Combining demands for stable unfoldings] in GHC.Types.Demand.
|
|
|
1180
|
+addUnfoldingDemands :: AnalEnv -> SubDemand -> Id -> DmdType -> DmdType
|
|
|
1181
|
+addUnfoldingDemands env rhs_sd id rhs_dmd_ty
|
|
|
1182
|
+ | isStableUnfolding unf
|
|
|
1183
|
+ , Just unf_body <- maybeUnfoldingTemplate unf
|
|
|
1184
|
+ , let WithDmdType unf_dmd_ty _ = dmdAnal env rhs_sd unf_body
|
|
|
1185
|
+ = -- pprTrace "addUnfoldingDemands" (ppr id $$ ppr rhs_dmd_ty $$ ppr unf_dmd_ty) $
|
|
|
1186
|
+ maxDmdType rhs_dmd_ty unf_dmd_ty
|
|
|
1187
|
+
|
|
|
1188
|
+ | otherwise
|
|
|
1189
|
+ = rhs_dmd_ty -- No stable unfolding, nothing to do
|
|
|
1190
|
+ where
|
|
|
1191
|
+ unf = realIdUnfolding id
|
|
|
1192
|
+
|
|
1148
|
1193
|
-- | The result type after applying 'idArity' many arguments. Returns 'Nothing'
|
|
1149
|
1194
|
-- when the type doesn't have exactly 'idArity' many arrows.
|
|
1150
|
1195
|
resultType_maybe :: Id -> Maybe Type
|
| ... |
... |
@@ -1482,10 +1527,21 @@ and transform to |
|
1482
|
1527
|
|
|
1483
|
1528
|
Now if f is subsequently inlined, we'll use 'g' and ... disaster.
|
|
1484
|
1529
|
|
|
1485
|
|
-SOLUTION: if f has a stable unfolding, treat every free variable as a
|
|
1486
|
|
-/demand root/, that is: Analyse it as if it was a variable occurring in a
|
|
|
1530
|
+SOLUTION for stable unfoldings: in `dmdAnalRhsSig`, if the function has a
|
|
|
1531
|
+stable unfolding, analyse it with `dmdAnal` and combine the resulting `DmdType`
|
|
|
1532
|
+with the RHS's `DmdType`. This is done by `addUnfoldingDemands`, which uses
|
|
|
1533
|
+`maxDmdType` to combine both argument demands and free variable demands.
|
|
|
1534
|
+See Note [Combining demands for stable unfoldings] in GHC.Types.Demand for
|
|
|
1535
|
+details of the combining operation.
|
|
|
1536
|
+
|
|
|
1537
|
+This handles both the free variables and arguments of stable unfoldings in one
|
|
|
1538
|
+go. For example, in the scenario above, the unfolding's `DmdType` will mention
|
|
|
1539
|
+`g` as a free variable, so `maxDmdType` will keep it alive.
|
|
|
1540
|
+
|
|
|
1541
|
+SOLUTION for RULES: treat every Id free in the RHS of a RULE as a
|
|
|
1542
|
+/demand root/, that is: analyse it as if it was a variable occurring in a
|
|
1487
|
1543
|
'topDmd' context. This is done in `demandRoot` (which we also use for exported
|
|
1488
|
|
-top-level ids). Do the same for Ids free in the RHS of any RULES for f.
|
|
|
1544
|
+top-level ids).
|
|
1489
|
1545
|
|
|
1490
|
1546
|
Wrinkles:
|
|
1491
|
1547
|
|
| ... |
... |
@@ -1502,7 +1558,7 @@ Wrinkles: |
|
1502
|
1558
|
this, that actually happened in practice.
|
|
1503
|
1559
|
|
|
1504
|
1560
|
(W2) You might wonder why we don't simply take the free vars of the
|
|
1505
|
|
- unfolding/RULE and map them to topDmd. The reason is that any of the free vars
|
|
|
1561
|
+ RULE and map them to topDmd. The reason is that any of the free vars
|
|
1506
|
1562
|
might have demand signatures themselves that in turn demand transitive free
|
|
1507
|
1563
|
variables and that we hence need to unleash! This came up in #23208.
|
|
1508
|
1564
|
Consider
|
| ... |
... |
@@ -1524,6 +1580,24 @@ Wrinkles: |
|
1524
|
1580
|
for `sg`, failing to unleash the signature and hence observed an absent
|
|
1525
|
1581
|
error instead of the `really important message`.
|
|
1526
|
1582
|
|
|
|
1583
|
+ (W3) The stable unfolding solution above handles /free variables/, but
|
|
|
1584
|
+ what about /arguments/? Consider (#26416)
|
|
|
1585
|
+
|
|
|
1586
|
+ fromVector :: (Storable a, KnownNat n) => Vector a -> Vector a
|
|
|
1587
|
+ fromVector v = ... (uses Storable dictionary) ...
|
|
|
1588
|
+ {-# INLINABLE fromVector #-}
|
|
|
1589
|
+
|
|
|
1590
|
+ Suppose that the optimised RHS of `fromVector` somehow discards the use of
|
|
|
1591
|
+ the Storable dictionary, but the stable unfolding still uses it. Then the
|
|
|
1592
|
+ demand signature will say that the Storable dictionary argument is absent,
|
|
|
1593
|
+ and worker/wrapper will replace it with `LitRubbish`. But when the
|
|
|
1594
|
+ worker's unfolding is inlined, it will use that rubbish value as a real
|
|
|
1595
|
+ dictionary, leading to a segfault!
|
|
|
1596
|
+
|
|
|
1597
|
+ `addUnfoldingDemands` handles this too: since `maxDmdType` combines both
|
|
|
1598
|
+ the argument demands and free variable demands from the unfolding's
|
|
|
1599
|
+ `DmdType` with the RHS's, argument absence is correctly prevented.
|
|
|
1600
|
+
|
|
1527
|
1601
|
Note [DmdAnal for DataCon wrappers]
|
|
1528
|
1602
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
1529
|
1603
|
We give DataCon wrappers a (necessarily flat) demand signature in
|
| ... |
... |
@@ -2001,22 +2075,20 @@ positiveTopBudget (MkB n _) = n >= 0 |
|
2001
|
2075
|
|
|
2002
|
2076
|
finaliseArgBoxities :: AnalEnv -> Id -> Arity
|
|
2003
|
2077
|
-> [Demand] -> Divergence
|
|
2004
|
|
- -> CoreExpr -> ([Demand], CoreExpr)
|
|
|
2078
|
+ -> CoreExpr -> [Demand]
|
|
2005
|
2079
|
-- POSTCONDITION:
|
|
2006
|
|
--- If: (dmds', rhs') = finaliseArgBoxitities ... dmds .. rhs
|
|
|
2080
|
+-- If: dmds' = finaliseArgBoxities ... dmds .. rhs
|
|
2007
|
2081
|
-- Then:
|
|
2008
|
2082
|
-- dmds' is the same as dmds (including length), except for boxity info
|
|
2009
|
|
--- rhs' is the same as rhs, except for dmd info on lambda binders
|
|
2010
|
2083
|
-- NB: For join points, length dmds might be greater than ww_arity
|
|
|
2084
|
+-- NB: rhs is needed only to count visible binders.
|
|
2011
|
2085
|
finaliseArgBoxities env fn ww_arity arg_dmds div rhs
|
|
2012
|
2086
|
|
|
2013
|
2087
|
-- Check for an OPAQUE function: see Note [OPAQUE pragma]
|
|
2014
|
2088
|
-- In that case, trim off all boxity info from argument demands
|
|
2015
|
|
- -- and demand info on lambda binders
|
|
2016
|
2089
|
-- See Note [The OPAQUE pragma and avoiding the reboxing of arguments]
|
|
2017
|
2090
|
| isOpaquePragma (idInlinePragma fn)
|
|
2018
|
|
- , let trimmed_arg_dmds = map trimBoxity arg_dmds
|
|
2019
|
|
- = (trimmed_arg_dmds, set_lam_dmds trimmed_arg_dmds rhs)
|
|
|
2091
|
+ = map trimBoxity arg_dmds
|
|
2020
|
2092
|
|
|
2021
|
2093
|
-- Check that we have enough visible binders to match the
|
|
2022
|
2094
|
-- ww arity; if not, we won't do worker/wrapper
|
| ... |
... |
@@ -2027,7 +2099,7 @@ finaliseArgBoxities env fn ww_arity arg_dmds div rhs |
|
2027
|
2099
|
-- It's a bit of a corner case. Anyway for now we pass on the
|
|
2028
|
2100
|
-- unadulterated demands from the RHS, without any boxity trimming.
|
|
2029
|
2101
|
| ww_arity > count isId bndrs
|
|
2030
|
|
- = (arg_dmds, rhs)
|
|
|
2102
|
+ = arg_dmds
|
|
2031
|
2103
|
|
|
2032
|
2104
|
-- The normal case
|
|
2033
|
2105
|
| otherwise
|
| ... |
... |
@@ -2036,10 +2108,7 @@ finaliseArgBoxities env fn ww_arity arg_dmds div rhs |
|
2036
|
2108
|
-- , text "max" <+> ppr max_wkr_args
|
|
2037
|
2109
|
-- , text "dmds before:" <+> ppr (map idDemandInfo (filter isId bndrs))
|
|
2038
|
2110
|
-- , text "dmds after: " <+> ppr arg_dmds' ]) $
|
|
2039
|
|
- (arg_dmds', set_lam_dmds arg_dmds' rhs)
|
|
2040
|
|
- -- set_lam_dmds: we must attach the final boxities to the lambda-binders
|
|
2041
|
|
- -- of the function, both because that's kosher, and because CPR analysis
|
|
2042
|
|
- -- uses the info on the binders directly.
|
|
|
2111
|
+ arg_dmds'
|
|
2043
|
2112
|
where
|
|
2044
|
2113
|
opts = ae_opts env
|
|
2045
|
2114
|
(bndrs, _body) = collectBinders rhs
|
| ... |
... |
@@ -2047,8 +2116,11 @@ finaliseArgBoxities env fn ww_arity arg_dmds div rhs |
|
2047
|
2116
|
|
|
2048
|
2117
|
arg_triples :: [(Type, StrictnessMark, Demand)]
|
|
2049
|
2118
|
arg_triples = take ww_arity $
|
|
2050
|
|
- [ (idType bndr, NotMarkedStrict, get_dmd bndr)
|
|
2051
|
|
- | bndr <- bndrs, isRuntimeVar bndr ]
|
|
|
2119
|
+ zipWith mk_triple
|
|
|
2120
|
+ [ bndr | bndr <- bndrs, isRuntimeVar bndr ]
|
|
|
2121
|
+ arg_dmds
|
|
|
2122
|
+ where
|
|
|
2123
|
+ mk_triple bndr arg_dmd = (idType bndr, NotMarkedStrict, get_dmd arg_dmd)
|
|
2052
|
2124
|
|
|
2053
|
2125
|
arg_dmds' = ww_arg_dmds ++ map trimBoxity (drop ww_arity arg_dmds)
|
|
2054
|
2126
|
-- If ww_arity < length arg_dmds, the leftover ones
|
| ... |
... |
@@ -2064,12 +2136,10 @@ finaliseArgBoxities env fn ww_arity arg_dmds div rhs |
|
2064
|
2136
|
-- This is the budget initialisation step of
|
|
2065
|
2137
|
-- Note [Worker argument budget]
|
|
2066
|
2138
|
|
|
2067
|
|
- get_dmd :: Id -> Demand
|
|
2068
|
|
- get_dmd bndr
|
|
|
2139
|
+ get_dmd :: Demand -> Demand
|
|
|
2140
|
+ get_dmd dmd
|
|
2069
|
2141
|
| is_bot_fn = unboxDeeplyDmd dmd -- See Note [Boxity for bottoming functions],
|
|
2070
|
2142
|
| otherwise = dmd -- case (B)
|
|
2071
|
|
- where
|
|
2072
|
|
- dmd = idDemandInfo bndr
|
|
2073
|
2143
|
|
|
2074
|
2144
|
-- is_bot_fn: see Note [Boxity for bottoming functions]
|
|
2075
|
2145
|
is_bot_fn = div == botDiv
|
| ... |
... |
@@ -2126,19 +2196,6 @@ finaliseArgBoxities env fn ww_arity arg_dmds div rhs |
|
2126
|
2196
|
| positiveTopBudget bg_inner' = (bg_inner', dmd')
|
|
2127
|
2197
|
| otherwise = (bg_inner, trimBoxity dmd)
|
|
2128
|
2198
|
|
|
2129
|
|
- set_lam_dmds :: [Demand] -> CoreExpr -> CoreExpr
|
|
2130
|
|
- -- Attach the demands to the outer lambdas of this expression
|
|
2131
|
|
- set_lam_dmds (dmd:dmds) (Lam v e)
|
|
2132
|
|
- | isTyVar v = Lam v (set_lam_dmds (dmd:dmds) e)
|
|
2133
|
|
- | otherwise = Lam (v `setIdDemandInfo` dmd) (set_lam_dmds dmds e)
|
|
2134
|
|
- set_lam_dmds dmds (Cast e co) = Cast (set_lam_dmds dmds e) co
|
|
2135
|
|
- -- This case happens for an OPAQUE function, which may look like
|
|
2136
|
|
- -- f = (\x y. blah) |> co
|
|
2137
|
|
- -- We give it strictness but no boxity (#22502)
|
|
2138
|
|
- set_lam_dmds _ e = e
|
|
2139
|
|
- -- In the OPAQUE case, the list of demands at this point might be
|
|
2140
|
|
- -- non-empty, e.g., when looking at a PAP. Hence don't panic (#22997).
|
|
2141
|
|
-
|
|
2142
|
2199
|
finaliseLetBoxity
|
|
2143
|
2200
|
:: AnalEnv
|
|
2144
|
2201
|
-> Type -- ^ Type of the let-bound Id
|