| ... |
... |
@@ -1243,14 +1243,15 @@ specExpr env (Let bind body) |
|
1243
|
1243
|
-- Note [Fire rules in the specialiser]
|
|
1244
|
1244
|
fireRewriteRules :: SpecEnv -> InExpr -> [OutExpr] -> (InExpr, [OutExpr])
|
|
1245
|
1245
|
fireRewriteRules env (Var f) args
|
|
1246
|
|
- | Just (rule, expr) <- specLookupRule env f args InitialPhase (getRules (se_rules env) f)
|
|
|
1246
|
+ | let rules = getRules (se_rules env) f
|
|
|
1247
|
+ , Just (rule, expr) <- specLookupRule env f args activeInInitialPhase rules
|
|
1247
|
1248
|
, let rest_args = drop (ruleArity rule) args -- See Note [Extra args in the target]
|
|
1248
|
1249
|
zapped_subst = Core.zapSubst (se_subst env)
|
|
1249
|
1250
|
expr' = simpleOptExprWith defaultSimpleOpts zapped_subst expr
|
|
1250
|
1251
|
-- simplOptExpr needed because lookupRule returns
|
|
1251
|
1252
|
-- (\x y. rhs) arg1 arg2
|
|
1252
|
|
- , (fun, args) <- collectArgs expr'
|
|
1253
|
|
- = fireRewriteRules env fun (args++rest_args)
|
|
|
1253
|
+ , (fun', args') <- collectArgs expr'
|
|
|
1254
|
+ = fireRewriteRules env fun' (args'++rest_args)
|
|
1254
|
1255
|
fireRewriteRules _ fun args = (fun, args)
|
|
1255
|
1256
|
|
|
1256
|
1257
|
--------------
|
| ... |
... |
@@ -1620,7 +1621,7 @@ specCalls :: Bool -- True => specialising imported fn |
|
1620
|
1621
|
|
|
1621
|
1622
|
-- This function checks existing rules, and does not create
|
|
1622
|
1623
|
-- duplicate ones. So the caller does not need to do this filtering.
|
|
1623
|
|
--- See 'already_covered'
|
|
|
1624
|
+-- See `alreadyCovered`
|
|
1624
|
1625
|
|
|
1625
|
1626
|
type SpecInfo = ( [CoreRule] -- Specialisation rules
|
|
1626
|
1627
|
, [(Id,CoreExpr)] -- Specialised definition
|
| ... |
... |
@@ -1644,15 +1645,13 @@ specCalls spec_imp env existing_rules calls_for_me fn rhs |
|
1644
|
1645
|
|
|
1645
|
1646
|
= -- pprTrace "specCalls: some" (vcat
|
|
1646
|
1647
|
-- [ text "function" <+> ppr fn
|
|
1647
|
|
- -- , text "calls:" <+> ppr calls_for_me
|
|
1648
|
|
- -- , text "subst" <+> ppr (se_subst env) ]) $
|
|
|
1648
|
+ -- , text "calls:" <+> ppr calls_for_me
|
|
|
1649
|
+ -- , text "subst" <+> ppr (se_subst env) ]) $
|
|
1649
|
1650
|
foldlM spec_call ([], [], emptyUDs) calls_for_me
|
|
1650
|
1651
|
|
|
1651
|
1652
|
| otherwise -- No calls or RHS doesn't fit our preconceptions
|
|
1652
|
|
- = warnPprTrace (not (exprIsTrivial rhs) && notNull calls_for_me && not (isClassOpId fn))
|
|
|
1653
|
+ = warnPprTrace (not (exprIsTrivial rhs) && notNull calls_for_me)
|
|
1653
|
1654
|
"Missed specialisation opportunity for" (ppr fn $$ trace_doc) $
|
|
1654
|
|
- -- isClassOpId: class-op Ids never inline; we specialise them
|
|
1655
|
|
- -- through fireRewriteRules. So don't complain about missed opportunities
|
|
1656
|
1655
|
-- Note [Specialisation shape]
|
|
1657
|
1656
|
-- pprTrace "specCalls: none" (ppr fn <+> ppr calls_for_me) $
|
|
1658
|
1657
|
return ([], [], emptyUDs)
|
| ... |
... |
@@ -1664,6 +1663,10 @@ specCalls spec_imp env existing_rules calls_for_me fn rhs |
|
1664
|
1663
|
fn_unf = realIdUnfolding fn -- Ignore loop-breaker-ness here
|
|
1665
|
1664
|
inl_prag = idInlinePragma fn
|
|
1666
|
1665
|
inl_act = inlinePragmaActivation inl_prag
|
|
|
1666
|
+ is_active = isActive (beginPhase inl_act) :: Activation -> Bool
|
|
|
1667
|
+ -- is_active: inl_act is the activation we are going to put in the new
|
|
|
1668
|
+ -- SPEC rule; so we want to see if it is covered by another rule with
|
|
|
1669
|
+ -- that same activation.
|
|
1667
|
1670
|
is_local = isLocalId fn
|
|
1668
|
1671
|
is_dfun = isDFunId fn
|
|
1669
|
1672
|
dflags = se_dflags env
|
| ... |
... |
@@ -1674,16 +1677,6 @@ specCalls spec_imp env existing_rules calls_for_me fn rhs |
|
1674
|
1677
|
(rhs_bndrs, rhs_body) = collectBindersPushingCo rhs
|
|
1675
|
1678
|
-- See Note [Account for casts in binding]
|
|
1676
|
1679
|
|
|
1677
|
|
- already_covered :: SpecEnv -> [CoreRule] -> [CoreExpr] -> Bool
|
|
1678
|
|
- already_covered env new_rules args -- Note [Specialisations already covered]
|
|
1679
|
|
- = isJust (specLookupRule env fn args (beginPhase inl_act)
|
|
1680
|
|
- (new_rules ++ existing_rules))
|
|
1681
|
|
- -- Rules: we look both in the new_rules (generated by this invocation
|
|
1682
|
|
- -- of specCalls), and in existing_rules (passed in to specCalls)
|
|
1683
|
|
- -- inl_act: is the activation we are going to put in the new SPEC
|
|
1684
|
|
- -- rule; so we want to see if it is covered by another rule with
|
|
1685
|
|
- -- that same activation.
|
|
1686
|
|
-
|
|
1687
|
1680
|
----------------------------------------------------------
|
|
1688
|
1681
|
-- Specialise to one particular call pattern
|
|
1689
|
1682
|
spec_call :: SpecInfo -- Accumulating parameter
|
| ... |
... |
@@ -1717,8 +1710,12 @@ specCalls spec_imp env existing_rules calls_for_me fn rhs |
|
1717
|
1710
|
-- , ppr dx_binds ]) $
|
|
1718
|
1711
|
-- return ()
|
|
1719
|
1712
|
|
|
|
1713
|
+ ; let all_rules = rules_acc ++ existing_rules
|
|
|
1714
|
+ -- all_rules: we look both in the rules_acc (generated by this invocation
|
|
|
1715
|
+ -- of specCalls), and in existing_rules (passed in to specCalls)
|
|
1720
|
1716
|
; if not useful -- No useful specialisation
|
|
1721
|
|
- || already_covered rhs_env2 rules_acc rule_lhs_args
|
|
|
1717
|
+ || alreadyCovered rhs_env2 rule_bndrs fn rule_lhs_args is_active all_rules
|
|
|
1718
|
+ -- See (SC1) in Note [Specialisations already covered]
|
|
1722
|
1719
|
then return spec_acc
|
|
1723
|
1720
|
else
|
|
1724
|
1721
|
do { -- Run the specialiser on the specialised RHS
|
| ... |
... |
@@ -1780,7 +1777,7 @@ specCalls spec_imp env existing_rules calls_for_me fn rhs |
|
1780
|
1777
|
spec_fn_details
|
|
1781
|
1778
|
= case idDetails fn of
|
|
1782
|
1779
|
JoinId join_arity _ -> JoinId (join_arity - join_arity_decr) Nothing
|
|
1783
|
|
- DFunId is_nt -> DFunId is_nt
|
|
|
1780
|
+ DFunId unary -> DFunId unary
|
|
1784
|
1781
|
_ -> VanillaId
|
|
1785
|
1782
|
|
|
1786
|
1783
|
; spec_fn <- newSpecIdSM (idName fn) spec_fn_ty spec_fn_details spec_fn_info
|
| ... |
... |
@@ -1804,6 +1801,8 @@ specCalls spec_imp env existing_rules calls_for_me fn rhs |
|
1804
|
1801
|
, ppr spec_fn <+> dcolon <+> ppr spec_fn_ty
|
|
1805
|
1802
|
, ppr rhs_bndrs, ppr call_args
|
|
1806
|
1803
|
, ppr spec_rule
|
|
|
1804
|
+ , text "acc" <+> ppr rules_acc
|
|
|
1805
|
+ , text "existing" <+> ppr existing_rules
|
|
1807
|
1806
|
]
|
|
1808
|
1807
|
|
|
1809
|
1808
|
; -- pprTrace "spec_call: rule" _rule_trace_doc
|
| ... |
... |
@@ -1812,19 +1811,35 @@ specCalls spec_imp env existing_rules calls_for_me fn rhs |
|
1812
|
1811
|
, spec_uds `thenUDs` uds_acc
|
|
1813
|
1812
|
) } }
|
|
1814
|
1813
|
|
|
|
1814
|
+alreadyCovered :: SpecEnv
|
|
|
1815
|
+ -> [Var] -> Id -> [CoreExpr] -- LHS of possible new rule
|
|
|
1816
|
+ -> (Activation -> Bool) -- Which rules are active
|
|
|
1817
|
+ -> [CoreRule] -> Bool
|
|
|
1818
|
+-- Note [Specialisations already covered] esp (SC2)
|
|
|
1819
|
+alreadyCovered env bndrs fn args is_active rules
|
|
|
1820
|
+ = case specLookupRule env fn args is_active rules of
|
|
|
1821
|
+ Nothing -> False
|
|
|
1822
|
+ Just (rule, _)
|
|
|
1823
|
+ | isAutoRule rule -> -- Discard identical rules
|
|
|
1824
|
+ -- We know that (fn args) is an instance of RULE
|
|
|
1825
|
+ -- Check if RULE is an instance of (fn args)
|
|
|
1826
|
+ ruleLhsIsMoreSpecific in_scope bndrs args rule
|
|
|
1827
|
+ | otherwise -> True -- User rules dominate
|
|
|
1828
|
+ where
|
|
|
1829
|
+ in_scope = substInScopeSet (se_subst env)
|
|
|
1830
|
+
|
|
1815
|
1831
|
-- Convenience function for invoking lookupRule from Specialise
|
|
1816
|
1832
|
-- The SpecEnv's InScopeSet should include all the Vars in the [CoreExpr]
|
|
1817
|
1833
|
specLookupRule :: SpecEnv -> Id -> [CoreExpr]
|
|
1818
|
|
- -> CompilerPhase -- Look up rules as if we were in this phase
|
|
|
1834
|
+ -> (Activation -> Bool) -- Which rules are active
|
|
1819
|
1835
|
-> [CoreRule] -> Maybe (CoreRule, CoreExpr)
|
|
1820
|
|
-specLookupRule env fn args phase rules
|
|
|
1836
|
+specLookupRule env fn args is_active rules
|
|
1821
|
1837
|
= lookupRule ropts in_scope_env is_active fn args rules
|
|
1822
|
1838
|
where
|
|
1823
|
1839
|
dflags = se_dflags env
|
|
1824
|
1840
|
in_scope = substInScopeSet (se_subst env)
|
|
1825
|
1841
|
in_scope_env = ISE in_scope (whenActiveUnfoldingFun is_active)
|
|
1826
|
1842
|
ropts = initRuleOpts dflags
|
|
1827
|
|
- is_active = isActive phase
|
|
1828
|
1843
|
|
|
1829
|
1844
|
{- Note [Specialising DFuns]
|
|
1830
|
1845
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
| ... |
... |
@@ -2323,21 +2338,24 @@ This plan is implemented in the Rec case of specBindItself. |
|
2323
|
2338
|
Note [Specialisations already covered]
|
|
2324
|
2339
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
2325
|
2340
|
We obviously don't want to generate two specialisations for the same
|
|
2326
|
|
-argument pattern. There are two wrinkles
|
|
2327
|
|
-
|
|
2328
|
|
-1. We do the already-covered test in specDefn, not when we generate
|
|
2329
|
|
-the CallInfo in mkCallUDs. We used to test in the latter place, but
|
|
2330
|
|
-we now iterate the specialiser somewhat, and the Id at the call site
|
|
2331
|
|
-might therefore not have all the RULES that we can see in specDefn
|
|
2332
|
|
-
|
|
2333
|
|
-2. What about two specialisations where the second is an *instance*
|
|
2334
|
|
-of the first? If the more specific one shows up first, we'll generate
|
|
2335
|
|
-specialisations for both. If the *less* specific one shows up first,
|
|
2336
|
|
-we *don't* currently generate a specialisation for the more specific
|
|
2337
|
|
-one. (See the call to lookupRule in already_covered.) Reasons:
|
|
2338
|
|
- (a) lookupRule doesn't say which matches are exact (bad reason)
|
|
2339
|
|
- (b) if the earlier specialisation is user-provided, it's
|
|
2340
|
|
- far from clear that we should auto-specialise further
|
|
|
2341
|
+argument pattern. Wrinkles
|
|
|
2342
|
+
|
|
|
2343
|
+(SC1) We do the already-covered test in specDefn, not when we generate
|
|
|
2344
|
+ the CallInfo in mkCallUDs. We used to test in the latter place, but
|
|
|
2345
|
+ we now iterate the specialiser somewhat, and the Id at the call site
|
|
|
2346
|
+ might therefore not have all the RULES that we can see in specDefn
|
|
|
2347
|
+
|
|
|
2348
|
+(SC2) What about two specialisations where the second is an *instance*
|
|
|
2349
|
+ of the first? It's a bit arbitrary, but here's what we do:
|
|
|
2350
|
+ * If the existing one is user-specified, via a SPECIALISE pragma, we
|
|
|
2351
|
+ suppress the further specialisation.
|
|
|
2352
|
+ * If the existing one is auto-generated, we generate a second RULE
|
|
|
2353
|
+ for the more specialised version.
|
|
|
2354
|
+ The latter is important because we don't want the accidental order
|
|
|
2355
|
+ of calls to determine what specialisations we generate.
|
|
|
2356
|
+
|
|
|
2357
|
+(SC3) Annoyingly, we /also/ eliminate duplicates in `filterCalls`.
|
|
|
2358
|
+ See (MP3) in Note [Specialising polymorphic dictionaries]
|
|
2341
|
2359
|
|
|
2342
|
2360
|
Note [Auto-specialisation and RULES]
|
|
2343
|
2361
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
| ... |
... |
@@ -2800,12 +2818,10 @@ non-dictionary bindings too. |
|
2800
|
2818
|
|
|
2801
|
2819
|
Note [Specialising polymorphic dictionaries]
|
|
2802
|
2820
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
2803
|
|
-
|
|
2804
|
2821
|
Note June 2023: This has proved to be quite a tricky optimisation to get right
|
|
2805
|
2822
|
see (#23469, #23109, #21229, #23445) so it is now guarded by a flag
|
|
2806
|
2823
|
`-fpolymorphic-specialisation`.
|
|
2807
|
2824
|
|
|
2808
|
|
-
|
|
2809
|
2825
|
Consider
|
|
2810
|
2826
|
class M a where { foo :: a -> Int }
|
|
2811
|
2827
|
|
| ... |
... |
@@ -2845,11 +2861,26 @@ Here are the moving parts: |
|
2845
|
2861
|
function.
|
|
2846
|
2862
|
|
|
2847
|
2863
|
(MP3) If we have f :: forall m. Monoid m => blah, and two calls
|
|
2848
|
|
- (f @(Endo b) (d :: Monoid (Endo b))
|
|
2849
|
|
- (f @(Endo (c->c)) (d :: Monoid (Endo (c->c)))
|
|
|
2864
|
+ (f @(Endo b) (d1 :: Monoid (Endo b))
|
|
|
2865
|
+ (f @(Endo (c->c)) (d2 :: Monoid (Endo (c->c)))
|
|
2850
|
2866
|
we want to generate a specialisation only for the first. The second
|
|
2851
|
2867
|
is just a substitution instance of the first, with no greater specialisation.
|
|
2852
|
|
- Hence the call to `remove_dups` in `filterCalls`.
|
|
|
2868
|
+ Hence the use of `removeDupCalls` in `filterCalls`.
|
|
|
2869
|
+
|
|
|
2870
|
+ You might wonder if `d2` might be more specialised than `d1`; but no.
|
|
|
2871
|
+ This `removeDupCalls` thing is at the definition site of `f`, and both `d1`
|
|
|
2872
|
+ and `d2` are in scope. So `d1` is simply more polymorphic than `d2`, but
|
|
|
2873
|
+ is just as specialised.
|
|
|
2874
|
+
|
|
|
2875
|
+ This distinction is sadly lost once we build a RULE, so `alreadyCovered`
|
|
|
2876
|
+ can't be so clever. E.g if we have an existing RULE
|
|
|
2877
|
+ forall @a (d1:Ord Int) (d2: Eq a). f @a @Int d1 d2 = ...
|
|
|
2878
|
+ and a putative new rule
|
|
|
2879
|
+ forall (d1:Ord Int) (d2: Eq Int). f @Int @Int d1 d2 = ...
|
|
|
2880
|
+ we /don't/ want the existing rule to subsume the new one.
|
|
|
2881
|
+
|
|
|
2882
|
+ So we sadly put up with having two rather different places where we
|
|
|
2883
|
+ eliminate duplicates: `alreadyCovered` and `removeDupCalls`.
|
|
2853
|
2884
|
|
|
2854
|
2885
|
All this arose in #13873, in the unexpected form that a SPECIALISE
|
|
2855
|
2886
|
pragma made the program slower! The reason was that the specialised
|
| ... |
... |
@@ -2947,16 +2978,29 @@ data CallInfoSet = CIS Id (Bag CallInfo) |
|
2947
|
2978
|
-- The list of types and dictionaries is guaranteed to
|
|
2948
|
2979
|
-- match the type of f
|
|
2949
|
2980
|
-- The Bag may contain duplicate calls (i.e. f @T and another f @T)
|
|
2950
|
|
- -- These dups are eliminated by already_covered in specCalls
|
|
|
2981
|
+ -- These dups are eliminated by alreadyCovered in specCalls
|
|
2951
|
2982
|
|
|
2952
|
2983
|
data CallInfo
|
|
2953
|
|
- = CI { ci_key :: [SpecArg] -- All arguments
|
|
|
2984
|
+ = CI { ci_key :: [SpecArg] -- Arguments of the call
|
|
|
2985
|
+ -- See Note [The (CI-KEY) invariant]
|
|
|
2986
|
+
|
|
2954
|
2987
|
, ci_fvs :: IdSet -- Free Ids of the ci_key call
|
|
2955
|
2988
|
-- /not/ including the main id itself, of course
|
|
2956
|
2989
|
-- NB: excluding tyvars:
|
|
2957
|
2990
|
-- See Note [Specialising polymorphic dictionaries]
|
|
2958
|
2991
|
}
|
|
2959
|
2992
|
|
|
|
2993
|
+{- Note [The (CI-KEY) invariant]
|
|
|
2994
|
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
2995
|
+Invariant (CI-KEY):
|
|
|
2996
|
+ In the `ci_key :: [SpecArg]` field of `CallInfo`,
|
|
|
2997
|
+ * The list is non-empty
|
|
|
2998
|
+ * The least element is always a `SpecDict`
|
|
|
2999
|
+
|
|
|
3000
|
+In this way the RULE has as few args as possible, which broadens its
|
|
|
3001
|
+applicability, since rules only fire when saturated.
|
|
|
3002
|
+-}
|
|
|
3003
|
+
|
|
2960
|
3004
|
type DictExpr = CoreExpr
|
|
2961
|
3005
|
|
|
2962
|
3006
|
ciSetFilter :: (CallInfo -> Bool) -> CallInfoSet -> CallInfoSet
|
| ... |
... |
@@ -3045,10 +3089,7 @@ mkCallUDs' env f args |
|
3045
|
3089
|
ci_key :: [SpecArg]
|
|
3046
|
3090
|
ci_key = dropWhileEndLE (not . isSpecDict) $
|
|
3047
|
3091
|
zipWith mk_spec_arg args pis
|
|
3048
|
|
- -- Drop trailing args until we get to a SpecDict
|
|
3049
|
|
- -- In this way the RULE has as few args as possible,
|
|
3050
|
|
- -- which broadens its applicability, since rules only
|
|
3051
|
|
- -- fire when saturated
|
|
|
3092
|
+ -- Establish (CI-KEY): drop trailing args until we get to a SpecDict
|
|
3052
|
3093
|
|
|
3053
|
3094
|
mk_spec_arg :: OutExpr -> PiTyBinder -> SpecArg
|
|
3054
|
3095
|
mk_spec_arg arg (Named bndr)
|
| ... |
... |
@@ -3086,34 +3127,76 @@ site, so we only look through ticks that RULE matching looks through |
|
3086
|
3127
|
-}
|
|
3087
|
3128
|
|
|
3088
|
3129
|
wantCallsFor :: SpecEnv -> Id -> Bool
|
|
3089
|
|
-wantCallsFor _env _f = True
|
|
3090
|
|
- -- We could reduce the size of the UsageDetails by being less eager
|
|
3091
|
|
- -- about collecting calls for LocalIds: there is no point for
|
|
3092
|
|
- -- ones that are lambda-bound. We can't decide this by looking at
|
|
3093
|
|
- -- the (absence of an) unfolding, because unfoldings for local
|
|
3094
|
|
- -- functions are discarded by cloneBindSM, so no local binder will
|
|
3095
|
|
- -- have an unfolding at this stage. We'd have to keep a candidate
|
|
3096
|
|
- -- set of let-binders.
|
|
3097
|
|
- --
|
|
3098
|
|
- -- Not many lambda-bound variables have dictionary arguments, so
|
|
3099
|
|
- -- this would make little difference anyway.
|
|
3100
|
|
- --
|
|
3101
|
|
- -- For imported Ids we could check for an unfolding, but we have to
|
|
3102
|
|
- -- do so anyway in canSpecImport, and it seems better to have it
|
|
3103
|
|
- -- all in one place. So we simply collect usage info for imported
|
|
3104
|
|
- -- overloaded functions.
|
|
3105
|
|
-
|
|
3106
|
|
-{- Note [Interesting dictionary arguments]
|
|
3107
|
|
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
3108
|
|
-Consider this
|
|
3109
|
|
- \a.\d:Eq a. let f = ... in ...(f d)...
|
|
3110
|
|
-There really is not much point in specialising f wrt the dictionary d,
|
|
3111
|
|
-because the code for the specialised f is not improved at all, because
|
|
3112
|
|
-d is lambda-bound. We simply get junk specialisations.
|
|
3113
|
|
-
|
|
3114
|
|
-What is "interesting"? Just that it has *some* structure. But what about
|
|
3115
|
|
-variables? We look in the variable's /unfolding/. And that means
|
|
3116
|
|
-that we must be careful to ensure that dictionaries have unfoldings,
|
|
|
3130
|
+-- See Note [wantCallsFor]
|
|
|
3131
|
+wantCallsFor _env f
|
|
|
3132
|
+ = case idDetails f of
|
|
|
3133
|
+ RecSelId {} -> False
|
|
|
3134
|
+ DataConWorkId {} -> False
|
|
|
3135
|
+ DataConWrapId {} -> False
|
|
|
3136
|
+ ClassOpId {} -> False
|
|
|
3137
|
+ PrimOpId {} -> False
|
|
|
3138
|
+ FCallId {} -> False
|
|
|
3139
|
+ TickBoxOpId {} -> False
|
|
|
3140
|
+ CoVarId {} -> False
|
|
|
3141
|
+
|
|
|
3142
|
+ DFunId {} -> True
|
|
|
3143
|
+ VanillaId {} -> True
|
|
|
3144
|
+ JoinId {} -> True
|
|
|
3145
|
+ WorkerLikeId {} -> True
|
|
|
3146
|
+ RepPolyId {} -> True
|
|
|
3147
|
+
|
|
|
3148
|
+{- Note [wantCallsFor]
|
|
|
3149
|
+~~~~~~~~~~~~~~~~~~~~~~
|
|
|
3150
|
+`wantCallsFor env f` says whether the Specialiser should collect calls for
|
|
|
3151
|
+function `f`; other thing being equal, the fewer calls we collect the better. It
|
|
|
3152
|
+is False for things we can't specialise:
|
|
|
3153
|
+
|
|
|
3154
|
+* ClassOpId: never inline and we don't have a defn to specialise; we specialise
|
|
|
3155
|
+ them through fireRewriteRules.
|
|
|
3156
|
+* PrimOpId: are never overloaded
|
|
|
3157
|
+* Data constructors: we never specialise them
|
|
|
3158
|
+
|
|
|
3159
|
+We could reduce the size of the UsageDetails by being less eager about
|
|
|
3160
|
+collecting calls for some LocalIds: there is no point for ones that are
|
|
|
3161
|
+lambda-bound. We can't decide this by looking at the (absence of an) unfolding,
|
|
|
3162
|
+because unfoldings for local functions are discarded by cloneBindSM, so no local
|
|
|
3163
|
+binder will have an unfolding at this stage. We'd have to keep a candidate set
|
|
|
3164
|
+of let-binders.
|
|
|
3165
|
+
|
|
|
3166
|
+Not many lambda-bound variables have dictionary arguments, so this would make
|
|
|
3167
|
+little difference anyway.
|
|
|
3168
|
+
|
|
|
3169
|
+For imported Ids we could check for an unfolding, but we have to do so anyway in
|
|
|
3170
|
+canSpecImport, and it seems better to have it all in one place. So we simply
|
|
|
3171
|
+collect usage info for imported overloaded functions.
|
|
|
3172
|
+
|
|
|
3173
|
+Note [Interesting dictionary arguments]
|
|
|
3174
|
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
3175
|
+In `mkCallUDs` we only use `SpecDict` for dictionaries of which
|
|
|
3176
|
+`interestingDict` holds. Otherwise we use `UnspecArg`. Two reasons:
|
|
|
3177
|
+
|
|
|
3178
|
+* Consider this
|
|
|
3179
|
+ \a.\d:Eq a. let f = ... in ...(f d)...
|
|
|
3180
|
+ There really is not much point in specialising f wrt the dictionary d,
|
|
|
3181
|
+ because the code for the specialised f is not improved at all, because
|
|
|
3182
|
+ d is lambda-bound. We simply get junk specialisations.
|
|
|
3183
|
+
|
|
|
3184
|
+* Consider this (#25703):
|
|
|
3185
|
+ f :: (Eq a, Show b) => a -> b -> INt
|
|
|
3186
|
+ goo :: forall x. (Eq x) => x -> blah
|
|
|
3187
|
+ goo @x (d:Eq x) (arg:x) = ...(f @x @Int d $fShowInt)...
|
|
|
3188
|
+ If we built a `ci_key` with a (SpecDict d) for `d`, we would end up
|
|
|
3189
|
+ discarding the call at the `\d`. But if we use `UnspecArg` for that
|
|
|
3190
|
+ uninteresting `d`, we'll get a `ci_key` of
|
|
|
3191
|
+ f @x @Int UnspecArg (SpecDict $fShowInt)
|
|
|
3192
|
+ and /that/ can float out to f's definition and specialise nicely.
|
|
|
3193
|
+ Hooray. (NB: the call can float only if `-fpolymorphic-specialisation`
|
|
|
3194
|
+ is on; otherwise it'll be trapped by the `\@x -> ...`.)(
|
|
|
3195
|
+
|
|
|
3196
|
+What is "interesting"? (See `interestingDict`.) Just that it has *some*
|
|
|
3197
|
+structure. But what about variables? We look in the variable's /unfolding/.
|
|
|
3198
|
+And that means that we must be careful to ensure that dictionaries /have/
|
|
|
3199
|
+unfoldings,
|
|
3117
|
3200
|
|
|
3118
|
3201
|
* cloneBndrSM discards non-Stable unfoldings
|
|
3119
|
3202
|
* specBind updates the unfolding after specialisation
|
| ... |
... |
@@ -3159,7 +3242,7 @@ Now `f` turns into: |
|
3159
|
3242
|
meth @a dc ....
|
|
3160
|
3243
|
|
|
3161
|
3244
|
When we specialise `f`, at a=Int say, that superclass selection can
|
|
3162
|
|
-nfire (via rewiteClassOps), but that info (that 'dc' is now a
|
|
|
3245
|
+fire (via rewiteClassOps), but that info (that 'dc' is now a
|
|
3163
|
3246
|
particular dictionary `C`, of type `C Int`) must be available to
|
|
3164
|
3247
|
the call `meth @a dc`, so that we can fire the `meth` class-op, and
|
|
3165
|
3248
|
thence specialise `wombat`.
|
| ... |
... |
@@ -3286,7 +3369,11 @@ dumpUDs :: [CoreBndr] -> UsageDetails -> (UsageDetails, OrdList DictBind) |
|
3286
|
3369
|
-- Used at a lambda or case binder; just dump anything mentioning the binder
|
|
3287
|
3370
|
dumpUDs bndrs uds@(MkUD { ud_binds = orig_dbs, ud_calls = orig_calls })
|
|
3288
|
3371
|
| null bndrs = (uds, nilOL) -- Common in case alternatives
|
|
3289
|
|
- | otherwise = -- pprTrace "dumpUDs" (ppr bndrs $$ ppr free_uds $$ ppr dump_dbs) $
|
|
|
3372
|
+ | otherwise = -- pprTrace "dumpUDs" (vcat
|
|
|
3373
|
+ -- [ text "bndrs" <+> ppr bndrs
|
|
|
3374
|
+ -- , text "uds" <+> ppr uds
|
|
|
3375
|
+ -- , text "free_uds" <+> ppr free_uds
|
|
|
3376
|
+ -- , text "dump-dbs" <+> ppr dump_dbs ]) $
|
|
3290
|
3377
|
(free_uds, dump_dbs)
|
|
3291
|
3378
|
where
|
|
3292
|
3379
|
free_uds = uds { ud_binds = free_dbs, ud_calls = free_calls }
|
| ... |
... |
@@ -3325,20 +3412,17 @@ callsForMe fn uds@MkUD { ud_binds = orig_dbs, ud_calls = orig_calls } |
|
3325
|
3412
|
calls_for_me = case lookupDVarEnv orig_calls fn of
|
|
3326
|
3413
|
Nothing -> []
|
|
3327
|
3414
|
Just cis -> filterCalls cis orig_dbs
|
|
3328
|
|
- -- filterCalls: drop calls that (directly or indirectly)
|
|
3329
|
|
- -- refer to fn. See Note [Avoiding loops (DFuns)]
|
|
3330
|
3415
|
|
|
3331
|
3416
|
----------------------
|
|
3332
|
3417
|
filterCalls :: CallInfoSet -> FloatedDictBinds -> [CallInfo]
|
|
3333
|
|
--- Remove dominated calls (Note [Specialising polymorphic dictionaries])
|
|
3334
|
|
--- and loopy DFuns (Note [Avoiding loops (DFuns)])
|
|
|
3418
|
+-- Remove
|
|
|
3419
|
+-- (a) dominated calls: (MP3) in Note [Specialising polymorphic dictionaries]
|
|
|
3420
|
+-- (b) loopy DFuns: Note [Avoiding loops (DFuns)]
|
|
3335
|
3421
|
filterCalls (CIS fn call_bag) (FDB { fdb_binds = dbs })
|
|
3336
|
|
- | isDFunId fn -- Note [Avoiding loops (DFuns)] applies only to DFuns
|
|
3337
|
|
- = filter ok_call de_dupd_calls
|
|
3338
|
|
- | otherwise -- Do not apply it to non-DFuns
|
|
3339
|
|
- = de_dupd_calls -- See Note [Avoiding loops (non-DFuns)]
|
|
|
3422
|
+ | isDFunId fn = filter ok_call de_dupd_calls -- Deals with (b)
|
|
|
3423
|
+ | otherwise = de_dupd_calls
|
|
3340
|
3424
|
where
|
|
3341
|
|
- de_dupd_calls = remove_dups call_bag
|
|
|
3425
|
+ de_dupd_calls = removeDupCalls call_bag -- Deals with (a)
|
|
3342
|
3426
|
|
|
3343
|
3427
|
dump_set = foldl' go (unitVarSet fn) dbs
|
|
3344
|
3428
|
-- This dump-set could also be computed by splitDictBinds
|
| ... |
... |
@@ -3352,10 +3436,10 @@ filterCalls (CIS fn call_bag) (FDB { fdb_binds = dbs }) |
|
3352
|
3436
|
|
|
3353
|
3437
|
ok_call (CI { ci_fvs = fvs }) = fvs `disjointVarSet` dump_set
|
|
3354
|
3438
|
|
|
3355
|
|
-remove_dups :: Bag CallInfo -> [CallInfo]
|
|
|
3439
|
+removeDupCalls :: Bag CallInfo -> [CallInfo]
|
|
3356
|
3440
|
-- Calls involving more generic instances beat more specific ones.
|
|
3357
|
3441
|
-- See (MP3) in Note [Specialising polymorphic dictionaries]
|
|
3358
|
|
-remove_dups calls = foldr add [] calls
|
|
|
3442
|
+removeDupCalls calls = foldr add [] calls
|
|
3359
|
3443
|
where
|
|
3360
|
3444
|
add :: CallInfo -> [CallInfo] -> [CallInfo]
|
|
3361
|
3445
|
add ci [] = [ci]
|
| ... |
... |
@@ -3364,12 +3448,20 @@ remove_dups calls = foldr add [] calls |
|
3364
|
3448
|
| otherwise = ci2 : add ci1 cis
|
|
3365
|
3449
|
|
|
3366
|
3450
|
beats_or_same :: CallInfo -> CallInfo -> Bool
|
|
|
3451
|
+-- (beats_or_same ci1 ci2) is True if specialising on ci1 subsumes ci2
|
|
|
3452
|
+-- That is: ci1's types are less specialised than ci2
|
|
|
3453
|
+-- ci1 specialises on the same dict args as ci2
|
|
3367
|
3454
|
beats_or_same (CI { ci_key = args1 }) (CI { ci_key = args2 })
|
|
3368
|
3455
|
= go args1 args2
|
|
3369
|
3456
|
where
|
|
3370
|
|
- go [] _ = True
|
|
|
3457
|
+ go [] [] = True
|
|
3371
|
3458
|
go (arg1:args1) (arg2:args2) = go_arg arg1 arg2 && go args1 args2
|
|
3372
|
|
- go (_:_) [] = False
|
|
|
3459
|
+
|
|
|
3460
|
+ -- If one or the other runs dry, the other must still have a SpecDict
|
|
|
3461
|
+ -- because of the (CI-KEY) invariant. So neither subsumes the other;
|
|
|
3462
|
+ -- one is more specialised (faster code) but the other is more generally
|
|
|
3463
|
+ -- applicable.
|
|
|
3464
|
+ go _ _ = False
|
|
3373
|
3465
|
|
|
3374
|
3466
|
go_arg (SpecType ty1) (SpecType ty2) = isJust (tcMatchTy ty1 ty2)
|
|
3375
|
3467
|
go_arg UnspecType UnspecType = True
|