[Git][ghc/ghc][master] Fix infelicities in the Specialiser

28 Apr 2025

Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC


Commits:
ce616f49 by Simon Peyton Jones at 2025-04-27T21:10:25+01:00
Fix infelicities in the Specialiser

On the way to #23109 (unary classes) I discovered some infelicities
(or maybe tiny bugs, I forget) in the type-class specialiser.

I also tripped over #25965, an outright bug in the rule matcher

Specifically:

* Refactor: I enhanced `wantCallsFor`, whih previously always said
`True`, to discard calls of class-ops, data constructors etc.  This is
a bit more efficient; and it means we don't need to worry about
filtering them out later.

* Fix: I tidied up some tricky logic that eliminated redundant
  specialisations.  It wasn't working correctly.  See the expanded
  Note [Specialisations already covered], and
  (MP3) in Note [Specialising polymorphic dictionaries].

  See also the new top-level `alreadyCovered`
  function, which now goes via `GHC.Core.Rules.ruleLhsIsMoreSpecific`

  I also added a useful Note [The (CI-KEY) invariant]

* Fix #25965: fixed a tricky bug in the `go_fam_fam` in
  `GHC.Core.Unify.uVarOrFam`, which allows matching to succeed
  without binding all type varibles.

  I enhanced Note [Apartness and type families] some more

* #25703. This ticket "just works" with -fpolymorphic-specialisation;
  but I was surprised that it worked! In this MR I added documentation
  to Note [Interesting dictionary arguments] to explain; and tests to
  ensure it stays fixed.

- - - - -


11 changed files:

- compiler/GHC/Core/Opt/Specialise.hs
- compiler/GHC/Core/Rules.hs
- compiler/GHC/Core/Unify.hs
- compiler/GHC/Tc/Solver/Equality.hs
- compiler/GHC/Types/Basic.hs
- + testsuite/tests/simplCore/should_compile/T25703.hs
- + testsuite/tests/simplCore/should_compile/T25703.stderr
- + testsuite/tests/simplCore/should_compile/T25703a.hs
- + testsuite/tests/simplCore/should_compile/T25703a.stderr
- + testsuite/tests/simplCore/should_compile/T25965.hs
- testsuite/tests/simplCore/should_compile/all.T


Changes:

=====================================
compiler/GHC/Core/Opt/Specialise.hs
=====================================
@@ -1243,14 +1243,15 @@ specExpr env (Let bind body)
 --     Note [Fire rules in the specialiser]
 fireRewriteRules :: SpecEnv -> InExpr -> [OutExpr] -> (InExpr, [OutExpr])
 fireRewriteRules env (Var f) args
-  | Just (rule, expr) <- specLookupRule env f args InitialPhase (getRules (se_rules env) f)
+  | let rules = getRules (se_rules env) f
+  , Just (rule, expr) <- specLookupRule env f args activeInInitialPhase rules
   , let rest_args    = drop (ruleArity rule) args -- See Note [Extra args in the target]
         zapped_subst = Core.zapSubst (se_subst env)
         expr'        = simpleOptExprWith defaultSimpleOpts zapped_subst expr
                        -- simplOptExpr needed because lookupRule returns
                        --   (\x y. rhs) arg1 arg2
-  , (fun, args) <- collectArgs expr'
-  = fireRewriteRules env fun (args++rest_args)
+  , (fun', args') <- collectArgs expr'
+  = fireRewriteRules env fun' (args'++rest_args)
 fireRewriteRules _ fun args = (fun, args)
 
 --------------
@@ -1620,7 +1621,7 @@ specCalls :: Bool              -- True  =>  specialising imported fn
 
 -- This function checks existing rules, and does not create
 -- duplicate ones. So the caller does not need to do this filtering.

--- See 'already_covered'
+-- See `alreadyCovered`
 
 type SpecInfo = ( [CoreRule]       -- Specialisation rules
                 , [(Id,CoreExpr)]  -- Specialised definition
@@ -1644,15 +1645,13 @@ specCalls spec_imp env existing_rules calls_for_me fn rhs
 
   = -- pprTrace "specCalls: some" (vcat
     --   [ text "function" <+> ppr fn
-    --   , text "calls:" <+> ppr calls_for_me
-    --   , text "subst" <+> ppr (se_subst env) ]) $
+    --    , text "calls:" <+> ppr calls_for_me
+    --    , text "subst" <+> ppr (se_subst env) ]) $
     foldlM spec_call ([], [], emptyUDs) calls_for_me
 
   | otherwise   -- No calls or RHS doesn't fit our preconceptions
-  = warnPprTrace (not (exprIsTrivial rhs) && notNull calls_for_me && not (isClassOpId fn))
+  = warnPprTrace (not (exprIsTrivial rhs) && notNull calls_for_me)
           "Missed specialisation opportunity for" (ppr fn $$ trace_doc) $
-          -- isClassOpId: class-op Ids never inline; we specialise them
-          -- through fireRewriteRules. So don't complain about missed opportunities
           -- Note [Specialisation shape]
     -- pprTrace "specCalls: none" (ppr fn <+> ppr calls_for_me) $
     return ([], [], emptyUDs)
@@ -1664,6 +1663,10 @@ specCalls spec_imp env existing_rules calls_for_me fn rhs
     fn_unf    = realIdUnfolding fn  -- Ignore loop-breaker-ness here
     inl_prag  = idInlinePragma fn
     inl_act   = inlinePragmaActivation inl_prag
+    is_active = isActive (beginPhase inl_act) :: Activation -> Bool
+         -- is_active: inl_act is the activation we are going to put in the new
+         --   SPEC rule; so we want to see if it is covered by another rule with
+         --   that same activation.
     is_local  = isLocalId fn
     is_dfun   = isDFunId fn
     dflags    = se_dflags env
@@ -1674,16 +1677,6 @@ specCalls spec_imp env existing_rules calls_for_me fn rhs
     (rhs_bndrs, rhs_body) = collectBindersPushingCo rhs
                             -- See Note [Account for casts in binding]
 
-    already_covered :: SpecEnv -> [CoreRule] -> [CoreExpr] -> Bool
-    already_covered env new_rules args      -- Note [Specialisations already covered]
-       = isJust (specLookupRule env fn args (beginPhase inl_act)
-                                (new_rules ++ existing_rules))
-         -- Rules: we look both in the new_rules (generated by this invocation
-         --   of specCalls), and in existing_rules (passed in to specCalls)
-         -- inl_act: is the activation we are going to put in the new SPEC
-         --   rule; so we want to see if it is covered by another rule with
-         --   that same activation.
-
     ----------------------------------------------------------
         -- Specialise to one particular call pattern
     spec_call :: SpecInfo                         -- Accumulating parameter
@@ -1717,8 +1710,12 @@ specCalls spec_imp env existing_rules calls_for_me fn rhs
 --                , ppr dx_binds ]) $
 --             return ()
 
+           ; let all_rules = rules_acc ++ existing_rules
+                 -- all_rules: we look both in the rules_acc (generated by this invocation
+                 --   of specCalls), and in existing_rules (passed in to specCalls)
            ; if not useful  -- No useful specialisation
-                || already_covered rhs_env2 rules_acc rule_lhs_args
+                || alreadyCovered rhs_env2 rule_bndrs fn rule_lhs_args is_active all_rules
+                   -- See (SC1) in Note [Specialisations already covered]
              then return spec_acc
              else
         do { -- Run the specialiser on the specialised RHS
@@ -1780,7 +1777,7 @@ specCalls spec_imp env existing_rules calls_for_me fn rhs
                  spec_fn_details
                    = case idDetails fn of
                        JoinId join_arity _ -> JoinId (join_arity - join_arity_decr) Nothing
-                       DFunId is_nt        -> DFunId is_nt
+                       DFunId unary        -> DFunId unary
                        _                   -> VanillaId
 
            ; spec_fn <- newSpecIdSM (idName fn) spec_fn_ty spec_fn_details spec_fn_info
@@ -1804,6 +1801,8 @@ specCalls spec_imp env existing_rules calls_for_me fn rhs
                                        , ppr spec_fn  <+> dcolon <+> ppr spec_fn_ty
                                        , ppr rhs_bndrs, ppr call_args
                                        , ppr spec_rule
+                                       , text "acc" <+> ppr rules_acc
+                                       , text "existing" <+> ppr existing_rules
                                        ]
 
            ; -- pprTrace "spec_call: rule" _rule_trace_doc
@@ -1812,19 +1811,35 @@ specCalls spec_imp env existing_rules calls_for_me fn rhs
                     , spec_uds           `thenUDs` uds_acc
                     ) } }
 
+alreadyCovered :: SpecEnv
+               -> [Var] -> Id -> [CoreExpr]   -- LHS of possible new rule
+               -> (Activation -> Bool)        -- Which rules are active
+               -> [CoreRule] -> Bool
+-- Note [Specialisations already covered] esp (SC2)
+alreadyCovered env bndrs fn args is_active rules
+  = case specLookupRule env fn args is_active rules of
+      Nothing             -> False
+      Just (rule, _)
+        | isAutoRule rule -> -- Discard identical rules
+                             -- We know that (fn args) is an instance of RULE
+                             -- Check if RULE is an instance of (fn args)
+                             ruleLhsIsMoreSpecific in_scope bndrs args rule
+        | otherwise       -> True  -- User rules dominate
+  where
+    in_scope = substInScopeSet (se_subst env)
+
 -- Convenience function for invoking lookupRule from Specialise
 -- The SpecEnv's InScopeSet should include all the Vars in the [CoreExpr]
 specLookupRule :: SpecEnv -> Id -> [CoreExpr]
-               -> CompilerPhase  -- Look up rules as if we were in this phase
+               -> (Activation -> Bool)  -- Which rules are active
                -> [CoreRule] -> Maybe (CoreRule, CoreExpr)
-specLookupRule env fn args phase rules
+specLookupRule env fn args is_active rules
   = lookupRule ropts in_scope_env is_active fn args rules
   where
     dflags       = se_dflags env
     in_scope     = substInScopeSet (se_subst env)
     in_scope_env = ISE in_scope (whenActiveUnfoldingFun is_active)
     ropts        = initRuleOpts dflags
-    is_active    = isActive phase
 
 {- Note [Specialising DFuns]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -2323,21 +2338,24 @@ This plan is implemented in the Rec case of specBindItself.
 Note [Specialisations already covered]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 We obviously don't want to generate two specialisations for the same
-argument pattern.  There are two wrinkles
-
-1. We do the already-covered test in specDefn, not when we generate
-the CallInfo in mkCallUDs.  We used to test in the latter place, but
-we now iterate the specialiser somewhat, and the Id at the call site
-might therefore not have all the RULES that we can see in specDefn
-
-2. What about two specialisations where the second is an *instance*
-of the first?  If the more specific one shows up first, we'll generate
-specialisations for both.  If the *less* specific one shows up first,
-we *don't* currently generate a specialisation for the more specific
-one.  (See the call to lookupRule in already_covered.)  Reasons:
-  (a) lookupRule doesn't say which matches are exact (bad reason)
-  (b) if the earlier specialisation is user-provided, it's
-      far from clear that we should auto-specialise further
+argument pattern.  Wrinkles
+
+(SC1) We do the already-covered test in specDefn, not when we generate
+    the CallInfo in mkCallUDs.  We used to test in the latter place, but
+    we now iterate the specialiser somewhat, and the Id at the call site
+    might therefore not have all the RULES that we can see in specDefn
+
+(SC2) What about two specialisations where the second is an *instance*
+   of the first?  It's a bit arbitrary, but here's what we do:
+   * If the existing one is user-specified, via a SPECIALISE pragma, we
+     suppress the further specialisation.
+   * If the existing one is auto-generated, we generate a second RULE
+     for the more specialised version.
+   The latter is important because we don't want the accidental order
+   of calls to determine what specialisations we generate.
+
+(SC3) Annoyingly, we /also/ eliminate duplicates in `filterCalls`.
+   See (MP3) in Note [Specialising polymorphic dictionaries]
 
 Note [Auto-specialisation and RULES]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -2800,12 +2818,10 @@ non-dictionary bindings too.
 
 Note [Specialising polymorphic dictionaries]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 Note June 2023: This has proved to be quite a tricky optimisation to get right
 see (#23469, #23109, #21229, #23445) so it is now guarded by a flag
 `-fpolymorphic-specialisation`.
 
-
 Consider
     class M a where { foo :: a -> Int }
 
@@ -2845,11 +2861,26 @@ Here are the moving parts:
       function.
 
 (MP3) If we have f :: forall m. Monoid m => blah, and two calls
-        (f @(Endo b)      (d :: Monoid (Endo b))
-        (f @(Endo (c->c)) (d :: Monoid (Endo (c->c)))
+        (f @(Endo b)      (d1 :: Monoid (Endo b))
+        (f @(Endo (c->c)) (d2 :: Monoid (Endo (c->c)))
       we want to generate a specialisation only for the first.  The second
       is just a substitution instance of the first, with no greater specialisation.
-      Hence the call to `remove_dups` in `filterCalls`.
+      Hence the use of `removeDupCalls` in `filterCalls`.
+
+      You might wonder if `d2` might be more specialised than `d1`; but no.
+      This `removeDupCalls` thing is at the definition site of `f`, and both `d1`
+      and `d2` are in scope. So `d1` is simply more polymorphic than `d2`, but
+      is just as specialised.
+
+      This distinction is sadly lost once we build a RULE, so `alreadyCovered`
+      can't be so clever.  E.g if we have an existing RULE
+            forall @a (d1:Ord Int) (d2: Eq a). f @a @Int d1 d2 = ...
+      and a putative new rule
+            forall (d1:Ord Int) (d2: Eq Int). f @Int @Int d1 d2 = ...
+      we /don't/ want the existing rule to subsume the new one.
+
+      So we sadly put up with having two rather different places where we
+      eliminate duplicates: `alreadyCovered` and `removeDupCalls`.
 
 All this arose in #13873, in the unexpected form that a SPECIALISE
 pragma made the program slower!  The reason was that the specialised
@@ -2947,16 +2978,29 @@ data CallInfoSet = CIS Id (Bag CallInfo)
   -- The list of types and dictionaries is guaranteed to
   -- match the type of f
   -- The Bag may contain duplicate calls (i.e. f @T and another f @T)
-  -- These dups are eliminated by already_covered in specCalls
+  -- These dups are eliminated by alreadyCovered in specCalls
 
 data CallInfo
-  = CI { ci_key  :: [SpecArg]   -- All arguments
+  = CI { ci_key  :: [SpecArg]   -- Arguments of the call
+                                -- See Note [The (CI-KEY) invariant]
+
        , ci_fvs  :: IdSet       -- Free Ids of the ci_key call
                                 -- /not/ including the main id itself, of course
                                 -- NB: excluding tyvars:
                                 --     See Note [Specialising polymorphic dictionaries]
     }
 
+{- Note [The (CI-KEY) invariant]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Invariant (CI-KEY):
+   In the `ci_key :: [SpecArg]` field of `CallInfo`,
+     * The list is non-empty
+     * The least element is always a `SpecDict`
+
+In this way the RULE has as few args as possible, which broadens its
+applicability, since rules only fire when saturated.
+-}
+
 type DictExpr = CoreExpr
 
 ciSetFilter :: (CallInfo -> Bool) -> CallInfoSet -> CallInfoSet
@@ -3045,10 +3089,7 @@ mkCallUDs' env f args
     ci_key :: [SpecArg]
     ci_key = dropWhileEndLE (not . isSpecDict) $
              zipWith mk_spec_arg args pis
-             -- Drop trailing args until we get to a SpecDict
-             -- In this way the RULE has as few args as possible,
-             -- which broadens its applicability, since rules only
-             -- fire when saturated
+             -- Establish (CI-KEY): drop trailing args until we get to a SpecDict
 
     mk_spec_arg :: OutExpr -> PiTyBinder -> SpecArg
     mk_spec_arg arg (Named bndr)
@@ -3086,34 +3127,76 @@ site, so we only look through ticks that RULE matching looks through
 -}
 
 wantCallsFor :: SpecEnv -> Id -> Bool
-wantCallsFor _env _f = True
- -- We could reduce the size of the UsageDetails by being less eager
- -- about collecting calls for LocalIds: there is no point for
- -- ones that are lambda-bound.  We can't decide this by looking at
- -- the (absence of an) unfolding, because unfoldings for local
- -- functions are discarded by cloneBindSM, so no local binder will
- -- have an unfolding at this stage.  We'd have to keep a candidate
- -- set of let-binders.
- --
- -- Not many lambda-bound variables have dictionary arguments, so
- -- this would make little difference anyway.
- --
- -- For imported Ids we could check for an unfolding, but we have to
- -- do so anyway in canSpecImport, and it seems better to have it
- -- all in one place.  So we simply collect usage info for imported
- -- overloaded functions.
-
-{- Note [Interesting dictionary arguments]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Consider this
-         \a.\d:Eq a.  let f = ... in ...(f d)...
-There really is not much point in specialising f wrt the dictionary d,
-because the code for the specialised f is not improved at all, because
-d is lambda-bound.  We simply get junk specialisations.
-
-What is "interesting"?  Just that it has *some* structure.  But what about
-variables?  We look in the variable's /unfolding/.  And that means
-that we must be careful to ensure that dictionaries have unfoldings,
+-- See Note [wantCallsFor]
+wantCallsFor _env f
+  = case idDetails f of
+      RecSelId {}      -> False
+      DataConWorkId {} -> False
+      DataConWrapId {} -> False
+      ClassOpId {}     -> False
+      PrimOpId {}      -> False
+      FCallId {}       -> False
+      TickBoxOpId {}   -> False
+      CoVarId {}       -> False
+
+      DFunId {}        -> True
+      VanillaId {}     -> True
+      JoinId {}        -> True
+      WorkerLikeId {}  -> True
+      RepPolyId {}     -> True
+
+{- Note [wantCallsFor]
+~~~~~~~~~~~~~~~~~~~~~~
+`wantCallsFor env f` says whether the Specialiser should collect calls for
+function `f`; other thing being equal, the fewer calls we collect the better. It
+is False for things we can't specialise:
+
+* ClassOpId: never inline and we don't have a defn to specialise; we specialise
+  them through fireRewriteRules.
+* PrimOpId: are never overloaded
+* Data constructors: we never specialise them
+
+We could reduce the size of the UsageDetails by being less eager about
+collecting calls for some LocalIds: there is no point for ones that are
+lambda-bound.  We can't decide this by looking at the (absence of an) unfolding,
+because unfoldings for local functions are discarded by cloneBindSM, so no local
+binder will have an unfolding at this stage.  We'd have to keep a candidate set
+of let-binders.
+
+Not many lambda-bound variables have dictionary arguments, so this would make
+little difference anyway.
+
+For imported Ids we could check for an unfolding, but we have to do so anyway in
+canSpecImport, and it seems better to have it all in one place.  So we simply
+collect usage info for imported overloaded functions.
+
+Note [Interesting dictionary arguments]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+In `mkCallUDs` we only use `SpecDict` for dictionaries of which
+`interestingDict` holds.  Otherwise we use `UnspecArg`.  Two reasons:
+
+* Consider this
+       \a.\d:Eq a.  let f = ... in ...(f d)...
+  There really is not much point in specialising f wrt the dictionary d,
+  because the code for the specialised f is not improved at all, because
+  d is lambda-bound.  We simply get junk specialisations.
+
+* Consider this (#25703):
+     f :: (Eq a, Show b) => a -> b -> INt
+     goo :: forall x. (Eq x) => x -> blah
+     goo @x (d:Eq x) (arg:x) = ...(f @x @Int d $fShowInt)...
+  If we built a `ci_key` with a (SpecDict d) for `d`, we would end up
+  discarding the call at the `\d`.  But if we use `UnspecArg` for that
+  uninteresting `d`, we'll get a `ci_key` of
+      f @x @Int UnspecArg (SpecDict $fShowInt)
+  and /that/ can float out to f's definition and specialise nicely.
+  Hooray.  (NB: the call can float only if `-fpolymorphic-specialisation`
+  is on; otherwise it'll be trapped by the `\@x -> ...`.)(
+
+What is "interesting"?  (See `interestingDict`.)  Just that it has *some*
+structure.  But what about variables?  We look in the variable's /unfolding/.
+And that means that we must be careful to ensure that dictionaries /have/
+unfoldings,
 
 * cloneBndrSM discards non-Stable unfoldings
 * specBind updates the unfolding after specialisation
@@ -3159,7 +3242,7 @@ Now `f` turns into:
           meth @a dc ....
 
 When we specialise `f`, at a=Int say, that superclass selection can
-nfire (via rewiteClassOps), but that info (that 'dc' is now a
+fire (via rewiteClassOps), but that info (that 'dc' is now a
 particular dictionary `C`, of type `C Int`) must be available to
 the call `meth @a dc`, so that we can fire the `meth` class-op, and
 thence specialise `wombat`.
@@ -3286,7 +3369,11 @@ dumpUDs :: [CoreBndr] -> UsageDetails -> (UsageDetails, OrdList DictBind)
 -- Used at a lambda or case binder; just dump anything mentioning the binder
 dumpUDs bndrs uds@(MkUD { ud_binds = orig_dbs, ud_calls = orig_calls })
   | null bndrs = (uds, nilOL)  -- Common in case alternatives
-  | otherwise  = -- pprTrace "dumpUDs" (ppr bndrs $$ ppr free_uds $$ ppr dump_dbs) $
+  | otherwise  = -- pprTrace "dumpUDs" (vcat
+                 --    [ text "bndrs" <+> ppr bndrs
+                 --    , text "uds" <+> ppr uds
+                 --    , text "free_uds" <+> ppr free_uds
+                 --    , text "dump-dbs" <+> ppr dump_dbs ]) $
                  (free_uds, dump_dbs)
   where
     free_uds = uds { ud_binds = free_dbs, ud_calls = free_calls }
@@ -3325,20 +3412,17 @@ callsForMe fn uds@MkUD { ud_binds = orig_dbs, ud_calls = orig_calls }
     calls_for_me = case lookupDVarEnv orig_calls fn of
                         Nothing -> []
                         Just cis -> filterCalls cis orig_dbs
-         -- filterCalls: drop calls that (directly or indirectly)
-         -- refer to fn.  See Note [Avoiding loops (DFuns)]
 
 ----------------------
 filterCalls :: CallInfoSet -> FloatedDictBinds -> [CallInfo]
--- Remove dominated calls (Note [Specialising polymorphic dictionaries])
--- and loopy DFuns (Note [Avoiding loops (DFuns)])
+-- Remove
+--   (a) dominated calls: (MP3) in Note [Specialising polymorphic dictionaries]
+--   (b) loopy DFuns: Note [Avoiding loops (DFuns)]
 filterCalls (CIS fn call_bag) (FDB { fdb_binds = dbs })
-  | isDFunId fn  -- Note [Avoiding loops (DFuns)] applies only to DFuns
-  = filter ok_call de_dupd_calls
-  | otherwise         -- Do not apply it to non-DFuns
-  = de_dupd_calls  -- See Note [Avoiding loops (non-DFuns)]
+  | isDFunId fn  = filter ok_call de_dupd_calls  -- Deals with (b)
+  | otherwise    = de_dupd_calls
   where
-    de_dupd_calls = remove_dups call_bag
+    de_dupd_calls = removeDupCalls call_bag -- Deals with (a)
 
     dump_set = foldl' go (unitVarSet fn) dbs
       -- This dump-set could also be computed by splitDictBinds
@@ -3352,10 +3436,10 @@ filterCalls (CIS fn call_bag) (FDB { fdb_binds = dbs })
 
     ok_call (CI { ci_fvs = fvs }) = fvs `disjointVarSet` dump_set
 
-remove_dups :: Bag CallInfo -> [CallInfo]
+removeDupCalls :: Bag CallInfo -> [CallInfo]
 -- Calls involving more generic instances beat more specific ones.
 -- See (MP3) in Note [Specialising polymorphic dictionaries]
-remove_dups calls = foldr add [] calls
+removeDupCalls calls = foldr add [] calls
   where
     add :: CallInfo -> [CallInfo] -> [CallInfo]
     add ci [] = [ci]
@@ -3364,12 +3448,20 @@ remove_dups calls = foldr add [] calls
                       | otherwise               = ci2 : add ci1 cis
 
 beats_or_same :: CallInfo -> CallInfo -> Bool
+-- (beats_or_same ci1 ci2) is True if specialising on ci1 subsumes ci2
+-- That is: ci1's types are less specialised than ci2
+--          ci1   specialises on the same dict args as ci2
 beats_or_same (CI { ci_key = args1 }) (CI { ci_key = args2 })
   = go args1 args2
   where
-    go [] _ = True
+    go []           []           = True
     go (arg1:args1) (arg2:args2) = go_arg arg1 arg2 && go args1 args2
-    go (_:_)        []           = False
+
+    -- If one or the other runs dry, the other must still have a SpecDict
+    -- because of the (CI-KEY) invariant.  So neither subsumes the other;
+    -- one is more specialised (faster code) but the other is more generally
+    -- applicable.
+    go  _ _ = False
 
     go_arg (SpecType ty1) (SpecType ty2) = isJust (tcMatchTy ty1 ty2)
     go_arg UnspecType     UnspecType     = True


=====================================
compiler/GHC/Core/Rules.hs
=====================================
@@ -9,7 +9,7 @@
 -- The 'CoreRule' datatype itself is declared elsewhere.
 module GHC.Core.Rules (
         -- ** Looking up rules
-        lookupRule, matchExprs,
+        lookupRule, matchExprs, ruleLhsIsMoreSpecific,
 
         -- ** RuleBase, RuleEnv
         RuleBase, RuleEnv(..), mkRuleEnv, emptyRuleEnv,
@@ -587,8 +587,8 @@ findBest :: InScopeSet -> (Id, [CoreExpr])
 
 findBest _        _      (rule,ans)   [] = (rule,ans)
 findBest in_scope target (rule1,ans1) ((rule2,ans2):prs)
-  | isMoreSpecific in_scope rule1 rule2 = findBest in_scope target (rule1,ans1) prs
-  | isMoreSpecific in_scope rule2 rule1 = findBest in_scope target (rule2,ans2) prs
+  | ruleIsMoreSpecific in_scope rule1 rule2 = findBest in_scope target (rule1,ans1) prs
+  | ruleIsMoreSpecific in_scope rule2 rule1 = findBest in_scope target (rule2,ans2) prs
   | debugIsOn = let pp_rule rule
                       = ifPprDebug (ppr rule)
                                    (doubleQuotes (ftext (ruleName rule)))
@@ -603,15 +603,25 @@ findBest in_scope target (rule1,ans1) ((rule2,ans2):prs)
   where
     (fn,args) = target
 
-isMoreSpecific :: InScopeSet -> CoreRule -> CoreRule -> Bool
--- The call (rule1 `isMoreSpecific` rule2)
+ruleIsMoreSpecific :: InScopeSet -> CoreRule -> CoreRule -> Bool
+-- The call (rule1 `ruleIsMoreSpecific` rule2)
 -- sees if rule2 can be instantiated to look like rule1
--- See Note [isMoreSpecific]
-isMoreSpecific _        (BuiltinRule {}) _                = False
-isMoreSpecific _        (Rule {})        (BuiltinRule {}) = True
-isMoreSpecific in_scope (Rule { ru_bndrs = bndrs1, ru_args = args1 })
-                        (Rule { ru_bndrs = bndrs2, ru_args = args2 })
-  = isJust (matchExprs in_scope_env bndrs2 args2 args1)
+-- See Note [ruleIsMoreSpecific]
+ruleIsMoreSpecific in_scope rule1 rule2
+  = case rule1 of
+       BuiltinRule {} -> False
+       Rule { ru_bndrs = bndrs1, ru_args = args1 }
+                      -> ruleLhsIsMoreSpecific in_scope bndrs1 args1 rule2
+
+ruleLhsIsMoreSpecific :: InScopeSet
+                      -> [Var] -> [CoreExpr]  -- LHS of a possible new rule
+                      -> CoreRule             -- An existing rule
+                      -> Bool                 -- New one is more specific
+ruleLhsIsMoreSpecific in_scope bndrs1 args1 rule2
+  = case rule2 of
+       BuiltinRule {} -> True
+       Rule { ru_bndrs = bndrs2, ru_args = args2 }
+                      -> isJust (matchExprs in_scope_env bndrs2 args2 args1)
   where
    full_in_scope = in_scope `extendInScopeSetList` bndrs1
    in_scope_env  = ISE full_in_scope noUnfoldingFun
@@ -620,9 +630,9 @@ isMoreSpecific in_scope (Rule { ru_bndrs = bndrs1, ru_args = args1 })
 noBlackList :: Activation -> Bool
 noBlackList _ = False           -- Nothing is black listed
 
-{- Note [isMoreSpecific]
+{- Note [ruleIsMoreSpecific]
 ~~~~~~~~~~~~~~~~~~~~~~~~
-The call (rule1 `isMoreSpecific` rule2)
+The call (rule1 `ruleIsMoreSpecific` rule2)
 sees if rule2 can be instantiated to look like rule1.
 
 Wrinkle:
@@ -825,7 +835,7 @@ bound on the LHS:
 
   The rule looks like
     forall (a::*) (d::Eq Char) (x :: Foo a Char).
-         f (Foo a Char) d x = True
+         f @(Foo a Char) d x = True
 
   Matching the rule won't bind 'a', and legitimately so.  We fudge by
   pretending that 'a' is bound to (Any :: *).


=====================================
compiler/GHC/Core/Unify.hs
=====================================
@@ -331,35 +331,57 @@ Wrinkles
    `DontBindMe`, the unifier must return `SurelyApart`, not `MaybeApart`.  See
    `go_fam` in `uVarOrFam`
 
-(ATF6) You might think that when /matching/ the um_fam_env will always be empty,
-   because type-class-instance and type-family-instance heads can't include type
-   families.  E.g.   instance C (F a) where ...   -- Illegal
-
-   But you'd be wrong: when "improving" type family constraint we may have a
-   type family on the LHS of a match. Consider
+(ATF6) When /matching/ can we ever have a type-family application on the LHS, in
+   the template?  You might think not, because type-class-instance and
+   type-family-instance heads can't include type families.  E.g.
+            instance C (F a) where ...  -- Illegal
+
+   But you'd be wrong: even when matching, we can see type families in the LHS template:
+   * In `checkValidClass`, in `check_dm` we check that the default method has the
+      right type, using matching, both ways.  And that type may have type-family
+      applications in it. Example in test CoOpt_Singletons.
+
+   * In the specialiser: see the call to `tcMatchTy` in
+     `GHC.Core.Opt.Specialise.beats_or_same`
+
+   * With -fpolymorphic-specialsation, we might get a specialiation rule like
+         RULE forall a (d :: Eq (Maybe (F a))) .
+                 f @(Maybe (F a)) d = ...
+     See #25965.
+
+   * A user-written RULE could conceivably have a type-family application
+     in the template.  It might not be a good rule, but I don't think we currently
+     check for this.
+
+    In all these cases we are only interested in finding a substitution /for
+    type variables/ that makes the match work.  So we simply want to recurse into
+    the arguments of the type family.  E.g.
+       Template:   forall a.  Maybe (F a)
+       Target:     Mabybe (F Int)
+    We want to succeed with substitution [a :-> Int].  See (ATF9).
+
+    Conclusion: where we enter via `tcMatchTy`, `tcMatchTys`, `tc_match_tys`,
+    etc, we always end up in `tc_match_tys_x`.  There we invoke the unifier
+    but we do not distinguish between `SurelyApart` and `MaybeApart`. So in
+    these cases we can set `um_bind_fam_fun` to `neverBindFam`.
+
+(ATF7) There is one other, very special case of matching where we /do/ want to
+   bind type families in `um_fam_env`, namely in GHC.Tc.Solver.Equality, the call
+   to `tcUnifyTyForInjectivity False` in `improve_injective_wanted_top`.
+   Consider
+   of a match. Consider
       type family G6 a = r | r -> a
       type instance G6 [a]  = [G a]
       type instance G6 Bool = Int
-   and the Wanted constraint [W] G6 alpha ~ [Int].  We /match/ each type instance
-   RHS against [Int]!  So we try
-        [G a] ~ [Int]
+   and suppose we haev a Wanted constraint
+      [W] G6 alpha ~ [Int]
+.  According to Section 5.2 of "Injective type families for Haskell", we /match/
+   the RHS each type instance [Int].  So we try
+        Template: [G a]    Target: [Int]
    and we want to succeed with MaybeApart, so that we can generate the improvement
-   constraint  [W] alpha ~ [beta]  where beta is fresh.
-   See Section 5.2 of "Injective type families for Haskell".
-
-   A second place that we match with type-fams on the LHS is in `checkValidClass`.
-   In `check_dm` we check that the default method has the right type, using matching,
-   both ways.  And that type may have type-family applications in it. Example in
-   test CoOpt_Singletons.
-
-(ATF7) You might think that (ATF6) is a very special case, and in /other/ uses of
-  matching, where we enter via `tc_match_tys_x` we will never see a type-family
-  in the template. But actually we do see that case in the specialiser: see
-  the call to `tcMatchTy` in `GHC.Core.Opt.Specialise.beats_or_same`
-
-  Also: a user-written RULE could conceivably have a type-family application
-  in the template.  It might not be a good rule, but I don't think we currently
-  check for this.
+   constraint
+        [W] alpha ~ [beta]
+   where beta is fresh.  We do this by binding [G a :-> Int]
 
 (ATF8) The treatment of type families is governed by
          um_bind_fam_fun :: BindFamFun
@@ -399,6 +421,8 @@ Wrinkles
   Key point: when decomposing (F tys1 ~ F tys2), we should /also/ extend the
   type-family substitution.
 
+  (ATF11-1) All this cleverness only matters when unifying, not when matching
+
 (ATF12) There is a horrid exception for the injectivity check. See (UR1) in
   in Note [Specification of unification].
 
@@ -595,7 +619,7 @@ tc_match_tys_x :: HasDebugCallStack
                -> [Type]
                -> Maybe Subst
 tc_match_tys_x bind_tv match_kis (Subst in_scope id_env tv_env cv_env) tys1 tys2
-  = case tc_unify_tys alwaysBindFam  -- (ATF7) in Note [Apartness and type families]
+  = case tc_unify_tys neverBindFam  -- (ATF7) in Note [Apartness and type families]
                       bind_tv
                       False  -- Matching, not unifying
                       False  -- Not an injectivity check
@@ -1857,6 +1881,7 @@ uVarOrFam env ty1 ty2 kco
       = go_fam_fam tc1 tys1 tys2 kco
 
       -- Now check if we can bind the (F tys) to the RHS
+      -- This can happen even when matching: see (ATF7)
       | BindMe <- um_bind_fam_fun env tc1 tys1 rhs
       = -- ToDo: do we need an occurs check here?
         do { extendFamEnv tc1 tys1 rhs
@@ -1881,11 +1906,6 @@ uVarOrFam env ty1 ty2 kco
     -- go_fam_fam: LHS and RHS are both saturated type-family applications,
     --             for the same type-family F
     go_fam_fam tc tys1 tys2 kco
-      | tcEqTyConAppArgs tys1 tys2
-      -- Detect (F tys ~ F tys); otherwise we'd build an infinite substitution
-      = return ()
-
-      | otherwise
        -- Decompose (F tys1 ~ F tys2): (ATF9)
        -- Use injectivity information of F: (ATF10)
        -- But first bind the type-fam if poss: (ATF11)
@@ -1902,13 +1922,19 @@ uVarOrFam env ty1 ty2 kco
        (inj_tys1, noninj_tys1) = partitionByList inj tys1
        (inj_tys2, noninj_tys2) = partitionByList inj tys2
 
-       bind_fam_if_poss | BindMe <- um_bind_fam_fun env tc tys1 rhs1
-                        = extendFamEnv tc tys1 rhs1
-                        | um_unif env
-                        , BindMe <- um_bind_fam_fun env tc tys2 rhs2
-                        = extendFamEnv tc tys2 rhs2
-                        | otherwise
-                        = return ()
+       bind_fam_if_poss
+         | not (um_unif env)  -- Not when matching (ATF11-1)
+         = return ()
+         | tcEqTyConAppArgs tys1 tys2   -- Detect (F tys ~ F tys);
+         = return ()                    -- otherwise we'd build an infinite substitution
+         | BindMe <- um_bind_fam_fun env tc tys1 rhs1
+         = extendFamEnv tc tys1 rhs1
+         | um_unif env
+         , BindMe <- um_bind_fam_fun env tc tys2 rhs2
+         = extendFamEnv tc tys2 rhs2
+         | otherwise
+         = return ()
+
        rhs1 = mkTyConApp tc tys2 `mkCastTy` mkSymCo kco
        rhs2 = mkTyConApp tc tys1 `mkCastTy` kco
 
@@ -1993,7 +2019,7 @@ data UMState = UMState
   -- in um_foralls; i.e. variables bound by foralls inside the types being unified
 
   -- When /matching/ um_fam_env is usually empty; but not quite always.
-  -- See (ATF6) and (ATF7) of Note [Apartness and type families]
+  -- See (ATF7) of Note [Apartness and type families]
 
 newtype UM a
   = UM' { unUM :: UMState -> UnifyResultM (UMState, a) }


=====================================
compiler/GHC/Tc/Solver/Equality.hs
=====================================
@@ -3017,6 +3017,7 @@ improve_wanted_top_fun_eqs fam_tc lhs_tys rhs_ty
 
 improve_injective_wanted_top :: FamInstEnvs -> [Bool] -> TyCon -> [TcType] -> Xi -> TcS [TypeEqn]
 -- Interact with top-level instance declarations
+-- See Section 5.2 in the Injective Type Families paper
 improve_injective_wanted_top fam_envs inj_args fam_tc lhs_tys rhs_ty
   = concatMapM do_one branches
   where
@@ -3035,6 +3036,7 @@ improve_injective_wanted_top fam_envs inj_args fam_tc lhs_tys rhs_ty
     do_one branch@(CoAxBranch { cab_tvs = branch_tvs, cab_lhs = branch_lhs_tys, cab_rhs = branch_rhs })
       | let in_scope1 = in_scope `extendInScopeSetList` branch_tvs
       , Just subst <- tcUnifyTyForInjectivity False in_scope1 branch_rhs rhs_ty
+                      -- False: matching, not unifying
       = do { let inSubst tv = tv `elemVarEnv` getTvSubstEnv subst
                  unsubstTvs = filterOut inSubst branch_tvs
                  -- The order of unsubstTvs is important; it must be


=====================================
compiler/GHC/Types/Basic.hs
=====================================
@@ -87,7 +87,7 @@ module GHC.Types.Basic (
         CompilerPhase(..), PhaseNum, beginPhase, nextPhase, laterPhase,
 
         Activation(..), isActive, competesWith,
-        isNeverActive, isAlwaysActive, activeInFinalPhase,
+        isNeverActive, isAlwaysActive, activeInFinalPhase, activeInInitialPhase,
         activateAfterInitial, activateDuringFinal, activeAfter,
 
         RuleMatchInfo(..), isConLike, isFunLike,


=====================================
testsuite/tests/simplCore/should_compile/T25703.hs
=====================================
@@ -0,0 +1,7 @@
+module T25703 where
+
+f :: (Eq a, Show b) => a -> b -> Int
+f x y = f x y
+
+goo :: forall x. (Eq x) => x -> Int
+goo arg = f arg (3::Int)


=====================================
testsuite/tests/simplCore/should_compile/T25703.stderr
=====================================
@@ -0,0 +1,2 @@
+Rule fired: SPEC f @_ @Int (T25703)
+Rule fired: SPEC f @_ @Int (T25703)


=====================================
testsuite/tests/simplCore/should_compile/T25703a.hs
=====================================
@@ -0,0 +1,69 @@
+{-# LANGUAGE DataKinds #-}
+{-# LANGUAGE GADTs #-}
+
+{-# OPTIONS_GHC -O2 -fspecialise-aggressively #-}
+
+-- This pragma is just here to pretend that the function body of 'foo' is huge
+-- and should never be inlined.
+{-# OPTIONS_GHC -funfolding-use-threshold=-200 #-}
+
+module T25703a where
+
+import Data.Kind
+import Data.Type.Equality
+import Data.Proxy
+import GHC.TypeNats
+
+-- Pretend this is some big dictionary that absolutely must get
+-- specialised away for performance reasons.
+type C :: Nat -> Constraint
+class C i where
+  meth :: Proxy i -> Double
+instance C 0 where
+  meth _ = 0.1
+instance C 1 where
+  meth _ = 1.1
+instance C 2 where
+  meth _ = 2.1
+
+{-# INLINEABLE foo #-}
+foo :: forall a (n :: Nat) (m :: Nat)
+    .  ( Eq a, C n, C m )
+    => a -> ( Proxy n, Proxy m ) -> Int -> Double
+-- Pretend this is a big complicated function, too big to inline,
+-- for which we absolutely must specialise away the 'C n', 'C m'
+-- dictionaries for performance reasons.
+foo a b c
+  = if a == a
+    then meth @n Proxy + fromIntegral c
+    else 2 * meth @m Proxy
+
+-- Runtime dispatch to a specialisation of 'foo'
+foo_spec :: forall a (n :: Nat) (m :: Nat)
+         .  ( Eq a, KnownNat n, KnownNat m )
+         => a -> ( Proxy n, Proxy m ) -> Int -> Double
+foo_spec a b c
+  | Just Refl <- sameNat @n @0 Proxy Proxy
+  , Just Refl <- sameNat @m @0 Proxy Proxy
+  = foo @a @0 @0 a b c
+  | Just Refl <- sameNat @n @0 Proxy Proxy
+  , Just Refl <- sameNat @m @1 Proxy Proxy
+  = foo @a @0 @1 a b c
+  | Just Refl <- sameNat @n @1 Proxy Proxy
+  , Just Refl <- sameNat @m @1 Proxy Proxy
+  = foo @a @1 @1 a b c
+  | Just Refl <- sameNat @n @0 Proxy Proxy
+  , Just Refl <- sameNat @m @2 Proxy Proxy
+  = foo @a @0 @2 a b c
+  | Just Refl <- sameNat @n @1 Proxy Proxy
+  , Just Refl <- sameNat @m @2 Proxy Proxy
+  = foo @a @1 @2 a b c
+  | Just Refl <- sameNat @n @2 Proxy Proxy
+  , Just Refl <- sameNat @m @2 Proxy Proxy
+  = foo @a @2 @2 a b c
+  | otherwise
+  = error $ unlines
+      [ "f: no specialisation"
+      , "n: " ++ show (natVal @n Proxy)
+      , "m: " ++ show (natVal @m Proxy)
+      ]


=====================================
testsuite/tests/simplCore/should_compile/T25703a.stderr
=====================================
@@ -0,0 +1,6 @@
+Rule fired: SPEC foo @_ @2 @2 (T25703a)
+Rule fired: SPEC foo @_ @1 @2 (T25703a)
+Rule fired: SPEC foo @_ @0 @2 (T25703a)
+Rule fired: SPEC foo @_ @1 @1 (T25703a)
+Rule fired: SPEC foo @_ @0 @1 (T25703a)
+Rule fired: SPEC foo @_ @0 @0 (T25703a)


=====================================
testsuite/tests/simplCore/should_compile/T25965.hs
=====================================
@@ -0,0 +1,18 @@
+{-# LANGUAGE TypeFamilies #-}
+{-# OPTIONS_GHC -O -fpolymorphic-specialisation #-}
+
+module Foo where
+
+type family F a
+
+data T a = T1
+
+instance Eq (T a) where { (==) x y = False }
+
+foo :: Eq a => a -> Bool
+foo x | x==x = True
+      | otherwise = foo x
+
+bar :: forall b. b -> T (F b) -> Bool
+bar y x = foo x
+


=====================================
testsuite/tests/simplCore/should_compile/all.T
=====================================
@@ -543,3 +543,8 @@ test('T25883c', normal, compile_grep_core, [''])
 test('T25883d', [extra_files(['T25883d_import.hs'])], multimod_compile_filter, ['T25883d', '-O -ddump-simpl -dno-typeable-binds -dsuppress-all -dsuppress-uniques', r'grep -e "y ="'])
 
 test('T25976', [grep_errmsg('Dead Code')], compile, ['-O -ddump-simpl -dsuppress-uniques -dno-typeable-binds'])
+
+test('T25965', normal, compile, ['-O'])
+test('T25703',  [grep_errmsg(r'SPEC')], compile, ['-O -fpolymorphic-specialisation -ddump-rule-firings'])
+test('T25703a', [grep_errmsg(r'SPEC')], compile, ['-O -fpolymorphic-specialisation -ddump-rule-firings'])
+



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/ce616f4976b6bdf9093372eb454787d1...

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/ce616f4976b6bdf9093372eb454787d1...
You're receiving this email because of your account on gitlab.haskell.org.

    

Marge Bot (＠marge-bot)

tags

participants (1)