Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC

Commits:

11 changed files:

Changes:

  • compiler/GHC/Core/Opt/Specialise.hs
    ... ... @@ -1243,14 +1243,15 @@ specExpr env (Let bind body)
    1243 1243
     --     Note [Fire rules in the specialiser]
    
    1244 1244
     fireRewriteRules :: SpecEnv -> InExpr -> [OutExpr] -> (InExpr, [OutExpr])
    
    1245 1245
     fireRewriteRules env (Var f) args
    
    1246
    -  | Just (rule, expr) <- specLookupRule env f args InitialPhase (getRules (se_rules env) f)
    
    1246
    +  | let rules = getRules (se_rules env) f
    
    1247
    +  , Just (rule, expr) <- specLookupRule env f args activeInInitialPhase rules
    
    1247 1248
       , let rest_args    = drop (ruleArity rule) args -- See Note [Extra args in the target]
    
    1248 1249
             zapped_subst = Core.zapSubst (se_subst env)
    
    1249 1250
             expr'        = simpleOptExprWith defaultSimpleOpts zapped_subst expr
    
    1250 1251
                            -- simplOptExpr needed because lookupRule returns
    
    1251 1252
                            --   (\x y. rhs) arg1 arg2
    
    1252
    -  , (fun, args) <- collectArgs expr'
    
    1253
    -  = fireRewriteRules env fun (args++rest_args)
    
    1253
    +  , (fun', args') <- collectArgs expr'
    
    1254
    +  = fireRewriteRules env fun' (args'++rest_args)
    
    1254 1255
     fireRewriteRules _ fun args = (fun, args)
    
    1255 1256
     
    
    1256 1257
     --------------
    
    ... ... @@ -1620,7 +1621,7 @@ specCalls :: Bool -- True => specialising imported fn
    1620 1621
     
    
    1621 1622
     -- This function checks existing rules, and does not create
    
    1622 1623
     -- duplicate ones. So the caller does not need to do this filtering.
    
    1623
    --- See 'already_covered'
    
    1624
    +-- See `alreadyCovered`
    
    1624 1625
     
    
    1625 1626
     type SpecInfo = ( [CoreRule]       -- Specialisation rules
    
    1626 1627
                     , [(Id,CoreExpr)]  -- Specialised definition
    
    ... ... @@ -1644,15 +1645,13 @@ specCalls spec_imp env existing_rules calls_for_me fn rhs
    1644 1645
     
    
    1645 1646
       = -- pprTrace "specCalls: some" (vcat
    
    1646 1647
         --   [ text "function" <+> ppr fn
    
    1647
    -    --   , text "calls:" <+> ppr calls_for_me
    
    1648
    -    --   , text "subst" <+> ppr (se_subst env) ]) $
    
    1648
    +    --    , text "calls:" <+> ppr calls_for_me
    
    1649
    +    --    , text "subst" <+> ppr (se_subst env) ]) $
    
    1649 1650
         foldlM spec_call ([], [], emptyUDs) calls_for_me
    
    1650 1651
     
    
    1651 1652
       | otherwise   -- No calls or RHS doesn't fit our preconceptions
    
    1652
    -  = warnPprTrace (not (exprIsTrivial rhs) && notNull calls_for_me && not (isClassOpId fn))
    
    1653
    +  = warnPprTrace (not (exprIsTrivial rhs) && notNull calls_for_me)
    
    1653 1654
               "Missed specialisation opportunity for" (ppr fn $$ trace_doc) $
    
    1654
    -          -- isClassOpId: class-op Ids never inline; we specialise them
    
    1655
    -          -- through fireRewriteRules. So don't complain about missed opportunities
    
    1656 1655
               -- Note [Specialisation shape]
    
    1657 1656
         -- pprTrace "specCalls: none" (ppr fn <+> ppr calls_for_me) $
    
    1658 1657
         return ([], [], emptyUDs)
    
    ... ... @@ -1664,6 +1663,10 @@ specCalls spec_imp env existing_rules calls_for_me fn rhs
    1664 1663
         fn_unf    = realIdUnfolding fn  -- Ignore loop-breaker-ness here
    
    1665 1664
         inl_prag  = idInlinePragma fn
    
    1666 1665
         inl_act   = inlinePragmaActivation inl_prag
    
    1666
    +    is_active = isActive (beginPhase inl_act) :: Activation -> Bool
    
    1667
    +         -- is_active: inl_act is the activation we are going to put in the new
    
    1668
    +         --   SPEC rule; so we want to see if it is covered by another rule with
    
    1669
    +         --   that same activation.
    
    1667 1670
         is_local  = isLocalId fn
    
    1668 1671
         is_dfun   = isDFunId fn
    
    1669 1672
         dflags    = se_dflags env
    
    ... ... @@ -1674,16 +1677,6 @@ specCalls spec_imp env existing_rules calls_for_me fn rhs
    1674 1677
         (rhs_bndrs, rhs_body) = collectBindersPushingCo rhs
    
    1675 1678
                                 -- See Note [Account for casts in binding]
    
    1676 1679
     
    
    1677
    -    already_covered :: SpecEnv -> [CoreRule] -> [CoreExpr] -> Bool
    
    1678
    -    already_covered env new_rules args      -- Note [Specialisations already covered]
    
    1679
    -       = isJust (specLookupRule env fn args (beginPhase inl_act)
    
    1680
    -                                (new_rules ++ existing_rules))
    
    1681
    -         -- Rules: we look both in the new_rules (generated by this invocation
    
    1682
    -         --   of specCalls), and in existing_rules (passed in to specCalls)
    
    1683
    -         -- inl_act: is the activation we are going to put in the new SPEC
    
    1684
    -         --   rule; so we want to see if it is covered by another rule with
    
    1685
    -         --   that same activation.
    
    1686
    -
    
    1687 1680
         ----------------------------------------------------------
    
    1688 1681
             -- Specialise to one particular call pattern
    
    1689 1682
         spec_call :: SpecInfo                         -- Accumulating parameter
    
    ... ... @@ -1717,8 +1710,12 @@ specCalls spec_imp env existing_rules calls_for_me fn rhs
    1717 1710
     --                , ppr dx_binds ]) $
    
    1718 1711
     --             return ()
    
    1719 1712
     
    
    1713
    +           ; let all_rules = rules_acc ++ existing_rules
    
    1714
    +                 -- all_rules: we look both in the rules_acc (generated by this invocation
    
    1715
    +                 --   of specCalls), and in existing_rules (passed in to specCalls)
    
    1720 1716
                ; if not useful  -- No useful specialisation
    
    1721
    -                || already_covered rhs_env2 rules_acc rule_lhs_args
    
    1717
    +                || alreadyCovered rhs_env2 rule_bndrs fn rule_lhs_args is_active all_rules
    
    1718
    +                   -- See (SC1) in Note [Specialisations already covered]
    
    1722 1719
                  then return spec_acc
    
    1723 1720
                  else
    
    1724 1721
             do { -- Run the specialiser on the specialised RHS
    
    ... ... @@ -1780,7 +1777,7 @@ specCalls spec_imp env existing_rules calls_for_me fn rhs
    1780 1777
                      spec_fn_details
    
    1781 1778
                        = case idDetails fn of
    
    1782 1779
                            JoinId join_arity _ -> JoinId (join_arity - join_arity_decr) Nothing
    
    1783
    -                       DFunId is_nt        -> DFunId is_nt
    
    1780
    +                       DFunId unary        -> DFunId unary
    
    1784 1781
                            _                   -> VanillaId
    
    1785 1782
     
    
    1786 1783
                ; spec_fn <- newSpecIdSM (idName fn) spec_fn_ty spec_fn_details spec_fn_info
    
    ... ... @@ -1804,6 +1801,8 @@ specCalls spec_imp env existing_rules calls_for_me fn rhs
    1804 1801
                                            , ppr spec_fn  <+> dcolon <+> ppr spec_fn_ty
    
    1805 1802
                                            , ppr rhs_bndrs, ppr call_args
    
    1806 1803
                                            , ppr spec_rule
    
    1804
    +                                       , text "acc" <+> ppr rules_acc
    
    1805
    +                                       , text "existing" <+> ppr existing_rules
    
    1807 1806
                                            ]
    
    1808 1807
     
    
    1809 1808
                ; -- pprTrace "spec_call: rule" _rule_trace_doc
    
    ... ... @@ -1812,19 +1811,35 @@ specCalls spec_imp env existing_rules calls_for_me fn rhs
    1812 1811
                         , spec_uds           `thenUDs` uds_acc
    
    1813 1812
                         ) } }
    
    1814 1813
     
    
    1814
    +alreadyCovered :: SpecEnv
    
    1815
    +               -> [Var] -> Id -> [CoreExpr]   -- LHS of possible new rule
    
    1816
    +               -> (Activation -> Bool)        -- Which rules are active
    
    1817
    +               -> [CoreRule] -> Bool
    
    1818
    +-- Note [Specialisations already covered] esp (SC2)
    
    1819
    +alreadyCovered env bndrs fn args is_active rules
    
    1820
    +  = case specLookupRule env fn args is_active rules of
    
    1821
    +      Nothing             -> False
    
    1822
    +      Just (rule, _)
    
    1823
    +        | isAutoRule rule -> -- Discard identical rules
    
    1824
    +                             -- We know that (fn args) is an instance of RULE
    
    1825
    +                             -- Check if RULE is an instance of (fn args)
    
    1826
    +                             ruleLhsIsMoreSpecific in_scope bndrs args rule
    
    1827
    +        | otherwise       -> True  -- User rules dominate
    
    1828
    +  where
    
    1829
    +    in_scope = substInScopeSet (se_subst env)
    
    1830
    +
    
    1815 1831
     -- Convenience function for invoking lookupRule from Specialise
    
    1816 1832
     -- The SpecEnv's InScopeSet should include all the Vars in the [CoreExpr]
    
    1817 1833
     specLookupRule :: SpecEnv -> Id -> [CoreExpr]
    
    1818
    -               -> CompilerPhase  -- Look up rules as if we were in this phase
    
    1834
    +               -> (Activation -> Bool)  -- Which rules are active
    
    1819 1835
                    -> [CoreRule] -> Maybe (CoreRule, CoreExpr)
    
    1820
    -specLookupRule env fn args phase rules
    
    1836
    +specLookupRule env fn args is_active rules
    
    1821 1837
       = lookupRule ropts in_scope_env is_active fn args rules
    
    1822 1838
       where
    
    1823 1839
         dflags       = se_dflags env
    
    1824 1840
         in_scope     = substInScopeSet (se_subst env)
    
    1825 1841
         in_scope_env = ISE in_scope (whenActiveUnfoldingFun is_active)
    
    1826 1842
         ropts        = initRuleOpts dflags
    
    1827
    -    is_active    = isActive phase
    
    1828 1843
     
    
    1829 1844
     {- Note [Specialising DFuns]
    
    1830 1845
     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    ... ... @@ -2323,21 +2338,24 @@ This plan is implemented in the Rec case of specBindItself.
    2323 2338
     Note [Specialisations already covered]
    
    2324 2339
     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    2325 2340
     We obviously don't want to generate two specialisations for the same
    
    2326
    -argument pattern.  There are two wrinkles
    
    2327
    -
    
    2328
    -1. We do the already-covered test in specDefn, not when we generate
    
    2329
    -the CallInfo in mkCallUDs.  We used to test in the latter place, but
    
    2330
    -we now iterate the specialiser somewhat, and the Id at the call site
    
    2331
    -might therefore not have all the RULES that we can see in specDefn
    
    2332
    -
    
    2333
    -2. What about two specialisations where the second is an *instance*
    
    2334
    -of the first?  If the more specific one shows up first, we'll generate
    
    2335
    -specialisations for both.  If the *less* specific one shows up first,
    
    2336
    -we *don't* currently generate a specialisation for the more specific
    
    2337
    -one.  (See the call to lookupRule in already_covered.)  Reasons:
    
    2338
    -  (a) lookupRule doesn't say which matches are exact (bad reason)
    
    2339
    -  (b) if the earlier specialisation is user-provided, it's
    
    2340
    -      far from clear that we should auto-specialise further
    
    2341
    +argument pattern.  Wrinkles
    
    2342
    +
    
    2343
    +(SC1) We do the already-covered test in specDefn, not when we generate
    
    2344
    +    the CallInfo in mkCallUDs.  We used to test in the latter place, but
    
    2345
    +    we now iterate the specialiser somewhat, and the Id at the call site
    
    2346
    +    might therefore not have all the RULES that we can see in specDefn
    
    2347
    +
    
    2348
    +(SC2) What about two specialisations where the second is an *instance*
    
    2349
    +   of the first?  It's a bit arbitrary, but here's what we do:
    
    2350
    +   * If the existing one is user-specified, via a SPECIALISE pragma, we
    
    2351
    +     suppress the further specialisation.
    
    2352
    +   * If the existing one is auto-generated, we generate a second RULE
    
    2353
    +     for the more specialised version.
    
    2354
    +   The latter is important because we don't want the accidental order
    
    2355
    +   of calls to determine what specialisations we generate.
    
    2356
    +
    
    2357
    +(SC3) Annoyingly, we /also/ eliminate duplicates in `filterCalls`.
    
    2358
    +   See (MP3) in Note [Specialising polymorphic dictionaries]
    
    2341 2359
     
    
    2342 2360
     Note [Auto-specialisation and RULES]
    
    2343 2361
     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    ... ... @@ -2800,12 +2818,10 @@ non-dictionary bindings too.
    2800 2818
     
    
    2801 2819
     Note [Specialising polymorphic dictionaries]
    
    2802 2820
     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    2803
    -
    
    2804 2821
     Note June 2023: This has proved to be quite a tricky optimisation to get right
    
    2805 2822
     see (#23469, #23109, #21229, #23445) so it is now guarded by a flag
    
    2806 2823
     `-fpolymorphic-specialisation`.
    
    2807 2824
     
    
    2808
    -
    
    2809 2825
     Consider
    
    2810 2826
         class M a where { foo :: a -> Int }
    
    2811 2827
     
    
    ... ... @@ -2845,11 +2861,26 @@ Here are the moving parts:
    2845 2861
           function.
    
    2846 2862
     
    
    2847 2863
     (MP3) If we have f :: forall m. Monoid m => blah, and two calls
    
    2848
    -        (f @(Endo b)      (d :: Monoid (Endo b))
    
    2849
    -        (f @(Endo (c->c)) (d :: Monoid (Endo (c->c)))
    
    2864
    +        (f @(Endo b)      (d1 :: Monoid (Endo b))
    
    2865
    +        (f @(Endo (c->c)) (d2 :: Monoid (Endo (c->c)))
    
    2850 2866
           we want to generate a specialisation only for the first.  The second
    
    2851 2867
           is just a substitution instance of the first, with no greater specialisation.
    
    2852
    -      Hence the call to `remove_dups` in `filterCalls`.
    
    2868
    +      Hence the use of `removeDupCalls` in `filterCalls`.
    
    2869
    +
    
    2870
    +      You might wonder if `d2` might be more specialised than `d1`; but no.
    
    2871
    +      This `removeDupCalls` thing is at the definition site of `f`, and both `d1`
    
    2872
    +      and `d2` are in scope. So `d1` is simply more polymorphic than `d2`, but
    
    2873
    +      is just as specialised.
    
    2874
    +
    
    2875
    +      This distinction is sadly lost once we build a RULE, so `alreadyCovered`
    
    2876
    +      can't be so clever.  E.g if we have an existing RULE
    
    2877
    +            forall @a (d1:Ord Int) (d2: Eq a). f @a @Int d1 d2 = ...
    
    2878
    +      and a putative new rule
    
    2879
    +            forall (d1:Ord Int) (d2: Eq Int). f @Int @Int d1 d2 = ...
    
    2880
    +      we /don't/ want the existing rule to subsume the new one.
    
    2881
    +
    
    2882
    +      So we sadly put up with having two rather different places where we
    
    2883
    +      eliminate duplicates: `alreadyCovered` and `removeDupCalls`.
    
    2853 2884
     
    
    2854 2885
     All this arose in #13873, in the unexpected form that a SPECIALISE
    
    2855 2886
     pragma made the program slower!  The reason was that the specialised
    
    ... ... @@ -2947,16 +2978,29 @@ data CallInfoSet = CIS Id (Bag CallInfo)
    2947 2978
       -- The list of types and dictionaries is guaranteed to
    
    2948 2979
       -- match the type of f
    
    2949 2980
       -- The Bag may contain duplicate calls (i.e. f @T and another f @T)
    
    2950
    -  -- These dups are eliminated by already_covered in specCalls
    
    2981
    +  -- These dups are eliminated by alreadyCovered in specCalls
    
    2951 2982
     
    
    2952 2983
     data CallInfo
    
    2953
    -  = CI { ci_key  :: [SpecArg]   -- All arguments
    
    2984
    +  = CI { ci_key  :: [SpecArg]   -- Arguments of the call
    
    2985
    +                                -- See Note [The (CI-KEY) invariant]
    
    2986
    +
    
    2954 2987
            , ci_fvs  :: IdSet       -- Free Ids of the ci_key call
    
    2955 2988
                                     -- /not/ including the main id itself, of course
    
    2956 2989
                                     -- NB: excluding tyvars:
    
    2957 2990
                                     --     See Note [Specialising polymorphic dictionaries]
    
    2958 2991
         }
    
    2959 2992
     
    
    2993
    +{- Note [The (CI-KEY) invariant]
    
    2994
    +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    2995
    +Invariant (CI-KEY):
    
    2996
    +   In the `ci_key :: [SpecArg]` field of `CallInfo`,
    
    2997
    +     * The list is non-empty
    
    2998
    +     * The least element is always a `SpecDict`
    
    2999
    +
    
    3000
    +In this way the RULE has as few args as possible, which broadens its
    
    3001
    +applicability, since rules only fire when saturated.
    
    3002
    +-}
    
    3003
    +
    
    2960 3004
     type DictExpr = CoreExpr
    
    2961 3005
     
    
    2962 3006
     ciSetFilter :: (CallInfo -> Bool) -> CallInfoSet -> CallInfoSet
    
    ... ... @@ -3045,10 +3089,7 @@ mkCallUDs' env f args
    3045 3089
         ci_key :: [SpecArg]
    
    3046 3090
         ci_key = dropWhileEndLE (not . isSpecDict) $
    
    3047 3091
                  zipWith mk_spec_arg args pis
    
    3048
    -             -- Drop trailing args until we get to a SpecDict
    
    3049
    -             -- In this way the RULE has as few args as possible,
    
    3050
    -             -- which broadens its applicability, since rules only
    
    3051
    -             -- fire when saturated
    
    3092
    +             -- Establish (CI-KEY): drop trailing args until we get to a SpecDict
    
    3052 3093
     
    
    3053 3094
         mk_spec_arg :: OutExpr -> PiTyBinder -> SpecArg
    
    3054 3095
         mk_spec_arg arg (Named bndr)
    
    ... ... @@ -3086,34 +3127,76 @@ site, so we only look through ticks that RULE matching looks through
    3086 3127
     -}
    
    3087 3128
     
    
    3088 3129
     wantCallsFor :: SpecEnv -> Id -> Bool
    
    3089
    -wantCallsFor _env _f = True
    
    3090
    - -- We could reduce the size of the UsageDetails by being less eager
    
    3091
    - -- about collecting calls for LocalIds: there is no point for
    
    3092
    - -- ones that are lambda-bound.  We can't decide this by looking at
    
    3093
    - -- the (absence of an) unfolding, because unfoldings for local
    
    3094
    - -- functions are discarded by cloneBindSM, so no local binder will
    
    3095
    - -- have an unfolding at this stage.  We'd have to keep a candidate
    
    3096
    - -- set of let-binders.
    
    3097
    - --
    
    3098
    - -- Not many lambda-bound variables have dictionary arguments, so
    
    3099
    - -- this would make little difference anyway.
    
    3100
    - --
    
    3101
    - -- For imported Ids we could check for an unfolding, but we have to
    
    3102
    - -- do so anyway in canSpecImport, and it seems better to have it
    
    3103
    - -- all in one place.  So we simply collect usage info for imported
    
    3104
    - -- overloaded functions.
    
    3105
    -
    
    3106
    -{- Note [Interesting dictionary arguments]
    
    3107
    -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    3108
    -Consider this
    
    3109
    -         \a.\d:Eq a.  let f = ... in ...(f d)...
    
    3110
    -There really is not much point in specialising f wrt the dictionary d,
    
    3111
    -because the code for the specialised f is not improved at all, because
    
    3112
    -d is lambda-bound.  We simply get junk specialisations.
    
    3113
    -
    
    3114
    -What is "interesting"?  Just that it has *some* structure.  But what about
    
    3115
    -variables?  We look in the variable's /unfolding/.  And that means
    
    3116
    -that we must be careful to ensure that dictionaries have unfoldings,
    
    3130
    +-- See Note [wantCallsFor]
    
    3131
    +wantCallsFor _env f
    
    3132
    +  = case idDetails f of
    
    3133
    +      RecSelId {}      -> False
    
    3134
    +      DataConWorkId {} -> False
    
    3135
    +      DataConWrapId {} -> False
    
    3136
    +      ClassOpId {}     -> False
    
    3137
    +      PrimOpId {}      -> False
    
    3138
    +      FCallId {}       -> False
    
    3139
    +      TickBoxOpId {}   -> False
    
    3140
    +      CoVarId {}       -> False
    
    3141
    +
    
    3142
    +      DFunId {}        -> True
    
    3143
    +      VanillaId {}     -> True
    
    3144
    +      JoinId {}        -> True
    
    3145
    +      WorkerLikeId {}  -> True
    
    3146
    +      RepPolyId {}     -> True
    
    3147
    +
    
    3148
    +{- Note [wantCallsFor]
    
    3149
    +~~~~~~~~~~~~~~~~~~~~~~
    
    3150
    +`wantCallsFor env f` says whether the Specialiser should collect calls for
    
    3151
    +function `f`; other thing being equal, the fewer calls we collect the better. It
    
    3152
    +is False for things we can't specialise:
    
    3153
    +
    
    3154
    +* ClassOpId: never inline and we don't have a defn to specialise; we specialise
    
    3155
    +  them through fireRewriteRules.
    
    3156
    +* PrimOpId: are never overloaded
    
    3157
    +* Data constructors: we never specialise them
    
    3158
    +
    
    3159
    +We could reduce the size of the UsageDetails by being less eager about
    
    3160
    +collecting calls for some LocalIds: there is no point for ones that are
    
    3161
    +lambda-bound.  We can't decide this by looking at the (absence of an) unfolding,
    
    3162
    +because unfoldings for local functions are discarded by cloneBindSM, so no local
    
    3163
    +binder will have an unfolding at this stage.  We'd have to keep a candidate set
    
    3164
    +of let-binders.
    
    3165
    +
    
    3166
    +Not many lambda-bound variables have dictionary arguments, so this would make
    
    3167
    +little difference anyway.
    
    3168
    +
    
    3169
    +For imported Ids we could check for an unfolding, but we have to do so anyway in
    
    3170
    +canSpecImport, and it seems better to have it all in one place.  So we simply
    
    3171
    +collect usage info for imported overloaded functions.
    
    3172
    +
    
    3173
    +Note [Interesting dictionary arguments]
    
    3174
    +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    3175
    +In `mkCallUDs` we only use `SpecDict` for dictionaries of which
    
    3176
    +`interestingDict` holds.  Otherwise we use `UnspecArg`.  Two reasons:
    
    3177
    +
    
    3178
    +* Consider this
    
    3179
    +       \a.\d:Eq a.  let f = ... in ...(f d)...
    
    3180
    +  There really is not much point in specialising f wrt the dictionary d,
    
    3181
    +  because the code for the specialised f is not improved at all, because
    
    3182
    +  d is lambda-bound.  We simply get junk specialisations.
    
    3183
    +
    
    3184
    +* Consider this (#25703):
    
    3185
    +     f :: (Eq a, Show b) => a -> b -> INt
    
    3186
    +     goo :: forall x. (Eq x) => x -> blah
    
    3187
    +     goo @x (d:Eq x) (arg:x) = ...(f @x @Int d $fShowInt)...
    
    3188
    +  If we built a `ci_key` with a (SpecDict d) for `d`, we would end up
    
    3189
    +  discarding the call at the `\d`.  But if we use `UnspecArg` for that
    
    3190
    +  uninteresting `d`, we'll get a `ci_key` of
    
    3191
    +      f @x @Int UnspecArg (SpecDict $fShowInt)
    
    3192
    +  and /that/ can float out to f's definition and specialise nicely.
    
    3193
    +  Hooray.  (NB: the call can float only if `-fpolymorphic-specialisation`
    
    3194
    +  is on; otherwise it'll be trapped by the `\@x -> ...`.)(
    
    3195
    +
    
    3196
    +What is "interesting"?  (See `interestingDict`.)  Just that it has *some*
    
    3197
    +structure.  But what about variables?  We look in the variable's /unfolding/.
    
    3198
    +And that means that we must be careful to ensure that dictionaries /have/
    
    3199
    +unfoldings,
    
    3117 3200
     
    
    3118 3201
     * cloneBndrSM discards non-Stable unfoldings
    
    3119 3202
     * specBind updates the unfolding after specialisation
    
    ... ... @@ -3159,7 +3242,7 @@ Now `f` turns into:
    3159 3242
               meth @a dc ....
    
    3160 3243
     
    
    3161 3244
     When we specialise `f`, at a=Int say, that superclass selection can
    
    3162
    -nfire (via rewiteClassOps), but that info (that 'dc' is now a
    
    3245
    +fire (via rewiteClassOps), but that info (that 'dc' is now a
    
    3163 3246
     particular dictionary `C`, of type `C Int`) must be available to
    
    3164 3247
     the call `meth @a dc`, so that we can fire the `meth` class-op, and
    
    3165 3248
     thence specialise `wombat`.
    
    ... ... @@ -3286,7 +3369,11 @@ dumpUDs :: [CoreBndr] -> UsageDetails -> (UsageDetails, OrdList DictBind)
    3286 3369
     -- Used at a lambda or case binder; just dump anything mentioning the binder
    
    3287 3370
     dumpUDs bndrs uds@(MkUD { ud_binds = orig_dbs, ud_calls = orig_calls })
    
    3288 3371
       | null bndrs = (uds, nilOL)  -- Common in case alternatives
    
    3289
    -  | otherwise  = -- pprTrace "dumpUDs" (ppr bndrs $$ ppr free_uds $$ ppr dump_dbs) $
    
    3372
    +  | otherwise  = -- pprTrace "dumpUDs" (vcat
    
    3373
    +                 --    [ text "bndrs" <+> ppr bndrs
    
    3374
    +                 --    , text "uds" <+> ppr uds
    
    3375
    +                 --    , text "free_uds" <+> ppr free_uds
    
    3376
    +                 --    , text "dump-dbs" <+> ppr dump_dbs ]) $
    
    3290 3377
                      (free_uds, dump_dbs)
    
    3291 3378
       where
    
    3292 3379
         free_uds = uds { ud_binds = free_dbs, ud_calls = free_calls }
    
    ... ... @@ -3325,20 +3412,17 @@ callsForMe fn uds@MkUD { ud_binds = orig_dbs, ud_calls = orig_calls }
    3325 3412
         calls_for_me = case lookupDVarEnv orig_calls fn of
    
    3326 3413
                             Nothing -> []
    
    3327 3414
                             Just cis -> filterCalls cis orig_dbs
    
    3328
    -         -- filterCalls: drop calls that (directly or indirectly)
    
    3329
    -         -- refer to fn.  See Note [Avoiding loops (DFuns)]
    
    3330 3415
     
    
    3331 3416
     ----------------------
    
    3332 3417
     filterCalls :: CallInfoSet -> FloatedDictBinds -> [CallInfo]
    
    3333
    --- Remove dominated calls (Note [Specialising polymorphic dictionaries])
    
    3334
    --- and loopy DFuns (Note [Avoiding loops (DFuns)])
    
    3418
    +-- Remove
    
    3419
    +--   (a) dominated calls: (MP3) in Note [Specialising polymorphic dictionaries]
    
    3420
    +--   (b) loopy DFuns: Note [Avoiding loops (DFuns)]
    
    3335 3421
     filterCalls (CIS fn call_bag) (FDB { fdb_binds = dbs })
    
    3336
    -  | isDFunId fn  -- Note [Avoiding loops (DFuns)] applies only to DFuns
    
    3337
    -  = filter ok_call de_dupd_calls
    
    3338
    -  | otherwise         -- Do not apply it to non-DFuns
    
    3339
    -  = de_dupd_calls  -- See Note [Avoiding loops (non-DFuns)]
    
    3422
    +  | isDFunId fn  = filter ok_call de_dupd_calls  -- Deals with (b)
    
    3423
    +  | otherwise    = de_dupd_calls
    
    3340 3424
       where
    
    3341
    -    de_dupd_calls = remove_dups call_bag
    
    3425
    +    de_dupd_calls = removeDupCalls call_bag -- Deals with (a)
    
    3342 3426
     
    
    3343 3427
         dump_set = foldl' go (unitVarSet fn) dbs
    
    3344 3428
           -- This dump-set could also be computed by splitDictBinds
    
    ... ... @@ -3352,10 +3436,10 @@ filterCalls (CIS fn call_bag) (FDB { fdb_binds = dbs })
    3352 3436
     
    
    3353 3437
         ok_call (CI { ci_fvs = fvs }) = fvs `disjointVarSet` dump_set
    
    3354 3438
     
    
    3355
    -remove_dups :: Bag CallInfo -> [CallInfo]
    
    3439
    +removeDupCalls :: Bag CallInfo -> [CallInfo]
    
    3356 3440
     -- Calls involving more generic instances beat more specific ones.
    
    3357 3441
     -- See (MP3) in Note [Specialising polymorphic dictionaries]
    
    3358
    -remove_dups calls = foldr add [] calls
    
    3442
    +removeDupCalls calls = foldr add [] calls
    
    3359 3443
       where
    
    3360 3444
         add :: CallInfo -> [CallInfo] -> [CallInfo]
    
    3361 3445
         add ci [] = [ci]
    
    ... ... @@ -3364,12 +3448,20 @@ remove_dups calls = foldr add [] calls
    3364 3448
                           | otherwise               = ci2 : add ci1 cis
    
    3365 3449
     
    
    3366 3450
     beats_or_same :: CallInfo -> CallInfo -> Bool
    
    3451
    +-- (beats_or_same ci1 ci2) is True if specialising on ci1 subsumes ci2
    
    3452
    +-- That is: ci1's types are less specialised than ci2
    
    3453
    +--          ci1   specialises on the same dict args as ci2
    
    3367 3454
     beats_or_same (CI { ci_key = args1 }) (CI { ci_key = args2 })
    
    3368 3455
       = go args1 args2
    
    3369 3456
       where
    
    3370
    -    go [] _ = True
    
    3457
    +    go []           []           = True
    
    3371 3458
         go (arg1:args1) (arg2:args2) = go_arg arg1 arg2 && go args1 args2
    
    3372
    -    go (_:_)        []           = False
    
    3459
    +
    
    3460
    +    -- If one or the other runs dry, the other must still have a SpecDict
    
    3461
    +    -- because of the (CI-KEY) invariant.  So neither subsumes the other;
    
    3462
    +    -- one is more specialised (faster code) but the other is more generally
    
    3463
    +    -- applicable.
    
    3464
    +    go  _ _ = False
    
    3373 3465
     
    
    3374 3466
         go_arg (SpecType ty1) (SpecType ty2) = isJust (tcMatchTy ty1 ty2)
    
    3375 3467
         go_arg UnspecType     UnspecType     = True
    

  • compiler/GHC/Core/Rules.hs
    ... ... @@ -9,7 +9,7 @@
    9 9
     -- The 'CoreRule' datatype itself is declared elsewhere.
    
    10 10
     module GHC.Core.Rules (
    
    11 11
             -- ** Looking up rules
    
    12
    -        lookupRule, matchExprs,
    
    12
    +        lookupRule, matchExprs, ruleLhsIsMoreSpecific,
    
    13 13
     
    
    14 14
             -- ** RuleBase, RuleEnv
    
    15 15
             RuleBase, RuleEnv(..), mkRuleEnv, emptyRuleEnv,
    
    ... ... @@ -587,8 +587,8 @@ findBest :: InScopeSet -> (Id, [CoreExpr])
    587 587
     
    
    588 588
     findBest _        _      (rule,ans)   [] = (rule,ans)
    
    589 589
     findBest in_scope target (rule1,ans1) ((rule2,ans2):prs)
    
    590
    -  | isMoreSpecific in_scope rule1 rule2 = findBest in_scope target (rule1,ans1) prs
    
    591
    -  | isMoreSpecific in_scope rule2 rule1 = findBest in_scope target (rule2,ans2) prs
    
    590
    +  | ruleIsMoreSpecific in_scope rule1 rule2 = findBest in_scope target (rule1,ans1) prs
    
    591
    +  | ruleIsMoreSpecific in_scope rule2 rule1 = findBest in_scope target (rule2,ans2) prs
    
    592 592
       | debugIsOn = let pp_rule rule
    
    593 593
                           = ifPprDebug (ppr rule)
    
    594 594
                                        (doubleQuotes (ftext (ruleName rule)))
    
    ... ... @@ -603,15 +603,25 @@ findBest in_scope target (rule1,ans1) ((rule2,ans2):prs)
    603 603
       where
    
    604 604
         (fn,args) = target
    
    605 605
     
    
    606
    -isMoreSpecific :: InScopeSet -> CoreRule -> CoreRule -> Bool
    
    607
    --- The call (rule1 `isMoreSpecific` rule2)
    
    606
    +ruleIsMoreSpecific :: InScopeSet -> CoreRule -> CoreRule -> Bool
    
    607
    +-- The call (rule1 `ruleIsMoreSpecific` rule2)
    
    608 608
     -- sees if rule2 can be instantiated to look like rule1
    
    609
    --- See Note [isMoreSpecific]
    
    610
    -isMoreSpecific _        (BuiltinRule {}) _                = False
    
    611
    -isMoreSpecific _        (Rule {})        (BuiltinRule {}) = True
    
    612
    -isMoreSpecific in_scope (Rule { ru_bndrs = bndrs1, ru_args = args1 })
    
    613
    -                        (Rule { ru_bndrs = bndrs2, ru_args = args2 })
    
    614
    -  = isJust (matchExprs in_scope_env bndrs2 args2 args1)
    
    609
    +-- See Note [ruleIsMoreSpecific]
    
    610
    +ruleIsMoreSpecific in_scope rule1 rule2
    
    611
    +  = case rule1 of
    
    612
    +       BuiltinRule {} -> False
    
    613
    +       Rule { ru_bndrs = bndrs1, ru_args = args1 }
    
    614
    +                      -> ruleLhsIsMoreSpecific in_scope bndrs1 args1 rule2
    
    615
    +
    
    616
    +ruleLhsIsMoreSpecific :: InScopeSet
    
    617
    +                      -> [Var] -> [CoreExpr]  -- LHS of a possible new rule
    
    618
    +                      -> CoreRule             -- An existing rule
    
    619
    +                      -> Bool                 -- New one is more specific
    
    620
    +ruleLhsIsMoreSpecific in_scope bndrs1 args1 rule2
    
    621
    +  = case rule2 of
    
    622
    +       BuiltinRule {} -> True
    
    623
    +       Rule { ru_bndrs = bndrs2, ru_args = args2 }
    
    624
    +                      -> isJust (matchExprs in_scope_env bndrs2 args2 args1)
    
    615 625
       where
    
    616 626
        full_in_scope = in_scope `extendInScopeSetList` bndrs1
    
    617 627
        in_scope_env  = ISE full_in_scope noUnfoldingFun
    
    ... ... @@ -620,9 +630,9 @@ isMoreSpecific in_scope (Rule { ru_bndrs = bndrs1, ru_args = args1 })
    620 630
     noBlackList :: Activation -> Bool
    
    621 631
     noBlackList _ = False           -- Nothing is black listed
    
    622 632
     
    
    623
    -{- Note [isMoreSpecific]
    
    633
    +{- Note [ruleIsMoreSpecific]
    
    624 634
     ~~~~~~~~~~~~~~~~~~~~~~~~
    
    625
    -The call (rule1 `isMoreSpecific` rule2)
    
    635
    +The call (rule1 `ruleIsMoreSpecific` rule2)
    
    626 636
     sees if rule2 can be instantiated to look like rule1.
    
    627 637
     
    
    628 638
     Wrinkle:
    
    ... ... @@ -825,7 +835,7 @@ bound on the LHS:
    825 835
     
    
    826 836
       The rule looks like
    
    827 837
         forall (a::*) (d::Eq Char) (x :: Foo a Char).
    
    828
    -         f (Foo a Char) d x = True
    
    838
    +         f @(Foo a Char) d x = True
    
    829 839
     
    
    830 840
       Matching the rule won't bind 'a', and legitimately so.  We fudge by
    
    831 841
       pretending that 'a' is bound to (Any :: *).
    

  • compiler/GHC/Core/Unify.hs
    ... ... @@ -331,35 +331,57 @@ Wrinkles
    331 331
        `DontBindMe`, the unifier must return `SurelyApart`, not `MaybeApart`.  See
    
    332 332
        `go_fam` in `uVarOrFam`
    
    333 333
     
    
    334
    -(ATF6) You might think that when /matching/ the um_fam_env will always be empty,
    
    335
    -   because type-class-instance and type-family-instance heads can't include type
    
    336
    -   families.  E.g.   instance C (F a) where ...   -- Illegal
    
    337
    -
    
    338
    -   But you'd be wrong: when "improving" type family constraint we may have a
    
    339
    -   type family on the LHS of a match. Consider
    
    334
    +(ATF6) When /matching/ can we ever have a type-family application on the LHS, in
    
    335
    +   the template?  You might think not, because type-class-instance and
    
    336
    +   type-family-instance heads can't include type families.  E.g.
    
    337
    +            instance C (F a) where ...  -- Illegal
    
    338
    +
    
    339
    +   But you'd be wrong: even when matching, we can see type families in the LHS template:
    
    340
    +   * In `checkValidClass`, in `check_dm` we check that the default method has the
    
    341
    +      right type, using matching, both ways.  And that type may have type-family
    
    342
    +      applications in it. Example in test CoOpt_Singletons.
    
    343
    +
    
    344
    +   * In the specialiser: see the call to `tcMatchTy` in
    
    345
    +     `GHC.Core.Opt.Specialise.beats_or_same`
    
    346
    +
    
    347
    +   * With -fpolymorphic-specialsation, we might get a specialiation rule like
    
    348
    +         RULE forall a (d :: Eq (Maybe (F a))) .
    
    349
    +                 f @(Maybe (F a)) d = ...
    
    350
    +     See #25965.
    
    351
    +
    
    352
    +   * A user-written RULE could conceivably have a type-family application
    
    353
    +     in the template.  It might not be a good rule, but I don't think we currently
    
    354
    +     check for this.
    
    355
    +
    
    356
    +    In all these cases we are only interested in finding a substitution /for
    
    357
    +    type variables/ that makes the match work.  So we simply want to recurse into
    
    358
    +    the arguments of the type family.  E.g.
    
    359
    +       Template:   forall a.  Maybe (F a)
    
    360
    +       Target:     Mabybe (F Int)
    
    361
    +    We want to succeed with substitution [a :-> Int].  See (ATF9).
    
    362
    +
    
    363
    +    Conclusion: where we enter via `tcMatchTy`, `tcMatchTys`, `tc_match_tys`,
    
    364
    +    etc, we always end up in `tc_match_tys_x`.  There we invoke the unifier
    
    365
    +    but we do not distinguish between `SurelyApart` and `MaybeApart`. So in
    
    366
    +    these cases we can set `um_bind_fam_fun` to `neverBindFam`.
    
    367
    +
    
    368
    +(ATF7) There is one other, very special case of matching where we /do/ want to
    
    369
    +   bind type families in `um_fam_env`, namely in GHC.Tc.Solver.Equality, the call
    
    370
    +   to `tcUnifyTyForInjectivity False` in `improve_injective_wanted_top`.
    
    371
    +   Consider
    
    372
    +   of a match. Consider
    
    340 373
           type family G6 a = r | r -> a
    
    341 374
           type instance G6 [a]  = [G a]
    
    342 375
           type instance G6 Bool = Int
    
    343
    -   and the Wanted constraint [W] G6 alpha ~ [Int].  We /match/ each type instance
    
    344
    -   RHS against [Int]!  So we try
    
    345
    -        [G a] ~ [Int]
    
    376
    +   and suppose we haev a Wanted constraint
    
    377
    +      [W] G6 alpha ~ [Int]
    
    378
    +.  According to Section 5.2 of "Injective type families for Haskell", we /match/
    
    379
    +   the RHS each type instance [Int].  So we try
    
    380
    +        Template: [G a]    Target: [Int]
    
    346 381
        and we want to succeed with MaybeApart, so that we can generate the improvement
    
    347
    -   constraint  [W] alpha ~ [beta]  where beta is fresh.
    
    348
    -   See Section 5.2 of "Injective type families for Haskell".
    
    349
    -
    
    350
    -   A second place that we match with type-fams on the LHS is in `checkValidClass`.
    
    351
    -   In `check_dm` we check that the default method has the right type, using matching,
    
    352
    -   both ways.  And that type may have type-family applications in it. Example in
    
    353
    -   test CoOpt_Singletons.
    
    354
    -
    
    355
    -(ATF7) You might think that (ATF6) is a very special case, and in /other/ uses of
    
    356
    -  matching, where we enter via `tc_match_tys_x` we will never see a type-family
    
    357
    -  in the template. But actually we do see that case in the specialiser: see
    
    358
    -  the call to `tcMatchTy` in `GHC.Core.Opt.Specialise.beats_or_same`
    
    359
    -
    
    360
    -  Also: a user-written RULE could conceivably have a type-family application
    
    361
    -  in the template.  It might not be a good rule, but I don't think we currently
    
    362
    -  check for this.
    
    382
    +   constraint
    
    383
    +        [W] alpha ~ [beta]
    
    384
    +   where beta is fresh.  We do this by binding [G a :-> Int]
    
    363 385
     
    
    364 386
     (ATF8) The treatment of type families is governed by
    
    365 387
              um_bind_fam_fun :: BindFamFun
    
    ... ... @@ -399,6 +421,8 @@ Wrinkles
    399 421
       Key point: when decomposing (F tys1 ~ F tys2), we should /also/ extend the
    
    400 422
       type-family substitution.
    
    401 423
     
    
    424
    +  (ATF11-1) All this cleverness only matters when unifying, not when matching
    
    425
    +
    
    402 426
     (ATF12) There is a horrid exception for the injectivity check. See (UR1) in
    
    403 427
       in Note [Specification of unification].
    
    404 428
     
    
    ... ... @@ -595,7 +619,7 @@ tc_match_tys_x :: HasDebugCallStack
    595 619
                    -> [Type]
    
    596 620
                    -> Maybe Subst
    
    597 621
     tc_match_tys_x bind_tv match_kis (Subst in_scope id_env tv_env cv_env) tys1 tys2
    
    598
    -  = case tc_unify_tys alwaysBindFam  -- (ATF7) in Note [Apartness and type families]
    
    622
    +  = case tc_unify_tys neverBindFam  -- (ATF7) in Note [Apartness and type families]
    
    599 623
                           bind_tv
    
    600 624
                           False  -- Matching, not unifying
    
    601 625
                           False  -- Not an injectivity check
    
    ... ... @@ -1857,6 +1881,7 @@ uVarOrFam env ty1 ty2 kco
    1857 1881
           = go_fam_fam tc1 tys1 tys2 kco
    
    1858 1882
     
    
    1859 1883
           -- Now check if we can bind the (F tys) to the RHS
    
    1884
    +      -- This can happen even when matching: see (ATF7)
    
    1860 1885
           | BindMe <- um_bind_fam_fun env tc1 tys1 rhs
    
    1861 1886
           = -- ToDo: do we need an occurs check here?
    
    1862 1887
             do { extendFamEnv tc1 tys1 rhs
    
    ... ... @@ -1881,11 +1906,6 @@ uVarOrFam env ty1 ty2 kco
    1881 1906
         -- go_fam_fam: LHS and RHS are both saturated type-family applications,
    
    1882 1907
         --             for the same type-family F
    
    1883 1908
         go_fam_fam tc tys1 tys2 kco
    
    1884
    -      | tcEqTyConAppArgs tys1 tys2
    
    1885
    -      -- Detect (F tys ~ F tys); otherwise we'd build an infinite substitution
    
    1886
    -      = return ()
    
    1887
    -
    
    1888
    -      | otherwise
    
    1889 1909
            -- Decompose (F tys1 ~ F tys2): (ATF9)
    
    1890 1910
            -- Use injectivity information of F: (ATF10)
    
    1891 1911
            -- But first bind the type-fam if poss: (ATF11)
    
    ... ... @@ -1902,13 +1922,19 @@ uVarOrFam env ty1 ty2 kco
    1902 1922
            (inj_tys1, noninj_tys1) = partitionByList inj tys1
    
    1903 1923
            (inj_tys2, noninj_tys2) = partitionByList inj tys2
    
    1904 1924
     
    
    1905
    -       bind_fam_if_poss | BindMe <- um_bind_fam_fun env tc tys1 rhs1
    
    1906
    -                        = extendFamEnv tc tys1 rhs1
    
    1907
    -                        | um_unif env
    
    1908
    -                        , BindMe <- um_bind_fam_fun env tc tys2 rhs2
    
    1909
    -                        = extendFamEnv tc tys2 rhs2
    
    1910
    -                        | otherwise
    
    1911
    -                        = return ()
    
    1925
    +       bind_fam_if_poss
    
    1926
    +         | not (um_unif env)  -- Not when matching (ATF11-1)
    
    1927
    +         = return ()
    
    1928
    +         | tcEqTyConAppArgs tys1 tys2   -- Detect (F tys ~ F tys);
    
    1929
    +         = return ()                    -- otherwise we'd build an infinite substitution
    
    1930
    +         | BindMe <- um_bind_fam_fun env tc tys1 rhs1
    
    1931
    +         = extendFamEnv tc tys1 rhs1
    
    1932
    +         | um_unif env
    
    1933
    +         , BindMe <- um_bind_fam_fun env tc tys2 rhs2
    
    1934
    +         = extendFamEnv tc tys2 rhs2
    
    1935
    +         | otherwise
    
    1936
    +         = return ()
    
    1937
    +
    
    1912 1938
            rhs1 = mkTyConApp tc tys2 `mkCastTy` mkSymCo kco
    
    1913 1939
            rhs2 = mkTyConApp tc tys1 `mkCastTy` kco
    
    1914 1940
     
    
    ... ... @@ -1993,7 +2019,7 @@ data UMState = UMState
    1993 2019
       -- in um_foralls; i.e. variables bound by foralls inside the types being unified
    
    1994 2020
     
    
    1995 2021
       -- When /matching/ um_fam_env is usually empty; but not quite always.
    
    1996
    -  -- See (ATF6) and (ATF7) of Note [Apartness and type families]
    
    2022
    +  -- See (ATF7) of Note [Apartness and type families]
    
    1997 2023
     
    
    1998 2024
     newtype UM a
    
    1999 2025
       = UM' { unUM :: UMState -> UnifyResultM (UMState, a) }
    

  • compiler/GHC/Tc/Solver/Equality.hs
    ... ... @@ -3017,6 +3017,7 @@ improve_wanted_top_fun_eqs fam_tc lhs_tys rhs_ty
    3017 3017
     
    
    3018 3018
     improve_injective_wanted_top :: FamInstEnvs -> [Bool] -> TyCon -> [TcType] -> Xi -> TcS [TypeEqn]
    
    3019 3019
     -- Interact with top-level instance declarations
    
    3020
    +-- See Section 5.2 in the Injective Type Families paper
    
    3020 3021
     improve_injective_wanted_top fam_envs inj_args fam_tc lhs_tys rhs_ty
    
    3021 3022
       = concatMapM do_one branches
    
    3022 3023
       where
    
    ... ... @@ -3035,6 +3036,7 @@ improve_injective_wanted_top fam_envs inj_args fam_tc lhs_tys rhs_ty
    3035 3036
         do_one branch@(CoAxBranch { cab_tvs = branch_tvs, cab_lhs = branch_lhs_tys, cab_rhs = branch_rhs })
    
    3036 3037
           | let in_scope1 = in_scope `extendInScopeSetList` branch_tvs
    
    3037 3038
           , Just subst <- tcUnifyTyForInjectivity False in_scope1 branch_rhs rhs_ty
    
    3039
    +                      -- False: matching, not unifying
    
    3038 3040
           = do { let inSubst tv = tv `elemVarEnv` getTvSubstEnv subst
    
    3039 3041
                      unsubstTvs = filterOut inSubst branch_tvs
    
    3040 3042
                      -- The order of unsubstTvs is important; it must be
    

  • compiler/GHC/Types/Basic.hs
    ... ... @@ -87,7 +87,7 @@ module GHC.Types.Basic (
    87 87
             CompilerPhase(..), PhaseNum, beginPhase, nextPhase, laterPhase,
    
    88 88
     
    
    89 89
             Activation(..), isActive, competesWith,
    
    90
    -        isNeverActive, isAlwaysActive, activeInFinalPhase,
    
    90
    +        isNeverActive, isAlwaysActive, activeInFinalPhase, activeInInitialPhase,
    
    91 91
             activateAfterInitial, activateDuringFinal, activeAfter,
    
    92 92
     
    
    93 93
             RuleMatchInfo(..), isConLike, isFunLike,
    

  • testsuite/tests/simplCore/should_compile/T25703.hs
    1
    +module T25703 where
    
    2
    +
    
    3
    +f :: (Eq a, Show b) => a -> b -> Int
    
    4
    +f x y = f x y
    
    5
    +
    
    6
    +goo :: forall x. (Eq x) => x -> Int
    
    7
    +goo arg = f arg (3::Int)

  • testsuite/tests/simplCore/should_compile/T25703.stderr
    1
    +Rule fired: SPEC f @_ @Int (T25703)
    
    2
    +Rule fired: SPEC f @_ @Int (T25703)

  • testsuite/tests/simplCore/should_compile/T25703a.hs
    1
    +{-# LANGUAGE DataKinds #-}
    
    2
    +{-# LANGUAGE GADTs #-}
    
    3
    +
    
    4
    +{-# OPTIONS_GHC -O2 -fspecialise-aggressively #-}
    
    5
    +
    
    6
    +-- This pragma is just here to pretend that the function body of 'foo' is huge
    
    7
    +-- and should never be inlined.
    
    8
    +{-# OPTIONS_GHC -funfolding-use-threshold=-200 #-}
    
    9
    +
    
    10
    +module T25703a where
    
    11
    +
    
    12
    +import Data.Kind
    
    13
    +import Data.Type.Equality
    
    14
    +import Data.Proxy
    
    15
    +import GHC.TypeNats
    
    16
    +
    
    17
    +-- Pretend this is some big dictionary that absolutely must get
    
    18
    +-- specialised away for performance reasons.
    
    19
    +type C :: Nat -> Constraint
    
    20
    +class C i where
    
    21
    +  meth :: Proxy i -> Double
    
    22
    +instance C 0 where
    
    23
    +  meth _ = 0.1
    
    24
    +instance C 1 where
    
    25
    +  meth _ = 1.1
    
    26
    +instance C 2 where
    
    27
    +  meth _ = 2.1
    
    28
    +
    
    29
    +{-# INLINEABLE foo #-}
    
    30
    +foo :: forall a (n :: Nat) (m :: Nat)
    
    31
    +    .  ( Eq a, C n, C m )
    
    32
    +    => a -> ( Proxy n, Proxy m ) -> Int -> Double
    
    33
    +-- Pretend this is a big complicated function, too big to inline,
    
    34
    +-- for which we absolutely must specialise away the 'C n', 'C m'
    
    35
    +-- dictionaries for performance reasons.
    
    36
    +foo a b c
    
    37
    +  = if a == a
    
    38
    +    then meth @n Proxy + fromIntegral c
    
    39
    +    else 2 * meth @m Proxy
    
    40
    +
    
    41
    +-- Runtime dispatch to a specialisation of 'foo'
    
    42
    +foo_spec :: forall a (n :: Nat) (m :: Nat)
    
    43
    +         .  ( Eq a, KnownNat n, KnownNat m )
    
    44
    +         => a -> ( Proxy n, Proxy m ) -> Int -> Double
    
    45
    +foo_spec a b c
    
    46
    +  | Just Refl <- sameNat @n @0 Proxy Proxy
    
    47
    +  , Just Refl <- sameNat @m @0 Proxy Proxy
    
    48
    +  = foo @a @0 @0 a b c
    
    49
    +  | Just Refl <- sameNat @n @0 Proxy Proxy
    
    50
    +  , Just Refl <- sameNat @m @1 Proxy Proxy
    
    51
    +  = foo @a @0 @1 a b c
    
    52
    +  | Just Refl <- sameNat @n @1 Proxy Proxy
    
    53
    +  , Just Refl <- sameNat @m @1 Proxy Proxy
    
    54
    +  = foo @a @1 @1 a b c
    
    55
    +  | Just Refl <- sameNat @n @0 Proxy Proxy
    
    56
    +  , Just Refl <- sameNat @m @2 Proxy Proxy
    
    57
    +  = foo @a @0 @2 a b c
    
    58
    +  | Just Refl <- sameNat @n @1 Proxy Proxy
    
    59
    +  , Just Refl <- sameNat @m @2 Proxy Proxy
    
    60
    +  = foo @a @1 @2 a b c
    
    61
    +  | Just Refl <- sameNat @n @2 Proxy Proxy
    
    62
    +  , Just Refl <- sameNat @m @2 Proxy Proxy
    
    63
    +  = foo @a @2 @2 a b c
    
    64
    +  | otherwise
    
    65
    +  = error $ unlines
    
    66
    +      [ "f: no specialisation"
    
    67
    +      , "n: " ++ show (natVal @n Proxy)
    
    68
    +      , "m: " ++ show (natVal @m Proxy)
    
    69
    +      ]

  • testsuite/tests/simplCore/should_compile/T25703a.stderr
    1
    +Rule fired: SPEC foo @_ @2 @2 (T25703a)
    
    2
    +Rule fired: SPEC foo @_ @1 @2 (T25703a)
    
    3
    +Rule fired: SPEC foo @_ @0 @2 (T25703a)
    
    4
    +Rule fired: SPEC foo @_ @1 @1 (T25703a)
    
    5
    +Rule fired: SPEC foo @_ @0 @1 (T25703a)
    
    6
    +Rule fired: SPEC foo @_ @0 @0 (T25703a)

  • testsuite/tests/simplCore/should_compile/T25965.hs
    1
    +{-# LANGUAGE TypeFamilies #-}
    
    2
    +{-# OPTIONS_GHC -O -fpolymorphic-specialisation #-}
    
    3
    +
    
    4
    +module Foo where
    
    5
    +
    
    6
    +type family F a
    
    7
    +
    
    8
    +data T a = T1
    
    9
    +
    
    10
    +instance Eq (T a) where { (==) x y = False }
    
    11
    +
    
    12
    +foo :: Eq a => a -> Bool
    
    13
    +foo x | x==x = True
    
    14
    +      | otherwise = foo x
    
    15
    +
    
    16
    +bar :: forall b. b -> T (F b) -> Bool
    
    17
    +bar y x = foo x
    
    18
    +

  • testsuite/tests/simplCore/should_compile/all.T
    ... ... @@ -543,3 +543,8 @@ test('T25883c', normal, compile_grep_core, [''])
    543 543
     test('T25883d', [extra_files(['T25883d_import.hs'])], multimod_compile_filter, ['T25883d', '-O -ddump-simpl -dno-typeable-binds -dsuppress-all -dsuppress-uniques', r'grep -e "y ="'])
    
    544 544
     
    
    545 545
     test('T25976', [grep_errmsg('Dead Code')], compile, ['-O -ddump-simpl -dsuppress-uniques -dno-typeable-binds'])
    
    546
    +
    
    547
    +test('T25965', normal, compile, ['-O'])
    
    548
    +test('T25703',  [grep_errmsg(r'SPEC')], compile, ['-O -fpolymorphic-specialisation -ddump-rule-firings'])
    
    549
    +test('T25703a', [grep_errmsg(r'SPEC')], compile, ['-O -fpolymorphic-specialisation -ddump-rule-firings'])
    
    550
    +