sheaf pushed to branch wip/andreask/ticked_joins at Glasgow Haskell Compiler / GHC

Commits:

10 changed files:

Changes:

  • compiler/GHC/Core/Lint.hs
    ... ... @@ -672,7 +672,7 @@ lintRhs :: Id -> CoreExpr -> LintM (OutType, UsageEnv)
    672 672
     lintRhs bndr rhs
    
    673 673
         | JoinPoint arity <- idJoinPointHood bndr
    
    674 674
         = lintJoinLams arity (Just bndr) rhs
    
    675
    -    | AlwaysTailCalled arity <- tailCallInfo (idOccInfo bndr)
    
    675
    +    | AlwaysTailCalled { tailCallArity = arity } <- tailCallInfo (idOccInfo bndr)
    
    676 676
         = lintJoinLams arity Nothing rhs
    
    677 677
     
    
    678 678
     -- Allow applications of the data constructor @StaticPtr@ at the top
    
    ... ... @@ -929,9 +929,12 @@ lintCoreExpr (Tick tickish expr)
    929 929
       = do { case tickish of
    
    930 930
                Breakpoint _ _ ids -> forM_ ids $ \id -> lintIdOcc id 0
    
    931 931
                _                  -> return ()
    
    932
    -       ; markAllJoinsBadIf block_joins $ lintCoreExpr expr }
    
    932
    +       ; expr_l <- lintCoreExpr expr
    
    933
    +       ; r <- markAllJoinsBadIf block_joins $ pure expr_l
    
    934
    +      --  ; when block_joins
    
    935
    +       ; pure r}
    
    933 936
       where
    
    934
    -    block_joins = not (tickish `tickishScopesLike` SoftScope)
    
    937
    +    block_joins = not (tickishCanScopeJoin tickish)
    
    935 938
           -- TODO Consider whether this is the correct rule. It is consistent with
    
    936 939
           -- the simplifier's behaviour - cost-centre-scoped ticks become part of
    
    937 940
           -- the continuation, and thus they behave like part of an evaluation
    

  • compiler/GHC/Core/Opt/Arity.hs
    ... ... @@ -90,7 +90,6 @@ import GHC.Utils.Misc
    90 90
     
    
    91 91
     import Data.List.NonEmpty ( nonEmpty )
    
    92 92
     import qualified Data.List.NonEmpty as NE
    
    93
    -import Data.Maybe( isJust )
    
    94 93
     
    
    95 94
     {-
    
    96 95
     ************************************************************************
    
    ... ... @@ -2835,22 +2834,6 @@ tryEtaReduce rec_ids bndrs body eval_sd
    2835 2834
     
    
    2836 2835
         ok_arg _ _ _ _ = Nothing
    
    2837 2836
     
    
    2838
    --- | Can we eta-reduce the given function
    
    2839
    --- See Note [Eta reduction soundness], criteria (B), (J), and (W).
    
    2840
    -cantEtaReduceFun :: Id -> Bool
    
    2841
    -cantEtaReduceFun fun
    
    2842
    -  =    hasNoBinding fun -- (B)
    
    2843
    -       -- Don't undersaturate functions with no binding.
    
    2844
    -
    
    2845
    -    ||  isJoinId fun    -- (J)
    
    2846
    -       -- Don't undersaturate join points.
    
    2847
    -       -- See Note [Invariants on join points] in GHC.Core, and #20599
    
    2848
    -
    
    2849
    -    || (isJust (idCbvMarks_maybe fun)) -- (W)
    
    2850
    -       -- Don't undersaturate StrictWorkerIds.
    
    2851
    -       -- See Note [CBV Function Ids: overview] in GHC.Types.Id.Info.
    
    2852
    -
    
    2853
    -
    
    2854 2837
     {- *********************************************************************
    
    2855 2838
     *                                                                      *
    
    2856 2839
                   The "push rules"
    

  • compiler/GHC/Core/Opt/OccurAnal.hs
    ... ... @@ -797,10 +797,10 @@ function call and a jump by looking at the occurrence (because the same pass
    797 797
     changes the 'IdDetails' and propagates the binders to their occurrence sites).
    
    798 798
     
    
    799 799
     To track potential join points, we use the 'occ_tail' field of OccInfo. A value
    
    800
    -of `AlwaysTailCalled n` indicates that every occurrence of the variable is a
    
    801
    -tail call with `n` arguments (counting both value and type arguments). Otherwise
    
    802
    -'occ_tail' will be 'NoTailCallInfo'. The tail call info flows bottom-up with the
    
    803
    -rest of 'OccInfo' until it goes on the binder.
    
    800
    +of `AlwaysTailCalled { tailCallArity = n }` indicates that every occurrence of
    
    801
    +the variable is a tail call with `n` arguments (counting both value and type
    
    802
    +arguments). Otherwise 'occ_tail' will be 'NoTailCallInfo'. The tail call info
    
    803
    +flows bottom-up with the rest of 'OccInfo' until it goes on the binder.
    
    804 804
     
    
    805 805
     Note [Join arity prediction based on joinRhsArity]
    
    806 806
     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    ... ... @@ -2585,13 +2585,21 @@ But it is not necessary to gather CoVars from the types of other binders.
    2585 2585
     occAnal env (Tick tickish body)
    
    2586 2586
       = WUD usage' (Tick tickish body')
    
    2587 2587
       where
    
    2588
    -    WUD usage body' = occAnal env body
    
    2588
    +    WUD usage body' = occAnal env' body
    
    2589
    +
    
    2590
    +    env' = case tickish of
    
    2591
    +      -- setInsideProfTick: join points under profiling ticks turn
    
    2592
    +      -- into quasi-join points. See Note [Quasi join points]
    
    2593
    +      ProfNote {} -> setInsideProfTick env
    
    2594
    +      _           -> env
    
    2589 2595
     
    
    2590 2596
         usage'
    
    2591
    -      | tickish `tickishScopesLike` SoftScope
    
    2597
    +      | tickishCanScopeJoin tickish
    
    2592 2598
           = usage  -- For soft-scoped ticks (including SourceNotes) we don't want
    
    2593 2599
                    -- to lose join-point-hood, so we don't mess with `usage` (#24078)
    
    2594 2600
     
    
    2601
    +               -- Similarly for cost centres. (#26157)
    
    2602
    +
    
    2595 2603
           -- For a non-soft tick scope, we can inline lambdas only, so we
    
    2596 2604
           -- abandon tail calls, and do markAllInsideLam too: usage_lam
    
    2597 2605
     
    
    ... ... @@ -2613,11 +2621,12 @@ occAnal env (Tick tickish body)
    2613 2621
         -- See #14242.
    
    2614 2622
     
    
    2615 2623
     occAnal env (Cast expr co)
    
    2616
    -  = let  (WUD usage expr') = occAnal env expr
    
    2617
    -         usage1 = addManyOccs usage (coVarsOfCo co)
    
    2618
    -             -- usage2: see Note [Gather occurrences of coercion variables]
    
    2619
    -         usage2 = markAllNonTail usage1
    
    2620
    -             -- usage3: calls inside expr aren't tail calls any more
    
    2624
    +  = let (WUD usage expr') = occAnal (setInsideCast env) expr
    
    2625
    +          -- setInsideCast: join points inside casts turn into quasi join points
    
    2626
    +          -- See Note [Quasi join points]
    
    2627
    +        usage1 = addManyOccs usage (coVarsOfCo co)
    
    2628
    +            -- usage2: see Note [Gather occurrences of coercion variables]
    
    2629
    +        usage2 = markAllNonTail usage1
    
    2621 2630
         in WUD usage2 (Cast expr' co)
    
    2622 2631
     
    
    2623 2632
     occAnal env app@(App _ _)
    
    ... ... @@ -2927,6 +2936,8 @@ scrutinised y).
    2927 2936
     
    
    2928 2937
     data OccEnv
    
    2929 2938
       = OccEnv { occ_encl       :: !OccEncl      -- Enclosing context information
    
    2939
    +           , occ_prof_ticks :: !Int -- ^ How many profiling ticks are we under? See Note [Quasi join points]
    
    2940
    +           , occ_casts      :: !Int -- ^ How many casts are we under? See Note [Quasi join points]
    
    2930 2941
                , occ_one_shots  :: !OneShots     -- See Note [OneShots]
    
    2931 2942
                , occ_unf_act    :: Id -> Bool    -- Which Id unfoldings are active
    
    2932 2943
                , occ_rule_act   :: ActivationGhc -> Bool  -- Which rules are active
    
    ... ... @@ -2992,6 +3003,8 @@ type OneShots = [OneShotInfo]
    2992 3003
     initOccEnv :: OccEnv
    
    2993 3004
     initOccEnv
    
    2994 3005
       = OccEnv { occ_encl      = OccVanilla
    
    3006
    +           , occ_prof_ticks = 0
    
    3007
    +           , occ_casts      = 0
    
    2995 3008
                , occ_one_shots = []
    
    2996 3009
     
    
    2997 3010
                      -- To be conservative, we say that all
    
    ... ... @@ -3070,6 +3083,12 @@ setTailCtxt !env = env { occ_encl = OccVanilla }
    3070 3083
         -- Preserve occ_one_shots, occ_join points
    
    3071 3084
         -- Do not use OccRhs for the RHS of a join point (which is a tail ctxt):
    
    3072 3085
     
    
    3086
    +setInsideProfTick :: OccEnv -> OccEnv
    
    3087
    +setInsideProfTick !env = env { occ_prof_ticks = 1 + occ_prof_ticks env }
    
    3088
    +
    
    3089
    +setInsideCast :: OccEnv -> OccEnv
    
    3090
    +setInsideCast !env = env { occ_casts = 1 + occ_casts env }
    
    3091
    +
    
    3073 3092
     mkRhsOccEnv :: OccEnv -> RecFlag -> OccEncl -> JoinPointHood -> Id -> CoreExpr -> OccEnv
    
    3074 3093
     -- See Note [The OccEnv for a right hand side]
    
    3075 3094
     -- For a join point:
    
    ... ... @@ -3696,7 +3715,7 @@ type OccInfoEnv = IdEnv LocalOcc -- A finite map from an expression's
    3696 3715
     data LocalOcc  -- See Note [LocalOcc]
    
    3697 3716
          = OneOccL { lo_n_br  :: {-# UNPACK #-} !BranchCount  -- Number of syntactic occurrences
    
    3698 3717
                    , lo_tail  :: !TailCallInfo
    
    3699
    -                   -- Combining (AlwaysTailCalled 2) and (AlwaysTailCalled 3)
    
    3718
    +                   -- NB: combining 'TailCallInfo's with different arities
    
    3700 3719
                        -- gives NoTailCallInfo
    
    3701 3720
                   , lo_int_cxt :: !InterestingCxt }
    
    3702 3721
     
    
    ... ... @@ -3789,9 +3808,20 @@ mkOneOcc !env id int_cxt arity
    3789 3808
       = mkSimpleDetails (unitVarEnv id occ)
    
    3790 3809
     
    
    3791 3810
       where
    
    3792
    -    occ = OneOccL { lo_n_br = 1
    
    3793
    -                  , lo_int_cxt = int_cxt
    
    3794
    -                  , lo_tail = AlwaysTailCalled arity }
    
    3811
    +    occ =
    
    3812
    +      OneOccL
    
    3813
    +        { lo_n_br = 1
    
    3814
    +        , lo_int_cxt = int_cxt
    
    3815
    +        , lo_tail =
    
    3816
    +            AlwaysTailCalled
    
    3817
    +              { tailCallArity = arity
    
    3818
    +
    
    3819
    +                -- See Note [Quasi join points] for justification of these
    
    3820
    +                -- two fields.
    
    3821
    +              , tailCallUnderProfTicks = occ_prof_ticks env
    
    3822
    +              , tailCallUnderCasts     = occ_casts env
    
    3823
    +              }
    
    3824
    +        }
    
    3795 3825
     
    
    3796 3826
     -- Add several occurrences, assumed not to be tail calls
    
    3797 3827
     add_many_occ :: Var -> OccInfoEnv -> OccInfoEnv
    
    ... ... @@ -3844,13 +3874,14 @@ delBndrsFromUDs bndrs (UD { ud_env = env, ud_z_many = z_many
    3844 3874
            , ud_z_tail    = z_tail   `delVarEnvList` bndrs }
    
    3845 3875
     
    
    3846 3876
     markAllMany, markAllInsideLam, markAllNonTail, markAllManyNonTail
    
    3847
    -  :: UsageDetails -> UsageDetails
    
    3877
    +  :: HasDebugCallStack => UsageDetails -> UsageDetails
    
    3848 3878
     markAllMany      ud@(UD { ud_env = env }) = ud { ud_z_many   = env }
    
    3849 3879
     markAllInsideLam ud@(UD { ud_env = env }) = ud { ud_z_in_lam = env }
    
    3850
    -markAllNonTail   ud@(UD { ud_env = env }) = ud { ud_z_tail   = env }
    
    3851 3880
     markAllManyNonTail = markAllMany . markAllNonTail -- effectively sets to noOccInfo
    
    3852 3881
     
    
    3853
    -markAllInsideLamIf, markAllNonTailIf :: Bool -> UsageDetails -> UsageDetails
    
    3882
    +markAllNonTail ud@(UD { ud_env = env }) = ud { ud_z_tail   = env }
    
    3883
    +
    
    3884
    +markAllInsideLamIf, markAllNonTailIf :: HasDebugCallStack => Bool -> UsageDetails -> UsageDetails
    
    3854 3885
     
    
    3855 3886
     markAllInsideLamIf  True  ud = markAllInsideLam ud
    
    3856 3887
     markAllInsideLamIf  False ud = ud
    
    ... ... @@ -3947,7 +3978,7 @@ adjustNonRecRhs mb_join_arity (WTUD (TUD rhs_ja uds) rhs)
    3947 3978
       where
    
    3948 3979
         exact_join = mb_join_arity == JoinPoint rhs_ja
    
    3949 3980
     
    
    3950
    -adjustTailUsage :: Bool        -- True <=> Exactly-matching join point; don't do markNonTail
    
    3981
    +adjustTailUsage :: HasDebugCallStack => Bool        -- True <=> Exactly-matching join point; don't do markNonTail
    
    3951 3982
                     -> CoreExpr    -- Rhs usage, AFTER occAnalLamTail
    
    3952 3983
                     -> UsageDetails
    
    3953 3984
                     -> UsageDetails
    
    ... ... @@ -3959,7 +3990,7 @@ adjustTailUsage exact_join rhs uds
    3959 3990
       where
    
    3960 3991
         one_shot   = isOneShotFun rhs
    
    3961 3992
     
    
    3962
    -adjustTailArity :: JoinPointHood -> TailUsageDetails -> UsageDetails
    
    3993
    +adjustTailArity :: HasDebugCallStack => JoinPointHood -> TailUsageDetails -> UsageDetails
    
    3963 3994
     adjustTailArity mb_rhs_ja (TUD ja usage)
    
    3964 3995
       = markAllNonTailIf (mb_rhs_ja /= JoinPoint ja) usage
    
    3965 3996
     
    
    ... ... @@ -3993,7 +4024,7 @@ tagNonRecBinder :: TopLevelFlag -- At top level?
    3993 4024
     -- Precondition: OccInfo is not IAmDead
    
    3994 4025
     tagNonRecBinder lvl occ bndr
    
    3995 4026
       | okForJoinPoint lvl bndr tail_call_info
    
    3996
    -  , AlwaysTailCalled ar <- tail_call_info
    
    4027
    +  , AlwaysTailCalled { tailCallArity = ar } <- tail_call_info
    
    3997 4028
       = (setBinderOcc occ bndr,        JoinPoint ar)
    
    3998 4029
       | otherwise
    
    3999 4030
       = (setBinderOcc zapped_occ bndr, NotJoinPoint)
    
    ... ... @@ -4080,7 +4111,7 @@ okForJoinPoint lvl bndr tail_call_info
    4080 4111
       = False
    
    4081 4112
       where
    
    4082 4113
         valid_join | NotTopLevel <- lvl
    
    4083
    -               , AlwaysTailCalled arity <- tail_call_info
    
    4114
    +               , AlwaysTailCalled { tailCallArity = arity } <- tail_call_info
    
    4084 4115
     
    
    4085 4116
                    , -- Invariant 1 as applied to LHSes of rules
    
    4086 4117
                      all (ok_rule arity) (idCoreRules bndr)
    
    ... ... @@ -4097,9 +4128,9 @@ okForJoinPoint lvl bndr tail_call_info
    4097 4128
     
    
    4098 4129
         lost_join | JoinPoint ja <- idJoinPointHood bndr
    
    4099 4130
                   = not valid_join ||
    
    4100
    -                (case tail_call_info of  -- Valid join but arity differs
    
    4101
    -                   AlwaysTailCalled ja' -> ja /= ja'
    
    4102
    -                   _                    -> False)
    
    4131
    +                (case tail_call_info of -- Valid join but arity differs
    
    4132
    +                   AlwaysTailCalled { tailCallArity = ja' } -> ja /= ja'
    
    4133
    +                   _ -> False)
    
    4103 4134
                   | otherwise = False
    
    4104 4135
     
    
    4105 4136
         ok_rule _ BuiltinRule{} = False -- only possible with plugin shenanigans
    
    ... ... @@ -4121,7 +4152,7 @@ okForJoinPoint lvl bndr tail_call_info
    4121 4152
                  , text "tc:" <+> ppr tail_call_info
    
    4122 4153
                  , text "rules:" <+> ppr (idCoreRules bndr)
    
    4123 4154
                  , case tail_call_info of
    
    4124
    -                 AlwaysTailCalled arity ->
    
    4155
    +                 AlwaysTailCalled { tailCallArity = arity } ->
    
    4125 4156
                         vcat [ text "ok_unf:" <+> ppr (ok_unfolding arity (realIdUnfolding bndr))
    
    4126 4157
                              , text "ok_type:" <+> ppr (isValidJoinPointType arity (idType bndr)) ]
    
    4127 4158
                      _ -> empty ]
    
    ... ... @@ -4184,6 +4215,6 @@ orLocalOcc (OneOccL { lo_n_br = nbr1, lo_int_cxt = int_cxt1, lo_tail = tci1 })
    4184 4215
     orLocalOcc occ1 occ2 = andLocalOcc occ1 occ2
    
    4185 4216
     
    
    4186 4217
     andTailCallInfo :: TailCallInfo -> TailCallInfo -> TailCallInfo
    
    4187
    -andTailCallInfo info@(AlwaysTailCalled arity1) (AlwaysTailCalled arity2)
    
    4188
    -  | arity1 == arity2 = info
    
    4218
    +andTailCallInfo (AlwaysTailCalled arity1 p1 c1) (AlwaysTailCalled arity2 p2 c2)
    
    4219
    +  | arity1 == arity2 = AlwaysTailCalled arity1 (max p1 p2) (max c1 c2)
    
    4189 4220
     andTailCallInfo _ _  = NoTailCallInfo

  • compiler/GHC/Core/Opt/Simplify/Env.hs
    ... ... @@ -201,6 +201,9 @@ data SimplEnv
    201 201
     
    
    202 202
           , seCaseDepth   :: !Int  -- Depth of multi-branch case alternatives
    
    203 203
     
    
    204
    +      , seProfTicks   :: !Int  -- Current depth of profiling ticks; see Note [Quasi join points]
    
    205
    +      , seCasts       :: !Int  -- Current depth of casts; see Note [Quasi join points]
    
    206
    +
    
    204 207
           , seInlineDepth :: !Int  -- 0 initially, 1 when we inline an already-simplified
    
    205 208
                                    -- unfolding, and simplify again; and so on
    
    206 209
                                    -- See Note [Inline depth]
    
    ... ... @@ -590,6 +593,8 @@ mkSimplEnv mode fam_envs
    590 593
                  , seIdSubst     = emptyVarEnv
    
    591 594
                  , seRecIds      = emptyUnVarSet
    
    592 595
                  , seCaseDepth   = 0
    
    596
    +             , seProfTicks   = 0
    
    597
    +             , seCasts       = 0
    
    593 598
                  , seInlineDepth = 0 }
    
    594 599
             -- The top level "enclosing CC" is "SUBSUMED".
    
    595 600
     
    

  • compiler/GHC/Core/Opt/Simplify/Iteration.hs
    ... ... @@ -39,7 +39,7 @@ import GHC.Core.Opt.Arity ( ArityType, exprArity, arityTypeBotSigs_maybe
    39 39
                               , pushCoTyArg, pushCoValArg, exprIsDeadEnd
    
    40 40
                               , typeArity, arityTypeArity, etaExpandAT )
    
    41 41
     import GHC.Core.SimpleOpt ( exprIsConApp_maybe, joinPointBinding_maybe, joinPointBindings_maybe )
    
    42
    -import GHC.Core.FVs     ( mkRuleInfo {- exprsFreeIds -} )
    
    42
    +import GHC.Core.FVs     ( mkRuleInfo )
    
    43 43
     import GHC.Core.Rules   ( lookupRule, getRules )
    
    44 44
     import GHC.Core.Multiplicity
    
    45 45
     
    
    ... ... @@ -57,6 +57,7 @@ import GHC.Types.Unique ( hasKey )
    57 57
     import GHC.Types.Basic
    
    58 58
     import GHC.Types.Tickish
    
    59 59
     import GHC.Types.Var    ( isTyCoVar )
    
    60
    +
    
    60 61
     import GHC.Builtin.Types.Prim( realWorldStatePrimTy )
    
    61 62
     import GHC.Builtin.Names( runRWKey, seqHashKey )
    
    62 63
     
    
    ... ... @@ -1442,7 +1443,10 @@ simplTick env tickish expr cont
    1442 1443
     
    
    1443 1444
       no_floating_past_tick =
    
    1444 1445
         do { let (inc,outc) = splitCont cont
    
    1445
    -       ; (floats, expr1) <- simplExprF env expr inc
    
    1446
    +             env' = case tickish of
    
    1447
    +                  ProfNote {} -> env { seProfTicks = seProfTicks env + 1 }
    
    1448
    +                  _ -> env
    
    1449
    +       ; (floats, expr1) <- simplExprF env' expr inc
    
    1446 1450
            ; let expr2    = wrapFloats floats expr1
    
    1447 1451
                  tickish' = simplTickish env tickish
    
    1448 1452
            ; rebuild env (mkTick tickish' expr2) outc
    
    ... ... @@ -1680,39 +1684,54 @@ optOutCoercion env co already_optimised
    1680 1684
         empty_subst = mkEmptySubst (seInScope env)
    
    1681 1685
         opts = seOptCoercionOpts env
    
    1682 1686
     
    
    1687
    +-- | Number of casts we are adding around an expression as we process a 'Cast'.
    
    1688
    +--
    
    1689
    +-- We need the cast depth to implement the logic of Note [Quasi join points].
    
    1690
    +type NbCastsAdded = Int
    
    1691
    +
    
    1683 1692
     simplCast :: SimplEnv -> InExpr -> InCoercion -> SimplCont
    
    1684 1693
               -> SimplM (SimplFloats, OutExpr)
    
    1685 1694
     simplCast env body co0 cont0
    
    1686 1695
       = do  { co1   <- {-#SCC "simplCast-simplCoercion" #-} simplCoercion env co0
    
    1687
    -        ; cont1 <- {-#SCC "simplCast-addCoerce" #-}
    
    1688
    -                   if isReflCo co1
    
    1689
    -                   then return cont0  -- See Note [Optimising reflexivity]
    
    1690
    -                   else addCoerce co1 True cont0
    
    1691
    -                        -- True <=> co1 is optimised
    
    1692
    -        ; {-#SCC "simplCast-simplExprF" #-} simplExprF env body cont1 }
    
    1696
    +        ; (cont1, nbAddedCasts) <- {-#SCC "simplCast-addCoerce" #-}
    
    1697
    +            if isReflCo co1
    
    1698
    +            then return (cont0, 0) -- See Note [Optimising reflexivity]
    
    1699
    +            else addCoerce co1 True cont0
    
    1700
    +                 -- True <=> co1 is optimised
    
    1701
    +
    
    1702
    +          -- Keep track of how many casts we have added, because we need this
    
    1703
    +          -- information for Note [Quasi join points].
    
    1704
    +        ; let env' = env { seCasts = seCasts env + nbAddedCasts }
    
    1705
    +        ; {-#SCC "simplCast-simplExprF" #-} simplExprF env' body cont1 }
    
    1693 1706
       where
    
    1694 1707
     
    
    1695 1708
             -- If the first parameter is MRefl, then simplifying revealed a
    
    1696 1709
             -- reflexive coercion. Omit.
    
    1697
    -        addCoerceM :: MOutCoercion -> Bool -> SimplCont -> SimplM SimplCont
    
    1698
    -        addCoerceM MRefl    _   cont = return cont
    
    1710
    +        addCoerceM :: MOutCoercion -> Bool -> SimplCont -> SimplM (SimplCont, NbCastsAdded)
    
    1711
    +        addCoerceM MRefl    _   cont = return (cont, 0)
    
    1699 1712
             addCoerceM (MCo co) opt cont = addCoerce co opt cont
    
    1700 1713
     
    
    1701
    -        addCoerce :: OutCoercion -> Bool -> SimplCont -> SimplM SimplCont
    
    1714
    +        addCoerce :: OutCoercion -> Bool -> SimplCont -> SimplM (SimplCont, NbCastsAdded)
    
    1702 1715
             addCoerce co1 _ (CastIt { sc_co = co2, sc_cont = cont })  -- See Note [Optimising reflexivity]
    
    1703
    -          = addCoerce (mkTransCo co1 co2) False cont
    
    1704
    -                      -- False: (mkTransCo co1 co2) is not fully optimised
    
    1705
    -                      -- See Note [Avoid re-simplifying coercions]
    
    1716
    +          = do { (cont', nbCastsAdded) <- addCoerce (mkTransCo co1 co2) False cont
    
    1717
    +                                -- False: (mkTransCo co1 co2) is not fully optimised
    
    1718
    +                                -- See Note [Avoid re-simplifying coercions]
    
    1719
    +               ; return (cont', nbCastsAdded - 1)
    
    1720
    +                  -- -1: the coercion coalesced with an existing coercion.
    
    1721
    +               }
    
    1706 1722
     
    
    1707 1723
             addCoerce co co_is_opt (ApplyToTy { sc_arg_ty = arg_ty, sc_cont = tail })
    
    1708 1724
               | Just (arg_ty', m_co') <- pushCoTyArg co arg_ty
    
    1709 1725
               = {-#SCC "addCoerce-pushCoTyArg" #-}
    
    1710
    -            do { tail' <- addCoerceM m_co' co_is_opt tail
    
    1711
    -               ; return (ApplyToTy { sc_arg_ty  = arg_ty'
    
    1712
    -                                   , sc_cont    = tail'
    
    1713
    -                                   , sc_hole_ty = coercionLKind co }) }
    
    1714
    -                                        -- NB!  As the cast goes past, the
    
    1715
    -                                        -- type of the hole changes (#16312)
    
    1726
    +            do { (tail', nbCastsAdded) <- addCoerceM m_co' co_is_opt tail
    
    1727
    +               ; return ( ApplyToTy { sc_arg_ty  = arg_ty'
    
    1728
    +                                    , sc_cont    = tail'
    
    1729
    +                                    , sc_hole_ty = coercionLKind co }
    
    1730
    +                                      -- NB!  As the cast goes past, the
    
    1731
    +                                      -- type of the hole changes (#16312)
    
    1732
    +                        , nbCastsAdded )
    
    1733
    +               }
    
    1734
    +
    
    1716 1735
             -- (f |> co) e   ===>   (f (e |> co1)) |> co2
    
    1717 1736
             -- where   co :: (s1->s2) ~ (t1->t2)
    
    1718 1737
             --         co1 :: t1 ~ s1
    
    ... ... @@ -1725,10 +1744,12 @@ simplCast env body co0 cont0
    1725 1744
     
    
    1726 1745
               | Just (m_co1, m_co2) <- pushCoValArg co
    
    1727 1746
               = {-#SCC "addCoerce-pushCoValArg" #-}
    
    1728
    -            do { tail' <- addCoerceM m_co2 co_is_opt tail
    
    1747
    +            do { (tail', nbCastsAdded) <- addCoerceM m_co2 co_is_opt tail
    
    1729 1748
                    ; case m_co1 of {
    
    1730
    -                   MRefl -> return (cont { sc_cont = tail'
    
    1731
    -                                         , sc_hole_ty = coercionLKind co }) ;
    
    1749
    +                   MRefl -> return
    
    1750
    +                     ( cont { sc_cont = tail'
    
    1751
    +                            , sc_hole_ty = coercionLKind co }
    
    1752
    +                     , nbCastsAdded ) ;
    
    1732 1753
                           -- See Note [Avoiding simplifying repeatedly]
    
    1733 1754
     
    
    1734 1755
                        MCo co1 ->
    
    ... ... @@ -1738,17 +1759,23 @@ simplCast env body co0 cont0
    1738 1759
                         -- to make it all consistent.  It's a bit messy.
    
    1739 1760
                         -- But it isn't a common case.
    
    1740 1761
                         -- Example of use: #995
    
    1741
    -               ; return (ApplyToVal { sc_arg  = mkCast arg' co1
    
    1742
    -                                    , sc_env  = arg_se'
    
    1743
    -                                    , sc_dup  = dup'
    
    1744
    -                                    , sc_cont = tail'
    
    1745
    -                                    , sc_hole_ty = coercionLKind co }) } } }
    
    1762
    +               ; return
    
    1763
    +                   ( ApplyToVal { sc_arg  = mkCast arg' co1
    
    1764
    +                                , sc_env  = arg_se'
    
    1765
    +                                , sc_dup  = dup'
    
    1766
    +                                , sc_cont = tail'
    
    1767
    +                                , sc_hole_ty = coercionLKind co }
    
    1768
    +                   , nbCastsAdded ) } } }
    
    1746 1769
     
    
    1747 1770
             addCoerce co co_is_opt cont
    
    1748
    -          | isReflCo co = return cont  -- Having this at the end makes a huge
    
    1749
    -                                       -- difference in T12227, for some reason
    
    1750
    -                                       -- See Note [Optimising reflexivity]
    
    1751
    -          | otherwise = return (CastIt { sc_co = co, sc_opt = co_is_opt, sc_cont = cont })
    
    1771
    +          | isReflCo co = return (cont, 0 :: NbCastsAdded )
    
    1772
    +            -- Having this at the end makes a huge
    
    1773
    +            -- difference in T12227, for some reason
    
    1774
    +            -- See Note [Optimising reflexivity]
    
    1775
    +          | otherwise =
    
    1776
    +            return
    
    1777
    +              ( CastIt { sc_co = co, sc_opt = co_is_opt, sc_cont = cont }
    
    1778
    +              , 1 :: NbCastsAdded )
    
    1752 1779
     
    
    1753 1780
     simplLazyArg :: SimplEnvIS              -- ^ Used only for its InScopeSet
    
    1754 1781
                  -> DupFlag
    
    ... ... @@ -2051,8 +2078,8 @@ simplNonRecJoinPoint :: SimplEnv -> InId -> InExpr
    2051 2078
                          -> InExpr -> SimplCont
    
    2052 2079
                          -> SimplM (SimplFloats, OutExpr)
    
    2053 2080
     simplNonRecJoinPoint env bndr rhs body cont
    
    2054
    -   = assert (isJoinId bndr ) $
    
    2055
    -     wrapJoinCont env cont $ \ env cont ->
    
    2081
    +   = assert (isJoinId bndr) $
    
    2082
    +     wrapJoinCont do_case_case env cont $ \ env cont ->
    
    2056 2083
          do { -- We push join_cont into the join RHS and the body;
    
    2057 2084
               -- and wrap wrap_cont around the whole thing
    
    2058 2085
             ; let mult   = contHoleScaling cont
    
    ... ... @@ -2062,14 +2089,17 @@ simplNonRecJoinPoint env bndr rhs body cont
    2062 2089
             ; (floats1, env3)  <- simplJoinBind NonRecursive cont (bndr,env) (bndr2,env2) (rhs,env)
    
    2063 2090
             ; (floats2, body') <- simplExprF env3 body cont
    
    2064 2091
             ; return (floats1 `addFloats` floats2, body') }
    
    2092
    +  where
    
    2093
    +    do_case_case =
    
    2094
    +      if isTrueJoinPoint env bndr
    
    2095
    +      then seCaseCase env
    
    2096
    +      else False
    
    2065 2097
     
    
    2066
    -
    
    2067
    -------------------
    
    2068 2098
     simplRecJoinPoint :: SimplEnv -> [(InId, InExpr)]
    
    2069 2099
                       -> InExpr -> SimplCont
    
    2070 2100
                       -> SimplM (SimplFloats, OutExpr)
    
    2071 2101
     simplRecJoinPoint env pairs body cont
    
    2072
    -  = wrapJoinCont env cont $ \ env cont ->
    
    2102
    +  = wrapJoinCont do_case_case env cont $ \ env cont ->
    
    2073 2103
         do { let bndrs  = map fst pairs
    
    2074 2104
                  mult   = contHoleScaling cont
    
    2075 2105
                  res_ty = contResultType cont
    
    ... ... @@ -2079,30 +2109,53 @@ simplRecJoinPoint env pairs body cont
    2079 2109
            ; (floats1, env2)  <- simplRecBind env1 (BC_Join Recursive cont) pairs
    
    2080 2110
            ; (floats2, body') <- simplExprF env2 body cont
    
    2081 2111
            ; return (floats1 `addFloats` floats2, body') }
    
    2112
    +  where
    
    2113
    +    do_case_case =
    
    2114
    +      if all (isTrueJoinPoint env . fst) pairs
    
    2115
    +      then seCaseCase env
    
    2116
    +      else False
    
    2117
    +
    
    2118
    +-- | Is this a true join point, or only a quasi join point?
    
    2119
    +--
    
    2120
    +-- See Note [Quasi join points]
    
    2121
    +isTrueJoinPoint :: SimplEnv -> InId -> Bool
    
    2122
    +isTrueJoinPoint env id
    
    2123
    +  | Just occMaxProfTicks <- occursUnderProfTicks (idOccInfo id)
    
    2124
    +  , occMaxProfTicks > seProfTicks env
    
    2125
    +  -- The join point occurs under more profiling ticks that its binding.
    
    2126
    +  = False
    
    2127
    +  | Just occMaxCasts <- occursUnderCasts (idOccInfo id)
    
    2128
    +  , occMaxCasts > seCasts env
    
    2129
    +  -- The join point occurs under more casts than its binding.
    
    2130
    +  = False
    
    2131
    +  | otherwise
    
    2132
    +  = True
    
    2082 2133
     
    
    2083 2134
     --------------------
    
    2084
    -wrapJoinCont :: SimplEnv -> SimplCont
    
    2135
    +wrapJoinCont :: Bool
    
    2136
    +             -> SimplEnv -> SimplCont
    
    2085 2137
                  -> (SimplEnv -> SimplCont -> SimplM (SimplFloats, OutExpr))
    
    2086 2138
                  -> SimplM (SimplFloats, OutExpr)
    
    2087 2139
     -- Deal with making the continuation duplicable if necessary,
    
    2088 2140
     -- and with the no-case-of-case situation.
    
    2089
    -wrapJoinCont env cont thing_inside
    
    2141
    +wrapJoinCont do_case_case env cont thing_inside
    
    2090 2142
       | contIsStop cont        -- Common case; no need for fancy footwork
    
    2091 2143
       = thing_inside env cont
    
    2092 2144
     
    
    2093
    -  | not (seCaseCase env)
    
    2094
    -    -- See Note [Join points with -fno-case-of-case]
    
    2095
    -  = do { (floats1, expr1) <- thing_inside env (mkBoringStop (contHoleType cont))
    
    2096
    -       ; let (floats2, expr2) = wrapJoinFloatsX floats1 expr1
    
    2097
    -       ; (floats3, expr3) <- rebuild (env `setInScopeFromF` floats2) expr2 cont
    
    2098
    -       ; return (floats2 `addFloats` floats3, expr3) }
    
    2099
    -
    
    2100
    -  | otherwise
    
    2101
    -    -- Normal case; see Note [Join points and case-of-case]
    
    2145
    +  | do_case_case
    
    2146
    +    -- Normal situation: do the "case-of-case" transformation.
    
    2147
    +    -- See Note [Join points and case-of-case].
    
    2102 2148
       = do { (floats1, cont')  <- mkDupableCont env cont
    
    2103 2149
            ; (floats2, result) <- thing_inside (env `setInScopeFromF` floats1) cont'
    
    2104 2150
            ; return (floats1 `addFloats` floats2, result) }
    
    2105 2151
     
    
    2152
    +  | otherwise
    
    2153
    +    -- No "case-of-case" transformation.
    
    2154
    +    -- See Note [Join points with -fno-case-of-case].
    
    2155
    +  = do { (floats1, expr1) <- thing_inside env (mkBoringStop (contHoleType cont))
    
    2156
    +       ; let (floats2, expr2) = wrapJoinFloatsX floats1 expr1
    
    2157
    +       ; (floats3, expr3) <- rebuild (env `setInScopeFromF` floats2) expr2 cont
    
    2158
    +       ; return (floats2 `addFloats` floats3, expr3) }
    
    2106 2159
     
    
    2107 2160
     --------------------
    
    2108 2161
     trimJoinCont :: Id         -- Used only in error message
    
    ... ... @@ -2151,15 +2204,18 @@ evaluation context E):
    2151 2204
     
    
    2152 2205
     As is evident from the example, there are two components to this behavior:
    
    2153 2206
     
    
    2154
    -  1. When entering the RHS of a join point, copy the context inside.
    
    2155
    -  2. When a join point is invoked, discard the outer context.
    
    2207
    +  (wrapJoinCont) When entering the RHS of a join point, copy the context inside.
    
    2208
    +  (trimJoinCont) When a join point is invoked, discard the outer context.
    
    2156 2209
     
    
    2157 2210
     We need to be very careful here to remain consistent---neither part is
    
    2158 2211
     optional!
    
    2159 2212
     
    
    2160
    -We need do make the continuation E duplicable (since we are duplicating it)
    
    2213
    +We need to make the continuation E duplicable (since we are duplicating it)
    
    2161 2214
     with mkDupableCont.
    
    2162 2215
     
    
    2216
    +Note that not all join points support this transformation:
    
    2217
    +see Note [Quasi join points].
    
    2218
    +
    
    2163 2219
     
    
    2164 2220
     Note [Join points with -fno-case-of-case]
    
    2165 2221
     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    ... ... @@ -2184,7 +2240,8 @@ case-of-case we may then end up with this totally bogus result
    2184 2240
     This would be OK in the language of the paper, but not in GHC: j is no longer
    
    2185 2241
     a join point.  We can only do the "push continuation into the RHS of the
    
    2186 2242
     join point j" if we also push the continuation right down to the /jumps/ to
    
    2187
    -j, so that it can evaporate there.  If we are doing case-of-case, we'll get to
    
    2243
    +j, so that it can evaporate there (trimJoinCont). Then, if we are doing
    
    2244
    +case-of-case, we'll get to
    
    2188 2245
     
    
    2189 2246
         join x = case <j-rhs> of <outer-alts> in
    
    2190 2247
         case y of
    
    ... ... @@ -2199,6 +2256,105 @@ inwards altogether at any join point. Instead simplify the (join ... in ...)
    2199 2256
     with a Stop continuation, and wrap the original continuation around the
    
    2200 2257
     outside.  Surprisingly tricky!
    
    2201 2258
     
    
    2259
    +Note [Quasi join points]
    
    2260
    +~~~~~~~~~~~~~~~~~~~~~~~~
    
    2261
    +We currently classify join points into two separate categories
    
    2262
    +
    
    2263
    +  - true join points
    
    2264
    +  - quasi join points
    
    2265
    +
    
    2266
    +Definition:
    
    2267
    +  A join point binding defines a *quasi* join point if any of the join point
    
    2268
    +  binders occur under profiling ticks or casts.
    
    2269
    +
    
    2270
    +  If a join point binding is not a quasi join point, it is a *true* join point.
    
    2271
    +
    
    2272
    +We can push continuations into true join points, as described in
    
    2273
    +Note [Join points and case-of-case]:
    
    2274
    +
    
    2275
    +  K[ join j = rhs in body ]  -->   join j = K[ rhs ] in K[ body ]
    
    2276
    +
    
    2277
    +This transformation is not valid if the occurrences of 'j' in 'body' appear:
    
    2278
    +
    
    2279
    +  1. under casts, see #26422
    
    2280
    +  2. under profiling ticks, see #26693 #26157 #26642
    
    2281
    +
    
    2282
    +For example, consider (a minimisation of) the program in #26693:
    
    2283
    +
    
    2284
    +  join { j :: Bool -> IO (); j _ = guts }
    
    2285
    +  in case pass of
    
    2286
    +    False -> scctick<foo> jump j True
    
    2287
    +    True  ->              jump j False
    
    2288
    +
    
    2289
    +Let's try to push the application to an argument 'arg' into this expression.
    
    2290
    +As per Note [Join points and case-of-case], we proceed by first applying the
    
    2291
    +argument to both the join point RHS and the case alternatives:
    
    2292
    +
    
    2293
    +  join { j :: Bool -> IO (); j _ = guts arg ] }
    
    2294
    +    in case pass of
    
    2295
    +      False -> (scctick<foo> jump j True) arg
    
    2296
    +      True  ->               jump j False arg
    
    2297
    +
    
    2298
    +Then we rely on 'trimJoinCont' to remove the argument. In this case, this fails
    
    2299
    +for the first branch, because 'trimJoinCont' doesn't look through profiling
    
    2300
    +ticks. Were we to address this, it's still not clear what code we would want to
    
    2301
    +end up with, as we don't want to misattribute profiling costs.
    
    2302
    +We could plausibly transform to the following:
    
    2303
    +
    
    2304
    +  join { j :: Bool -> IO (); j scc_or_null _ = (setSCC# scc_or_null guts) arg ] }
    
    2305
    +    in case pass of
    
    2306
    +      False -> jump j <foo> True
    
    2307
    +      True  -> jump j null  False
    
    2308
    +
    
    2309
    +where `setSCC#` is a new primop that would set the current cost centre pointer
    
    2310
    +(or no-op if the given pointer is null).
    
    2311
    +However:
    
    2312
    +  - this primop doesn't exist today,
    
    2313
    +  - it requires adding an argument to the join point (hence changing its arity)
    
    2314
    +So instead, for now, we simply disallow the case-of-case transformation for 'j'.
    
    2315
    +
    
    2316
    +Similarly for casts:
    
    2317
    +
    
    2318
    +    join { j = blah }
    
    2319
    +      in case e of
    
    2320
    +        False -> j True  |> co1
    
    2321
    +        True  -> j False |> co2
    
    2322
    +
    
    2323
    +if we want to apply this to an argument 'arg', we would need to perform the
    
    2324
    +following transformation:
    
    2325
    +
    
    2326
    +    join { j co = ( blah |> co ) arg }
    
    2327
    +      in case e of
    
    2328
    +        False -> j co1 True
    
    2329
    +        True  -> j co2 False
    
    2330
    +
    
    2331
    +in which we add a coercion argument to the join point. Again, this is not a
    
    2332
    +transformation we currently implement, so we instead prevent case-of-case for
    
    2333
    +such join points.
    
    2334
    +
    
    2335
    +To figure out whether a join point is a true join point or a quasi join point,
    
    2336
    +we proceed as follows:
    
    2337
    +
    
    2338
    +  1. In occurrence analysis, we compute how many profiling ticks/casts each
    
    2339
    +     join point Id occurs under.
    
    2340
    +
    
    2341
    +     This is stored in the 'tailCallUnderProfTicks' and 'tailCallUnderCasts'
    
    2342
    +     fields of 'TailCallInfo', and populated by keeping track of how many
    
    2343
    +     profiling ticks and casts we are under when doing occurrence analysis
    
    2344
    +     (see 'occ_prof_ticks' and 'occ_casts').
    
    2345
    +
    
    2346
    +  2. In the simplifier, we keep track of how many profiling ticks/casts we are
    
    2347
    +     currently inside.  See 'seProfTicks' and 'seCasts', which are updated
    
    2348
    +     in 'simplTick' and 'simplCast', respectively.
    
    2349
    +
    
    2350
    +  3. In the simplifier, when we come across a join point binding (in either
    
    2351
    +     'simplNonRecJoinPoint' or 'simplRecJoinPoint'), we compare the current
    
    2352
    +     cast depth/profiling tick depth with the cast depth/profiling tick depth
    
    2353
    +     of the occurrences of the join point binders.
    
    2354
    +
    
    2355
    +     If a join point binder occurs under more profiling ticks/casts than its
    
    2356
    +     binding site, then it is a quasi join point and we switch off the
    
    2357
    +     case-of-case transformation.
    
    2202 2358
     
    
    2203 2359
     ************************************************************************
    
    2204 2360
     *                                                                      *
    

  • compiler/GHC/Core/SimpleOpt.hs
    ... ... @@ -1076,7 +1076,7 @@ joinPointBinding_maybe bndr rhs
    1076 1076
       | isJoinId bndr
    
    1077 1077
       = Just (bndr, rhs)
    
    1078 1078
     
    
    1079
    -  | AlwaysTailCalled join_arity <- tailCallInfo (idOccInfo bndr)
    
    1079
    +  | AlwaysTailCalled { tailCallArity = join_arity } <- tailCallInfo (idOccInfo bndr)
    
    1080 1080
       , (bndrs, body) <- etaExpandToJoinPoint join_arity rhs
    
    1081 1081
       , let str_sig   = idDmdSig bndr
    
    1082 1082
             str_arity = count isId bndrs  -- Strictness demands are for Ids only
    

  • compiler/GHC/Core/Utils.hs
    ... ... @@ -35,6 +35,7 @@ module GHC.Core.Utils (
    35 35
             exprIsTopLevelBindable,
    
    36 36
             exprIsUnaryClassFun, isUnaryClassId,
    
    37 37
             altsAreExhaustive, etaExpansionTick,
    
    38
    +        cantEtaReduceFun,
    
    38 39
     
    
    39 40
             -- * Equality
    
    40 41
             cheapEqExpr, cheapEqExpr', diffBinds,
    
    ... ... @@ -2081,9 +2082,24 @@ altsAreExhaustive (Alt con1 _ _ : alts)
    2081 2082
     -- Takes the function we are applying as argument.
    
    2082 2083
     etaExpansionTick :: Id -> GenTickish pass -> Bool
    
    2083 2084
     etaExpansionTick id t
    
    2084
    -  = hasNoBinding id &&
    
    2085
    +  = ( cantEtaReduceFun id ) &&
    
    2085 2086
         ( tickishFloatable t || isProfTick t )
    
    2086 2087
     
    
    2088
    +-- | Can we eta-reduce the given function?
    
    2089
    +-- See Note [Eta reduction soundness], criteria (B), (J), and (W).
    
    2090
    +cantEtaReduceFun :: Id -> Bool
    
    2091
    +cantEtaReduceFun fun
    
    2092
    +  =    hasNoBinding fun -- (B)
    
    2093
    +       -- Don't undersaturate functions with no binding.
    
    2094
    +
    
    2095
    +    || isJoinId fun    -- (J)
    
    2096
    +       -- Don't undersaturate join points.
    
    2097
    +       -- See Note [Invariants on join points] in GHC.Core, and #20599
    
    2098
    +
    
    2099
    +    || isJust (idCbvMarks_maybe fun) -- (W)
    
    2100
    +       -- Don't undersaturate StrictWorkerIds.
    
    2101
    +       -- See Note [CBV Function Ids] in GHC.Types.Id.Info.
    
    2102
    +
    
    2087 2103
     {- Note [exprOkForSpeculation and type classes]
    
    2088 2104
     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    2089 2105
     Consider (#22745, #15205)
    

  • compiler/GHC/CoreToStg/Prep.hs
    ... ... @@ -1130,7 +1130,10 @@ cpeApp top_env expr
    1130 1130
                      hd = getIdFromTrivialExpr_maybe e2
    
    1131 1131
                      -- Determine number of required arguments. See Note [Ticks and mandatory eta expansion]
    
    1132 1132
                      min_arity = case hd of
    
    1133
    -                   Just v_hd -> if hasNoBinding v_hd then Just $! (idArity v_hd) else Nothing
    
    1133
    +                   Just v_hd ->
    
    1134
    +                     if cantEtaReduceFun v_hd
    
    1135
    +                     then Just $! idArity v_hd
    
    1136
    +                     else Nothing
    
    1134 1137
                        Nothing -> Nothing
    
    1135 1138
               --  ; pprTraceM "cpe_app:stricts:" (ppr v <+> ppr args $$ ppr stricts $$ ppr (idCbvMarks_maybe v))
    
    1136 1139
                ; (app, floats, unsat_ticks) <- rebuild_app env args e2 emptyFloats stricts min_arity
    

  • compiler/GHC/Types/Basic.hs
    ... ... @@ -70,7 +70,7 @@ module GHC.Types.Basic (
    70 70
             BranchCount, oneBranch,
    
    71 71
             InterestingCxt(..),
    
    72 72
             TailCallInfo(..), tailCallInfo, zapOccTailCallInfo,
    
    73
    -        isAlwaysTailCalled,
    
    73
    +        isAlwaysTailCalled, occursUnderProfTicks, occursUnderCasts,
    
    74 74
     
    
    75 75
             EP(..),
    
    76 76
     
    
    ... ... @@ -1149,8 +1149,14 @@ instance Monoid InsideLam where
    1149 1149
       mappend = (Semi.<>)
    
    1150 1150
     
    
    1151 1151
     -----------------
    
    1152
    +
    
    1153
    +-- | See Note [TailCallInfo]
    
    1152 1154
     data TailCallInfo
    
    1153
    -  = AlwaysTailCalled {-# UNPACK #-} !JoinArity -- See Note [TailCallInfo]
    
    1155
    +  = AlwaysTailCalled
    
    1156
    +     { tailCallArity :: {-# UNPACK #-} !JoinArity
    
    1157
    +     , tailCallUnderProfTicks :: !Int -- See Note [Quasi join points]
    
    1158
    +     , tailCallUnderCasts     :: !Int -- See Note [Quasi join points]
    
    1159
    +     }
    
    1154 1160
       | NoTailCallInfo
    
    1155 1161
       deriving (Eq)
    
    1156 1162
     
    
    ... ... @@ -1167,9 +1173,26 @@ isAlwaysTailCalled occ
    1167 1173
       = case tailCallInfo occ of AlwaysTailCalled{} -> True
    
    1168 1174
                                  NoTailCallInfo     -> False
    
    1169 1175
     
    
    1176
    +-- | If this 'Id' is always tail called, how many profiling ticks does
    
    1177
    +-- it occur under? See Note [Quasi join points].
    
    1178
    +occursUnderProfTicks :: OccInfo -> Maybe Int
    
    1179
    +occursUnderProfTicks occ =
    
    1180
    +  case tailCallInfo occ of
    
    1181
    +    AlwaysTailCalled { tailCallUnderProfTicks = nb } -> Just nb
    
    1182
    +    NoTailCallInfo -> Nothing
    
    1183
    +
    
    1184
    +-- | If this 'Id' is always tail called, how many casts does
    
    1185
    +-- it occur under? See Note [Quasi join points].
    
    1186
    +occursUnderCasts :: OccInfo -> Maybe Int
    
    1187
    +occursUnderCasts occ =
    
    1188
    +  case tailCallInfo occ of
    
    1189
    +    AlwaysTailCalled { tailCallUnderCasts = nb } -> Just nb
    
    1190
    +    NoTailCallInfo -> Nothing
    
    1191
    +
    
    1170 1192
     instance Outputable TailCallInfo where
    
    1171
    -  ppr (AlwaysTailCalled ar) = sep [ text "Tail", int ar ]
    
    1172
    -  ppr _                     = empty
    
    1193
    +  ppr (AlwaysTailCalled ar p c) =
    
    1194
    +    sep [ text "Tail", brackets (int p <> comma <> int c), int ar ]
    
    1195
    +  ppr NoTailCallInfo = text "NoTailCallInfo"
    
    1173 1196
     
    
    1174 1197
     -----------------
    
    1175 1198
     strongLoopBreaker, weakLoopBreaker :: OccInfo
    
    ... ... @@ -1217,7 +1240,10 @@ instance Outputable OccInfo where
    1217 1240
               pp_tail                = pprShortTailCallInfo tail_info
    
    1218 1241
     
    
    1219 1242
     pprShortTailCallInfo :: TailCallInfo -> SDoc
    
    1220
    -pprShortTailCallInfo (AlwaysTailCalled ar) = char 'T' <> brackets (int ar)
    
    1243
    +pprShortTailCallInfo (AlwaysTailCalled ar p c)
    
    1244
    +  = char 'T' <> (brackets (text "P" <+> int p))
    
    1245
    +             <> (brackets (text "C" <+> int c))
    
    1246
    +             <> brackets (int ar)
    
    1221 1247
     pprShortTailCallInfo NoTailCallInfo        = empty
    
    1222 1248
     
    
    1223 1249
     {-
    
    ... ... @@ -1251,6 +1277,9 @@ point can also be invoked from other join points, not just from case branches:
    1251 1277
     Here both 'j1' and 'j2' will get marked AlwaysTailCalled, but j1 will get
    
    1252 1278
     ManyOccs and j2 will get `OneOcc { occ_n_br = 2 }`.
    
    1253 1279
     
    
    1280
    +We also store how many profiling ticks and casts the join point occurs under.
    
    1281
    +The rationale is described in Note [Quasi join points].
    
    1282
    +
    
    1254 1283
     ************************************************************************
    
    1255 1284
     *                                                                      *
    
    1256 1285
                     Default method specification
    

  • compiler/GHC/Types/Tickish.hs
    ... ... @@ -11,6 +11,7 @@ module GHC.Types.Tickish (
    11 11
       tickishScopesLike,
    
    12 12
       tickishFloatable,
    
    13 13
       tickishCanSplit,
    
    14
    +  tickishCanScopeJoin,
    
    14 15
       mkNoCount,
    
    15 16
       mkNoScope,
    
    16 17
       tickishIsCode,
    
    ... ... @@ -326,6 +327,14 @@ tickishCanSplit ProfNote{profNoteScope = True, profNoteCount = True}
    326 327
                        = True
    
    327 328
     tickishCanSplit _  = False
    
    328 329
     
    
    330
    +-- | Is @join f x in <tick> jump f x@ valid?
    
    331
    +tickishCanScopeJoin :: GenTickish pass -> Bool
    
    332
    +tickishCanScopeJoin tick = case tick of
    
    333
    +  ProfNote{} -> True
    
    334
    +  HpcTick{} -> False
    
    335
    +  Breakpoint{} -> False
    
    336
    +  SourceNote{} -> True
    
    337
    +
    
    329 338
     mkNoCount :: GenTickish pass -> GenTickish pass
    
    330 339
     mkNoCount n | not (tickishCounts n)   = n
    
    331 340
                 | not (tickishCanSplit n) = panic "mkNoCount: Cannot split!"