Simon Peyton Jones pushed to branch wip/T23109a at Glasgow Haskell Compiler / GHC

Commits:

10 changed files:

Changes:

  • compiler/GHC/Core/Opt/OccurAnal.hs
    ... ... @@ -55,7 +55,7 @@ import GHC.Types.Tickish
    55 55
     import GHC.Types.Var.Set
    
    56 56
     import GHC.Types.Var.Env
    
    57 57
     import GHC.Types.Var
    
    58
    -import GHC.Types.Demand ( argOneShots, argsOneShots, isDeadEndSig )
    
    58
    +import GHC.Types.Demand ( argOneShots, argsOneShots {- , isDeadEndSig -} )
    
    59 59
     
    
    60 60
     import GHC.Utils.Outputable
    
    61 61
     import GHC.Utils.Panic
    
    ... ... @@ -1096,14 +1096,14 @@ mkNonRecRhsCtxt lvl bndr unf
    1096 1096
         certainly_inline -- See Note [Cascading inlines]
    
    1097 1097
           = -- mkNonRecRhsCtxt is only used for non-join points, so occAnalBind
    
    1098 1098
             -- has set the OccInfo for this binder before calling occAnalNonRecRhs
    
    1099
    +        -- Distressing delicacy ... has to line up with preInlineUnconditionally
    
    1099 1100
             case idOccInfo bndr of
    
    1100 1101
               OneOcc { occ_in_lam = NotInsideLam, occ_n_br = 1 }
    
    1101
    -            -> active && not stable_unf && not top_bottoming
    
    1102
    +            -> active && not (isTopLevel lvl) && not stable_unf
    
    1102 1103
               _ -> False
    
    1103 1104
     
    
    1104 1105
         active     = isAlwaysActive (idInlineActivation bndr)
    
    1105 1106
         stable_unf = isStableUnfolding unf
    
    1106
    -    top_bottoming = isTopLevel lvl && isDeadEndId bndr
    
    1107 1107
     
    
    1108 1108
     -----------------
    
    1109 1109
     occAnalRecBind :: OccEnv -> TopLevelFlag -> ImpRuleEdges -> [(Var,CoreExpr)]
    
    ... ... @@ -2580,8 +2580,9 @@ occAnalArgs !env fun args !one_shots
    2580 2580
     
    
    2581 2581
         -- Make bottoming functions interesting
    
    2582 2582
         -- See Note [Bottoming function calls]
    
    2583
    -    encl | Var f <- fun, isDeadEndSig (idDmdSig f) = OccScrut
    
    2584
    -         | otherwise                               = OccVanilla
    
    2583
    +--    encl | Var f <- fun, isDeadEndSig (idDmdSig f) = OccScrut
    
    2584
    +--         | otherwise                               = OccVanilla
    
    2585
    +    encl = OccVanilla
    
    2585 2586
     
    
    2586 2587
         go uds fun [] _ = WUD uds fun
    
    2587 2588
         go uds fun (arg:args) one_shots
    
    ... ... @@ -2606,7 +2607,8 @@ Consider
    2606 2607
        let x = (a,b) in
    
    2607 2608
        case p of
    
    2608 2609
           A -> ...(error x)..
    
    2609
    -      B -> ...(ertor x)...
    
    2610
    +      B -> ...(error x)...
    
    2611
    +      C -> ..blah...
    
    2610 2612
     
    
    2611 2613
     postInlineUnconditionally may duplicate x's binding, but sometimes it
    
    2612 2614
     does so only if the use site IsInteresting.  Pushing allocation into error
    
    ... ... @@ -2616,6 +2618,9 @@ setting occ_encl = OccScrut for such calls.
    2616 2618
     The slightly-artificial test T21128 is a good example.  It's probably
    
    2617 2619
     not a huge deal.
    
    2618 2620
     
    
    2621
    +ToDo!!!  Fix comment.   Now postinlineUnconditionally ignores intersting-ness for
    
    2622
    +non-top-level things.
    
    2623
    +
    
    2619 2624
     Note [Arguments of let-bound constructors]
    
    2620 2625
     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    2621 2626
     Consider
    

  • compiler/GHC/Core/Opt/Pipeline.hs
    ... ... @@ -217,7 +217,7 @@ getCoreToDo dflags hpt_rule_base extra_vars
    217 217
             if full_laziness then
    
    218 218
                CoreDoFloatOutwards $ FloatOutSwitches
    
    219 219
                     { floatOutLambdas     = Just 0
    
    220
    -                , floatOutConstants   = True
    
    220
    +                , floatOutConstants   = False  -- Initially
    
    221 221
                     , floatOutOverSatApps = False
    
    222 222
                     , floatToTopLevelOnly = False
    
    223 223
                     , floatJoinsToTop     = False  -- Initially, don't float join points at all
    
    ... ... @@ -284,7 +284,7 @@ getCoreToDo dflags hpt_rule_base extra_vars
    284 284
             --        f_el22 (f_el21 r_midblock)
    
    285 285
             runWhen full_laziness $ CoreDoFloatOutwards $ FloatOutSwitches
    
    286 286
                    { floatOutLambdas     = floatLamArgs dflags
    
    287
    -               , floatOutConstants   = True
    
    287
    +               , floatOutConstants   = True  -- For SpecConstr and CSE
    
    288 288
                    , floatOutOverSatApps = True
    
    289 289
                    , floatToTopLevelOnly = False
    
    290 290
                    , floatJoinsToTop     = True },
    

  • compiler/GHC/Core/Opt/SetLevels.hs
    ... ... @@ -406,7 +406,7 @@ lvlApp env orig_expr ((_,AnnVar fn), args)
    406 406
       , arity < n_val_args
    
    407 407
       , Nothing <- isClassOpId_maybe fn
    
    408 408
       =  do { rargs' <- mapM (lvlNonTailMFE env False) rargs
    
    409
    -        ; lapp'  <- lvlNonTailMFE env False lapp
    
    409
    +        ; lapp'  <- lvlNonTailMFE env True lapp
    
    410 410
             ; return (foldl' App lapp' rargs') }
    
    411 411
     
    
    412 412
       | otherwise
    
    ... ... @@ -482,14 +482,14 @@ Consider this:
    482 482
       f :: T Int -> blah
    
    483 483
       f x vs = case x of { MkT y ->
    
    484 484
                  let f vs = ...(case y of I# w -> e)...f..
    
    485
    -             in f vs
    
    485
    +             in f vs }
    
    486 486
     
    
    487 487
     Here we can float the (case y ...) out, because y is sure
    
    488 488
     to be evaluated, to give
    
    489 489
       f x vs = case x of { MkT y ->
    
    490
    -           case y of I# w ->
    
    490
    +           case y of { I# w ->
    
    491 491
                  let f vs = ...(e)...f..
    
    492
    -             in f vs
    
    492
    +             in f vs }}
    
    493 493
     
    
    494 494
     That saves unboxing it every time round the loop.  It's important in
    
    495 495
     some DPH stuff where we really want to avoid that repeated unboxing in
    
    ... ... @@ -614,7 +614,8 @@ lvlMFE env strict_ctxt e@(_, AnnCase {})
    614 614
       = lvlExpr env e     -- See Note [Case MFEs]
    
    615 615
     
    
    616 616
     lvlMFE env strict_ctxt ann_expr
    
    617
    -  | not float_me
    
    617
    +  |  notWorthFloating expr abs_vars
    
    618
    +  || not float_me
    
    618 619
       || floatTopLvlOnly env && not (isTopLvl dest_lvl)
    
    619 620
              -- Only floating to the top level is allowed.
    
    620 621
       || hasFreeJoin env fvs   -- If there is a free join, don't float
    
    ... ... @@ -623,9 +624,6 @@ lvlMFE env strict_ctxt ann_expr
    623 624
              -- We can't let-bind an expression if we don't know
    
    624 625
              -- how it will be represented at runtime.
    
    625 626
              -- See Note [Representation polymorphism invariants] in GHC.Core
    
    626
    -  || notWorthFloating expr abs_vars
    
    627
    -         -- Test notWorhtFloating last;
    
    628
    -         -- See Note [Large free-variable sets]
    
    629 627
       = -- Don't float it out
    
    630 628
         lvlExpr env ann_expr
    
    631 629
     
    
    ... ... @@ -676,12 +674,11 @@ lvlMFE env strict_ctxt ann_expr
    676 674
         is_function  = isFunction ann_expr
    
    677 675
         mb_bot_str   = exprBotStrictness_maybe expr
    
    678 676
                                -- See Note [Bottoming floats]
    
    679
    -                           -- esp Bottoming floats (2)
    
    677
    +                           -- esp Bottoming floats (BF2)
    
    680 678
         expr_ok_for_spec = exprOkForSpeculation expr
    
    681 679
         abs_vars = abstractVars dest_lvl env fvs
    
    682 680
         dest_lvl = destLevel env fvs fvs_ty is_function is_bot_lam
    
    683
    -               -- NB: is_bot_lam not is_bot; see (3) in
    
    684
    -               --     Note [Bottoming floats]
    
    681
    +               -- NB: is_bot_lam not is_bot; see (BF2) in Note [Bottoming floats]
    
    685 682
     
    
    686 683
         -- float_is_new_lam: the floated thing will be a new value lambda
    
    687 684
         -- replacing, say (g (x+4)) by (lvl x).  No work is saved, nor is
    
    ... ... @@ -698,20 +695,32 @@ lvlMFE env strict_ctxt ann_expr
    698 695
     
    
    699 696
             -- A decision to float entails let-binding this thing, and we only do
    
    700 697
             -- that if we'll escape a value lambda, or will go to the top level.
    
    698
    +        -- Never float trivial expressions;
    
    699
    +        --   notably, save_work might be true of a lone evaluated variable.
    
    701 700
         float_me = saves_work || saves_alloc || is_mk_static
    
    702 701
     
    
    703 702
         -- See Note [Saving work]
    
    704
    -    saves_work = escapes_value_lam        -- (a)
    
    705
    -                 && not (exprIsHNF expr)  -- (b)
    
    706
    -                 && not float_is_new_lam  -- (c)
    
    703
    +    is_hnf = exprIsHNF expr
    
    704
    +    saves_work = escapes_value_lam        -- (SW-a)
    
    705
    +                 && not is_hnf            -- (SW-b)
    
    706
    +                 && not float_is_new_lam  -- (SW-c)
    
    707 707
         escapes_value_lam = dest_lvl `ltMajLvl` (le_ctxt_lvl env)
    
    708 708
     
    
    709
    -    -- See Note [Saving allocation] and Note [Floating to the top]
    
    710
    -    saves_alloc =  isTopLvl dest_lvl
    
    711
    -                && floatConsts env
    
    712
    -                && (   not strict_ctxt                     -- (a)
    
    713
    -                    || exprIsHNF expr                      -- (b)
    
    714
    -                    || (is_bot_lam && escapes_value_lam))  -- (c)
    
    709
    +    -- See Note [Floating to the top]
    
    710
    +--    is_con_app = isSaturatedConApp expr  -- True of literal strings too
    
    711
    +    saves_alloc = isTopLvl dest_lvl
    
    712
    +               && (escapes_value_lam || floatConsts env)
    
    713
    +                  -- Always float allocation out of a value lambda
    
    714
    +                  --    if it gets to top level
    
    715
    +               && (not strict_ctxt || is_hnf || is_bot_lam)
    
    716
    +        -- is_con_app: don't float PAPs to the top; they may well end
    
    717
    +        -- up getting eta-expanded and re-inlined
    
    718
    +        -- E.g.   f = \x -> (++) ys
    
    719
    +        -- If we float, then eta-expand we get
    
    720
    +        --      lvl = (++) ys
    
    721
    +        --      f = \x \zs -> lvl zs
    
    722
    +        -- and now we'll inline lvl.  Silly.
    
    723
    +
    
    715 724
     
    
    716 725
     hasFreeJoin :: LevelEnv -> DVarSet -> Bool
    
    717 726
     -- Has a free join point which is not being floated to top level.
    
    ... ... @@ -726,22 +735,22 @@ hasFreeJoin env fvs
    726 735
     The key idea in let-floating is to
    
    727 736
       * float a redex out of a (value) lambda
    
    728 737
     Doing so can save an unbounded amount of work.
    
    729
    -But see also Note [Saving allocation].
    
    738
    +But see also Note [Floating to the top].
    
    730 739
     
    
    731 740
     So we definitely float an expression out if
    
    732
    -(a) It will escape a value lambda (escapes_value_lam)
    
    733
    -(b) The expression is not a head-normal form (exprIsHNF); see (SW1, SW2).
    
    734
    -(c) Floating does not require wrapping it in value lambdas (float_is_new_lam).
    
    741
    +(SW-a) It will escape a value lambda (escapes_value_lam)
    
    742
    +(SW-b) The expression is not a head-normal form (exprIsHNF); see (SW1, SW2).
    
    743
    +(SW-c) Floating does not require wrapping it in value lambdas (float_is_new_lam).
    
    735 744
         See (SW3) below
    
    736 745
     
    
    737 746
     Wrinkles:
    
    738 747
     
    
    739
    -(SW1) Concerning (b) I experimented with using `exprIsCheap` rather than
    
    748
    +(SW1) Concerning (SW-b) I experimented with using `exprIsCheap` rather than
    
    740 749
           `exprIsHNF` but the latter seems better, according to nofib
    
    741 750
           (`spectral/mate` got 10% worse with exprIsCheap).  It's really a bit of a
    
    742 751
           heuristic.
    
    743 752
     
    
    744
    -(SW2) What about omitting (b), and hence floating HNFs as well?  The danger of
    
    753
    +(SW2) What about omitting (SW-b), and hence floating HNFs as well?  The danger of
    
    745 754
           doing so is that we end up floating out a HNF from a cold path (where it
    
    746 755
           might never get allocated at all) and allocating it all the time
    
    747 756
           regardless.  Example
    
    ... ... @@ -760,7 +769,7 @@ Wrinkles:
    760 769
            - Occasionally decreases runtime allocation (T12996 -2.5%)
    
    761 770
            - Slightly mixed effect on nofib: (puzzle -10%, mate -5%, cichelli +5%)
    
    762 771
              but geometric mean is -0.09%.
    
    763
    -      Overall, a win.
    
    772
    +      Overall, a small win.
    
    764 773
     
    
    765 774
     (SW3) Concerning (c), if we are wrapping the thing in extra value lambdas (in
    
    766 775
           abs_vars), then nothing is saved.  E.g.
    
    ... ... @@ -771,10 +780,12 @@ Wrinkles:
    771 780
           we have saved nothing: one pair will still be allocated for each
    
    772 781
           call of `f`.  Hence the (not float_is_new_lam) in saves_work.
    
    773 782
     
    
    774
    -Note [Saving allocation]
    
    775
    -~~~~~~~~~~~~~~~~~~~~~~~~
    
    783
    +Note [Floating to the top]
    
    784
    +~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    776 785
     Even if `saves_work` is false, we we may want to float even cheap/HNF
    
    777
    -expressions out of value lambdas, for several reasons:
    
    786
    +expressions out of value lambdas. Data suggests, however, that it is better
    
    787
    +/only/ to do so, /if/ they can go to top level. If the expression goes to top
    
    788
    +level we don't pay the cost of allocating cold-path thunks described in (SW2).
    
    778 789
     
    
    779 790
     * Doing so may save allocation. Consider
    
    780 791
             f = \x.  .. (\y.e) ...
    
    ... ... @@ -782,6 +793,11 @@ expressions out of value lambdas, for several reasons:
    782 793
       (assuming e does not mention x). An example where this really makes a
    
    783 794
       difference is simplrun009.
    
    784 795
     
    
    796
    +* In principle this would be true even if the (\y.e) didn't go to top level; but
    
    797
    +  in practice we only float a HNF if it goes all way to the top.  We don't pay
    
    798
    +  /any/ allocation cost for a top-level floated expression; it just becomes
    
    799
    +  static data.
    
    800
    +
    
    785 801
     * It may allow SpecContr to fire on functions. Consider
    
    786 802
             f = \x. ....(f (\y.e))....
    
    787 803
       After floating we get
    
    ... ... @@ -793,21 +809,7 @@ expressions out of value lambdas, for several reasons:
    793 809
       a big difference for string literals and bottoming expressions: see Note
    
    794 810
       [Floating to the top]
    
    795 811
     
    
    796
    -Data suggests, however, that it is better /only/ to float HNFS, /if/ they can go
    
    797
    -to top level. See (SW2) of Note [Saving work].  If the expression goes to top
    
    798
    -level we don't pay the cost of allocating cold-path thunks described in (SW2).
    
    799
    -
    
    800
    -Hence `isTopLvl dest_lvl` in `saves_alloc`.
    
    801
    -
    
    802
    -Note [Floating to the top]
    
    803
    -~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    804
    -Even though Note [Saving allocation] suggests that we should not, in
    
    805
    -general, float HNFs, the balance change if it goes to the top:
    
    806
    -
    
    807
    -* We don't pay an allocation cost for the floated expression; it
    
    808
    -  just becomes static data.
    
    809
    -
    
    810
    -* Floating string literal is valuable -- no point in duplicating the
    
    812
    +* Floating string literals is valuable -- no point in duplicating the
    
    811 813
       at each call site!
    
    812 814
     
    
    813 815
     * Floating bottoming expressions is valuable: they are always cold
    
    ... ... @@ -815,32 +817,32 @@ general, float HNFs, the balance change if it goes to the top:
    815 817
       can be quite big, inhibiting inlining. See Note [Bottoming floats]
    
    816 818
     
    
    817 819
     So we float an expression to the top if:
    
    818
    -  (a) the context is lazy (so we get allocation), or
    
    819
    -  (b) the expression is a HNF (so we get allocation), or
    
    820
    -  (c) the expression is bottoming and floating would escape a
    
    821
    -      value lambda (NB: if the expression itself is a lambda, (b)
    
    822
    -      will apply; so this case only catches bottoming thunks)
    
    820
    +  (FT1) the context is lazy (so we get allocation), or
    
    821
    +  (FT2) the expression is a HNF (so we get allocation), or
    
    822
    +  (FT3) the expression is bottoming and floating would escape a
    
    823
    +         value lambda (NB: if the expression itself is a lambda, (b)
    
    824
    +         will apply; so this case only catches bottoming thunks)
    
    823 825
     
    
    824 826
     Examples:
    
    825 827
     
    
    826
    -* (a) Strict.  Case scrutinee
    
    828
    +* (FT1) Strict.  Case scrutinee
    
    827 829
           f = case g True of ....
    
    828 830
       Don't float (g True) to top level; then we have the admin of a
    
    829 831
       top-level thunk to worry about, with zero gain.
    
    830 832
     
    
    831
    -* (a) Strict.  Case alternative
    
    833
    +* (FT1) Strict.  Case alternative
    
    832 834
           h = case y of
    
    833 835
                  True  -> g True
    
    834 836
                  False -> False
    
    835 837
       Don't float (g True) to the top level
    
    836 838
     
    
    837
    -* (b) HNF
    
    839
    +* (FT2) HNF
    
    838 840
           f = case y of
    
    839 841
                 True  -> p:q
    
    840 842
                 False -> blah
    
    841 843
       We may as well float the (p:q) so it becomes a static data structure.
    
    842 844
     
    
    843
    -* (c) Bottoming expressions; see also Note [Bottoming floats]
    
    845
    +* (FT3) Bottoming expressions; see also Note [Bottoming floats]
    
    844 846
           f x = case x of
    
    845 847
                   0 -> error <big thing>
    
    846 848
                   _ -> x+1
    
    ... ... @@ -853,7 +855,7 @@ Examples:
    853 855
       'foo' anyway.  So float bottoming things only if they escape
    
    854 856
       a lambda.
    
    855 857
     
    
    856
    -* Arguments
    
    858
    +* (FT4) Arguments
    
    857 859
          t = f (g True)
    
    858 860
       Prior to Apr 22 we didn't float (g True) to the top if f was strict.
    
    859 861
       But (a) this only affected CAFs, because if it escapes a value lambda
    
    ... ... @@ -868,28 +870,6 @@ early loses opportunities for RULES which (needless to say) are
    868 870
     important in some nofib programs (gcd is an example).  [SPJ note:
    
    869 871
     I think this is obsolete; the flag seems always on.]
    
    870 872
     
    
    871
    -Note [Large free-variable sets]
    
    872
    -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    873
    -In #24471 we had something like
    
    874
    -     x1 = I# 1
    
    875
    -     ...
    
    876
    -     x1000 = I# 1000
    
    877
    -     foo = f x1 (f x2 (f x3 ....))
    
    878
    -So every sub-expression in `foo` has lots and lots of free variables.  But
    
    879
    -none of these sub-expressions float anywhere; the entire float-out pass is a
    
    880
    -no-op.
    
    881
    -
    
    882
    -In lvlMFE, we want to find out quickly if the MFE is not-floatable; that is
    
    883
    -the common case.  In #24471 it turned out that we were testing `abs_vars` (a
    
    884
    -relatively complicated calculation that takes at least O(n-free-vars) time to
    
    885
    -compute) for every sub-expression.
    
    886
    -
    
    887
    -Better instead to test `float_me` early. That still involves looking at
    
    888
    -dest_lvl, which means looking at every free variable, but the constant factor
    
    889
    -is a lot better.
    
    890
    -
    
    891
    -ToDo: find a way to fix the bad asymptotic complexity.
    
    892
    -
    
    893 873
     Note [Floating join point bindings]
    
    894 874
     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    895 875
     Mostly we don't float join points at all -- we want them to /stay/ join points.
    
    ... ... @@ -1053,30 +1033,36 @@ we'd like to float the call to error, to get
    1053 1033
     
    
    1054 1034
     But, as ever, we need to be careful:
    
    1055 1035
     
    
    1056
    -(1) We want to float a bottoming
    
    1036
    +(BF1) We want to float a bottoming
    
    1057 1037
         expression even if it has free variables:
    
    1058 1038
             f = \x. g (let v = h x in error ("urk" ++ v))
    
    1059 1039
         Then we'd like to abstract over 'x', and float the whole arg of g:
    
    1060 1040
             lvl = \x. let v = h x in error ("urk" ++ v)
    
    1061 1041
             f = \x. g (lvl x)
    
    1062
    -    To achieve this we pass is_bot to destLevel
    
    1063
    -
    
    1064
    -(2) We do not do this for lambdas that return
    
    1065
    -    bottom.  Instead we treat the /body/ of such a function specially,
    
    1066
    -    via point (1).  For example:
    
    1042
    +    To achieve this we pass `is_bot` to destLevel
    
    1043
    +
    
    1044
    +(BF2) We do the same for /lambdas/ that return bottom.
    
    1045
    +    Suppose the original lambda had /no/ free vars:
    
    1046
    +        f = \x. ....(\y z. error (y++z))...
    
    1047
    +    then we'd like to float that whole lambda
    
    1048
    +        lvl = \y z. error (y++z)
    
    1049
    +        f = \x. ....lvl....
    
    1050
    +    If we just floated its bottom-valued body, we might abstract the arguments in
    
    1051
    +    the "wrong" order and end up with this bad result
    
    1052
    +        lvl = \z y. error (y++z)
    
    1053
    +        f = \x. ....(\y z. lvl z y)....
    
    1054
    +
    
    1055
    +    If the lambda does have free vars, this will happen:
    
    1067 1056
             f = \x. ....(\y z. if x then error y else error z)....
    
    1068
    -    If we float the whole lambda thus
    
    1057
    +    We float the whole lambda thus
    
    1069 1058
             lvl = \x. \y z. if x then error y else error z
    
    1070 1059
             f = \x. ...(lvl x)...
    
    1071
    -    we may well end up eta-expanding that PAP to
    
    1060
    +    And we may well end up eta-expanding that PAP to
    
    1061
    +        lvl = \x. \y z. if b then error y else error z
    
    1072 1062
             f = \x. ...(\y z. lvl x y z)...
    
    1063
    +    so we get a (small) closure.  So be it.
    
    1073 1064
     
    
    1074
    -    ===>
    
    1075
    -        lvl = \x z y. if b then error y else error z
    
    1076
    -        f = \x. ...(\y z. lvl x z y)...
    
    1077
    -    (There is no guarantee that we'll choose the perfect argument order.)
    
    1078
    -
    
    1079
    -(3) If we have a /binding/ that returns bottom, we want to float it to top
    
    1065
    +(BF3) If we have a /binding/ that returns bottom, we want to float it to top
    
    1080 1066
         level, even if it has free vars (point (1)), and even it has lambdas.
    
    1081 1067
         Example:
    
    1082 1068
            ... let { v = \y. error (show x ++ show y) } in ...
    
    ... ... @@ -1092,7 +1078,6 @@ But, as ever, we need to be careful:
    1092 1078
         join points (#24768), and floating to the top would abstract over those join
    
    1093 1079
         points, which we should never do.
    
    1094 1080
     
    
    1095
    -
    
    1096 1081
     See Maessen's paper 1999 "Bottom extraction: factoring error handling out
    
    1097 1082
     of functional programs" (unpublished I think).
    
    1098 1083
     
    
    ... ... @@ -1135,7 +1120,6 @@ float the case (as advocated here) we won't float the (build ...y..)
    1135 1120
     either, so fusion will happen.  It can be a big effect, esp in some
    
    1136 1121
     artificial benchmarks (e.g. integer, queens), but there is no perfect
    
    1137 1122
     answer.
    
    1138
    -
    
    1139 1123
     -}
    
    1140 1124
     
    
    1141 1125
     annotateBotStr :: Id -> Arity -> Maybe (Arity, DmdSig, CprSig) -> Id
    
    ... ... @@ -1152,69 +1136,124 @@ annotateBotStr id n_extra mb_bot_str
    1152 1136
       = id
    
    1153 1137
     
    
    1154 1138
     notWorthFloating :: CoreExpr -> [Var] -> Bool
    
    1155
    --- Returns True if the expression would be replaced by
    
    1156
    --- something bigger than it is now.  For example:
    
    1157
    ---   abs_vars = tvars only:  return True if e is trivial,
    
    1158
    ---                           but False for anything bigger
    
    1159
    ---   abs_vars = [x] (an Id): return True for trivial, or an application (f x)
    
    1160
    ---                           but False for (f x x)
    
    1161
    ---
    
    1162
    --- One big goal is that floating should be idempotent.  Eg if
    
    1163
    --- we replace e with (lvl79 x y) and then run FloatOut again, don't want
    
    1164
    --- to replace (lvl79 x y) with (lvl83 x y)!
    
    1165
    -
    
    1139
    +--  See Note [notWorthFloating]
    
    1166 1140
     notWorthFloating e abs_vars
    
    1167
    -  = go e (count isId abs_vars)
    
    1141
    +  = go e 0
    
    1168 1142
       where
    
    1169
    -    go (Var {}) n               = n >= 0
    
    1170
    -    go (Lit lit) n              = assert (n==0) $
    
    1171
    -                                  litIsTrivial lit   -- Note [Floating literals]
    
    1172
    -    go (Type {}) _              = True
    
    1173
    -    go (Coercion {}) _          = True
    
    1143
    +    n_abs_vars = count isId abs_vars  -- See (NWF5)
    
    1144
    +
    
    1145
    +    go :: CoreExpr -> Int -> Bool
    
    1146
    +    -- (go e n) return True if (e x1 .. xn) is not worth floating
    
    1147
    +    -- where `e` has n trivial value arguments x1..xn
    
    1148
    +    -- See (NWF4)
    
    1149
    +    go (Lit lit) n         = assert (n==0) $
    
    1150
    +                             litIsTrivial lit   -- See (NWF1)
    
    1151
    +    go (Type {}) _         = True
    
    1152
    +    go (Tick t e) n        = not (tickishIsCode t) && go e n
    
    1153
    +    go (Cast e _) n        = n==0 || go e n     -- See (NWF3)
    
    1154
    +    go (Coercion {}) _     = True
    
    1174 1155
         go (App e arg) n
    
    1175
    -       -- See Note [Floating applications to coercions]
    
    1176
    -       | not (isRuntimeArg arg) = go e n
    
    1177
    -       | n==0                   = False
    
    1178
    -       | exprIsTrivial arg      = go e (n-1) -- NB: exprIsTrivial arg = go arg 0
    
    1179
    -       | otherwise              = False
    
    1180
    -    go (Tick t e) n             = not (tickishIsCode t) && go e n
    
    1181
    -    go (Cast e _)  n            = go e n
    
    1182
    -    go (Case e b _ as) n
    
    1156
    +       | Type {} <- arg    = go e n    -- Just types, not coercions (NWF2)
    
    1157
    +       | exprIsTrivial arg = go e (n+1)
    
    1158
    +       | otherwise         = False  -- (f non-triv) is worth floating
    
    1159
    +
    
    1160
    +    go (Case e b _ as) _
    
    1161
    +      -- Do not float the `case` part of trivial cases (NWF3)
    
    1162
    +      -- We'll have a look at the RHS when we get there
    
    1183 1163
           | null as
    
    1184
    -      = go e n     -- See Note [Empty case is trivial]
    
    1185
    -      | Just rhs <- isUnsafeEqualityCase e b as
    
    1186
    -      = go rhs n   -- See (U2) of Note [Implementing unsafeCoerce] in base:Unsafe.Coerce
    
    1187
    -    go _ _                      = False
    
    1164
    +      = True   -- See Note [Empty case is trivial]
    
    1165
    +      | Just {} <- isUnsafeEqualityCase e b as
    
    1166
    +      = True   -- See (U2) of Note [Implementing unsafeCoerce] in base:Unsafe.Coerce
    
    1167
    +      | otherwise
    
    1168
    +      = False
    
    1188 1169
     
    
    1189
    -{-
    
    1190
    -Note [Floating literals]
    
    1191
    -~~~~~~~~~~~~~~~~~~~~~~~~
    
    1192
    -It's important to float Integer literals, so that they get shared,
    
    1193
    -rather than being allocated every time round the loop.
    
    1194
    -Hence the litIsTrivial.
    
    1170
    +    go (Var _) n
    
    1171
    +      | n==0             = True   -- Naked variable
    
    1172
    +      | n <= n_abs_vars  = True   -- (f a b c) is not worth floating if
    
    1173
    +      | otherwise        = False  -- a,b,c are all abstracted; see (NWF5)
    
    1195 1174
     
    
    1196
    -Ditto literal strings (LitString), which we'd like to float to top
    
    1197
    -level, which is now possible.
    
    1175
    +    go _ _ = False  -- Let etc is worth floating
    
    1198 1176
     
    
    1199
    -Note [Floating applications to coercions]
    
    1200
    -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    1201
    -We don’t float out variables applied only to type arguments, since the
    
    1202
    -extra binding would be pointless: type arguments are completely erased.
    
    1203
    -But *coercion* arguments aren’t (see Note [Coercion tokens] in
    
    1204
    -"GHC.CoreToStg" and Note [inlineBoringOk] in"GHC.Core.Unfold"),
    
    1205
    -so we still want to float out variables applied only to
    
    1206
    -coercion arguments.
    
    1177
    +{- Note [notWorthFloating]
    
    1178
    +~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    1179
    +`notWorthFloating` returns True if the expression would be replaced by something
    
    1180
    +bigger than it is now.  One big goal is that floating should be idempotent.  Eg
    
    1181
    +if we replace e with (lvl79 x y) and then run FloatOut again, don't want to
    
    1182
    +replace (lvl79 x y) with (lvl83 x y)!
    
    1207 1183
     
    
    1184
    +For example:
    
    1185
    +  abs_vars = tvars only:  return True if e is trivial,
    
    1186
    +                          but False for anything bigger
    
    1187
    +  abs_vars = [x] (an Id): return True for trivial, or an application (f x)
    
    1188
    +                          but False for (f x x)
    
    1189
    +
    
    1190
    +(NWF1) It's important to float Integer literals, so that they get shared, rather
    
    1191
    +  than being allocated every time round the loop.  Hence the litIsTrivial.
    
    1192
    +
    
    1193
    +  Ditto literal strings (LitString), which we'd like to float to top
    
    1194
    +  level, which is now possible.
    
    1195
    +
    
    1196
    +(NWF2) We don’t float out variables applied only to type arguments, since the
    
    1197
    +  extra binding would be pointless: type arguments are completely erased.
    
    1198
    +  But *coercion* arguments aren’t (see Note [Coercion tokens] in
    
    1199
    +  "GHC.CoreToStg" and Note [inlineBoringOk] in"GHC.Core.Unfold"),
    
    1200
    +  so we still want to float out variables applied only to
    
    1201
    +  coercion arguments.
    
    1202
    +
    
    1203
    +(NWF3) Some expressions have trivial wrappers:
    
    1204
    +     - Casts (e |> co)
    
    1205
    +     - Unary-class applications:
    
    1206
    +          - Dictionary applications (MkC meth)
    
    1207
    +          - Class-op applictions    (op dict)
    
    1208
    +     - Case of empty alts
    
    1209
    +     - Unsafe-equality case
    
    1210
    +  In all these cases we say "not worth floating", and we do so /regardless/
    
    1211
    +  of the wrapped expression.  The SetLevels stuff may subsequently float the
    
    1212
    +  components of the expression.
    
    1213
    +
    
    1214
    +  Example:  is it worth floating (f x |> co)?  No!  If we did we'd get
    
    1215
    +     lvl = f x |> co
    
    1216
    +     ...lvl....
    
    1217
    +  Then we'd do cast worker/wrapper and end up with.
    
    1218
    +     lvl' = f x
    
    1219
    +     ...(lvl' |> co)...
    
    1220
    +  Silly!  Better not to float it in the first place.  If we say "no" here,
    
    1221
    +  we'll subsequently say "yes" for (f x) and get
    
    1222
    +     lvl = f x
    
    1223
    +     ....(lvl |> co)...
    
    1224
    +  which is what we want.  In short: don't float trivial wrappers.
    
    1225
    +
    
    1226
    +(NWF4) The only non-trivial expression that we say "not worth floating" for
    
    1227
    +  is an application
    
    1228
    +             f x y z
    
    1229
    +  where the number of value arguments is <= the number of abstracted Ids.
    
    1230
    +  This is what makes floating idempotent.  Hence counting the number of
    
    1231
    +  value arguments in `go`
    
    1232
    +
    
    1233
    +(NWF5) In #24471 we had something like
    
    1234
    +     x1 = I# 1
    
    1235
    +     ...
    
    1236
    +     x1000 = I# 1000
    
    1237
    +     foo = f x1 (f x2 (f x3 ....))
    
    1238
    +  So every sub-expression in `foo` has lots and lots of free variables.  But
    
    1239
    +  none of these sub-expressions float anywhere; the entire float-out pass is a
    
    1240
    +  no-op.
    
    1208 1241
     
    
    1209
    -************************************************************************
    
    1210
    -*                                                                      *
    
    1211
    -\subsection{Bindings}
    
    1212
    -*                                                                      *
    
    1213
    -************************************************************************
    
    1242
    +  So `notWorthFloating` tries to avoid evaluating `n_abs_vars`, in cases where
    
    1243
    +  it obviously /is/ worth floating.  (In #24471 it turned out that we were
    
    1244
    +  testing `abs_vars` (a relatively complicated calculation that takes at least
    
    1245
    +  O(n-free-vars) time to compute) for every sub-expression.)
    
    1214 1246
     
    
    1215
    -The binding stuff works for top level too.
    
    1247
    +  Hence testing `n_abs_vars only` at the very end.
    
    1216 1248
     -}
    
    1217 1249
     
    
    1250
    +{- *********************************************************************
    
    1251
    +*                                                                      *
    
    1252
    +                       Bindings
    
    1253
    +        This binding stuff works for top level too.
    
    1254
    +*                                                                      *
    
    1255
    +********************************************************************* -}
    
    1256
    +
    
    1218 1257
     lvlBind :: LevelEnv
    
    1219 1258
             -> CoreBindWithFVs
    
    1220 1259
             -> LvlM (LevelledBind, LevelEnv)
    
    ... ... @@ -1261,7 +1300,7 @@ lvlBind env (AnnNonRec bndr rhs)
    1261 1300
             -- is_bot_lam: looks like (\xy. bot), maybe zero lams
    
    1262 1301
             -- NB: not isBottomThunk!
    
    1263 1302
             -- NB: not is_join: don't send bottoming join points to the top.
    
    1264
    -        -- See Note [Bottoming floats] point (3)
    
    1303
    +        -- See Note [Bottoming floats] (BF3)
    
    1265 1304
     
    
    1266 1305
         is_top_bindable = exprIsTopLevelBindable deann_rhs bndr_ty
    
    1267 1306
         n_extra       = count isId abs_vars
    
    ... ... @@ -1552,9 +1591,8 @@ destLevel env fvs fvs_ty is_function is_bot
    1552 1591
                                   -- See Note [Floating join point bindings]
    
    1553 1592
       = tOP_LEVEL
    
    1554 1593
     
    
    1555
    -  | is_bot              -- Send bottoming bindings to the top
    
    1556
    -  = as_far_as_poss      -- regardless; see Note [Bottoming floats]
    
    1557
    -                        -- Esp Bottoming floats (1) and (3)
    
    1594
    +  | is_bot              -- Send bottoming bindings to the top regardless;
    
    1595
    +  = as_far_as_poss      -- see (BF1) and (BF2) in Note [Bottoming floats]
    
    1558 1596
     
    
    1559 1597
       | Just n_args <- floatLams env
    
    1560 1598
       , n_args > 0  -- n=0 case handled uniformly by the 'otherwise' case
    
    ... ... @@ -1568,8 +1606,13 @@ destLevel env fvs fvs_ty is_function is_bot
    1568 1606
         max_fv_id_level = maxFvLevel isId env fvs -- Max over Ids only; the
    
    1569 1607
                                                   -- tyvars will be abstracted
    
    1570 1608
     
    
    1609
    +    -- as_far_as_poss: destination level depends only on the free Ids (more
    
    1610
    +    -- precisely, free CoVars) of the /type/, not the free Ids of the /term/.
    
    1611
    +    -- Why worry about the free CoVars?  See Note [Floating and kind casts]
    
    1612
    +    --
    
    1613
    +    -- There may be free Ids in the term, but then we'll just
    
    1614
    +    -- lambda-abstract over them
    
    1571 1615
         as_far_as_poss = maxFvLevel' isId env fvs_ty
    
    1572
    -                     -- See Note [Floating and kind casts]
    
    1573 1616
     
    
    1574 1617
     {- Note [Floating and kind casts]
    
    1575 1618
     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    ... ... @@ -1732,10 +1775,9 @@ maxFvLevel max_me env var_set
    1732 1775
         -- It's OK to use a non-deterministic fold here because maxIn commutes.
    
    1733 1776
     
    
    1734 1777
     maxFvLevel' :: (Var -> Bool) -> LevelEnv -> TyCoVarSet -> Level
    
    1735
    --- Same but for TyCoVarSet
    
    1778
    +-- Precisely the same as `maxFvLevel` but for TyCoVarSet rather than DVarSet
    
    1736 1779
     maxFvLevel' max_me env var_set
    
    1737 1780
       = nonDetStrictFoldUniqSet (maxIn max_me env) tOP_LEVEL var_set
    
    1738
    -    -- It's OK to use a non-deterministic fold here because maxIn commutes.
    
    1739 1781
     
    
    1740 1782
     maxIn :: (Var -> Bool) -> LevelEnv -> InVar -> Level -> Level
    
    1741 1783
     maxIn max_me (LE { le_lvl_env = lvl_env, le_env = id_env }) in_var lvl
    

  • compiler/GHC/Core/Opt/Simplify/Env.hs
    ... ... @@ -11,7 +11,7 @@ module GHC.Core.Opt.Simplify.Env (
    11 11
             SimplMode(..), updMode, smPlatform,
    
    12 12
     
    
    13 13
             -- * Environments
    
    14
    -        SimplEnv(..), pprSimplEnv,   -- Temp not abstract
    
    14
    +        SimplEnv(..), StaticEnv, pprSimplEnv,
    
    15 15
             seArityOpts, seCaseCase, seCaseFolding, seCaseMerge, seCastSwizzle,
    
    16 16
             seDoEtaReduction, seEtaExpand, seFloatEnable, seInline, seNames,
    
    17 17
             seOptCoercionOpts, sePhase, sePlatform, sePreInline,
    
    ... ... @@ -170,6 +170,8 @@ coercion we don't apply optCoercion to it if seInlineDepth>0.
    170 170
     Reason: it has already been optimised once, no point in doing so again.
    
    171 171
     -}
    
    172 172
     
    
    173
    +type StaticEnv = SimplEnv       -- Just the static part is relevant
    
    174
    +
    
    173 175
     data SimplEnv
    
    174 176
       = SimplEnv {
    
    175 177
          ----------- Static part of the environment -----------
    
    ... ... @@ -407,7 +409,6 @@ data SimplSR
    407 409
            -- and  ja = Just a <=> x is a join-point of arity a
    
    408 410
            -- See Note [Join arity in SimplIdSubst]
    
    409 411
     
    
    410
    -
    
    411 412
       | DoneId OutId
    
    412 413
            -- If  x :-> DoneId v   is in the SimplIdSubst
    
    413 414
            -- then replace occurrences of x by v
    
    ... ... @@ -778,7 +779,7 @@ emptyJoinFloats = nilOL
    778 779
     isEmptyJoinFloats :: JoinFloats -> Bool
    
    779 780
     isEmptyJoinFloats = isNilOL
    
    780 781
     
    
    781
    -unitLetFloat :: OutBind -> LetFloats
    
    782
    +unitLetFloat :: HasDebugCallStack => OutBind -> LetFloats
    
    782 783
     -- This key function constructs a singleton float with the right form
    
    783 784
     unitLetFloat bind = assert (all (not . isJoinId) (bindersOf bind)) $
    
    784 785
                         LetFloats (unitOL bind) (flag bind)
    

  • compiler/GHC/Core/Opt/Simplify/Iteration.hs
    ... ... @@ -447,7 +447,7 @@ we want to do something very similar to worker/wrapper:
    447 447
     
    
    448 448
     We call this making a cast worker/wrapper in tryCastWorkerWrapper.
    
    449 449
     
    
    450
    -The main motivaiton is that x can be inlined freely.  There's a chance
    
    450
    +The main motivation is that x can be inlined freely.  There's a chance
    
    451 451
     that e will be a constructor application or function, or something
    
    452 452
     like that, so moving the coercion to the usage site may well cancel
    
    453 453
     the coercions and lead to further optimisation.  Example:
    
    ... ... @@ -576,11 +576,13 @@ Note [Concrete types] in GHC.Tc.Utils.Concrete.
    576 576
     -}
    
    577 577
     
    
    578 578
     tryCastWorkerWrapper :: SimplEnv -> BindContext
    
    579
    -                     -> InId -> OutId -> OutExpr
    
    580
    -                     -> SimplM (SimplFloats, SimplEnv)
    
    579
    +                     -> OutId -> OutExpr
    
    580
    +                     -> SimplM (Maybe (LetFloats, OutId, OutExpr))
    
    581 581
     -- See Note [Cast worker/wrapper]
    
    582
    -tryCastWorkerWrapper env bind_cxt old_bndr bndr (Cast rhs co)
    
    583
    -  | BC_Let top_lvl is_rec <- bind_cxt  -- Not join points
    
    582
    +-- Given input x = rhs |> co, the result will be
    
    583
    +--    (x' = rhs, x, x' |> co))
    
    584
    +tryCastWorkerWrapper env bind_cxt bndr (Cast rhs co)
    
    585
    +  | BC_Let top_lvl _ <- bind_cxt  -- Not join points
    
    584 586
       , not (isDFunId bndr) -- nor DFuns; cast w/w is no help, and we can't transform
    
    585 587
                             --            a DFunUnfolding in mk_worker_unfolding
    
    586 588
       , not (exprIsTrivial rhs)        -- Not x = y |> co; Wrinkle 1
    
    ... ... @@ -588,38 +590,23 @@ tryCastWorkerWrapper env bind_cxt old_bndr bndr (Cast rhs co)
    588 590
       , typeHasFixedRuntimeRep work_ty    -- Don't peel off a cast if doing so would
    
    589 591
                                           -- lose the underlying runtime representation.
    
    590 592
                                           -- See Note [Preserve RuntimeRep info in cast w/w]
    
    591
    -  , not (isOpaquePragma (idInlinePragma old_bndr)) -- Not for OPAQUE bindings
    
    592
    -                                                   -- See Note [OPAQUE pragma]
    
    593
    +  , not (isOpaquePragma (idInlinePragma bndr)) -- Not for OPAQUE bindings
    
    594
    +                                               -- See Note [OPAQUE pragma]
    
    593 595
       = do  { uniq <- getUniqueM
    
    594 596
             ; let work_name = mkSystemVarName uniq occ_fs
    
    595 597
                   work_id   = mkLocalIdWithInfo work_name ManyTy work_ty work_info
    
    596
    -              is_strict = isStrictId bndr
    
    597 598
     
    
    598
    -        ; (rhs_floats, work_rhs) <- prepareBinding env top_lvl is_rec is_strict
    
    599
    -                                                   work_id (emptyFloats env) rhs
    
    600
    -
    
    601
    -        ; work_unf <- mk_worker_unfolding top_lvl work_id work_rhs
    
    599
    +        ; work_unf <- mk_worker_unfolding top_lvl work_id rhs
    
    602 600
             ; let  work_id_w_unf = work_id `setIdUnfolding` work_unf
    
    603
    -               floats   = rhs_floats `addLetFloats`
    
    604
    -                          unitLetFloat (NonRec work_id_w_unf work_rhs)
    
    605
    -
    
    606
    -               triv_rhs = Cast (Var work_id_w_unf) co
    
    607
    -
    
    608
    -        ; if postInlineUnconditionally env bind_cxt old_bndr bndr triv_rhs
    
    609
    -             -- Almost always True, because the RHS is trivial
    
    610
    -             -- In that case we want to eliminate the binding fast
    
    611
    -             -- We conservatively use postInlineUnconditionally so that we
    
    612
    -             -- check all the right things
    
    613
    -          then do { tick (PostInlineUnconditionally bndr)
    
    614
    -                  ; return ( floats
    
    615
    -                           , extendIdSubst (setInScopeFromF env floats) old_bndr $
    
    616
    -                             DoneEx triv_rhs NotJoinPoint ) }
    
    617
    -
    
    618
    -          else do { wrap_unf <- mkLetUnfolding env top_lvl VanillaSrc bndr False triv_rhs
    
    619
    -                  ; let bndr' = bndr `setInlinePragma` mkCastWrapperInlinePrag (idInlinePragma bndr)
    
    620
    -                                `setIdUnfolding`  wrap_unf
    
    621
    -                        floats' = floats `extendFloats` NonRec bndr' triv_rhs
    
    622
    -                  ; return ( floats', setInScopeFromF env floats' ) } }
    
    601
    +               work_bind     = NonRec work_id_w_unf rhs
    
    602
    +               triv_rhs      = Cast (Var work_id_w_unf) co
    
    603
    +
    
    604
    +        ; wrap_unf <- mkLetUnfolding env top_lvl VanillaSrc bndr False triv_rhs
    
    605
    +        ; let wrap_prag = mkCastWrapperInlinePrag (inlinePragInfo info)
    
    606
    +              bndr' = bndr `setInlinePragma` wrap_prag
    
    607
    +                           `setIdUnfolding`  wrap_unf
    
    608
    +
    
    609
    +        ; return (Just (unitLetFloat work_bind, bndr', triv_rhs)) }
    
    623 610
       where
    
    624 611
         -- Force the occ_fs so that the old Id is not retained in the new Id.
    
    625 612
         !occ_fs = getOccFS bndr
    
    ... ... @@ -647,10 +634,10 @@ tryCastWorkerWrapper env bind_cxt old_bndr bndr (Cast rhs co)
    647 634
                  | isStableSource src -> return (unf { uf_tmpl = mkCast unf_rhs (mkSymCo co) })
    
    648 635
                _ -> mkLetUnfolding env top_lvl VanillaSrc work_id False work_rhs
    
    649 636
     
    
    650
    -tryCastWorkerWrapper env _ _ bndr rhs  -- All other bindings
    
    637
    +tryCastWorkerWrapper _ _ bndr rhs  -- All other bindings
    
    651 638
       = do { traceSmpl "tcww:no" (vcat [ text "bndr:" <+> ppr bndr
    
    652 639
                                        , text "rhs:" <+> ppr rhs ])
    
    653
    -        ; return (mkFloatBind env (NonRec bndr rhs)) }
    
    640
    +       ; return Nothing }
    
    654 641
     
    
    655 642
     mkCastWrapperInlinePrag :: InlinePragma -> InlinePragma
    
    656 643
     -- See Note [Cast worker/wrapper]
    
    ... ... @@ -810,39 +797,40 @@ makeTrivial :: HasDebugCallStack
    810 797
     -- Binds the expression to a variable, if it's not trivial, returning the variable
    
    811 798
     -- For the Demand argument, see Note [Keeping demand info in StrictArg Plan A]
    
    812 799
     makeTrivial env top_lvl dmd occ_fs expr
    
    813
    -  | exprIsTrivial expr                          -- Already trivial
    
    814
    -  || not (bindingOk top_lvl expr expr_ty)       -- Cannot trivialise
    
    815
    -                                                --   See Note [Cannot trivialise]
    
    800
    +  | exprIsTrivial expr                  -- Already trivial
    
    816 801
       = return (emptyLetFloats, expr)
    
    817 802
     
    
    818
    -  | Cast expr' co <- expr
    
    819
    -  = do { (floats, triv_expr) <- makeTrivial env top_lvl dmd occ_fs expr'
    
    820
    -       ; return (floats, Cast triv_expr co) }
    
    803
    +  | not (bindingOk top_lvl expr expr_ty)  -- Cannot trivialise
    
    804
    +  = return (emptyLetFloats, expr)         -- See Note [Cannot trivialise]
    
    821 805
     
    
    822
    -  | otherwise -- 'expr' is not of form (Cast e co)
    
    806
    +  | otherwise
    
    823 807
       = do  { (floats, expr1) <- prepareRhs env top_lvl occ_fs expr
    
    824 808
             ; uniq <- getUniqueM
    
    825 809
             ; let name = mkSystemVarName uniq occ_fs
    
    826
    -              var  = mkLocalIdWithInfo name ManyTy expr_ty id_info
    
    810
    +              bndr = mkLocalIdWithInfo name ManyTy expr_ty id_info
    
    811
    +              bind_ctxt = BC_Let top_lvl NonRecursive
    
    827 812
     
    
    828 813
             -- Now something very like completeBind,
    
    829 814
             -- but without the postInlineUnconditionally part
    
    830
    -        ; (arity_type, expr2) <- tryEtaExpandRhs env (BC_Let top_lvl NonRecursive) var expr1
    
    815
    +        ; (arity_type, expr2) <- tryEtaExpandRhs env bind_ctxt bndr expr1
    
    831 816
               -- Technically we should extend the in-scope set in 'env' with
    
    832 817
               -- the 'floats' from prepareRHS; but they are all fresh, so there is
    
    833 818
               -- no danger of introducing name shadowing in eta expansion
    
    834 819
     
    
    835
    -        ; unf <- mkLetUnfolding env top_lvl VanillaSrc var False expr2
    
    836
    -
    
    837
    -        ; let final_id = addLetBndrInfo var arity_type unf
    
    838
    -              bind     = NonRec final_id expr2
    
    820
    +        ; unf <- mkLetUnfolding env top_lvl VanillaSrc bndr False expr2
    
    821
    +        ; let bndr'    = addLetBndrInfo bndr arity_type unf
    
    822
    +              anf_bind = NonRec bndr' expr2
    
    839 823
     
    
    840
    -        ; traceSmpl "makeTrivial" (vcat [text "final_id" <+> ppr final_id, text "rhs" <+> ppr expr2 ])
    
    841
    -        ; return ( floats `addLetFlts` unitLetFloat bind, Var final_id ) }
    
    824
    +        ; mb_cast_ww <- tryCastWorkerWrapper env bind_ctxt bndr' expr2
    
    825
    +        ; case mb_cast_ww of
    
    826
    +            Nothing -> return (floats `addLetFlts` unitLetFloat anf_bind, Var bndr')
    
    827
    +            Just (work_flts, _, triv_rhs)
    
    828
    +                    -> return (floats `addLetFlts` work_flts, triv_rhs) }
    
    842 829
       where
    
    843 830
         id_info = vanillaIdInfo `setDemandInfo` dmd
    
    844 831
         expr_ty = exprType expr
    
    845 832
     
    
    833
    +
    
    846 834
     bindingOk :: TopLevelFlag -> CoreExpr -> Type -> Bool
    
    847 835
     -- True iff we can have a binding of this expression at this level
    
    848 836
     -- Precondition: the type is the type of the expression
    
    ... ... @@ -936,26 +924,50 @@ completeBind bind_cxt (old_bndr, unf_se) (new_bndr, new_rhs, env)
    936 924
                                 eta_rhs (idType new_bndr) new_arity old_unf
    
    937 925
     
    
    938 926
           ; let new_bndr_w_info = addLetBndrInfo new_bndr new_arity new_unfolding
    
    939
    -        -- See Note [In-scope set as a substitution]
    
    927
    +               -- See Note [In-scope set as a substitution]
    
    928
    +            occ_anald_rhs = maybeUnfoldingTemplate new_unfolding `orElse` eta_rhs
    
    929
    +               -- occ_anald_rhs: see Note [Use occ-anald RHS in postInlineUnconditionally]
    
    940 930
     
    
    931
    +      -- Try postInlineUnconditionally for (x = rhs)
    
    932
    +      -- If that succeeds we don't want to do tryCastWorkerWrapper
    
    941 933
           ; if postInlineUnconditionally env bind_cxt old_bndr new_bndr_w_info eta_rhs
    
    942
    -
    
    943
    -        then -- Inline and discard the binding
    
    944
    -             do  { tick (PostInlineUnconditionally old_bndr)
    
    945
    -                 ; let unf_rhs = maybeUnfoldingTemplate new_unfolding `orElse` eta_rhs
    
    946
    -                          -- See Note [Use occ-anald RHS in postInlineUnconditionally]
    
    947
    -                 ; simplTrace "PostInlineUnconditionally" (ppr new_bndr <+> ppr unf_rhs) $
    
    948
    -                   return ( emptyFloats env
    
    949
    -                          , extendIdSubst env old_bndr $
    
    950
    -                            DoneEx unf_rhs (idJoinPointHood new_bndr)) }
    
    934
    +        then post_inline_it (emptyFloats env) occ_anald_rhs
    
    935
    +        else
    
    936
    +
    
    937
    +   do { -- Try cast worker-wrapper
    
    938
    +        mb_cast_ww <- tryCastWorkerWrapper env bind_cxt new_bndr_w_info eta_rhs
    
    939
    +      ; case mb_cast_ww of
    
    940
    +          Nothing -> no_post_inline (emptyFloats env) new_bndr_w_info eta_rhs
    
    941
    +
    
    942
    +          Just (cast_let_flts, new_bndr, new_rhs)
    
    943
    +            -- Try postInlineUnconditionally for (new_bndr = new_rhs)
    
    944
    +            -- Almost always fires, because `new_rhs` is small, but we conservatively
    
    945
    +            -- use `postInlineUnconditionally` so that we check all the right things
    
    946
    +            | postInlineUnconditionally env bind_cxt old_bndr new_bndr new_rhs
    
    947
    +            -> post_inline_it cast_floats new_rhs
    
    948
    +                   -- new_rhs is (x |> co) so no need to occ-anal
    
    949
    +            | otherwise
    
    950
    +            -> no_post_inline cast_floats new_bndr new_rhs
    
    951
    +            where
    
    952
    +              cast_floats = emptyFloats env `addLetFloats` cast_let_flts
    
    953
    +    } }
    
    954
    +  where
    
    955
    +    no_post_inline floats new_bndr new_rhs
    
    956
    +      = do { let the_bind = NonRec new_bndr new_rhs
    
    957
    +                 floats'  = floats `extendFloats` the_bind
    
    958
    +                 env'     = env `setInScopeFromF` floats'
    
    959
    +           ; return (floats', env') }
    
    960
    +
    
    961
    +    post_inline_it floats rhs
    
    962
    +      = do  { simplTrace "PostInlineUnconditionally" (ppr old_bndr <+> ppr rhs) $
    
    963
    +              tick (PostInlineUnconditionally old_bndr)
    
    964
    +            ; let env' = env `setInScopeFromF` floats
    
    965
    +            ; return ( floats
    
    966
    +                     , extendIdSubst env' old_bndr $
    
    967
    +                       DoneEx rhs (idJoinPointHood old_bndr)) }
    
    951 968
                     -- Use the substitution to make quite, quite sure that the
    
    952 969
                     -- substitution will happen, since we are going to discard the binding
    
    953 970
     
    
    954
    -        else -- Keep the binding; do cast worker/wrapper
    
    955
    ---             simplTrace "completeBind" (vcat [ text "bndrs" <+> ppr old_bndr <+> ppr new_bndr
    
    956
    ---                                             , text "eta_rhs" <+> ppr eta_rhs ]) $
    
    957
    -             tryCastWorkerWrapper env bind_cxt old_bndr new_bndr_w_info eta_rhs }
    
    958
    -
    
    959 971
     addLetBndrInfo :: OutId -> ArityType -> Unfolding -> OutId
    
    960 972
     addLetBndrInfo new_bndr new_arity_type new_unf
    
    961 973
       = new_bndr `setIdInfo` info5
    
    ... ... @@ -3955,7 +3967,17 @@ mkDupableContWithDmds env dmds
    3955 3967
             ; (floats1, cont') <- mkDupableContWithDmds env cont_dmds cont
    
    3956 3968
             ; let env' = env `setInScopeFromF` floats1
    
    3957 3969
             ; (_, se', arg') <- simplLazyArg env' dup hole_ty Nothing se arg
    
    3958
    -        ; (let_floats2, arg'') <- makeTrivial env NotTopLevel dmd (fsLit "karg") arg'
    
    3970
    +
    
    3971
    +        -- Make the argument duplicable. Danger: if arg is small and we let-bind
    
    3972
    +        -- it, then postInlineUnconditionally will just inline it again, perhaps
    
    3973
    +        -- taking an extra Simplifier iteration (e.g. in test T21839c). So make
    
    3974
    +        -- a `let` only if `couldBeSmallEnoughToInline` says that it is big enough
    
    3975
    +        ; let uf_opts = seUnfoldingOpts env
    
    3976
    +        ; (let_floats2, arg'')
    
    3977
    +              <- if couldBeSmallEnoughToInline uf_opts (unfoldingUseThreshold uf_opts) arg'
    
    3978
    +                 then return (emptyLetFloats, arg')
    
    3979
    +                 else makeTrivial env NotTopLevel dmd (fsLit "karg") arg'
    
    3980
    +
    
    3959 3981
             ; let all_floats = floats1 `addLetFloats` let_floats2
    
    3960 3982
             ; return ( all_floats
    
    3961 3983
                      , ApplyToVal { sc_arg = arg''
    
    ... ... @@ -4592,7 +4614,8 @@ mkLetUnfolding :: SimplEnv -> TopLevelFlag -> UnfoldingSource
    4592 4614
                    -> InId -> Bool    -- True <=> this is a join point
    
    4593 4615
                    -> OutExpr -> SimplM Unfolding
    
    4594 4616
     mkLetUnfolding env top_lvl src id is_join new_rhs
    
    4595
    -  = return (mkUnfolding uf_opts src is_top_lvl is_bottoming is_join new_rhs Nothing)
    
    4617
    +  = -- Monadic to force those where-bindings
    
    4618
    +    return (mkUnfolding uf_opts src is_top_lvl is_bottoming is_join new_rhs Nothing)
    
    4596 4619
                 -- We make an  unfolding *even for loop-breakers*.
    
    4597 4620
                 -- Reason: (a) It might be useful to know that they are WHNF
    
    4598 4621
                 --         (b) In GHC.Iface.Tidy we currently assume that, if we want to
    

  • compiler/GHC/Core/Opt/Simplify/Utils.hs
    ... ... @@ -216,8 +216,6 @@ data SimplCont
    216 216
             CoreTickish     -- Tick tickish <hole>
    
    217 217
             SimplCont
    
    218 218
     
    
    219
    -type StaticEnv = SimplEnv       -- Just the static part is relevant
    
    220
    -
    
    221 219
     data FromWhat = FromLet | FromBeta Levity
    
    222 220
     
    
    223 221
     -- See Note [DupFlag invariants]
    
    ... ... @@ -723,7 +721,6 @@ which it is on the LHS of a rule (see updModeForRules), then don't
    723 721
     make use of the strictness info for the function.
    
    724 722
     -}
    
    725 723
     
    
    726
    -
    
    727 724
     {-
    
    728 725
     ************************************************************************
    
    729 726
     *                                                                      *
    
    ... ... @@ -1423,8 +1420,12 @@ preInlineUnconditionally for
    1423 1420
     Note [Top-level bottoming Ids]
    
    1424 1421
     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    1425 1422
     Don't inline top-level Ids that are bottoming, even if they are used just
    
    1426
    -once, because FloatOut has gone to some trouble to extract them out.
    
    1427
    -Inlining them won't make the program run faster!
    
    1423
    +once, because FloatOut has gone to some trouble to extract them out. e.g.
    
    1424
    +    report x y = error (..lots of stuff...)
    
    1425
    +    f x y z = if z then report x y else ...blah...
    
    1426
    +Here `f` might be small enough to inline; but if we put all the `report`
    
    1427
    +stuff inside it, it'll look to big.  In general we don't want to duplicate
    
    1428
    +all the error-reporting goop.
    
    1428 1429
     
    
    1429 1430
     Note [Do not inline CoVars unconditionally]
    
    1430 1431
     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    ... ... @@ -1460,51 +1461,25 @@ preInlineUnconditionally env top_lvl bndr rhs rhs_env
    1460 1461
         extend_subst_with inl_rhs = extendIdSubst env bndr $! (mkContEx rhs_env inl_rhs)
    
    1461 1462
     
    
    1462 1463
         one_occ IAmDead = True -- Happens in ((\x.1) v)
    
    1463
    -    one_occ OneOcc{ occ_n_br   = 1
    
    1464
    -                  , occ_in_lam = NotInsideLam }   = isNotTopLevel top_lvl || early_phase
    
    1464
    +    one_occ OneOcc{ occ_n_br    = 1
    
    1465
    +                  , occ_in_lam  = NotInsideLam
    
    1466
    +                  , occ_int_cxt = int_cxt }
    
    1467
    +      =  isNotTopLevel top_lvl      -- Get rid of allocation
    
    1468
    +      || (int_cxt==IsInteresting)   -- Function is applied
    
    1469
    +   --   || (early_phase && not (isConLikeUnfolding unf))  -- See early_phase
    
    1465 1470
         one_occ OneOcc{ occ_n_br   = 1
    
    1466 1471
                       , occ_in_lam = IsInsideLam
    
    1467
    -                  , occ_int_cxt = IsInteresting } = canInlineInLam rhs
    
    1468
    -    one_occ _                                     = False
    
    1472
    +                  , occ_int_cxt = IsInteresting }
    
    1473
    +      = canInlineInLam rhs
    
    1474
    +    one_occ _
    
    1475
    +      = False
    
    1469 1476
     
    
    1470 1477
         pre_inline_unconditionally = sePreInline env
    
    1471 1478
         active = isActive (sePhase env) (inlinePragmaActivation inline_prag)
    
    1472 1479
                  -- See Note [pre/postInlineUnconditionally in gentle mode]
    
    1473 1480
         inline_prag = idInlinePragma bndr
    
    1474 1481
     
    
    1475
    --- Be very careful before inlining inside a lambda, because (a) we must not
    
    1476
    --- invalidate occurrence information, and (b) we want to avoid pushing a
    
    1477
    --- single allocation (here) into multiple allocations (inside lambda).
    
    1478
    --- Inlining a *function* with a single *saturated* call would be ok, mind you.
    
    1479
    ---      || (if is_cheap && not (canInlineInLam rhs) then pprTrace "preinline" (ppr bndr <+> ppr rhs) ok else ok)
    
    1480
    ---      where
    
    1481
    ---              is_cheap = exprIsCheap rhs
    
    1482
    ---              ok = is_cheap && int_cxt
    
    1483
    -
    
    1484
    -        --      int_cxt         The context isn't totally boring
    
    1485
    -        -- E.g. let f = \ab.BIG in \y. map f xs
    
    1486
    -        --      Don't want to substitute for f, because then we allocate
    
    1487
    -        --      its closure every time the \y is called
    
    1488
    -        -- But: let f = \ab.BIG in \y. map (f y) xs
    
    1489
    -        --      Now we do want to substitute for f, even though it's not
    
    1490
    -        --      saturated, because we're going to allocate a closure for
    
    1491
    -        --      (f y) every time round the loop anyhow.
    
    1492
    -
    
    1493
    -        -- canInlineInLam => free vars of rhs are (Once in_lam) or Many,
    
    1494
    -        -- so substituting rhs inside a lambda doesn't change the occ info.
    
    1495
    -        -- Sadly, not quite the same as exprIsHNF.
    
    1496
    -    canInlineInLam (Lit _)    = True
    
    1497
    -    canInlineInLam (Lam b e)  = isRuntimeVar b || canInlineInLam e
    
    1498
    -    canInlineInLam (Tick t e) = not (tickishIsCode t) && canInlineInLam e
    
    1499
    -    canInlineInLam (Var v)    = case idOccInfo v of
    
    1500
    -                                  OneOcc { occ_in_lam = IsInsideLam } -> True
    
    1501
    -                                  ManyOccs {}                         -> True
    
    1502
    -                                  _                                   -> False
    
    1503
    -    canInlineInLam _          = False
    
    1504
    -      -- not ticks.  Counting ticks cannot be duplicated, and non-counting
    
    1505
    -      -- ticks around a Lam will disappear anyway.
    
    1506
    -
    
    1507
    -    early_phase = sePhase env /= FinalPhase
    
    1482
    +--    early_phase = sePhase env /= FinalPhase
    
    1508 1483
         -- If we don't have this early_phase test, consider
    
    1509 1484
         --      x = length [1,2,3]
    
    1510 1485
         -- The full laziness pass carefully floats all the cons cells to
    
    ... ... @@ -1532,6 +1507,52 @@ preInlineUnconditionally env top_lvl bndr rhs rhs_env
    1532 1507
         -- (Nor can we check for `exprIsExpandable rhs`, because that needs to look
    
    1533 1508
         -- at the non-existent unfolding for the `I# 2#` which is also floated out.)
    
    1534 1509
     
    
    1510
    +
    
    1511
    +-- Be very careful before inlining inside a lambda, because (a) we must not
    
    1512
    +-- invalidate occurrence information, and (b) we want to avoid pushing a
    
    1513
    +-- single allocation (here) into multiple allocations (inside lambda).
    
    1514
    +-- Inlining a *function* with a single *saturated* call would be ok, mind you.
    
    1515
    +--      || (if is_cheap && not (canInlineInLam rhs) then pprTrace "preinline" (ppr bndr <+> ppr rhs) ok else ok)
    
    1516
    +--      where
    
    1517
    +--              is_cheap = exprIsCheap rhs
    
    1518
    +--              ok = is_cheap && int_cxt
    
    1519
    +
    
    1520
    +        --      int_cxt         The context isn't totally boring
    
    1521
    +        -- E.g. let f = \ab.BIG in \y. map f xs
    
    1522
    +        --      Don't want to substitute for f, because then we allocate
    
    1523
    +        --      its closure every time the \y is called
    
    1524
    +        -- But: let f = \ab.BIG in \y. map (f y) xs
    
    1525
    +        --      Now we do want to substitute for f, even though it's not
    
    1526
    +        --      saturated, because we're going to allocate a closure for
    
    1527
    +        --      (f y) every time round the loop anyhow.
    
    1528
    +
    
    1529
    +        -- canInlineInLam => free vars of rhs are (Once in_lam) or Many,
    
    1530
    +        -- so substituting rhs inside a lambda doesn't change the occ info.
    
    1531
    +        -- Sadly, not quite the same as exprIsHNF.
    
    1532
    +canInlineInLam ::CoreExpr -> Bool
    
    1533
    +canInlineInLam e
    
    1534
    +  = go e
    
    1535
    +  where
    
    1536
    +    go (Lit _)    = True
    
    1537
    +    go (Lam b e)  = isRuntimeVar b || go e
    
    1538
    +    go (Cast e _) = go e
    
    1539
    +    go (Tick t e) = not (tickishIsCode t) && go e
    
    1540
    +    -- This matters only for:
    
    1541
    +    --     x = y            -- or y|>co
    
    1542
    +    --     f = \p. ..x..    -- One occurrence of x
    
    1543
    +    --     ..y..            -- Multiple other occurrences of y
    
    1544
    +    -- Then it is safe to inline x unconditionally
    
    1545
    +    -- For postInlineUncondionally we have already tested exprIsTrivial
    
    1546
    +    -- so this Var case never arises
    
    1547
    +    go (Var v)    = case idOccInfo v of
    
    1548
    +                       OneOcc { occ_in_lam = IsInsideLam } -> True
    
    1549
    +                       ManyOccs {}                         -> True
    
    1550
    +                       _                                   -> False
    
    1551
    +    go _          = False
    
    1552
    +      -- not ticks.  Counting ticks cannot be duplicated, and non-counting
    
    1553
    +      -- ticks around a Lam will disappear anyway.
    
    1554
    +
    
    1555
    +
    
    1535 1556
     {-
    
    1536 1557
     ************************************************************************
    
    1537 1558
     *                                                                      *
    
    ... ... @@ -1582,71 +1603,77 @@ postInlineUnconditionally
    1582 1603
     -- Reason: we don't want to inline single uses, or discard dead bindings,
    
    1583 1604
     --         for unlifted, side-effect-ful bindings
    
    1584 1605
     postInlineUnconditionally env bind_cxt old_bndr bndr rhs
    
    1585
    -  | not active                  = False
    
    1586
    -  | isWeakLoopBreaker occ_info  = False -- If it's a loop-breaker of any kind, don't inline
    
    1587
    -                                        -- because it might be referred to "earlier"
    
    1588
    -  | isStableUnfolding unfolding = False -- Note [Stable unfoldings and postInlineUnconditionally]
    
    1589
    -  | isTopLevel (bindContextLevel bind_cxt)
    
    1590
    -                                = False -- Note [Top level and postInlineUnconditionally]
    
    1591
    -  | exprIsTrivial rhs           = True
    
    1592
    -  | BC_Join {} <- bind_cxt      = False -- See point (1) of Note [Duplicating join points]
    
    1606
    +  | not active                   = False
    
    1607
    +  | isWeakLoopBreaker occ_info   = False -- If it's a loop-breaker of any kind, don't inline
    
    1608
    +                                         -- because it might be referred to "earlier"
    
    1609
    +  | isStableUnfolding unfolding  = False -- Note [Stable unfoldings and postInlineUnconditionally]
    
    1610
    +  | BC_Join {} <- bind_cxt       = exprIsTrivial rhs
    
    1611
    +                                        -- See point (DJ1) of Note [Duplicating join points]
    
    1593 1612
                                             --     in GHC.Core.Opt.Simplify.Iteration
    
    1613
    +  | is_top_lvl, isDeadEndId bndr = False -- Note [Top-level bottoming Ids]
    
    1594 1614
       | otherwise
    
    1595 1615
       = case occ_info of
    
    1596
    -      OneOcc { occ_in_lam = in_lam, occ_int_cxt = int_cxt, occ_n_br = n_br }
    
    1597
    -        -- See Note [Inline small things to avoid creating a thunk]
    
    1616
    +      IAmALoopBreaker {} -> False
    
    1617
    +      ManyOccs {} | is_top_lvl -> False  -- Note [Top level and postInlineUnconditionally]
    
    1618
    +                  | otherwise  -> exprIsTrivial rhs
    
    1598 1619
     
    
    1599
    -        | n_br >= 100 -> False  -- See #23627
    
    1620
    +      OneOcc { occ_in_lam = in_lam, occ_int_cxt = int_cxt, occ_n_br = n_br }
    
    1621
    +              | exprIsTrivial rhs -> True
    
    1622
    +              | otherwise         -> check_one_occ in_lam int_cxt n_br
    
    1600 1623
     
    
    1601
    -        | n_br == 1, NotInsideLam <- in_lam  -- One syntactic occurrence
    
    1602
    -        -> True                              -- See Note [Post-inline for single-use things]
    
    1624
    +      IAmDead -> True   -- This happens; for example, the case_bndr during case of
    
    1625
    +                        -- known constructor:  case (a,b) of x { (p,q) -> ... }
    
    1626
    +                        -- Here x isn't mentioned in the RHS, so we don't want to
    
    1627
    +                        -- create the (dead) let-binding  let x = (a,b) in ...
    
    1628
    +  where
    
    1629
    +    is_top_lvl  = isTopLevel (bindContextLevel bind_cxt)
    
    1630
    +    is_demanded = isStrUsedDmd (idDemandInfo bndr)
    
    1631
    +    occ_info    = idOccInfo old_bndr
    
    1632
    +    unfolding   = idUnfolding bndr
    
    1633
    +    arity       = idArity bndr
    
    1634
    +--    is_cheap    = isCheapUnfolding unfolding
    
    1635
    +    uf_opts     = seUnfoldingOpts env
    
    1636
    +    phase       = sePhase env
    
    1637
    +    active      = isActive phase (idInlineActivation bndr)
    
    1638
    +        -- See Note [pre/postInlineUnconditionally in gentle mode]
    
    1603 1639
     
    
    1640
    +    -- Check for code-size blow-up from inlining in multiple places
    
    1641
    +    code_dup_ok n_br
    
    1642
    +      | n_br == 1   = True  -- No duplication
    
    1643
    +      | n_br >= 100 = False -- See #23627
    
    1644
    +      | is_demanded = False -- Demanded => no allocation (it'll be a case expression
    
    1645
    +                            -- in the end) so inlining duplicates code but nothing more
    
    1646
    +      | otherwise   = smallEnoughToInline uf_opts unfolding
    
    1647
    +
    
    1648
    +    -- See Note [Post-inline for single-use things]
    
    1649
    +    check_one_occ NotInsideLam NotInteresting n_br = not is_top_lvl && code_dup_ok n_br
    
    1650
    +    check_one_occ NotInsideLam IsInteresting  n_br = code_dup_ok n_br
    
    1651
    +    check_one_occ IsInsideLam  NotInteresting _    = False
    
    1652
    +    check_one_occ IsInsideLam  IsInteresting  n_br = arity > 0 && code_dup_ok n_br
    
    1653
    +      -- IsInteresting: inlining inside a lambda only with good reason
    
    1654
    +      --    See the notes on int_cxt in preInlineUnconditionally
    
    1655
    +      -- arity>0: do not inline data strutures under lambdas, only functions
    
    1656
    +
    
    1657
    +---------------
    
    1658
    +-- A wrong bit of code, left here in case you are tempted to do this
    
    1604 1659
     --        | is_unlifted                        -- Unlifted binding, hence ok-for-spec
    
    1605 1660
     --        -> True                              -- hence cheap to inline probably just a primop
    
    1606
    ---                                             -- Not a big deal either way
    
    1607 1661
     -- No, this is wrong.  {v = p +# q; x = K v}.
    
    1608 1662
     -- Don't inline v; it'll just get floated out again. Stupid.
    
    1663
    +---------------
    
    1609 1664
     
    
    1610
    -        | is_demanded
    
    1611
    -        -> False                            -- No allocation (it'll be a case expression in the end)
    
    1612
    -                                            -- so inlining duplicates code but nothing more
    
    1613 1665
     
    
    1614
    -        | otherwise
    
    1615
    -        -> work_ok in_lam int_cxt && smallEnoughToInline uf_opts unfolding
    
    1616
    -              -- Multiple syntactic occurences; but lazy, and small enough to dup
    
    1617
    -              -- ToDo: consider discount on smallEnoughToInline if int_cxt is true
    
    1618
    -
    
    1619
    -      IAmDead -> True   -- This happens; for example, the case_bndr during case of
    
    1620
    -                        -- known constructor:  case (a,b) of x { (p,q) -> ... }
    
    1621
    -                        -- Here x isn't mentioned in the RHS, so we don't want to
    
    1622
    -                        -- create the (dead) let-binding  let x = (a,b) in ...
    
    1623
    -
    
    1624
    -      _ -> False
    
    1625
    -
    
    1626
    -  where
    
    1627
    -    work_ok NotInsideLam _              = True
    
    1628
    -    work_ok IsInsideLam  IsInteresting  = isCheapUnfolding unfolding
    
    1629
    -    work_ok IsInsideLam  NotInteresting = False
    
    1630
    -      -- NotInsideLam: outside a lambda, we want to be reasonably aggressive
    
    1631
    -      -- about inlining into multiple branches of case
    
    1666
    +      -- NotInsideLam: outside a lambda, when not at top-level we want to be
    
    1667
    +      -- reasonably aggressive about inlining into multiple branches of case
    
    1632 1668
           -- e.g. let x = <non-value>
    
    1633 1669
           --      in case y of { C1 -> ..x..; C2 -> ..x..; C3 -> ... }
    
    1634 1670
           -- Inlining can be a big win if C3 is the hot-spot, even if
    
    1635 1671
           -- the uses in C1, C2 are not 'interesting'
    
    1636 1672
           -- An example that gets worse if you add int_cxt here is 'clausify'
    
    1637 1673
     
    
    1638
    -      -- InsideLam: check for acceptable work duplication, using isCheapUnfoldign
    
    1639
    -      -- int_cxt to prevent us inlining inside a lambda without some
    
    1640
    -      -- good reason.  See the notes on int_cxt in preInlineUnconditionally
    
    1674
    +      -- InsideLam:
    
    1641 1675
     
    
    1642 1676
     --    is_unlifted = isUnliftedType (idType bndr)
    
    1643
    -    is_demanded = isStrUsedDmd (idDemandInfo bndr)
    
    1644
    -    occ_info    = idOccInfo old_bndr
    
    1645
    -    unfolding   = idUnfolding bndr
    
    1646
    -    uf_opts     = seUnfoldingOpts env
    
    1647
    -    phase       = sePhase env
    
    1648
    -    active      = isActive phase (idInlineActivation bndr)
    
    1649
    -        -- See Note [pre/postInlineUnconditionally in gentle mode]
    
    1650 1677
     
    
    1651 1678
     {- Note [Inline small things to avoid creating a thunk]
    
    1652 1679
     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    ... ... @@ -1669,23 +1696,24 @@ where GHC.Driver.CmdLine.$wprocessArgs allocated hugely more.
    1669 1696
     Note [Post-inline for single-use things]
    
    1670 1697
     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    1671 1698
     If we have
    
    1672
    -
    
    1673 1699
        let x = rhs in ...x...
    
    1674
    -
    
    1675 1700
     and `x` is used exactly once, and not inside a lambda, then we will usually
    
    1676 1701
     preInlineUnconditinally. But we can still get this situation in
    
    1677 1702
     postInlineUnconditionally:
    
    1678
    -
    
    1679 1703
       case K rhs of K x -> ...x....
    
    1680
    -
    
    1681 1704
     Here we'll use `simplAuxBind` to bind `x` to (the already-simplified) `rhs`;
    
    1682 1705
     and `x` is used exactly once.  It's beneficial to inline right away; otherwise
    
    1683 1706
     we risk creating
    
    1684
    -
    
    1685 1707
        let x = rhs in ...x...
    
    1708
    +which will take another iteration of the Simplifier to eliminate.
    
    1686 1709
     
    
    1687
    -which will take another iteration of the Simplifier to eliminate.  We do this in
    
    1688
    -two places
    
    1710
    +A similar, but less frequent, case is
    
    1711
    +    let f = \x.blah in ...(\y. ...(f e)...) ...
    
    1712
    +Again `preInlineUnconditionally will usually inline `f`, but it can arise
    
    1713
    +via `simplAuxBind` if we have something like
    
    1714
    +    (\f \y. ...(f e)..) (\x.blah)
    
    1715
    +
    
    1716
    +We do unconditional post-inlining in two places:
    
    1689 1717
     
    
    1690 1718
     1. In the full `postInlineUnconditionally` look for the special case
    
    1691 1719
        of "one occurrence, not under a lambda", and inline unconditionally then.
    
    ... ... @@ -1714,24 +1742,20 @@ Alas!
    1714 1742
     
    
    1715 1743
     Note [Top level and postInlineUnconditionally]
    
    1716 1744
     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    1717
    -We don't do postInlineUnconditionally for top-level things (even for
    
    1718
    -ones that are trivial):
    
    1745
    +We must take care when considering postInlineUnconditionally for top-level things
    
    1719 1746
     
    
    1720
    -  * Doing so will inline top-level error expressions that have been
    
    1721
    -    carefully floated out by FloatOut.  More generally, it might
    
    1722
    -    replace static allocation with dynamic.
    
    1747
    +  * Don't inline top-level error expressions that have been carefully floated
    
    1748
    +    out by FloatOut.  See Note [Top-level bottoming Ids].
    
    1723 1749
     
    
    1724
    -  * Even for trivial expressions there's a problem.  Consider
    
    1750
    +  * Even for trivial expressions we need to take care: we must not
    
    1751
    +    postInlineUnconditionally a top-level ManyOccs binder, even if its
    
    1752
    +    RHS is trivial. Consider
    
    1725 1753
           {-# RULE "foo" forall (xs::[T]). reverse xs = ruggle xs #-}
    
    1726 1754
           blah xs = reverse xs
    
    1727 1755
           ruggle = sort
    
    1728
    -    In one simplifier pass we might fire the rule, getting
    
    1756
    +    We must not postInlineUnconditionally `ruggle`, because in the same
    
    1757
    +    simplifier pass we might fire the rule, getting
    
    1729 1758
           blah xs = ruggle xs
    
    1730
    -    but in *that* simplifier pass we must not do postInlineUnconditionally
    
    1731
    -    on 'ruggle' because then we'll have an unbound occurrence of 'ruggle'
    
    1732
    -
    
    1733
    -    If the rhs is trivial it'll be inlined by callSiteInline, and then
    
    1734
    -    the binding will be dead and discarded by the next use of OccurAnal
    
    1735 1759
     
    
    1736 1760
       * There is less point, because the main goal is to get rid of local
    
    1737 1761
         bindings used in multiple case branches.
    

  • compiler/GHC/Core/Utils.hs
    ... ... @@ -397,10 +397,12 @@ mkTicks :: [CoreTickish] -> CoreExpr -> CoreExpr
    397 397
     mkTicks ticks expr = foldr mkTick expr ticks
    
    398 398
     
    
    399 399
     isSaturatedConApp :: CoreExpr -> Bool
    
    400
    +-- Also includes literals
    
    400 401
     isSaturatedConApp e = go e []
    
    401 402
       where go (App f a) as = go f (a:as)
    
    402 403
             go (Var fun) args
    
    403 404
                = isConLikeId fun && idArity fun == valArgCount args
    
    405
    +        go (Lit {})   _  = True
    
    404 406
             go (Cast f _) as = go f as
    
    405 407
             go _ _ = False
    
    406 408
     
    

  • libraries/ghc-internal/src/GHC/Internal/Base.hs
    ... ... @@ -1809,7 +1809,12 @@ build g = g (:) []
    1809 1809
     
    
    1810 1810
     augment :: forall a. (forall b. (a->b->b) -> b -> b) -> [a] -> [a]
    
    1811 1811
     {-# INLINE [1] augment #-}
    
    1812
    -augment g xs = g (:) xs
    
    1812
    +-- Give it one argument so that it inlines with one arg
    
    1813
    +-- But (crucially) the body is a lambda so that `g` is visibly applied
    
    1814
    +-- to two args, and hence we know that in a call
    
    1815
    +--      augment (\c n. blah)
    
    1816
    +-- both c and n are OneShot
    
    1817
    +augment g = \xs -> g (:) xs
    
    1813 1818
     
    
    1814 1819
     {-# RULES
    
    1815 1820
     "fold/build"    forall k z (g::forall b. (a->b->b) -> b -> b) .
    
    ... ... @@ -1975,7 +1980,7 @@ The rules for map work like this.
    1975 1980
     "++/literal_utf8" forall x. (++) (unpackCStringUtf8# x) = unpackAppendCStringUtf8# x #-}
    
    1976 1981
     
    
    1977 1982
     {-# RULES
    
    1978
    -"++"    [~1] forall xs ys. xs ++ ys = augment (\c n -> foldr c n xs) ys
    
    1983
    +"++"    [~1] forall xs. (++) xs = augment (\c n -> foldr c n xs)
    
    1979 1984
       #-}
    
    1980 1985
     
    
    1981 1986
     
    

  • libraries/ghc-internal/src/GHC/Internal/Float.hs
    ... ... @@ -5,6 +5,7 @@
    5 5
                , MagicHash
    
    6 6
                , UnboxedTuples
    
    7 7
                , UnliftedFFITypes
    
    8
    +           , TypeApplications
    
    8 9
       #-}
    
    9 10
     {-# LANGUAGE CApiFFI #-}
    
    10 11
     -- We believe we could deorphan this module, by moving lots of things
    
    ... ... @@ -1696,6 +1697,16 @@ showSignedFloat showPos p x
    1696 1697
            = showParen (p > 6) (showChar '-' . showPos (-x))
    
    1697 1698
        | otherwise = showPos x
    
    1698 1699
     
    
    1700
    +
    
    1701
    +-- Speicialise showSignedFloat for (a) the type and (b) the argument function
    
    1702
    +-- The particularly targets are the calls in `instance Show Float` and
    
    1703
    +--     `instance Show Double`
    
    1704
    +-- Specialising for both (a) and (b) is obviously more efficient; and if you
    
    1705
    +-- don't you find that the `x` argument is strict, but boxed, and that can cause
    
    1706
    +-- functions calling showSignedFloat to have box their argument.
    
    1707
    +{-# SPECIALISE showSignedFloat @Float  showFloat #-}
    
    1708
    +{-# SPECIALISE showSignedFloat @Double showFloat #-}
    
    1709
    +
    
    1699 1710
     {-
    
    1700 1711
     We need to prevent over/underflow of the exponent in encodeFloat when
    
    1701 1712
     called from scaleFloat, hence we clamp the scaling parameter.
    

  • testsuite/tests/simplCore/should_run/simplrun009.hs
    ... ... @@ -6,7 +6,10 @@
    6 6
     -- It produces a nested unfold that should look something
    
    7 7
     -- like the code below.  Note the 'lvl1_shW'.  It is BAD
    
    8 8
     -- if this is a lambda instead; you get a lot more allocation
    
    9
    --- See Note [Saving allocation] in GHC.Core.Opt.SetLevels
    
    9
    +--
    
    10
    +-- LATER (2025): But in the end it seems better NOT to float lambdas,
    
    11
    +-- unless they go to top level.
    
    12
    +-- See (SW2) in Note [Saving work] in GHC.Core.Opt.SetLevels
    
    10 13
     
    
    11 14
     
    
    12 15
     {-