Simon Peyton Jones pushed to branch wip/T23109a at Glasgow Haskell Compiler / GHC

Commits:

2 changed files:

Changes:

  • compiler/GHC/Core/Opt/SetLevels.hs
    ... ... @@ -482,14 +482,14 @@ Consider this:
    482 482
       f :: T Int -> blah
    
    483 483
       f x vs = case x of { MkT y ->
    
    484 484
                  let f vs = ...(case y of I# w -> e)...f..
    
    485
    -             in f vs
    
    485
    +             in f vs }
    
    486 486
     
    
    487 487
     Here we can float the (case y ...) out, because y is sure
    
    488 488
     to be evaluated, to give
    
    489 489
       f x vs = case x of { MkT y ->
    
    490
    -           case y of I# w ->
    
    490
    +           case y of { I# w ->
    
    491 491
                  let f vs = ...(e)...f..
    
    492
    -             in f vs
    
    492
    +             in f vs }}
    
    493 493
     
    
    494 494
     That saves unboxing it every time round the loop.  It's important in
    
    495 495
     some DPH stuff where we really want to avoid that repeated unboxing in
    
    ... ... @@ -614,7 +614,8 @@ lvlMFE env strict_ctxt e@(_, AnnCase {})
    614 614
       = lvlExpr env e     -- See Note [Case MFEs]
    
    615 615
     
    
    616 616
     lvlMFE env strict_ctxt ann_expr
    
    617
    -  | not float_me
    
    617
    +  |  notWorthFloating expr abs_vars
    
    618
    +  || not float_me
    
    618 619
       || floatTopLvlOnly env && not (isTopLvl dest_lvl)
    
    619 620
              -- Only floating to the top level is allowed.
    
    620 621
       || hasFreeJoin env fvs   -- If there is a free join, don't float
    
    ... ... @@ -623,9 +624,6 @@ lvlMFE env strict_ctxt ann_expr
    623 624
              -- We can't let-bind an expression if we don't know
    
    624 625
              -- how it will be represented at runtime.
    
    625 626
              -- See Note [Representation polymorphism invariants] in GHC.Core
    
    626
    -  || notWorthFloating expr abs_vars
    
    627
    -         -- Test notWorhtFloating last;
    
    628
    -         -- See Note [Large free-variable sets]
    
    629 627
       = -- Don't float it out
    
    630 628
         lvlExpr env ann_expr
    
    631 629
     
    
    ... ... @@ -676,12 +674,11 @@ lvlMFE env strict_ctxt ann_expr
    676 674
         is_function  = isFunction ann_expr
    
    677 675
         mb_bot_str   = exprBotStrictness_maybe expr
    
    678 676
                                -- See Note [Bottoming floats]
    
    679
    -                           -- esp Bottoming floats (2)
    
    677
    +                           -- esp Bottoming floats (BF2)
    
    680 678
         expr_ok_for_spec = exprOkForSpeculation expr
    
    681 679
         abs_vars = abstractVars dest_lvl env fvs
    
    682 680
         dest_lvl = destLevel env fvs fvs_ty is_function is_bot_lam
    
    683
    -               -- NB: is_bot_lam not is_bot; see (3) in
    
    684
    -               --     Note [Bottoming floats]
    
    681
    +               -- NB: is_bot_lam not is_bot; see (BF2) in Note [Bottoming floats]
    
    685 682
     
    
    686 683
         -- float_is_new_lam: the floated thing will be a new value lambda
    
    687 684
         -- replacing, say (g (x+4)) by (lvl x).  No work is saved, nor is
    
    ... ... @@ -698,20 +695,22 @@ lvlMFE env strict_ctxt ann_expr
    698 695
     
    
    699 696
             -- A decision to float entails let-binding this thing, and we only do
    
    700 697
             -- that if we'll escape a value lambda, or will go to the top level.
    
    698
    +        -- Never float trivial expressions;
    
    699
    +        --   notably, save_work might be true of a lone evaluated variable.
    
    701 700
         float_me = saves_work || saves_alloc || is_mk_static
    
    702 701
     
    
    703 702
         -- See Note [Saving work]
    
    703
    +    is_hnf = exprIsHNF expr
    
    704 704
         saves_work = escapes_value_lam        -- (a)
    
    705
    -                 && not (exprIsHNF expr)  -- (b)
    
    705
    +                 && not is_hnf            -- (b)
    
    706 706
                      && not float_is_new_lam  -- (c)
    
    707 707
         escapes_value_lam = dest_lvl `ltMajLvl` (le_ctxt_lvl env)
    
    708 708
     
    
    709
    -    -- See Note [Saving allocation] and Note [Floating to the top]
    
    710
    -    saves_alloc =  isTopLvl dest_lvl
    
    711
    -                && floatConsts env
    
    712
    -                && (   not strict_ctxt                     -- (a)
    
    713
    -                    || exprIsHNF expr                      -- (b)
    
    714
    -                    || (is_bot_lam && escapes_value_lam))  -- (c)
    
    709
    +    -- See Note [Floating to the top]
    
    710
    +    saves_alloc = isTopLvl dest_lvl
    
    711
    +               && (  (floatConsts env &&
    
    712
    +                       (not strict_ctxt || is_hnf))     -- (FT1) and (FT2)
    
    713
    +                  || (is_bot_lam && escapes_value_lam)) -- (FT3)
    
    715 714
     
    
    716 715
     hasFreeJoin :: LevelEnv -> DVarSet -> Bool
    
    717 716
     -- Has a free join point which is not being floated to top level.
    
    ... ... @@ -726,7 +725,7 @@ hasFreeJoin env fvs
    726 725
     The key idea in let-floating is to
    
    727 726
       * float a redex out of a (value) lambda
    
    728 727
     Doing so can save an unbounded amount of work.
    
    729
    -But see also Note [Saving allocation].
    
    728
    +But see also Note [Floating to the top].
    
    730 729
     
    
    731 730
     So we definitely float an expression out if
    
    732 731
     (a) It will escape a value lambda (escapes_value_lam)
    
    ... ... @@ -771,10 +770,12 @@ Wrinkles:
    771 770
           we have saved nothing: one pair will still be allocated for each
    
    772 771
           call of `f`.  Hence the (not float_is_new_lam) in saves_work.
    
    773 772
     
    
    774
    -Note [Saving allocation]
    
    775
    -~~~~~~~~~~~~~~~~~~~~~~~~
    
    773
    +Note [Floating to the top]
    
    774
    +~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    776 775
     Even if `saves_work` is false, we we may want to float even cheap/HNF
    
    777
    -expressions out of value lambdas, for several reasons:
    
    776
    +expressions out of value lambdas. Data suggests, however, that it is better
    
    777
    +/only/ to do so, /if/ they can go to top level. If the expression goes to top
    
    778
    +level we don't pay the cost of allocating cold-path thunks described in (SW2).
    
    778 779
     
    
    779 780
     * Doing so may save allocation. Consider
    
    780 781
             f = \x.  .. (\y.e) ...
    
    ... ... @@ -782,6 +783,11 @@ expressions out of value lambdas, for several reasons:
    782 783
       (assuming e does not mention x). An example where this really makes a
    
    783 784
       difference is simplrun009.
    
    784 785
     
    
    786
    +* In principle this would be true even if the (\y.e) didn't go to top level; but
    
    787
    +  in practice we only float a HNF if it goes all way to the top.  We don't pay
    
    788
    +  /any/ allocation cost for a top-level floated expression; it just becomes
    
    789
    +  static data.
    
    790
    +
    
    785 791
     * It may allow SpecContr to fire on functions. Consider
    
    786 792
             f = \x. ....(f (\y.e))....
    
    787 793
       After floating we get
    
    ... ... @@ -793,21 +799,7 @@ expressions out of value lambdas, for several reasons:
    793 799
       a big difference for string literals and bottoming expressions: see Note
    
    794 800
       [Floating to the top]
    
    795 801
     
    
    796
    -Data suggests, however, that it is better /only/ to float HNFS, /if/ they can go
    
    797
    -to top level. See (SW2) of Note [Saving work].  If the expression goes to top
    
    798
    -level we don't pay the cost of allocating cold-path thunks described in (SW2).
    
    799
    -
    
    800
    -Hence `isTopLvl dest_lvl` in `saves_alloc`.
    
    801
    -
    
    802
    -Note [Floating to the top]
    
    803
    -~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    804
    -Even though Note [Saving allocation] suggests that we should not, in
    
    805
    -general, float HNFs, the balance change if it goes to the top:
    
    806
    -
    
    807
    -* We don't pay an allocation cost for the floated expression; it
    
    808
    -  just becomes static data.
    
    809
    -
    
    810
    -* Floating string literal is valuable -- no point in duplicating the
    
    802
    +* Floating string literals is valuable -- no point in duplicating the
    
    811 803
       at each call site!
    
    812 804
     
    
    813 805
     * Floating bottoming expressions is valuable: they are always cold
    
    ... ... @@ -815,32 +807,32 @@ general, float HNFs, the balance change if it goes to the top:
    815 807
       can be quite big, inhibiting inlining. See Note [Bottoming floats]
    
    816 808
     
    
    817 809
     So we float an expression to the top if:
    
    818
    -  (a) the context is lazy (so we get allocation), or
    
    819
    -  (b) the expression is a HNF (so we get allocation), or
    
    820
    -  (c) the expression is bottoming and floating would escape a
    
    821
    -      value lambda (NB: if the expression itself is a lambda, (b)
    
    822
    -      will apply; so this case only catches bottoming thunks)
    
    810
    +  (FT1) the context is lazy (so we get allocation), or
    
    811
    +  (FT2) the expression is a HNF (so we get allocation), or
    
    812
    +  (FT3) the expression is bottoming and floating would escape a
    
    813
    +         value lambda (NB: if the expression itself is a lambda, (b)
    
    814
    +         will apply; so this case only catches bottoming thunks)
    
    823 815
     
    
    824 816
     Examples:
    
    825 817
     
    
    826
    -* (a) Strict.  Case scrutinee
    
    818
    +* (FT1) Strict.  Case scrutinee
    
    827 819
           f = case g True of ....
    
    828 820
       Don't float (g True) to top level; then we have the admin of a
    
    829 821
       top-level thunk to worry about, with zero gain.
    
    830 822
     
    
    831
    -* (a) Strict.  Case alternative
    
    823
    +* (FT1) Strict.  Case alternative
    
    832 824
           h = case y of
    
    833 825
                  True  -> g True
    
    834 826
                  False -> False
    
    835 827
       Don't float (g True) to the top level
    
    836 828
     
    
    837
    -* (b) HNF
    
    829
    +* (FT2) HNF
    
    838 830
           f = case y of
    
    839 831
                 True  -> p:q
    
    840 832
                 False -> blah
    
    841 833
       We may as well float the (p:q) so it becomes a static data structure.
    
    842 834
     
    
    843
    -* (c) Bottoming expressions; see also Note [Bottoming floats]
    
    835
    +* (FT3) Bottoming expressions; see also Note [Bottoming floats]
    
    844 836
           f x = case x of
    
    845 837
                   0 -> error <big thing>
    
    846 838
                   _ -> x+1
    
    ... ... @@ -853,7 +845,7 @@ Examples:
    853 845
       'foo' anyway.  So float bottoming things only if they escape
    
    854 846
       a lambda.
    
    855 847
     
    
    856
    -* Arguments
    
    848
    +* (FT4) Arguments
    
    857 849
          t = f (g True)
    
    858 850
       Prior to Apr 22 we didn't float (g True) to the top if f was strict.
    
    859 851
       But (a) this only affected CAFs, because if it escapes a value lambda
    
    ... ... @@ -868,28 +860,6 @@ early loses opportunities for RULES which (needless to say) are
    868 860
     important in some nofib programs (gcd is an example).  [SPJ note:
    
    869 861
     I think this is obsolete; the flag seems always on.]
    
    870 862
     
    
    871
    -Note [Large free-variable sets]
    
    872
    -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    873
    -In #24471 we had something like
    
    874
    -     x1 = I# 1
    
    875
    -     ...
    
    876
    -     x1000 = I# 1000
    
    877
    -     foo = f x1 (f x2 (f x3 ....))
    
    878
    -So every sub-expression in `foo` has lots and lots of free variables.  But
    
    879
    -none of these sub-expressions float anywhere; the entire float-out pass is a
    
    880
    -no-op.
    
    881
    -
    
    882
    -In lvlMFE, we want to find out quickly if the MFE is not-floatable; that is
    
    883
    -the common case.  In #24471 it turned out that we were testing `abs_vars` (a
    
    884
    -relatively complicated calculation that takes at least O(n-free-vars) time to
    
    885
    -compute) for every sub-expression.
    
    886
    -
    
    887
    -Better instead to test `float_me` early. That still involves looking at
    
    888
    -dest_lvl, which means looking at every free variable, but the constant factor
    
    889
    -is a lot better.
    
    890
    -
    
    891
    -ToDo: find a way to fix the bad asymptotic complexity.
    
    892
    -
    
    893 863
     Note [Floating join point bindings]
    
    894 864
     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    895 865
     Mostly we don't float join points at all -- we want them to /stay/ join points.
    
    ... ... @@ -1053,30 +1023,36 @@ we'd like to float the call to error, to get
    1053 1023
     
    
    1054 1024
     But, as ever, we need to be careful:
    
    1055 1025
     
    
    1056
    -(1) We want to float a bottoming
    
    1026
    +(BF1) We want to float a bottoming
    
    1057 1027
         expression even if it has free variables:
    
    1058 1028
             f = \x. g (let v = h x in error ("urk" ++ v))
    
    1059 1029
         Then we'd like to abstract over 'x', and float the whole arg of g:
    
    1060 1030
             lvl = \x. let v = h x in error ("urk" ++ v)
    
    1061 1031
             f = \x. g (lvl x)
    
    1062
    -    To achieve this we pass is_bot to destLevel
    
    1063
    -
    
    1064
    -(2) We do not do this for lambdas that return
    
    1065
    -    bottom.  Instead we treat the /body/ of such a function specially,
    
    1066
    -    via point (1).  For example:
    
    1032
    +    To achieve this we pass `is_bot` to destLevel
    
    1033
    +
    
    1034
    +(BF2) We do the same for /lambdas/ that return bottom.
    
    1035
    +    Suppose the original lambda had /no/ free vars:
    
    1036
    +        f = \x. ....(\y z. error (y++z))...
    
    1037
    +    then we'd like to float that whole lambda
    
    1038
    +        lvl = \y z. error (y++z)
    
    1039
    +        f = \x. ....lvl....
    
    1040
    +    If we just floated its bottom-valued body, we might abstract the arguments in
    
    1041
    +    the "wrong" order and end up with this bad result
    
    1042
    +        lvl = \z y. error (y++z)
    
    1043
    +        f = \x. ....(\y z. lvl z y)....
    
    1044
    +
    
    1045
    +    If the lambda does have free vars, this will happen:
    
    1067 1046
             f = \x. ....(\y z. if x then error y else error z)....
    
    1068
    -    If we float the whole lambda thus
    
    1047
    +    We float the whole lambda thus
    
    1069 1048
             lvl = \x. \y z. if x then error y else error z
    
    1070 1049
             f = \x. ...(lvl x)...
    
    1071
    -    we may well end up eta-expanding that PAP to
    
    1050
    +    And we may well end up eta-expanding that PAP to
    
    1051
    +        lvl = \x. \y z. if b then error y else error z
    
    1072 1052
             f = \x. ...(\y z. lvl x y z)...
    
    1053
    +    so we get a (small) closure.  So be it.
    
    1073 1054
     
    
    1074
    -    ===>
    
    1075
    -        lvl = \x z y. if b then error y else error z
    
    1076
    -        f = \x. ...(\y z. lvl x z y)...
    
    1077
    -    (There is no guarantee that we'll choose the perfect argument order.)
    
    1078
    -
    
    1079
    -(3) If we have a /binding/ that returns bottom, we want to float it to top
    
    1055
    +(BF3) If we have a /binding/ that returns bottom, we want to float it to top
    
    1080 1056
         level, even if it has free vars (point (1)), and even it has lambdas.
    
    1081 1057
         Example:
    
    1082 1058
            ... let { v = \y. error (show x ++ show y) } in ...
    
    ... ... @@ -1092,7 +1068,6 @@ But, as ever, we need to be careful:
    1092 1068
         join points (#24768), and floating to the top would abstract over those join
    
    1093 1069
         points, which we should never do.
    
    1094 1070
     
    
    1095
    -
    
    1096 1071
     See Maessen's paper 1999 "Bottom extraction: factoring error handling out
    
    1097 1072
     of functional programs" (unpublished I think).
    
    1098 1073
     
    
    ... ... @@ -1135,7 +1110,6 @@ float the case (as advocated here) we won't float the (build ...y..)
    1135 1110
     either, so fusion will happen.  It can be a big effect, esp in some
    
    1136 1111
     artificial benchmarks (e.g. integer, queens), but there is no perfect
    
    1137 1112
     answer.
    
    1138
    -
    
    1139 1113
     -}
    
    1140 1114
     
    
    1141 1115
     annotateBotStr :: Id -> Arity -> Maybe (Arity, DmdSig, CprSig) -> Id
    
    ... ... @@ -1152,69 +1126,124 @@ annotateBotStr id n_extra mb_bot_str
    1152 1126
       = id
    
    1153 1127
     
    
    1154 1128
     notWorthFloating :: CoreExpr -> [Var] -> Bool
    
    1155
    --- Returns True if the expression would be replaced by
    
    1156
    --- something bigger than it is now.  For example:
    
    1157
    ---   abs_vars = tvars only:  return True if e is trivial,
    
    1158
    ---                           but False for anything bigger
    
    1159
    ---   abs_vars = [x] (an Id): return True for trivial, or an application (f x)
    
    1160
    ---                           but False for (f x x)
    
    1161
    ---
    
    1162
    --- One big goal is that floating should be idempotent.  Eg if
    
    1163
    --- we replace e with (lvl79 x y) and then run FloatOut again, don't want
    
    1164
    --- to replace (lvl79 x y) with (lvl83 x y)!
    
    1165
    -
    
    1129
    +--  See Note [notWorthFloating]
    
    1166 1130
     notWorthFloating e abs_vars
    
    1167
    -  = go e (count isId abs_vars)
    
    1131
    +  = go e 0
    
    1168 1132
       where
    
    1169
    -    go (Var {}) n               = n >= 0
    
    1170
    -    go (Lit lit) n              = assert (n==0) $
    
    1171
    -                                  litIsTrivial lit   -- Note [Floating literals]
    
    1172
    -    go (Type {}) _              = True
    
    1173
    -    go (Coercion {}) _          = True
    
    1133
    +    n_abs_vars = count isId abs_vars  -- See (NWF5)
    
    1134
    +
    
    1135
    +    go :: CoreExpr -> Int -> Bool
    
    1136
    +    -- (go e n) return True if (e x1 .. xn) is not worth floating
    
    1137
    +    -- where `e` has n trivial value arguments x1..xn
    
    1138
    +    -- See (NWF4)
    
    1139
    +    go (Lit lit) n         = assert (n==0) $
    
    1140
    +                             litIsTrivial lit   -- See (NWF1)
    
    1141
    +    go (Type {}) _         = True
    
    1142
    +    go (Tick t e) n        = not (tickishIsCode t) && go e n
    
    1143
    +    go (Cast e _) n        = n==0 || go e n     -- See (NWF3)
    
    1144
    +    go (Coercion {}) _     = True
    
    1174 1145
         go (App e arg) n
    
    1175
    -       -- See Note [Floating applications to coercions]
    
    1176
    -       | not (isRuntimeArg arg) = go e n
    
    1177
    -       | n==0                   = False
    
    1178
    -       | exprIsTrivial arg      = go e (n-1) -- NB: exprIsTrivial arg = go arg 0
    
    1179
    -       | otherwise              = False
    
    1180
    -    go (Tick t e) n             = not (tickishIsCode t) && go e n
    
    1181
    -    go (Cast e _)  n            = go e n
    
    1182
    -    go (Case e b _ as) n
    
    1146
    +       | Type {} <- arg    = go e n    -- Just types, not coercions (NWF2)
    
    1147
    +       | exprIsTrivial arg = go e (n+1)
    
    1148
    +       | otherwise         = False  -- (f non-triv) is worth floating
    
    1149
    +
    
    1150
    +    go (Case e b _ as) _
    
    1151
    +      -- Do not float the `case` part of trivial cases (NWF3)
    
    1152
    +      -- We'll have a look at the RHS when we get there
    
    1183 1153
           | null as
    
    1184
    -      = go e n     -- See Note [Empty case is trivial]
    
    1185
    -      | Just rhs <- isUnsafeEqualityCase e b as
    
    1186
    -      = go rhs n   -- See (U2) of Note [Implementing unsafeCoerce] in base:Unsafe.Coerce
    
    1187
    -    go _ _                      = False
    
    1154
    +      = True   -- See Note [Empty case is trivial]
    
    1155
    +      | Just {} <- isUnsafeEqualityCase e b as
    
    1156
    +      = True   -- See (U2) of Note [Implementing unsafeCoerce] in base:Unsafe.Coerce
    
    1157
    +      | otherwise
    
    1158
    +      = False
    
    1188 1159
     
    
    1189
    -{-
    
    1190
    -Note [Floating literals]
    
    1191
    -~~~~~~~~~~~~~~~~~~~~~~~~
    
    1192
    -It's important to float Integer literals, so that they get shared,
    
    1193
    -rather than being allocated every time round the loop.
    
    1194
    -Hence the litIsTrivial.
    
    1160
    +    go (Var _) n
    
    1161
    +      | n==0             = True   -- Naked variable
    
    1162
    +      | n <= n_abs_vars  = True   -- (f a b c) is not worth floating if
    
    1163
    +      | otherwise        = False  -- a,b,c are all abstracted; see (NWF5)
    
    1195 1164
     
    
    1196
    -Ditto literal strings (LitString), which we'd like to float to top
    
    1197
    -level, which is now possible.
    
    1165
    +    go _ _ = False  -- Let etc is worth floating
    
    1198 1166
     
    
    1199
    -Note [Floating applications to coercions]
    
    1200
    -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    1201
    -We don’t float out variables applied only to type arguments, since the
    
    1202
    -extra binding would be pointless: type arguments are completely erased.
    
    1203
    -But *coercion* arguments aren’t (see Note [Coercion tokens] in
    
    1204
    -"GHC.CoreToStg" and Note [inlineBoringOk] in"GHC.Core.Unfold"),
    
    1205
    -so we still want to float out variables applied only to
    
    1206
    -coercion arguments.
    
    1167
    +{- Note [notWorthFloating]
    
    1168
    +~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    1169
    +`notWorthFloating` returns True if the expression would be replaced by something
    
    1170
    +bigger than it is now.  One big goal is that floating should be idempotent.  Eg
    
    1171
    +if we replace e with (lvl79 x y) and then run FloatOut again, don't want to
    
    1172
    +replace (lvl79 x y) with (lvl83 x y)!
    
    1207 1173
     
    
    1174
    +For example:
    
    1175
    +  abs_vars = tvars only:  return True if e is trivial,
    
    1176
    +                          but False for anything bigger
    
    1177
    +  abs_vars = [x] (an Id): return True for trivial, or an application (f x)
    
    1178
    +                          but False for (f x x)
    
    1179
    +
    
    1180
    +(NWF1) It's important to float Integer literals, so that they get shared, rather
    
    1181
    +  than being allocated every time round the loop.  Hence the litIsTrivial.
    
    1182
    +
    
    1183
    +  Ditto literal strings (LitString), which we'd like to float to top
    
    1184
    +  level, which is now possible.
    
    1185
    +
    
    1186
    +(NWF2) We don’t float out variables applied only to type arguments, since the
    
    1187
    +  extra binding would be pointless: type arguments are completely erased.
    
    1188
    +  But *coercion* arguments aren’t (see Note [Coercion tokens] in
    
    1189
    +  "GHC.CoreToStg" and Note [inlineBoringOk] in"GHC.Core.Unfold"),
    
    1190
    +  so we still want to float out variables applied only to
    
    1191
    +  coercion arguments.
    
    1192
    +
    
    1193
    +(NWF3) Some expressions have trivial wrappers:
    
    1194
    +     - Casts (e |> co)
    
    1195
    +     - Unary-class applications:
    
    1196
    +          - Dictionary applications (MkC meth)
    
    1197
    +          - Class-op applictions    (op dict)
    
    1198
    +     - Case of empty alts
    
    1199
    +     - Unsafe-equality case
    
    1200
    +  In all these cases we say "not worth floating", and we do so /regardless/
    
    1201
    +  of the wrapped expression.  The SetLevels stuff may subsequently float the
    
    1202
    +  components of the expression.
    
    1203
    +
    
    1204
    +  Example:  is it worth floating (f x |> co)?  No!  If we did we'd get
    
    1205
    +     lvl = f x |> co
    
    1206
    +     ...lvl....
    
    1207
    +  Then we'd do cast worker/wrapper and end up with.
    
    1208
    +     lvl' = f x
    
    1209
    +     ...(lvl' |> co)...
    
    1210
    +  Silly!  Better not to float it in the first place.  If we say "no" here,
    
    1211
    +  we'll subsequently say "yes" for (f x) and get
    
    1212
    +     lvl = f x
    
    1213
    +     ....(lvl |> co)...
    
    1214
    +  which is what we want.  In short: don't float trivial wrappers.
    
    1215
    +
    
    1216
    +(NWF4) The only non-trivial expression that we say "not worth floating" for
    
    1217
    +  is an application
    
    1218
    +             f x y z
    
    1219
    +  where the number of value arguments is <= the number of abstracted Ids.
    
    1220
    +  This is what makes floating idempotent.  Hence counting the number of
    
    1221
    +  value arguments in `go`
    
    1222
    +
    
    1223
    +(NWF5) In #24471 we had something like
    
    1224
    +     x1 = I# 1
    
    1225
    +     ...
    
    1226
    +     x1000 = I# 1000
    
    1227
    +     foo = f x1 (f x2 (f x3 ....))
    
    1228
    +  So every sub-expression in `foo` has lots and lots of free variables.  But
    
    1229
    +  none of these sub-expressions float anywhere; the entire float-out pass is a
    
    1230
    +  no-op.
    
    1208 1231
     
    
    1209
    -************************************************************************
    
    1210
    -*                                                                      *
    
    1211
    -\subsection{Bindings}
    
    1212
    -*                                                                      *
    
    1213
    -************************************************************************
    
    1232
    +  So `notWorthFloating` tries to avoid evaluating `n_abs_vars`, in cases where
    
    1233
    +  it obviously /is/ worth floating.  (In #24471 it turned out that we were
    
    1234
    +  testing `abs_vars` (a relatively complicated calculation that takes at least
    
    1235
    +  O(n-free-vars) time to compute) for every sub-expression.)
    
    1214 1236
     
    
    1215
    -The binding stuff works for top level too.
    
    1237
    +  Hence testing `n_abs_vars only` at the very end.
    
    1216 1238
     -}
    
    1217 1239
     
    
    1240
    +{- *********************************************************************
    
    1241
    +*                                                                      *
    
    1242
    +                       Bindings
    
    1243
    +        This binding stuff works for top level too.
    
    1244
    +*                                                                      *
    
    1245
    +********************************************************************* -}
    
    1246
    +
    
    1218 1247
     lvlBind :: LevelEnv
    
    1219 1248
             -> CoreBindWithFVs
    
    1220 1249
             -> LvlM (LevelledBind, LevelEnv)
    
    ... ... @@ -1261,7 +1290,7 @@ lvlBind env (AnnNonRec bndr rhs)
    1261 1290
             -- is_bot_lam: looks like (\xy. bot), maybe zero lams
    
    1262 1291
             -- NB: not isBottomThunk!
    
    1263 1292
             -- NB: not is_join: don't send bottoming join points to the top.
    
    1264
    -        -- See Note [Bottoming floats] point (3)
    
    1293
    +        -- See Note [Bottoming floats] (BF3)
    
    1265 1294
     
    
    1266 1295
         is_top_bindable = exprIsTopLevelBindable deann_rhs bndr_ty
    
    1267 1296
         n_extra       = count isId abs_vars
    
    ... ... @@ -1552,9 +1581,8 @@ destLevel env fvs fvs_ty is_function is_bot
    1552 1581
                                   -- See Note [Floating join point bindings]
    
    1553 1582
       = tOP_LEVEL
    
    1554 1583
     
    
    1555
    -  | is_bot              -- Send bottoming bindings to the top
    
    1556
    -  = as_far_as_poss      -- regardless; see Note [Bottoming floats]
    
    1557
    -                        -- Esp Bottoming floats (1) and (3)
    
    1584
    +  | is_bot              -- Send bottoming bindings to the top regardless;
    
    1585
    +  = as_far_as_poss      -- see (BF1) and (BF2) in Note [Bottoming floats]
    
    1558 1586
     
    
    1559 1587
       | Just n_args <- floatLams env
    
    1560 1588
       , n_args > 0  -- n=0 case handled uniformly by the 'otherwise' case
    
    ... ... @@ -1568,8 +1596,13 @@ destLevel env fvs fvs_ty is_function is_bot
    1568 1596
         max_fv_id_level = maxFvLevel isId env fvs -- Max over Ids only; the
    
    1569 1597
                                                   -- tyvars will be abstracted
    
    1570 1598
     
    
    1599
    +    -- as_far_as_poss: destination level depends only on the free Ids (more
    
    1600
    +    -- precisely, free CoVars) of the /type/, not the free Ids of the /term/.
    
    1601
    +    -- Why worry about the free CoVars?  See Note [Floating and kind casts]
    
    1602
    +    --
    
    1603
    +    -- There may be free Ids in the term, but then we'll just
    
    1604
    +    -- lambda-abstract over them
    
    1571 1605
         as_far_as_poss = maxFvLevel' isId env fvs_ty
    
    1572
    -                     -- See Note [Floating and kind casts]
    
    1573 1606
     
    
    1574 1607
     {- Note [Floating and kind casts]
    
    1575 1608
     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    ... ... @@ -1732,10 +1765,9 @@ maxFvLevel max_me env var_set
    1732 1765
         -- It's OK to use a non-deterministic fold here because maxIn commutes.
    
    1733 1766
     
    
    1734 1767
     maxFvLevel' :: (Var -> Bool) -> LevelEnv -> TyCoVarSet -> Level
    
    1735
    --- Same but for TyCoVarSet
    
    1768
    +-- Precisely the same as `maxFvLevel` but for TyCoVarSet rather than DVarSet
    
    1736 1769
     maxFvLevel' max_me env var_set
    
    1737 1770
       = nonDetStrictFoldUniqSet (maxIn max_me env) tOP_LEVEL var_set
    
    1738
    -    -- It's OK to use a non-deterministic fold here because maxIn commutes.
    
    1739 1771
     
    
    1740 1772
     maxIn :: (Var -> Bool) -> LevelEnv -> InVar -> Level -> Level
    
    1741 1773
     maxIn max_me (LE { le_lvl_env = lvl_env, le_env = id_env }) in_var lvl
    

  • compiler/GHC/Core/Opt/Simplify/Iteration.hs
    ... ... @@ -801,9 +801,9 @@ makeTrivial env top_lvl dmd occ_fs expr
    801 801
       = return (emptyLetFloats, expr)
    
    802 802
     
    
    803 803
       | not (bindingOk top_lvl expr expr_ty)  -- Cannot trivialise
    
    804
    -  = return (emptyLetFloats, expr)         --   See Note [Cannot trivialise]
    
    804
    +  = return (emptyLetFloats, expr)         -- See Note [Cannot trivialise]
    
    805 805
     
    
    806
    -  | otherwise -- 'expr' is not of form (Cast e co)
    
    806
    +  | otherwise
    
    807 807
       = do  { (floats, expr1) <- prepareRhs env top_lvl occ_fs expr
    
    808 808
             ; uniq <- getUniqueM
    
    809 809
             ; let name = mkSystemVarName uniq occ_fs