... |
... |
@@ -482,14 +482,14 @@ Consider this: |
482
|
482
|
f :: T Int -> blah
|
483
|
483
|
f x vs = case x of { MkT y ->
|
484
|
484
|
let f vs = ...(case y of I# w -> e)...f..
|
485
|
|
- in f vs
|
|
485
|
+ in f vs }
|
486
|
486
|
|
487
|
487
|
Here we can float the (case y ...) out, because y is sure
|
488
|
488
|
to be evaluated, to give
|
489
|
489
|
f x vs = case x of { MkT y ->
|
490
|
|
- case y of I# w ->
|
|
490
|
+ case y of { I# w ->
|
491
|
491
|
let f vs = ...(e)...f..
|
492
|
|
- in f vs
|
|
492
|
+ in f vs }}
|
493
|
493
|
|
494
|
494
|
That saves unboxing it every time round the loop. It's important in
|
495
|
495
|
some DPH stuff where we really want to avoid that repeated unboxing in
|
... |
... |
@@ -614,7 +614,8 @@ lvlMFE env strict_ctxt e@(_, AnnCase {}) |
614
|
614
|
= lvlExpr env e -- See Note [Case MFEs]
|
615
|
615
|
|
616
|
616
|
lvlMFE env strict_ctxt ann_expr
|
617
|
|
- | not float_me
|
|
617
|
+ | notWorthFloating expr abs_vars
|
|
618
|
+ || not float_me
|
618
|
619
|
|| floatTopLvlOnly env && not (isTopLvl dest_lvl)
|
619
|
620
|
-- Only floating to the top level is allowed.
|
620
|
621
|
|| hasFreeJoin env fvs -- If there is a free join, don't float
|
... |
... |
@@ -623,9 +624,6 @@ lvlMFE env strict_ctxt ann_expr |
623
|
624
|
-- We can't let-bind an expression if we don't know
|
624
|
625
|
-- how it will be represented at runtime.
|
625
|
626
|
-- See Note [Representation polymorphism invariants] in GHC.Core
|
626
|
|
- || notWorthFloating expr abs_vars
|
627
|
|
- -- Test notWorhtFloating last;
|
628
|
|
- -- See Note [Large free-variable sets]
|
629
|
627
|
= -- Don't float it out
|
630
|
628
|
lvlExpr env ann_expr
|
631
|
629
|
|
... |
... |
@@ -676,12 +674,11 @@ lvlMFE env strict_ctxt ann_expr |
676
|
674
|
is_function = isFunction ann_expr
|
677
|
675
|
mb_bot_str = exprBotStrictness_maybe expr
|
678
|
676
|
-- See Note [Bottoming floats]
|
679
|
|
- -- esp Bottoming floats (2)
|
|
677
|
+ -- esp Bottoming floats (BF2)
|
680
|
678
|
expr_ok_for_spec = exprOkForSpeculation expr
|
681
|
679
|
abs_vars = abstractVars dest_lvl env fvs
|
682
|
680
|
dest_lvl = destLevel env fvs fvs_ty is_function is_bot_lam
|
683
|
|
- -- NB: is_bot_lam not is_bot; see (3) in
|
684
|
|
- -- Note [Bottoming floats]
|
|
681
|
+ -- NB: is_bot_lam not is_bot; see (BF2) in Note [Bottoming floats]
|
685
|
682
|
|
686
|
683
|
-- float_is_new_lam: the floated thing will be a new value lambda
|
687
|
684
|
-- replacing, say (g (x+4)) by (lvl x). No work is saved, nor is
|
... |
... |
@@ -698,20 +695,22 @@ lvlMFE env strict_ctxt ann_expr |
698
|
695
|
|
699
|
696
|
-- A decision to float entails let-binding this thing, and we only do
|
700
|
697
|
-- that if we'll escape a value lambda, or will go to the top level.
|
|
698
|
+ -- Never float trivial expressions;
|
|
699
|
+ -- notably, save_work might be true of a lone evaluated variable.
|
701
|
700
|
float_me = saves_work || saves_alloc || is_mk_static
|
702
|
701
|
|
703
|
702
|
-- See Note [Saving work]
|
|
703
|
+ is_hnf = exprIsHNF expr
|
704
|
704
|
saves_work = escapes_value_lam -- (a)
|
705
|
|
- && not (exprIsHNF expr) -- (b)
|
|
705
|
+ && not is_hnf -- (b)
|
706
|
706
|
&& not float_is_new_lam -- (c)
|
707
|
707
|
escapes_value_lam = dest_lvl `ltMajLvl` (le_ctxt_lvl env)
|
708
|
708
|
|
709
|
|
- -- See Note [Saving allocation] and Note [Floating to the top]
|
710
|
|
- saves_alloc = isTopLvl dest_lvl
|
711
|
|
- && floatConsts env
|
712
|
|
- && ( not strict_ctxt -- (a)
|
713
|
|
- || exprIsHNF expr -- (b)
|
714
|
|
- || (is_bot_lam && escapes_value_lam)) -- (c)
|
|
709
|
+ -- See Note [Floating to the top]
|
|
710
|
+ saves_alloc = isTopLvl dest_lvl
|
|
711
|
+ && ( (floatConsts env &&
|
|
712
|
+ (not strict_ctxt || is_hnf)) -- (FT1) and (FT2)
|
|
713
|
+ || (is_bot_lam && escapes_value_lam)) -- (FT3)
|
715
|
714
|
|
716
|
715
|
hasFreeJoin :: LevelEnv -> DVarSet -> Bool
|
717
|
716
|
-- Has a free join point which is not being floated to top level.
|
... |
... |
@@ -726,7 +725,7 @@ hasFreeJoin env fvs |
726
|
725
|
The key idea in let-floating is to
|
727
|
726
|
* float a redex out of a (value) lambda
|
728
|
727
|
Doing so can save an unbounded amount of work.
|
729
|
|
-But see also Note [Saving allocation].
|
|
728
|
+But see also Note [Floating to the top].
|
730
|
729
|
|
731
|
730
|
So we definitely float an expression out if
|
732
|
731
|
(a) It will escape a value lambda (escapes_value_lam)
|
... |
... |
@@ -771,10 +770,12 @@ Wrinkles: |
771
|
770
|
we have saved nothing: one pair will still be allocated for each
|
772
|
771
|
call of `f`. Hence the (not float_is_new_lam) in saves_work.
|
773
|
772
|
|
774
|
|
-Note [Saving allocation]
|
775
|
|
-~~~~~~~~~~~~~~~~~~~~~~~~
|
|
773
|
+Note [Floating to the top]
|
|
774
|
+~~~~~~~~~~~~~~~~~~~~~~~~~~
|
776
|
775
|
Even if `saves_work` is false, we we may want to float even cheap/HNF
|
777
|
|
-expressions out of value lambdas, for several reasons:
|
|
776
|
+expressions out of value lambdas. Data suggests, however, that it is better
|
|
777
|
+/only/ to do so, /if/ they can go to top level. If the expression goes to top
|
|
778
|
+level we don't pay the cost of allocating cold-path thunks described in (SW2).
|
778
|
779
|
|
779
|
780
|
* Doing so may save allocation. Consider
|
780
|
781
|
f = \x. .. (\y.e) ...
|
... |
... |
@@ -782,6 +783,11 @@ expressions out of value lambdas, for several reasons: |
782
|
783
|
(assuming e does not mention x). An example where this really makes a
|
783
|
784
|
difference is simplrun009.
|
784
|
785
|
|
|
786
|
+* In principle this would be true even if the (\y.e) didn't go to top level; but
|
|
787
|
+ in practice we only float a HNF if it goes all way to the top. We don't pay
|
|
788
|
+ /any/ allocation cost for a top-level floated expression; it just becomes
|
|
789
|
+ static data.
|
|
790
|
+
|
785
|
791
|
* It may allow SpecContr to fire on functions. Consider
|
786
|
792
|
f = \x. ....(f (\y.e))....
|
787
|
793
|
After floating we get
|
... |
... |
@@ -793,21 +799,7 @@ expressions out of value lambdas, for several reasons: |
793
|
799
|
a big difference for string literals and bottoming expressions: see Note
|
794
|
800
|
[Floating to the top]
|
795
|
801
|
|
796
|
|
-Data suggests, however, that it is better /only/ to float HNFS, /if/ they can go
|
797
|
|
-to top level. See (SW2) of Note [Saving work]. If the expression goes to top
|
798
|
|
-level we don't pay the cost of allocating cold-path thunks described in (SW2).
|
799
|
|
-
|
800
|
|
-Hence `isTopLvl dest_lvl` in `saves_alloc`.
|
801
|
|
-
|
802
|
|
-Note [Floating to the top]
|
803
|
|
-~~~~~~~~~~~~~~~~~~~~~~~~~~
|
804
|
|
-Even though Note [Saving allocation] suggests that we should not, in
|
805
|
|
-general, float HNFs, the balance change if it goes to the top:
|
806
|
|
-
|
807
|
|
-* We don't pay an allocation cost for the floated expression; it
|
808
|
|
- just becomes static data.
|
809
|
|
-
|
810
|
|
-* Floating string literal is valuable -- no point in duplicating the
|
|
802
|
+* Floating string literals is valuable -- no point in duplicating the
|
811
|
803
|
at each call site!
|
812
|
804
|
|
813
|
805
|
* Floating bottoming expressions is valuable: they are always cold
|
... |
... |
@@ -815,32 +807,32 @@ general, float HNFs, the balance change if it goes to the top: |
815
|
807
|
can be quite big, inhibiting inlining. See Note [Bottoming floats]
|
816
|
808
|
|
817
|
809
|
So we float an expression to the top if:
|
818
|
|
- (a) the context is lazy (so we get allocation), or
|
819
|
|
- (b) the expression is a HNF (so we get allocation), or
|
820
|
|
- (c) the expression is bottoming and floating would escape a
|
821
|
|
- value lambda (NB: if the expression itself is a lambda, (b)
|
822
|
|
- will apply; so this case only catches bottoming thunks)
|
|
810
|
+ (FT1) the context is lazy (so we get allocation), or
|
|
811
|
+ (FT2) the expression is a HNF (so we get allocation), or
|
|
812
|
+ (FT3) the expression is bottoming and floating would escape a
|
|
813
|
+ value lambda (NB: if the expression itself is a lambda, (b)
|
|
814
|
+ will apply; so this case only catches bottoming thunks)
|
823
|
815
|
|
824
|
816
|
Examples:
|
825
|
817
|
|
826
|
|
-* (a) Strict. Case scrutinee
|
|
818
|
+* (FT1) Strict. Case scrutinee
|
827
|
819
|
f = case g True of ....
|
828
|
820
|
Don't float (g True) to top level; then we have the admin of a
|
829
|
821
|
top-level thunk to worry about, with zero gain.
|
830
|
822
|
|
831
|
|
-* (a) Strict. Case alternative
|
|
823
|
+* (FT1) Strict. Case alternative
|
832
|
824
|
h = case y of
|
833
|
825
|
True -> g True
|
834
|
826
|
False -> False
|
835
|
827
|
Don't float (g True) to the top level
|
836
|
828
|
|
837
|
|
-* (b) HNF
|
|
829
|
+* (FT2) HNF
|
838
|
830
|
f = case y of
|
839
|
831
|
True -> p:q
|
840
|
832
|
False -> blah
|
841
|
833
|
We may as well float the (p:q) so it becomes a static data structure.
|
842
|
834
|
|
843
|
|
-* (c) Bottoming expressions; see also Note [Bottoming floats]
|
|
835
|
+* (FT3) Bottoming expressions; see also Note [Bottoming floats]
|
844
|
836
|
f x = case x of
|
845
|
837
|
0 -> error <big thing>
|
846
|
838
|
_ -> x+1
|
... |
... |
@@ -853,7 +845,7 @@ Examples: |
853
|
845
|
'foo' anyway. So float bottoming things only if they escape
|
854
|
846
|
a lambda.
|
855
|
847
|
|
856
|
|
-* Arguments
|
|
848
|
+* (FT4) Arguments
|
857
|
849
|
t = f (g True)
|
858
|
850
|
Prior to Apr 22 we didn't float (g True) to the top if f was strict.
|
859
|
851
|
But (a) this only affected CAFs, because if it escapes a value lambda
|
... |
... |
@@ -868,28 +860,6 @@ early loses opportunities for RULES which (needless to say) are |
868
|
860
|
important in some nofib programs (gcd is an example). [SPJ note:
|
869
|
861
|
I think this is obsolete; the flag seems always on.]
|
870
|
862
|
|
871
|
|
-Note [Large free-variable sets]
|
872
|
|
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
873
|
|
-In #24471 we had something like
|
874
|
|
- x1 = I# 1
|
875
|
|
- ...
|
876
|
|
- x1000 = I# 1000
|
877
|
|
- foo = f x1 (f x2 (f x3 ....))
|
878
|
|
-So every sub-expression in `foo` has lots and lots of free variables. But
|
879
|
|
-none of these sub-expressions float anywhere; the entire float-out pass is a
|
880
|
|
-no-op.
|
881
|
|
-
|
882
|
|
-In lvlMFE, we want to find out quickly if the MFE is not-floatable; that is
|
883
|
|
-the common case. In #24471 it turned out that we were testing `abs_vars` (a
|
884
|
|
-relatively complicated calculation that takes at least O(n-free-vars) time to
|
885
|
|
-compute) for every sub-expression.
|
886
|
|
-
|
887
|
|
-Better instead to test `float_me` early. That still involves looking at
|
888
|
|
-dest_lvl, which means looking at every free variable, but the constant factor
|
889
|
|
-is a lot better.
|
890
|
|
-
|
891
|
|
-ToDo: find a way to fix the bad asymptotic complexity.
|
892
|
|
-
|
893
|
863
|
Note [Floating join point bindings]
|
894
|
864
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
895
|
865
|
Mostly we don't float join points at all -- we want them to /stay/ join points.
|
... |
... |
@@ -1053,30 +1023,36 @@ we'd like to float the call to error, to get |
1053
|
1023
|
|
1054
|
1024
|
But, as ever, we need to be careful:
|
1055
|
1025
|
|
1056
|
|
-(1) We want to float a bottoming
|
|
1026
|
+(BF1) We want to float a bottoming
|
1057
|
1027
|
expression even if it has free variables:
|
1058
|
1028
|
f = \x. g (let v = h x in error ("urk" ++ v))
|
1059
|
1029
|
Then we'd like to abstract over 'x', and float the whole arg of g:
|
1060
|
1030
|
lvl = \x. let v = h x in error ("urk" ++ v)
|
1061
|
1031
|
f = \x. g (lvl x)
|
1062
|
|
- To achieve this we pass is_bot to destLevel
|
1063
|
|
-
|
1064
|
|
-(2) We do not do this for lambdas that return
|
1065
|
|
- bottom. Instead we treat the /body/ of such a function specially,
|
1066
|
|
- via point (1). For example:
|
|
1032
|
+ To achieve this we pass `is_bot` to destLevel
|
|
1033
|
+
|
|
1034
|
+(BF2) We do the same for /lambdas/ that return bottom.
|
|
1035
|
+ Suppose the original lambda had /no/ free vars:
|
|
1036
|
+ f = \x. ....(\y z. error (y++z))...
|
|
1037
|
+ then we'd like to float that whole lambda
|
|
1038
|
+ lvl = \y z. error (y++z)
|
|
1039
|
+ f = \x. ....lvl....
|
|
1040
|
+ If we just floated its bottom-valued body, we might abstract the arguments in
|
|
1041
|
+ the "wrong" order and end up with this bad result
|
|
1042
|
+ lvl = \z y. error (y++z)
|
|
1043
|
+ f = \x. ....(\y z. lvl z y)....
|
|
1044
|
+
|
|
1045
|
+ If the lambda does have free vars, this will happen:
|
1067
|
1046
|
f = \x. ....(\y z. if x then error y else error z)....
|
1068
|
|
- If we float the whole lambda thus
|
|
1047
|
+ We float the whole lambda thus
|
1069
|
1048
|
lvl = \x. \y z. if x then error y else error z
|
1070
|
1049
|
f = \x. ...(lvl x)...
|
1071
|
|
- we may well end up eta-expanding that PAP to
|
|
1050
|
+ And we may well end up eta-expanding that PAP to
|
|
1051
|
+ lvl = \x. \y z. if b then error y else error z
|
1072
|
1052
|
f = \x. ...(\y z. lvl x y z)...
|
|
1053
|
+ so we get a (small) closure. So be it.
|
1073
|
1054
|
|
1074
|
|
- ===>
|
1075
|
|
- lvl = \x z y. if b then error y else error z
|
1076
|
|
- f = \x. ...(\y z. lvl x z y)...
|
1077
|
|
- (There is no guarantee that we'll choose the perfect argument order.)
|
1078
|
|
-
|
1079
|
|
-(3) If we have a /binding/ that returns bottom, we want to float it to top
|
|
1055
|
+(BF3) If we have a /binding/ that returns bottom, we want to float it to top
|
1080
|
1056
|
level, even if it has free vars (point (1)), and even it has lambdas.
|
1081
|
1057
|
Example:
|
1082
|
1058
|
... let { v = \y. error (show x ++ show y) } in ...
|
... |
... |
@@ -1092,7 +1068,6 @@ But, as ever, we need to be careful: |
1092
|
1068
|
join points (#24768), and floating to the top would abstract over those join
|
1093
|
1069
|
points, which we should never do.
|
1094
|
1070
|
|
1095
|
|
-
|
1096
|
1071
|
See Maessen's paper 1999 "Bottom extraction: factoring error handling out
|
1097
|
1072
|
of functional programs" (unpublished I think).
|
1098
|
1073
|
|
... |
... |
@@ -1135,7 +1110,6 @@ float the case (as advocated here) we won't float the (build ...y..) |
1135
|
1110
|
either, so fusion will happen. It can be a big effect, esp in some
|
1136
|
1111
|
artificial benchmarks (e.g. integer, queens), but there is no perfect
|
1137
|
1112
|
answer.
|
1138
|
|
-
|
1139
|
1113
|
-}
|
1140
|
1114
|
|
1141
|
1115
|
annotateBotStr :: Id -> Arity -> Maybe (Arity, DmdSig, CprSig) -> Id
|
... |
... |
@@ -1152,69 +1126,124 @@ annotateBotStr id n_extra mb_bot_str |
1152
|
1126
|
= id
|
1153
|
1127
|
|
1154
|
1128
|
notWorthFloating :: CoreExpr -> [Var] -> Bool
|
1155
|
|
--- Returns True if the expression would be replaced by
|
1156
|
|
--- something bigger than it is now. For example:
|
1157
|
|
--- abs_vars = tvars only: return True if e is trivial,
|
1158
|
|
--- but False for anything bigger
|
1159
|
|
--- abs_vars = [x] (an Id): return True for trivial, or an application (f x)
|
1160
|
|
--- but False for (f x x)
|
1161
|
|
---
|
1162
|
|
--- One big goal is that floating should be idempotent. Eg if
|
1163
|
|
--- we replace e with (lvl79 x y) and then run FloatOut again, don't want
|
1164
|
|
--- to replace (lvl79 x y) with (lvl83 x y)!
|
1165
|
|
-
|
|
1129
|
+-- See Note [notWorthFloating]
|
1166
|
1130
|
notWorthFloating e abs_vars
|
1167
|
|
- = go e (count isId abs_vars)
|
|
1131
|
+ = go e 0
|
1168
|
1132
|
where
|
1169
|
|
- go (Var {}) n = n >= 0
|
1170
|
|
- go (Lit lit) n = assert (n==0) $
|
1171
|
|
- litIsTrivial lit -- Note [Floating literals]
|
1172
|
|
- go (Type {}) _ = True
|
1173
|
|
- go (Coercion {}) _ = True
|
|
1133
|
+ n_abs_vars = count isId abs_vars -- See (NWF5)
|
|
1134
|
+
|
|
1135
|
+ go :: CoreExpr -> Int -> Bool
|
|
1136
|
+ -- (go e n) return True if (e x1 .. xn) is not worth floating
|
|
1137
|
+ -- where `e` has n trivial value arguments x1..xn
|
|
1138
|
+ -- See (NWF4)
|
|
1139
|
+ go (Lit lit) n = assert (n==0) $
|
|
1140
|
+ litIsTrivial lit -- See (NWF1)
|
|
1141
|
+ go (Type {}) _ = True
|
|
1142
|
+ go (Tick t e) n = not (tickishIsCode t) && go e n
|
|
1143
|
+ go (Cast e _) n = n==0 || go e n -- See (NWF3)
|
|
1144
|
+ go (Coercion {}) _ = True
|
1174
|
1145
|
go (App e arg) n
|
1175
|
|
- -- See Note [Floating applications to coercions]
|
1176
|
|
- | not (isRuntimeArg arg) = go e n
|
1177
|
|
- | n==0 = False
|
1178
|
|
- | exprIsTrivial arg = go e (n-1) -- NB: exprIsTrivial arg = go arg 0
|
1179
|
|
- | otherwise = False
|
1180
|
|
- go (Tick t e) n = not (tickishIsCode t) && go e n
|
1181
|
|
- go (Cast e _) n = go e n
|
1182
|
|
- go (Case e b _ as) n
|
|
1146
|
+ | Type {} <- arg = go e n -- Just types, not coercions (NWF2)
|
|
1147
|
+ | exprIsTrivial arg = go e (n+1)
|
|
1148
|
+ | otherwise = False -- (f non-triv) is worth floating
|
|
1149
|
+
|
|
1150
|
+ go (Case e b _ as) _
|
|
1151
|
+ -- Do not float the `case` part of trivial cases (NWF3)
|
|
1152
|
+ -- We'll have a look at the RHS when we get there
|
1183
|
1153
|
| null as
|
1184
|
|
- = go e n -- See Note [Empty case is trivial]
|
1185
|
|
- | Just rhs <- isUnsafeEqualityCase e b as
|
1186
|
|
- = go rhs n -- See (U2) of Note [Implementing unsafeCoerce] in base:Unsafe.Coerce
|
1187
|
|
- go _ _ = False
|
|
1154
|
+ = True -- See Note [Empty case is trivial]
|
|
1155
|
+ | Just {} <- isUnsafeEqualityCase e b as
|
|
1156
|
+ = True -- See (U2) of Note [Implementing unsafeCoerce] in base:Unsafe.Coerce
|
|
1157
|
+ | otherwise
|
|
1158
|
+ = False
|
1188
|
1159
|
|
1189
|
|
-{-
|
1190
|
|
-Note [Floating literals]
|
1191
|
|
-~~~~~~~~~~~~~~~~~~~~~~~~
|
1192
|
|
-It's important to float Integer literals, so that they get shared,
|
1193
|
|
-rather than being allocated every time round the loop.
|
1194
|
|
-Hence the litIsTrivial.
|
|
1160
|
+ go (Var _) n
|
|
1161
|
+ | n==0 = True -- Naked variable
|
|
1162
|
+ | n <= n_abs_vars = True -- (f a b c) is not worth floating if
|
|
1163
|
+ | otherwise = False -- a,b,c are all abstracted; see (NWF5)
|
1195
|
1164
|
|
1196
|
|
-Ditto literal strings (LitString), which we'd like to float to top
|
1197
|
|
-level, which is now possible.
|
|
1165
|
+ go _ _ = False -- Let etc is worth floating
|
1198
|
1166
|
|
1199
|
|
-Note [Floating applications to coercions]
|
1200
|
|
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
1201
|
|
-We don’t float out variables applied only to type arguments, since the
|
1202
|
|
-extra binding would be pointless: type arguments are completely erased.
|
1203
|
|
-But *coercion* arguments aren’t (see Note [Coercion tokens] in
|
1204
|
|
-"GHC.CoreToStg" and Note [inlineBoringOk] in"GHC.Core.Unfold"),
|
1205
|
|
-so we still want to float out variables applied only to
|
1206
|
|
-coercion arguments.
|
|
1167
|
+{- Note [notWorthFloating]
|
|
1168
|
+~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
1169
|
+`notWorthFloating` returns True if the expression would be replaced by something
|
|
1170
|
+bigger than it is now. One big goal is that floating should be idempotent. Eg
|
|
1171
|
+if we replace e with (lvl79 x y) and then run FloatOut again, don't want to
|
|
1172
|
+replace (lvl79 x y) with (lvl83 x y)!
|
1207
|
1173
|
|
|
1174
|
+For example:
|
|
1175
|
+ abs_vars = tvars only: return True if e is trivial,
|
|
1176
|
+ but False for anything bigger
|
|
1177
|
+ abs_vars = [x] (an Id): return True for trivial, or an application (f x)
|
|
1178
|
+ but False for (f x x)
|
|
1179
|
+
|
|
1180
|
+(NWF1) It's important to float Integer literals, so that they get shared, rather
|
|
1181
|
+ than being allocated every time round the loop. Hence the litIsTrivial.
|
|
1182
|
+
|
|
1183
|
+ Ditto literal strings (LitString), which we'd like to float to top
|
|
1184
|
+ level, which is now possible.
|
|
1185
|
+
|
|
1186
|
+(NWF2) We don’t float out variables applied only to type arguments, since the
|
|
1187
|
+ extra binding would be pointless: type arguments are completely erased.
|
|
1188
|
+ But *coercion* arguments aren’t (see Note [Coercion tokens] in
|
|
1189
|
+ "GHC.CoreToStg" and Note [inlineBoringOk] in"GHC.Core.Unfold"),
|
|
1190
|
+ so we still want to float out variables applied only to
|
|
1191
|
+ coercion arguments.
|
|
1192
|
+
|
|
1193
|
+(NWF3) Some expressions have trivial wrappers:
|
|
1194
|
+ - Casts (e |> co)
|
|
1195
|
+ - Unary-class applications:
|
|
1196
|
+ - Dictionary applications (MkC meth)
|
|
1197
|
+ - Class-op applictions (op dict)
|
|
1198
|
+ - Case of empty alts
|
|
1199
|
+ - Unsafe-equality case
|
|
1200
|
+ In all these cases we say "not worth floating", and we do so /regardless/
|
|
1201
|
+ of the wrapped expression. The SetLevels stuff may subsequently float the
|
|
1202
|
+ components of the expression.
|
|
1203
|
+
|
|
1204
|
+ Example: is it worth floating (f x |> co)? No! If we did we'd get
|
|
1205
|
+ lvl = f x |> co
|
|
1206
|
+ ...lvl....
|
|
1207
|
+ Then we'd do cast worker/wrapper and end up with.
|
|
1208
|
+ lvl' = f x
|
|
1209
|
+ ...(lvl' |> co)...
|
|
1210
|
+ Silly! Better not to float it in the first place. If we say "no" here,
|
|
1211
|
+ we'll subsequently say "yes" for (f x) and get
|
|
1212
|
+ lvl = f x
|
|
1213
|
+ ....(lvl |> co)...
|
|
1214
|
+ which is what we want. In short: don't float trivial wrappers.
|
|
1215
|
+
|
|
1216
|
+(NWF4) The only non-trivial expression that we say "not worth floating" for
|
|
1217
|
+ is an application
|
|
1218
|
+ f x y z
|
|
1219
|
+ where the number of value arguments is <= the number of abstracted Ids.
|
|
1220
|
+ This is what makes floating idempotent. Hence counting the number of
|
|
1221
|
+ value arguments in `go`
|
|
1222
|
+
|
|
1223
|
+(NWF5) In #24471 we had something like
|
|
1224
|
+ x1 = I# 1
|
|
1225
|
+ ...
|
|
1226
|
+ x1000 = I# 1000
|
|
1227
|
+ foo = f x1 (f x2 (f x3 ....))
|
|
1228
|
+ So every sub-expression in `foo` has lots and lots of free variables. But
|
|
1229
|
+ none of these sub-expressions float anywhere; the entire float-out pass is a
|
|
1230
|
+ no-op.
|
1208
|
1231
|
|
1209
|
|
-************************************************************************
|
1210
|
|
-* *
|
1211
|
|
-\subsection{Bindings}
|
1212
|
|
-* *
|
1213
|
|
-************************************************************************
|
|
1232
|
+ So `notWorthFloating` tries to avoid evaluating `n_abs_vars`, in cases where
|
|
1233
|
+ it obviously /is/ worth floating. (In #24471 it turned out that we were
|
|
1234
|
+ testing `abs_vars` (a relatively complicated calculation that takes at least
|
|
1235
|
+ O(n-free-vars) time to compute) for every sub-expression.)
|
1214
|
1236
|
|
1215
|
|
-The binding stuff works for top level too.
|
|
1237
|
+ Hence testing `n_abs_vars only` at the very end.
|
1216
|
1238
|
-}
|
1217
|
1239
|
|
|
1240
|
+{- *********************************************************************
|
|
1241
|
+* *
|
|
1242
|
+ Bindings
|
|
1243
|
+ This binding stuff works for top level too.
|
|
1244
|
+* *
|
|
1245
|
+********************************************************************* -}
|
|
1246
|
+
|
1218
|
1247
|
lvlBind :: LevelEnv
|
1219
|
1248
|
-> CoreBindWithFVs
|
1220
|
1249
|
-> LvlM (LevelledBind, LevelEnv)
|
... |
... |
@@ -1261,7 +1290,7 @@ lvlBind env (AnnNonRec bndr rhs) |
1261
|
1290
|
-- is_bot_lam: looks like (\xy. bot), maybe zero lams
|
1262
|
1291
|
-- NB: not isBottomThunk!
|
1263
|
1292
|
-- NB: not is_join: don't send bottoming join points to the top.
|
1264
|
|
- -- See Note [Bottoming floats] point (3)
|
|
1293
|
+ -- See Note [Bottoming floats] (BF3)
|
1265
|
1294
|
|
1266
|
1295
|
is_top_bindable = exprIsTopLevelBindable deann_rhs bndr_ty
|
1267
|
1296
|
n_extra = count isId abs_vars
|
... |
... |
@@ -1552,9 +1581,8 @@ destLevel env fvs fvs_ty is_function is_bot |
1552
|
1581
|
-- See Note [Floating join point bindings]
|
1553
|
1582
|
= tOP_LEVEL
|
1554
|
1583
|
|
1555
|
|
- | is_bot -- Send bottoming bindings to the top
|
1556
|
|
- = as_far_as_poss -- regardless; see Note [Bottoming floats]
|
1557
|
|
- -- Esp Bottoming floats (1) and (3)
|
|
1584
|
+ | is_bot -- Send bottoming bindings to the top regardless;
|
|
1585
|
+ = as_far_as_poss -- see (BF1) and (BF2) in Note [Bottoming floats]
|
1558
|
1586
|
|
1559
|
1587
|
| Just n_args <- floatLams env
|
1560
|
1588
|
, n_args > 0 -- n=0 case handled uniformly by the 'otherwise' case
|
... |
... |
@@ -1568,8 +1596,13 @@ destLevel env fvs fvs_ty is_function is_bot |
1568
|
1596
|
max_fv_id_level = maxFvLevel isId env fvs -- Max over Ids only; the
|
1569
|
1597
|
-- tyvars will be abstracted
|
1570
|
1598
|
|
|
1599
|
+ -- as_far_as_poss: destination level depends only on the free Ids (more
|
|
1600
|
+ -- precisely, free CoVars) of the /type/, not the free Ids of the /term/.
|
|
1601
|
+ -- Why worry about the free CoVars? See Note [Floating and kind casts]
|
|
1602
|
+ --
|
|
1603
|
+ -- There may be free Ids in the term, but then we'll just
|
|
1604
|
+ -- lambda-abstract over them
|
1571
|
1605
|
as_far_as_poss = maxFvLevel' isId env fvs_ty
|
1572
|
|
- -- See Note [Floating and kind casts]
|
1573
|
1606
|
|
1574
|
1607
|
{- Note [Floating and kind casts]
|
1575
|
1608
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
... |
... |
@@ -1732,10 +1765,9 @@ maxFvLevel max_me env var_set |
1732
|
1765
|
-- It's OK to use a non-deterministic fold here because maxIn commutes.
|
1733
|
1766
|
|
1734
|
1767
|
maxFvLevel' :: (Var -> Bool) -> LevelEnv -> TyCoVarSet -> Level
|
1735
|
|
--- Same but for TyCoVarSet
|
|
1768
|
+-- Precisely the same as `maxFvLevel` but for TyCoVarSet rather than DVarSet
|
1736
|
1769
|
maxFvLevel' max_me env var_set
|
1737
|
1770
|
= nonDetStrictFoldUniqSet (maxIn max_me env) tOP_LEVEL var_set
|
1738
|
|
- -- It's OK to use a non-deterministic fold here because maxIn commutes.
|
1739
|
1771
|
|
1740
|
1772
|
maxIn :: (Var -> Bool) -> LevelEnv -> InVar -> Level -> Level
|
1741
|
1773
|
maxIn max_me (LE { le_lvl_env = lvl_env, le_env = id_env }) in_var lvl
|