| ... |
... |
@@ -9,6 +9,8 @@ |
|
9
|
9
|
-- many /other/ arguments the function has. Inconsistent unboxing is very
|
|
10
|
10
|
-- bad for performance, so I increased the limit to allow it to unbox
|
|
11
|
11
|
-- consistently.
|
|
|
12
|
+-- AK: Seems we no longer unbox OccEnv now anyway so it might be redundant.
|
|
|
13
|
+
|
|
12
|
14
|
|
|
13
|
15
|
{-
|
|
14
|
16
|
(c) The GRASP/AQUA Project, Glasgow University, 1992-1998
|
| ... |
... |
@@ -967,6 +969,12 @@ occAnalBind |
|
967
|
969
|
-> ([CoreBind] -> r -> r) -- How to combine the scope with new binds
|
|
968
|
970
|
-> WithUsageDetails r -- Of the whole let(rec)
|
|
969
|
971
|
|
|
|
972
|
+-- AK: While not allocating any less inlining occAnalBind turns calls to the
|
|
|
973
|
+-- passed functions into known calls with all the benefits that brings.
|
|
|
974
|
+-- On a version of T26425 with 6k alternatives this improved compile
|
|
|
975
|
+-- by 10-20% with -O.
|
|
|
976
|
+{-# INLINE occAnalBind #-}
|
|
|
977
|
+
|
|
970
|
978
|
occAnalBind env lvl ire (Rec pairs) thing_inside combine
|
|
971
|
979
|
= addInScopeList env (map fst pairs) $ \env ->
|
|
972
|
980
|
let WUD body_uds body' = thing_inside env
|
| ... |
... |
@@ -984,7 +992,7 @@ occAnalBind !env lvl ire (NonRec bndr rhs) thing_inside combine |
|
984
|
992
|
= -- Analyse the RHS and /then/ the body
|
|
985
|
993
|
let -- Analyse the rhs first, generating rhs_uds
|
|
986
|
994
|
!(rhs_uds_s, bndr', rhs') = occAnalNonRecRhs env lvl ire mb_join bndr rhs
|
|
987
|
|
- rhs_uds = foldr1 orUDs rhs_uds_s -- NB: orUDs. See (W4) of
|
|
|
995
|
+ rhs_uds = foldl1' orUDs rhs_uds_s -- NB: orUDs. See (W4) of
|
|
988
|
996
|
-- Note [Occurrence analysis for join points]
|
|
989
|
997
|
|
|
990
|
998
|
-- Now analyse the body, adding the join point
|
| ... |
... |
@@ -1049,6 +1057,7 @@ occAnalNonRecRhs !env lvl imp_rule_edges mb_join bndr rhs |
|
1049
|
1057
|
-- Match join arity O from mb_join_arity with manifest join arity M as
|
|
1050
|
1058
|
-- returned by of occAnalLamTail. It's totally OK for them to mismatch;
|
|
1051
|
1059
|
-- hence adjust the UDs from the RHS
|
|
|
1060
|
+
|
|
1052
|
1061
|
WUD adj_rhs_uds final_rhs = adjustNonRecRhs mb_join $
|
|
1053
|
1062
|
occAnalLamTail rhs_env rhs
|
|
1054
|
1063
|
final_bndr_with_rules
|
| ... |
... |
@@ -2054,6 +2063,18 @@ So The Plan is this: |
|
2054
|
2063
|
was a loop breaker last time round
|
|
2055
|
2064
|
|
|
2056
|
2065
|
Hence the is_lb field of NodeScore
|
|
|
2066
|
+
|
|
|
2067
|
+Note [Strictness in the occurrence analyser]
|
|
|
2068
|
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
2069
|
+By carefully making the occurrence analyser strict in some places, we can
|
|
|
2070
|
+dramatically reduce its memory residency. Among other things we:
|
|
|
2071
|
+* Evaluate the result of `tagLamBinder` and friends, so that the binder (or its
|
|
|
2072
|
+ OccInfo) does not retain the entire `UsageDetails`. Also use `strictMap` in `tagLamBinders`.
|
|
|
2073
|
+* In `combineUsageDetailsWith`, the fields of the data constructor are strict, and we use
|
|
|
2074
|
+ `strictPlusVarEnv` on the maps that are bound to be needed later on to avoid thunks being
|
|
|
2075
|
+ stored in the values.
|
|
|
2076
|
+
|
|
|
2077
|
+These measures reduced residency for test T26425 by a factor of at least 5x.
|
|
2057
|
2078
|
-}
|
|
2058
|
2079
|
|
|
2059
|
2080
|
{- *********************************************************************
|
| ... |
... |
@@ -2188,7 +2209,9 @@ occ_anal_lam_tail env expr@(Lam {}) |
|
2188
|
2209
|
go env rev_bndrs body
|
|
2189
|
2210
|
= addInScope env rev_bndrs $ \env ->
|
|
2190
|
2211
|
let !(WUD usage body') = occ_anal_lam_tail env body
|
|
2191
|
|
- wrap_lam body bndr = Lam (tagLamBinder usage bndr) body
|
|
|
2212
|
+ -- See Note [Strictness in the occurrence analyser]
|
|
|
2213
|
+ wrap_lam !body !bndr = let !bndr' = tagLamBinder usage bndr
|
|
|
2214
|
+ in Lam bndr' body
|
|
2192
|
2215
|
in WUD (usage `addLamCoVarOccs` rev_bndrs)
|
|
2193
|
2216
|
(foldl' wrap_lam body' rev_bndrs)
|
|
2194
|
2217
|
|
| ... |
... |
@@ -2541,7 +2564,8 @@ occAnal env (Case scrut bndr ty alts) |
|
2541
|
2564
|
let alt_env = addBndrSwap scrut' bndr $
|
|
2542
|
2565
|
setTailCtxt env -- Kill off OccRhs
|
|
2543
|
2566
|
WUD alts_usage alts' = do_alts alt_env alts
|
|
2544
|
|
- tagged_bndr = tagLamBinder alts_usage bndr
|
|
|
2567
|
+ !tagged_bndr = tagLamBinder alts_usage bndr
|
|
|
2568
|
+ -- See Note [Strictness in the occurrence analyser]
|
|
2545
|
2569
|
in WUD alts_usage (tagged_bndr, alts')
|
|
2546
|
2570
|
|
|
2547
|
2571
|
total_usage = markAllNonTail scrut_usage `andUDs` alts_usage
|
| ... |
... |
@@ -2559,11 +2583,13 @@ occAnal env (Case scrut bndr ty alts) |
|
2559
|
2583
|
do_alt !env (Alt con bndrs rhs)
|
|
2560
|
2584
|
= addInScopeList env bndrs $ \ env ->
|
|
2561
|
2585
|
let WUD rhs_usage rhs' = occAnal env rhs
|
|
2562
|
|
- tagged_bndrs = tagLamBinders rhs_usage bndrs
|
|
|
2586
|
+ !tagged_bndrs = tagLamBinders rhs_usage bndrs
|
|
|
2587
|
+ -- See Note [Strictness in the occurrence analyser]
|
|
2563
|
2588
|
in -- See Note [Binders in case alternatives]
|
|
2564
|
2589
|
WUD rhs_usage (Alt con tagged_bndrs rhs')
|
|
2565
|
2590
|
|
|
2566
|
2591
|
occAnal env (Let bind body)
|
|
|
2592
|
+ -- TODO: Would be nice to use a strict version of mkLets here
|
|
2567
|
2593
|
= occAnalBind env NotTopLevel noImpRuleEdges bind
|
|
2568
|
2594
|
(\env -> occAnal env body) mkLets
|
|
2569
|
2595
|
|
| ... |
... |
@@ -2644,10 +2670,12 @@ occAnalApp !env (Var fun, args, ticks) |
|
2644
|
2670
|
| fun `hasKey` runRWKey
|
|
2645
|
2671
|
, [t1, t2, arg] <- args
|
|
2646
|
2672
|
, WUD usage arg' <- adjustNonRecRhs (JoinPoint 1) $ occAnalLamTail env arg
|
|
2647
|
|
- = WUD usage (mkTicks ticks $ mkApps (Var fun) [t1, t2, arg'])
|
|
|
2673
|
+ = let app_out = mkTicks ticks $ mkApps (Var fun) [t1, t2, arg']
|
|
|
2674
|
+ in WUD usage app_out
|
|
2648
|
2675
|
|
|
2649
|
2676
|
occAnalApp env (Var fun_id, args, ticks)
|
|
2650
|
|
- = WUD all_uds (mkTicks ticks app')
|
|
|
2677
|
+ = let app_out = mkTicks ticks app'
|
|
|
2678
|
+ in WUD all_uds app_out
|
|
2651
|
2679
|
where
|
|
2652
|
2680
|
-- Lots of banged bindings: this is a very heavily bit of code,
|
|
2653
|
2681
|
-- so it pays not to make lots of thunks here, all of which
|
| ... |
... |
@@ -2692,8 +2720,9 @@ occAnalApp env (Var fun_id, args, ticks) |
|
2692
|
2720
|
-- See Note [Sources of one-shot information], bullet point A']
|
|
2693
|
2721
|
|
|
2694
|
2722
|
occAnalApp env (fun, args, ticks)
|
|
2695
|
|
- = WUD (markAllNonTail (fun_uds `andUDs` args_uds))
|
|
2696
|
|
- (mkTicks ticks app')
|
|
|
2723
|
+ = let app_out = mkTicks ticks app'
|
|
|
2724
|
+ in WUD (markAllNonTail (fun_uds `andUDs` args_uds)) app_out
|
|
|
2725
|
+
|
|
2697
|
2726
|
where
|
|
2698
|
2727
|
!(WUD args_uds app') = occAnalArgs env fun' args []
|
|
2699
|
2728
|
!(WUD fun_uds fun') = occAnal (addAppCtxt env args) fun
|
| ... |
... |
@@ -3650,8 +3679,8 @@ data WithTailUsageDetails a = WTUD !TailUsageDetails !a |
|
3650
|
3679
|
-------------------
|
|
3651
|
3680
|
-- UsageDetails API
|
|
3652
|
3681
|
|
|
3653
|
|
-andUDs, orUDs
|
|
3654
|
|
- :: UsageDetails -> UsageDetails -> UsageDetails
|
|
|
3682
|
+andUDs:: UsageDetails -> UsageDetails -> UsageDetails
|
|
|
3683
|
+orUDs :: UsageDetails -> UsageDetails -> UsageDetails
|
|
3655
|
3684
|
andUDs = combineUsageDetailsWith andLocalOcc
|
|
3656
|
3685
|
orUDs = combineUsageDetailsWith orLocalOcc
|
|
3657
|
3686
|
|
| ... |
... |
@@ -3766,10 +3795,13 @@ combineUsageDetailsWith plus_occ_info |
|
3766
|
3795
|
| isEmptyVarEnv env1 = uds2
|
|
3767
|
3796
|
| isEmptyVarEnv env2 = uds1
|
|
3768
|
3797
|
| otherwise
|
|
3769
|
|
- = UD { ud_env = plusVarEnv_C plus_occ_info env1 env2
|
|
3770
|
|
- , ud_z_many = plusVarEnv z_many1 z_many2
|
|
|
3798
|
+ -- See Note [Strictness in the occurrence analyser]
|
|
|
3799
|
+ -- Using strictPlusVarEnv here speeds up the test T26425 by about 10% by avoiding
|
|
|
3800
|
+ -- intermediate thunks.
|
|
|
3801
|
+ = UD { ud_env = strictPlusVarEnv_C plus_occ_info env1 env2
|
|
|
3802
|
+ , ud_z_many = strictPlusVarEnv z_many1 z_many2
|
|
3771
|
3803
|
, ud_z_in_lam = plusVarEnv z_in_lam1 z_in_lam2
|
|
3772
|
|
- , ud_z_tail = plusVarEnv z_tail1 z_tail2 }
|
|
|
3804
|
+ , ud_z_tail = strictPlusVarEnv z_tail1 z_tail2 }
|
|
3773
|
3805
|
|
|
3774
|
3806
|
lookupLetOccInfo :: UsageDetails -> Id -> OccInfo
|
|
3775
|
3807
|
-- Don't use locally-generated occ_info for exported (visible-elsewhere)
|
| ... |
... |
@@ -3847,7 +3879,8 @@ tagLamBinders :: UsageDetails -- Of scope |
|
3847
|
3879
|
-> [Id] -- Binders
|
|
3848
|
3880
|
-> [IdWithOccInfo] -- Tagged binders
|
|
3849
|
3881
|
tagLamBinders usage binders
|
|
3850
|
|
- = map (tagLamBinder usage) binders
|
|
|
3882
|
+ -- See Note [Strictness in the occurrence analyser]
|
|
|
3883
|
+ = strictMap (tagLamBinder usage) binders
|
|
3851
|
3884
|
|
|
3852
|
3885
|
tagLamBinder :: UsageDetails -- Of scope
|
|
3853
|
3886
|
-> Id -- Binder
|
| ... |
... |
@@ -3856,6 +3889,7 @@ tagLamBinder :: UsageDetails -- Of scope |
|
3856
|
3889
|
-- No-op on TyVars
|
|
3857
|
3890
|
-- A lambda binder never has an unfolding, so no need to look for that
|
|
3858
|
3891
|
tagLamBinder usage bndr
|
|
|
3892
|
+ -- See Note [Strictness in the occurrence analyser]
|
|
3859
|
3893
|
= setBinderOcc (markNonTail occ) bndr
|
|
3860
|
3894
|
-- markNonTail: don't try to make an argument into a join point
|
|
3861
|
3895
|
where
|