| ... |
... |
@@ -9,6 +9,8 @@ |
|
9
|
9
|
-- many /other/ arguments the function has. Inconsistent unboxing is very
|
|
10
|
10
|
-- bad for performance, so I increased the limit to allow it to unbox
|
|
11
|
11
|
-- consistently.
|
|
|
12
|
+-- AK: Seems we no longer unbox OccEnv now anyway so it might be redundant.
|
|
|
13
|
+
|
|
12
|
14
|
|
|
13
|
15
|
{-
|
|
14
|
16
|
(c) The GRASP/AQUA Project, Glasgow University, 1992-1998
|
| ... |
... |
@@ -967,6 +969,11 @@ occAnalBind |
|
967
|
969
|
-> ([CoreBind] -> r -> r) -- How to combine the scope with new binds
|
|
968
|
970
|
-> WithUsageDetails r -- Of the whole let(rec)
|
|
969
|
971
|
|
|
|
972
|
+-- While not allocating any less inlining occAnalBind turns calls to the passed functions
|
|
|
973
|
+-- into known calls. One might assume this doesn't matter, but for let heavy
|
|
|
974
|
+-- code I observed speed ups as big as 10-20%!
|
|
|
975
|
+{-# INLINE occAnalBind #-}
|
|
|
976
|
+
|
|
970
|
977
|
occAnalBind env lvl ire (Rec pairs) thing_inside combine
|
|
971
|
978
|
= addInScopeList env (map fst pairs) $ \env ->
|
|
972
|
979
|
let WUD body_uds body' = thing_inside env
|
| ... |
... |
@@ -984,7 +991,7 @@ occAnalBind !env lvl ire (NonRec bndr rhs) thing_inside combine |
|
984
|
991
|
= -- Analyse the RHS and /then/ the body
|
|
985
|
992
|
let -- Analyse the rhs first, generating rhs_uds
|
|
986
|
993
|
!(rhs_uds_s, bndr', rhs') = occAnalNonRecRhs env lvl ire mb_join bndr rhs
|
|
987
|
|
- rhs_uds = foldr1 orUDs rhs_uds_s -- NB: orUDs. See (W4) of
|
|
|
994
|
+ rhs_uds = foldl1' orUDs rhs_uds_s -- NB: orUDs. See (W4) of
|
|
988
|
995
|
-- Note [Occurrence analysis for join points]
|
|
989
|
996
|
|
|
990
|
997
|
-- Now analyse the body, adding the join point
|
| ... |
... |
@@ -1049,6 +1056,7 @@ occAnalNonRecRhs !env lvl imp_rule_edges mb_join bndr rhs |
|
1049
|
1056
|
-- Match join arity O from mb_join_arity with manifest join arity M as
|
|
1050
|
1057
|
-- returned by of occAnalLamTail. It's totally OK for them to mismatch;
|
|
1051
|
1058
|
-- hence adjust the UDs from the RHS
|
|
|
1059
|
+
|
|
1052
|
1060
|
WUD adj_rhs_uds final_rhs = adjustNonRecRhs mb_join $
|
|
1053
|
1061
|
occAnalLamTail rhs_env rhs
|
|
1054
|
1062
|
final_bndr_with_rules
|
| ... |
... |
@@ -2188,7 +2196,8 @@ occ_anal_lam_tail env expr@(Lam {}) |
|
2188
|
2196
|
go env rev_bndrs body
|
|
2189
|
2197
|
= addInScope env rev_bndrs $ \env ->
|
|
2190
|
2198
|
let !(WUD usage body') = occ_anal_lam_tail env body
|
|
2191
|
|
- wrap_lam body bndr = Lam (tagLamBinder usage bndr) body
|
|
|
2199
|
+ wrap_lam !body !bndr = let !bndr' = tagLamBinder usage bndr
|
|
|
2200
|
+ in Lam bndr' body
|
|
2192
|
2201
|
in WUD (usage `addLamCoVarOccs` rev_bndrs)
|
|
2193
|
2202
|
(foldl' wrap_lam body' rev_bndrs)
|
|
2194
|
2203
|
|
| ... |
... |
@@ -2541,7 +2550,7 @@ occAnal env (Case scrut bndr ty alts) |
|
2541
|
2550
|
let alt_env = addBndrSwap scrut' bndr $
|
|
2542
|
2551
|
setTailCtxt env -- Kill off OccRhs
|
|
2543
|
2552
|
WUD alts_usage alts' = do_alts alt_env alts
|
|
2544
|
|
- tagged_bndr = tagLamBinder alts_usage bndr
|
|
|
2553
|
+ !tagged_bndr = tagLamBinder alts_usage bndr
|
|
2545
|
2554
|
in WUD alts_usage (tagged_bndr, alts')
|
|
2546
|
2555
|
|
|
2547
|
2556
|
total_usage = markAllNonTail scrut_usage `andUDs` alts_usage
|
| ... |
... |
@@ -2559,11 +2568,12 @@ occAnal env (Case scrut bndr ty alts) |
|
2559
|
2568
|
do_alt !env (Alt con bndrs rhs)
|
|
2560
|
2569
|
= addInScopeList env bndrs $ \ env ->
|
|
2561
|
2570
|
let WUD rhs_usage rhs' = occAnal env rhs
|
|
2562
|
|
- tagged_bndrs = tagLamBinders rhs_usage bndrs
|
|
|
2571
|
+ !tagged_bndrs = tagLamBinders rhs_usage bndrs
|
|
2563
|
2572
|
in -- See Note [Binders in case alternatives]
|
|
2564
|
2573
|
WUD rhs_usage (Alt con tagged_bndrs rhs')
|
|
2565
|
2574
|
|
|
2566
|
2575
|
occAnal env (Let bind body)
|
|
|
2576
|
+ -- TODO: Would be nice to use a strict version of mkLets here
|
|
2567
|
2577
|
= occAnalBind env NotTopLevel noImpRuleEdges bind
|
|
2568
|
2578
|
(\env -> occAnal env body) mkLets
|
|
2569
|
2579
|
|
| ... |
... |
@@ -2644,10 +2654,12 @@ occAnalApp !env (Var fun, args, ticks) |
|
2644
|
2654
|
| fun `hasKey` runRWKey
|
|
2645
|
2655
|
, [t1, t2, arg] <- args
|
|
2646
|
2656
|
, WUD usage arg' <- adjustNonRecRhs (JoinPoint 1) $ occAnalLamTail env arg
|
|
2647
|
|
- = WUD usage (mkTicks ticks $ mkApps (Var fun) [t1, t2, arg'])
|
|
|
2657
|
+ = let app_out = (mkTicks ticks $ mkApps (Var fun) [t1, t2, arg'])
|
|
|
2658
|
+ in WUD usage app_out
|
|
2648
|
2659
|
|
|
2649
|
2660
|
occAnalApp env (Var fun_id, args, ticks)
|
|
2650
|
|
- = WUD all_uds (mkTicks ticks app')
|
|
|
2661
|
+ = let app_out = (mkTicks ticks app')
|
|
|
2662
|
+ in WUD all_uds app_out
|
|
2651
|
2663
|
where
|
|
2652
|
2664
|
-- Lots of banged bindings: this is a very heavily bit of code,
|
|
2653
|
2665
|
-- so it pays not to make lots of thunks here, all of which
|
| ... |
... |
@@ -2692,8 +2704,9 @@ occAnalApp env (Var fun_id, args, ticks) |
|
2692
|
2704
|
-- See Note [Sources of one-shot information], bullet point A']
|
|
2693
|
2705
|
|
|
2694
|
2706
|
occAnalApp env (fun, args, ticks)
|
|
2695
|
|
- = WUD (markAllNonTail (fun_uds `andUDs` args_uds))
|
|
2696
|
|
- (mkTicks ticks app')
|
|
|
2707
|
+ = let app_out = (mkTicks ticks app')
|
|
|
2708
|
+ in WUD (markAllNonTail (fun_uds `andUDs` args_uds)) app_out
|
|
|
2709
|
+
|
|
2697
|
2710
|
where
|
|
2698
|
2711
|
!(WUD args_uds app') = occAnalArgs env fun' args []
|
|
2699
|
2712
|
!(WUD fun_uds fun') = occAnal (addAppCtxt env args) fun
|
| ... |
... |
@@ -3650,8 +3663,8 @@ data WithTailUsageDetails a = WTUD !TailUsageDetails !a |
|
3650
|
3663
|
-------------------
|
|
3651
|
3664
|
-- UsageDetails API
|
|
3652
|
3665
|
|
|
3653
|
|
-andUDs, orUDs
|
|
3654
|
|
- :: UsageDetails -> UsageDetails -> UsageDetails
|
|
|
3666
|
+andUDs:: UsageDetails -> UsageDetails -> UsageDetails
|
|
|
3667
|
+orUDs :: UsageDetails -> UsageDetails -> UsageDetails
|
|
3655
|
3668
|
andUDs = combineUsageDetailsWith andLocalOcc
|
|
3656
|
3669
|
orUDs = combineUsageDetailsWith orLocalOcc
|
|
3657
|
3670
|
|
| ... |
... |
@@ -3766,10 +3779,12 @@ combineUsageDetailsWith plus_occ_info |
|
3766
|
3779
|
| isEmptyVarEnv env1 = uds2
|
|
3767
|
3780
|
| isEmptyVarEnv env2 = uds1
|
|
3768
|
3781
|
| otherwise
|
|
3769
|
|
- = UD { ud_env = plusVarEnv_C plus_occ_info env1 env2
|
|
3770
|
|
- , ud_z_many = plusVarEnv z_many1 z_many2
|
|
|
3782
|
+ -- Using strictPlusVarEnv here speeds up the test T26425 by about 10% by avoiding
|
|
|
3783
|
+ -- intermediate thunks.
|
|
|
3784
|
+ = UD { ud_env = strictPlusVarEnv_C plus_occ_info env1 env2
|
|
|
3785
|
+ , ud_z_many = strictPlusVarEnv z_many1 z_many2
|
|
3771
|
3786
|
, ud_z_in_lam = plusVarEnv z_in_lam1 z_in_lam2
|
|
3772
|
|
- , ud_z_tail = plusVarEnv z_tail1 z_tail2 }
|
|
|
3787
|
+ , ud_z_tail = strictPlusVarEnv z_tail1 z_tail2 }
|
|
3773
|
3788
|
|
|
3774
|
3789
|
lookupLetOccInfo :: UsageDetails -> Id -> OccInfo
|
|
3775
|
3790
|
-- Don't use locally-generated occ_info for exported (visible-elsewhere)
|
| ... |
... |
@@ -3847,7 +3862,7 @@ tagLamBinders :: UsageDetails -- Of scope |
|
3847
|
3862
|
-> [Id] -- Binders
|
|
3848
|
3863
|
-> [IdWithOccInfo] -- Tagged binders
|
|
3849
|
3864
|
tagLamBinders usage binders
|
|
3850
|
|
- = map (tagLamBinder usage) binders
|
|
|
3865
|
+ = strictMap (tagLamBinder usage) binders
|
|
3851
|
3866
|
|
|
3852
|
3867
|
tagLamBinder :: UsageDetails -- Of scope
|
|
3853
|
3868
|
-> Id -- Binder
|