Andreas Klebinger pushed to branch wip/andreask/occ_anal_tuning at Glasgow Haskell Compiler / GHC

Commits:

3 changed files:

Changes:

  • compiler/GHC/Core/Opt/OccurAnal.hs
    ... ... @@ -9,6 +9,8 @@
    9 9
     -- many /other/ arguments the function has.  Inconsistent unboxing is very
    
    10 10
     -- bad for performance, so I increased the limit to allow it to unbox
    
    11 11
     -- consistently.
    
    12
    +-- AK: Seems we no longer unbox OccEnv now anyway so it might be redundant.
    
    13
    +
    
    12 14
     
    
    13 15
     {-
    
    14 16
     (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
    
    ... ... @@ -967,6 +969,12 @@ occAnalBind
    967 969
       -> ([CoreBind] -> r -> r)          -- How to combine the scope with new binds
    
    968 970
       -> WithUsageDetails r              -- Of the whole let(rec)
    
    969 971
     
    
    972
    +-- AK: While not allocating any less inlining occAnalBind turns calls to the
    
    973
    +-- passed functions into known calls with all the benefits that brings.
    
    974
    +-- On a version of T26425 with 6k alternatives this improved compile
    
    975
    +-- by 10-20% with -O.
    
    976
    +{-# INLINE occAnalBind #-}
    
    977
    +
    
    970 978
     occAnalBind env lvl ire (Rec pairs) thing_inside combine
    
    971 979
       = addInScopeList env (map fst pairs) $ \env ->
    
    972 980
         let WUD body_uds body'  = thing_inside env
    
    ... ... @@ -984,7 +992,7 @@ occAnalBind !env lvl ire (NonRec bndr rhs) thing_inside combine
    984 992
       = -- Analyse the RHS and /then/ the body
    
    985 993
         let -- Analyse the rhs first, generating rhs_uds
    
    986 994
             !(rhs_uds_s, bndr', rhs') = occAnalNonRecRhs env lvl ire mb_join bndr rhs
    
    987
    -        rhs_uds = foldr1 orUDs rhs_uds_s   -- NB: orUDs.  See (W4) of
    
    995
    +        rhs_uds = foldl1' orUDs rhs_uds_s   -- NB: orUDs.  See (W4) of
    
    988 996
                                                -- Note [Occurrence analysis for join points]
    
    989 997
     
    
    990 998
             -- Now analyse the body, adding the join point
    
    ... ... @@ -1049,6 +1057,7 @@ occAnalNonRecRhs !env lvl imp_rule_edges mb_join bndr rhs
    1049 1057
         -- Match join arity O from mb_join_arity with manifest join arity M as
    
    1050 1058
         -- returned by of occAnalLamTail. It's totally OK for them to mismatch;
    
    1051 1059
         -- hence adjust the UDs from the RHS
    
    1060
    +
    
    1052 1061
         WUD adj_rhs_uds final_rhs = adjustNonRecRhs mb_join $
    
    1053 1062
                                     occAnalLamTail rhs_env rhs
    
    1054 1063
         final_bndr_with_rules
    
    ... ... @@ -2054,6 +2063,18 @@ So The Plan is this:
    2054 2063
        was a loop breaker last time round
    
    2055 2064
     
    
    2056 2065
     Hence the is_lb field of NodeScore
    
    2066
    +
    
    2067
    +Note [Strictness in the occurrence analyser]
    
    2068
    +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    2069
    +By carefully making the occurrence analyser strict in some places, we can
    
    2070
    +dramatically reduce its memory residency. Among other things we:
    
    2071
    +* Evaluate the result of `tagLamBinder` and friends, so that the binder (or its
    
    2072
    +  OccInfo) does not retain the entire `UsageDetails`.  Also use `strictMap` in `tagLamBinders`.
    
    2073
    +* In `combineUsageDetailsWith`, the fields of the data constructor are strict, and we use
    
    2074
    +  `strictPlusVarEnv` on the maps that are bound to be needed later on to avoid thunks being
    
    2075
    +  stored in the values.
    
    2076
    +
    
    2077
    +These measures reduced residency for test T26425 by a factor of at least 5x.
    
    2057 2078
     -}
    
    2058 2079
     
    
    2059 2080
     {- *********************************************************************
    
    ... ... @@ -2188,7 +2209,9 @@ occ_anal_lam_tail env expr@(Lam {})
    2188 2209
         go env rev_bndrs body
    
    2189 2210
           = addInScope env rev_bndrs $ \env ->
    
    2190 2211
             let !(WUD usage body') = occ_anal_lam_tail env body
    
    2191
    -            wrap_lam body bndr = Lam (tagLamBinder usage bndr) body
    
    2212
    +            -- See Note [Strictness in the occurrence analyser]
    
    2213
    +            wrap_lam !body !bndr = let !bndr' = tagLamBinder usage bndr
    
    2214
    +                                   in Lam bndr' body
    
    2192 2215
             in WUD (usage `addLamCoVarOccs` rev_bndrs)
    
    2193 2216
                    (foldl' wrap_lam body' rev_bndrs)
    
    2194 2217
     
    
    ... ... @@ -2541,7 +2564,8 @@ occAnal env (Case scrut bndr ty alts)
    2541 2564
                let alt_env = addBndrSwap scrut' bndr $
    
    2542 2565
                              setTailCtxt env  -- Kill off OccRhs
    
    2543 2566
                    WUD alts_usage alts' = do_alts alt_env alts
    
    2544
    -               tagged_bndr = tagLamBinder alts_usage bndr
    
    2567
    +               !tagged_bndr = tagLamBinder alts_usage bndr
    
    2568
    +               -- See Note [Strictness in the occurrence analyser]
    
    2545 2569
                in WUD alts_usage (tagged_bndr, alts')
    
    2546 2570
     
    
    2547 2571
           total_usage = markAllNonTail scrut_usage `andUDs` alts_usage
    
    ... ... @@ -2559,11 +2583,13 @@ occAnal env (Case scrut bndr ty alts)
    2559 2583
         do_alt !env (Alt con bndrs rhs)
    
    2560 2584
           = addInScopeList env bndrs $ \ env ->
    
    2561 2585
             let WUD rhs_usage rhs' = occAnal env rhs
    
    2562
    -            tagged_bndrs = tagLamBinders rhs_usage bndrs
    
    2586
    +            !tagged_bndrs = tagLamBinders rhs_usage bndrs
    
    2587
    +                           -- See Note [Strictness in the occurrence analyser]
    
    2563 2588
             in                 -- See Note [Binders in case alternatives]
    
    2564 2589
             WUD rhs_usage (Alt con tagged_bndrs rhs')
    
    2565 2590
     
    
    2566 2591
     occAnal env (Let bind body)
    
    2592
    +  -- TODO: Would be nice to use a strict version of mkLets here
    
    2567 2593
       = occAnalBind env NotTopLevel noImpRuleEdges bind
    
    2568 2594
                     (\env -> occAnal env body) mkLets
    
    2569 2595
     
    
    ... ... @@ -2644,10 +2670,12 @@ occAnalApp !env (Var fun, args, ticks)
    2644 2670
       | fun `hasKey` runRWKey
    
    2645 2671
       , [t1, t2, arg]  <- args
    
    2646 2672
       , WUD usage arg' <- adjustNonRecRhs (JoinPoint 1) $ occAnalLamTail env arg
    
    2647
    -  = WUD usage (mkTicks ticks $ mkApps (Var fun) [t1, t2, arg'])
    
    2673
    +  = let app_out = mkTicks ticks $ mkApps (Var fun) [t1, t2, arg']
    
    2674
    +    in WUD usage app_out
    
    2648 2675
     
    
    2649 2676
     occAnalApp env (Var fun_id, args, ticks)
    
    2650
    -  = WUD all_uds (mkTicks ticks app')
    
    2677
    +  = let app_out = mkTicks ticks app'
    
    2678
    +    in WUD all_uds app_out
    
    2651 2679
       where
    
    2652 2680
         -- Lots of banged bindings: this is a very heavily bit of code,
    
    2653 2681
         -- so it pays not to make lots of thunks here, all of which
    
    ... ... @@ -2692,8 +2720,9 @@ occAnalApp env (Var fun_id, args, ticks)
    2692 2720
             -- See Note [Sources of one-shot information], bullet point A']
    
    2693 2721
     
    
    2694 2722
     occAnalApp env (fun, args, ticks)
    
    2695
    -  = WUD (markAllNonTail (fun_uds `andUDs` args_uds))
    
    2696
    -                     (mkTicks ticks app')
    
    2723
    +  = let app_out = mkTicks ticks app'
    
    2724
    +    in WUD (markAllNonTail (fun_uds `andUDs` args_uds)) app_out
    
    2725
    +
    
    2697 2726
       where
    
    2698 2727
         !(WUD args_uds app') = occAnalArgs env fun' args []
    
    2699 2728
         !(WUD fun_uds fun')  = occAnal (addAppCtxt env args) fun
    
    ... ... @@ -3650,8 +3679,8 @@ data WithTailUsageDetails a = WTUD !TailUsageDetails !a
    3650 3679
     -------------------
    
    3651 3680
     -- UsageDetails API
    
    3652 3681
     
    
    3653
    -andUDs, orUDs
    
    3654
    -        :: UsageDetails -> UsageDetails -> UsageDetails
    
    3682
    +andUDs:: UsageDetails -> UsageDetails -> UsageDetails
    
    3683
    +orUDs :: UsageDetails -> UsageDetails -> UsageDetails
    
    3655 3684
     andUDs = combineUsageDetailsWith andLocalOcc
    
    3656 3685
     orUDs  = combineUsageDetailsWith orLocalOcc
    
    3657 3686
     
    
    ... ... @@ -3766,10 +3795,13 @@ combineUsageDetailsWith plus_occ_info
    3766 3795
       | isEmptyVarEnv env1 = uds2
    
    3767 3796
       | isEmptyVarEnv env2 = uds1
    
    3768 3797
       | otherwise
    
    3769
    -  = UD { ud_env       = plusVarEnv_C plus_occ_info env1 env2
    
    3770
    -       , ud_z_many    = plusVarEnv z_many1   z_many2
    
    3798
    +  -- See Note [Strictness in the occurrence analyser]
    
    3799
    +  -- Using strictPlusVarEnv here speeds up the test T26425 by about 10% by avoiding
    
    3800
    +  -- intermediate thunks.
    
    3801
    +  = UD { ud_env       = strictPlusVarEnv_C plus_occ_info env1 env2
    
    3802
    +       , ud_z_many    = strictPlusVarEnv z_many1   z_many2
    
    3771 3803
            , ud_z_in_lam  = plusVarEnv z_in_lam1 z_in_lam2
    
    3772
    -       , ud_z_tail    = plusVarEnv z_tail1   z_tail2 }
    
    3804
    +       , ud_z_tail    = strictPlusVarEnv z_tail1   z_tail2 }
    
    3773 3805
     
    
    3774 3806
     lookupLetOccInfo :: UsageDetails -> Id -> OccInfo
    
    3775 3807
     -- Don't use locally-generated occ_info for exported (visible-elsewhere)
    
    ... ... @@ -3847,7 +3879,8 @@ tagLamBinders :: UsageDetails -- Of scope
    3847 3879
                   -> [Id]                -- Binders
    
    3848 3880
                   -> [IdWithOccInfo]     -- Tagged binders
    
    3849 3881
     tagLamBinders usage binders
    
    3850
    -  = map (tagLamBinder usage) binders
    
    3882
    +  -- See Note [Strictness in the occurrence analyser]
    
    3883
    +  = strictMap (tagLamBinder usage) binders
    
    3851 3884
     
    
    3852 3885
     tagLamBinder :: UsageDetails       -- Of scope
    
    3853 3886
                  -> Id                 -- Binder
    
    ... ... @@ -3856,6 +3889,7 @@ tagLamBinder :: UsageDetails -- Of scope
    3856 3889
     -- No-op on TyVars
    
    3857 3890
     -- A lambda binder never has an unfolding, so no need to look for that
    
    3858 3891
     tagLamBinder usage bndr
    
    3892
    +  -- See Note [Strictness in the occurrence analyser]
    
    3859 3893
       = setBinderOcc (markNonTail occ) bndr
    
    3860 3894
           -- markNonTail: don't try to make an argument into a join point
    
    3861 3895
       where
    

  • compiler/GHC/Types/Unique/FM.hs
    ... ... @@ -51,7 +51,9 @@ module GHC.Types.Unique.FM (
    51 51
             delListFromUFM,
    
    52 52
             delListFromUFM_Directly,
    
    53 53
             plusUFM,
    
    54
    +        strictPlusUFM,
    
    54 55
             plusUFM_C,
    
    56
    +        strictPlusUFM_C,
    
    55 57
             plusUFM_CD,
    
    56 58
             plusUFM_CD2,
    
    57 59
             mergeUFM,
    
    ... ... @@ -261,16 +263,24 @@ delListFromUFM_Directly = foldl' delFromUFM_Directly
    261 263
     delFromUFM_Directly :: UniqFM key elt -> Unique -> UniqFM key elt
    
    262 264
     delFromUFM_Directly (UFM m) u = UFM (M.delete (getKey u) m)
    
    263 265
     
    
    264
    --- Bindings in right argument shadow those in the left
    
    266
    +-- | Bindings in right argument shadow those in the left.
    
    267
    +--
    
    268
    +-- Unlike containers this union is right-biased for historic reasons.
    
    265 269
     plusUFM :: UniqFM key elt -> UniqFM key elt -> UniqFM key elt
    
    266
    --- M.union is left-biased, plusUFM should be right-biased.
    
    267 270
     plusUFM (UFM x) (UFM y) = UFM (M.union y x)
    
    268 271
          -- Note (M.union y x), with arguments flipped
    
    269 272
          -- M.union is left-biased, plusUFM should be right-biased.
    
    270 273
     
    
    274
    +-- | Right biased
    
    275
    +strictPlusUFM :: UniqFM key elt -> UniqFM key elt -> UniqFM key elt
    
    276
    +strictPlusUFM (UFM x) (UFM y) = UFM (MS.union y x)
    
    277
    +
    
    271 278
     plusUFM_C :: (elt -> elt -> elt) -> UniqFM key elt -> UniqFM key elt -> UniqFM key elt
    
    272 279
     plusUFM_C f (UFM x) (UFM y) = UFM (M.unionWith f x y)
    
    273 280
     
    
    281
    +strictPlusUFM_C :: (elt -> elt -> elt) -> UniqFM key elt -> UniqFM key elt -> UniqFM key elt
    
    282
    +strictPlusUFM_C f (UFM x) (UFM y) = UFM (MS.unionWith f x y)
    
    283
    +
    
    274 284
     -- | `plusUFM_CD f m1 d1 m2 d2` merges the maps using `f` as the
    
    275 285
     -- combinding function and `d1` resp. `d2` as the default value if
    
    276 286
     -- there is no entry in `m1` reps. `m2`. The domain is the union of
    

  • compiler/GHC/Types/Var/Env.hs
    ... ... @@ -12,7 +12,8 @@ module GHC.Types.Var.Env (
    12 12
             elemVarEnv, disjointVarEnv, anyVarEnv,
    
    13 13
             extendVarEnv, extendVarEnv_C, extendVarEnv_Acc,
    
    14 14
             extendVarEnvList,
    
    15
    -        plusVarEnv, plusVarEnv_C, plusVarEnv_CD, plusMaybeVarEnv_C,
    
    15
    +        strictPlusVarEnv, plusVarEnv, plusVarEnv_C, strictPlusVarEnv_C,
    
    16
    +        plusVarEnv_CD, plusMaybeVarEnv_C,
    
    16 17
             plusVarEnvList, alterVarEnv,
    
    17 18
             delVarEnvList, delVarEnv,
    
    18 19
             minusVarEnv,
    
    ... ... @@ -511,6 +512,7 @@ extendVarEnv :: VarEnv a -> Var -> a -> VarEnv a
    511 512
     extendVarEnv_C    :: (a->a->a) -> VarEnv a -> Var -> a -> VarEnv a
    
    512 513
     extendVarEnv_Acc  :: (a->b->b) -> (a->b) -> VarEnv b -> Var -> a -> VarEnv b
    
    513 514
     plusVarEnv        :: VarEnv a -> VarEnv a -> VarEnv a
    
    515
    +strictPlusVarEnv  :: VarEnv a -> VarEnv a -> VarEnv a
    
    514 516
     plusVarEnvList    :: [VarEnv a] -> VarEnv a
    
    515 517
     extendVarEnvList  :: VarEnv a -> [(Var, a)] -> VarEnv a
    
    516 518
     varEnvDomain      :: VarEnv elt -> UnVarSet
    
    ... ... @@ -522,6 +524,7 @@ delVarEnvList :: Foldable f => VarEnv a -> f Var -> VarEnv a
    522 524
     delVarEnv         :: VarEnv a -> Var -> VarEnv a
    
    523 525
     minusVarEnv       :: VarEnv a -> VarEnv b -> VarEnv a
    
    524 526
     plusVarEnv_C      :: (a -> a -> a) -> VarEnv a -> VarEnv a -> VarEnv a
    
    527
    +strictPlusVarEnv_C :: (a -> a -> a) -> VarEnv a -> VarEnv a -> VarEnv a
    
    525 528
     plusVarEnv_CD     :: (a -> a -> a) -> VarEnv a -> a -> VarEnv a -> a -> VarEnv a
    
    526 529
     plusMaybeVarEnv_C :: (a -> a -> Maybe a) -> VarEnv a -> VarEnv a -> VarEnv a
    
    527 530
     mapVarEnv         :: (a -> b) -> VarEnv a -> VarEnv b
    
    ... ... @@ -548,6 +551,7 @@ extendVarEnv_C = addToUFM_C
    548 551
     extendVarEnv_Acc = addToUFM_Acc
    
    549 552
     extendVarEnvList = addListToUFM
    
    550 553
     plusVarEnv_C     = plusUFM_C
    
    554
    +strictPlusVarEnv_C = strictPlusUFM_C
    
    551 555
     plusVarEnv_CD    = plusUFM_CD
    
    552 556
     plusMaybeVarEnv_C = plusMaybeUFM_C
    
    553 557
     delVarEnvList    = delListFromUFM
    
    ... ... @@ -556,6 +560,7 @@ delVarEnvList = delListFromUFM
    556 560
     delVarEnv        = delFromUFM
    
    557 561
     minusVarEnv      = minusUFM
    
    558 562
     plusVarEnv       = plusUFM
    
    563
    +strictPlusVarEnv = strictPlusUFM
    
    559 564
     plusVarEnvList   = plusUFMList
    
    560 565
     -- lookupVarEnv is very hot (in part due to being called by substTyVar),
    
    561 566
     -- if it's not inlined than the mere allocation of the Just constructor causes