Andreas Klebinger pushed to branch wip/andreask/occ_anal_tuning at Glasgow Haskell Compiler / GHC
Commits:
-
3f2d549e
by Andreas Klebinger at 2025-10-10T14:22:32+02:00
4 changed files:
- compiler/GHC/Core/Opt/OccurAnal.hs
- compiler/GHC/Data/Graph/UnVar.hs
- compiler/GHC/Types/Unique/FM.hs
- compiler/GHC/Types/Var/Env.hs
Changes:
| ... | ... | @@ -9,6 +9,8 @@ |
| 9 | 9 | -- many /other/ arguments the function has. Inconsistent unboxing is very
|
| 10 | 10 | -- bad for performance, so I increased the limit to allow it to unbox
|
| 11 | 11 | -- consistently.
|
| 12 | +-- AK: Seems we no longer unbox OccEnv now anyway so it might be redundant.
|
|
| 13 | + |
|
| 12 | 14 | |
| 13 | 15 | {-
|
| 14 | 16 | (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
|
| ... | ... | @@ -37,6 +39,7 @@ import GHC.Core.Utils ( exprIsTrivial, isDefaultAlt, isExpandableApp, |
| 37 | 39 | mkCastMCo, mkTicks )
|
| 38 | 40 | import GHC.Core.Opt.Arity ( joinRhsArity, isOneShotBndr )
|
| 39 | 41 | import GHC.Core.Coercion
|
| 42 | +import GHC.Core.Seq (seqExpr)
|
|
| 40 | 43 | import GHC.Core.Type
|
| 41 | 44 | import GHC.Core.TyCo.FVs ( tyCoVarsOfMCo )
|
| 42 | 45 | |
| ... | ... | @@ -984,7 +987,7 @@ occAnalBind !env lvl ire (NonRec bndr rhs) thing_inside combine |
| 984 | 987 | = -- Analyse the RHS and /then/ the body
|
| 985 | 988 | let -- Analyse the rhs first, generating rhs_uds
|
| 986 | 989 | !(rhs_uds_s, bndr', rhs') = occAnalNonRecRhs env lvl ire mb_join bndr rhs
|
| 987 | - rhs_uds = foldr1 orUDs rhs_uds_s -- NB: orUDs. See (W4) of
|
|
| 990 | + rhs_uds = foldl1' orUDs rhs_uds_s -- NB: orUDs. See (W4) of
|
|
| 988 | 991 | -- Note [Occurrence analysis for join points]
|
| 989 | 992 | |
| 990 | 993 | -- Now analyse the body, adding the join point
|
| ... | ... | @@ -1049,6 +1052,7 @@ occAnalNonRecRhs !env lvl imp_rule_edges mb_join bndr rhs |
| 1049 | 1052 | -- Match join arity O from mb_join_arity with manifest join arity M as
|
| 1050 | 1053 | -- returned by of occAnalLamTail. It's totally OK for them to mismatch;
|
| 1051 | 1054 | -- hence adjust the UDs from the RHS
|
| 1055 | + |
|
| 1052 | 1056 | WUD adj_rhs_uds final_rhs = adjustNonRecRhs mb_join $
|
| 1053 | 1057 | occAnalLamTail rhs_env rhs
|
| 1054 | 1058 | final_bndr_with_rules
|
| ... | ... | @@ -2188,7 +2192,8 @@ occ_anal_lam_tail env expr@(Lam {}) |
| 2188 | 2192 | go env rev_bndrs body
|
| 2189 | 2193 | = addInScope env rev_bndrs $ \env ->
|
| 2190 | 2194 | let !(WUD usage body') = occ_anal_lam_tail env body
|
| 2191 | - wrap_lam body bndr = Lam (tagLamBinder usage bndr) body
|
|
| 2195 | + wrap_lam !body !bndr = let !bndr' = (tagLamBinder usage bndr)
|
|
| 2196 | + in Lam bndr' body
|
|
| 2192 | 2197 | in WUD (usage `addLamCoVarOccs` rev_bndrs)
|
| 2193 | 2198 | (foldl' wrap_lam body' rev_bndrs)
|
| 2194 | 2199 | |
| ... | ... | @@ -2541,7 +2546,7 @@ occAnal env (Case scrut bndr ty alts) |
| 2541 | 2546 | let alt_env = addBndrSwap scrut' bndr $
|
| 2542 | 2547 | setTailCtxt env -- Kill off OccRhs
|
| 2543 | 2548 | WUD alts_usage alts' = do_alts alt_env alts
|
| 2544 | - tagged_bndr = tagLamBinder alts_usage bndr
|
|
| 2549 | + !tagged_bndr = tagLamBinder alts_usage bndr
|
|
| 2545 | 2550 | in WUD alts_usage (tagged_bndr, alts')
|
| 2546 | 2551 | |
| 2547 | 2552 | total_usage = markAllNonTail scrut_usage `andUDs` alts_usage
|
| ... | ... | @@ -2561,11 +2566,16 @@ occAnal env (Case scrut bndr ty alts) |
| 2561 | 2566 | let WUD rhs_usage rhs' = occAnal env rhs
|
| 2562 | 2567 | tagged_bndrs = tagLamBinders rhs_usage bndrs
|
| 2563 | 2568 | in -- See Note [Binders in case alternatives]
|
| 2564 | - WUD rhs_usage (Alt con tagged_bndrs rhs')
|
|
| 2569 | + seqList tagged_bndrs -- avoid retaining the occEnv
|
|
| 2570 | + $ WUD rhs_usage (Alt con tagged_bndrs rhs')
|
|
| 2565 | 2571 | |
| 2566 | 2572 | occAnal env (Let bind body)
|
| 2567 | 2573 | = occAnalBind env NotTopLevel noImpRuleEdges bind
|
| 2568 | - (\env -> occAnal env body) mkLets
|
|
| 2574 | + (\env -> occAnal env body)
|
|
| 2575 | + -- Without the seqs we construct a `Let` whos body is a
|
|
| 2576 | + -- a thunk retaining the whole OccEnv until forced by the simplifier.
|
|
| 2577 | + (\bndrs body -> (seqExpr body) `seq` (seqList bndrs) `seq` mkLets bndrs body)
|
|
| 2578 | + -- mkLets
|
|
| 2569 | 2579 | |
| 2570 | 2580 | occAnalArgs :: OccEnv -> CoreExpr -> [CoreExpr]
|
| 2571 | 2581 | -> [OneShots] -- Very commonly empty, notably prior to dmd anal
|
| ... | ... | @@ -2644,10 +2654,12 @@ occAnalApp !env (Var fun, args, ticks) |
| 2644 | 2654 | | fun `hasKey` runRWKey
|
| 2645 | 2655 | , [t1, t2, arg] <- args
|
| 2646 | 2656 | , WUD usage arg' <- adjustNonRecRhs (JoinPoint 1) $ occAnalLamTail env arg
|
| 2647 | - = WUD usage (mkTicks ticks $ mkApps (Var fun) [t1, t2, arg'])
|
|
| 2657 | + = let app_out = (mkTicks ticks $ mkApps (Var fun) [t1, t2, arg'])
|
|
| 2658 | + in seq (seqExpr app_out) $ WUD usage app_out
|
|
| 2648 | 2659 | |
| 2649 | 2660 | occAnalApp env (Var fun_id, args, ticks)
|
| 2650 | - = WUD all_uds (mkTicks ticks app')
|
|
| 2661 | + = let app_out = (mkTicks ticks app')
|
|
| 2662 | + in seqExpr app_out `seq` WUD all_uds app_out
|
|
| 2651 | 2663 | where
|
| 2652 | 2664 | -- Lots of banged bindings: this is a very heavily bit of code,
|
| 2653 | 2665 | -- so it pays not to make lots of thunks here, all of which
|
| ... | ... | @@ -2692,8 +2704,9 @@ occAnalApp env (Var fun_id, args, ticks) |
| 2692 | 2704 | -- See Note [Sources of one-shot information], bullet point A']
|
| 2693 | 2705 | |
| 2694 | 2706 | occAnalApp env (fun, args, ticks)
|
| 2695 | - = WUD (markAllNonTail (fun_uds `andUDs` args_uds))
|
|
| 2696 | - (mkTicks ticks app')
|
|
| 2707 | + = let app_out = (mkTicks ticks app')
|
|
| 2708 | + in seqExpr app_out `seq` WUD (markAllNonTail (fun_uds `andUDs` args_uds)) app_out
|
|
| 2709 | + |
|
| 2697 | 2710 | where
|
| 2698 | 2711 | !(WUD args_uds app') = occAnalArgs env fun' args []
|
| 2699 | 2712 | !(WUD fun_uds fun') = occAnal (addAppCtxt env args) fun
|
| ... | ... | @@ -3650,8 +3663,8 @@ data WithTailUsageDetails a = WTUD !TailUsageDetails !a |
| 3650 | 3663 | -------------------
|
| 3651 | 3664 | -- UsageDetails API
|
| 3652 | 3665 | |
| 3653 | -andUDs, orUDs
|
|
| 3654 | - :: UsageDetails -> UsageDetails -> UsageDetails
|
|
| 3666 | +andUDs:: UsageDetails -> UsageDetails -> UsageDetails
|
|
| 3667 | +orUDs :: UsageDetails -> UsageDetails -> UsageDetails
|
|
| 3655 | 3668 | andUDs = combineUsageDetailsWith andLocalOcc
|
| 3656 | 3669 | orUDs = combineUsageDetailsWith orLocalOcc
|
| 3657 | 3670 | |
| ... | ... | @@ -3759,17 +3772,18 @@ restrictFreeVars bndrs fvs = restrictUniqSetToUFM bndrs fvs |
| 3759 | 3772 | |
| 3760 | 3773 | combineUsageDetailsWith :: (LocalOcc -> LocalOcc -> LocalOcc)
|
| 3761 | 3774 | -> UsageDetails -> UsageDetails -> UsageDetails
|
| 3762 | -{-# INLINE combineUsageDetailsWith #-}
|
|
| 3763 | 3775 | combineUsageDetailsWith plus_occ_info
|
| 3764 | 3776 | uds1@(UD { ud_env = env1, ud_z_many = z_many1, ud_z_in_lam = z_in_lam1, ud_z_tail = z_tail1 })
|
| 3765 | 3777 | uds2@(UD { ud_env = env2, ud_z_many = z_many2, ud_z_in_lam = z_in_lam2, ud_z_tail = z_tail2 })
|
| 3766 | 3778 | | isEmptyVarEnv env1 = uds2
|
| 3767 | 3779 | | isEmptyVarEnv env2 = uds1
|
| 3768 | 3780 | | otherwise
|
| 3769 | - = UD { ud_env = plusVarEnv_C plus_occ_info env1 env2
|
|
| 3770 | - , ud_z_many = plusVarEnv z_many1 z_many2
|
|
| 3771 | - , ud_z_in_lam = plusVarEnv z_in_lam1 z_in_lam2
|
|
| 3772 | - , ud_z_tail = plusVarEnv z_tail1 z_tail2 }
|
|
| 3781 | + -- Using strictPlusVarEnv here speeds up the test T26425 by about 10% by avoiding
|
|
| 3782 | + -- intermediate thunks.
|
|
| 3783 | + = UD { ud_env = strictPlusVarEnv_C plus_occ_info env1 env2
|
|
| 3784 | + , ud_z_many = strictPlusVarEnv z_many1 z_many2
|
|
| 3785 | + , ud_z_in_lam = strictPlusVarEnv z_in_lam1 z_in_lam2
|
|
| 3786 | + , ud_z_tail = strictPlusVarEnv z_tail1 z_tail2 }
|
|
| 3773 | 3787 | |
| 3774 | 3788 | lookupLetOccInfo :: UsageDetails -> Id -> OccInfo
|
| 3775 | 3789 | -- Don't use locally-generated occ_info for exported (visible-elsewhere)
|
| ... | ... | @@ -17,8 +17,8 @@ equal to g, but twice as expensive and large. |
| 17 | 17 | module GHC.Data.Graph.UnVar
|
| 18 | 18 | ( UnVarSet
|
| 19 | 19 | , emptyUnVarSet, mkUnVarSet, unionUnVarSet, unionUnVarSets
|
| 20 | - , extendUnVarSet, extendUnVarSetList, delUnVarSet, delUnVarSetList
|
|
| 21 | - , elemUnVarSet, isEmptyUnVarSet
|
|
| 20 | + , extendUnVarSet, extendUnVarSet_Directly, extendUnVarSetList, delUnVarSet, delUnVarSetList
|
|
| 21 | + , elemUnVarSet, elemUnVarSet_Directly, isEmptyUnVarSet
|
|
| 22 | 22 | , UnVarGraph
|
| 23 | 23 | , emptyUnVarGraph
|
| 24 | 24 | , unionUnVarGraph, unionUnVarGraphs
|
| ... | ... | @@ -60,6 +60,9 @@ emptyUnVarSet = UnVarSet S.empty |
| 60 | 60 | elemUnVarSet :: Var -> UnVarSet -> Bool
|
| 61 | 61 | elemUnVarSet v (UnVarSet s) = k v `S.member` s
|
| 62 | 62 | |
| 63 | +{-# INLINE elemUnVarSet_Directly #-}
|
|
| 64 | +elemUnVarSet_Directly :: Uniquable key => key -> UnVarSet -> Bool
|
|
| 65 | +elemUnVarSet_Directly v (UnVarSet s) = (getKey $ getUnique v) `S.member` s
|
|
| 63 | 66 | |
| 64 | 67 | isEmptyUnVarSet :: UnVarSet -> Bool
|
| 65 | 68 | isEmptyUnVarSet (UnVarSet s) = S.null s
|
| ... | ... | @@ -82,6 +85,10 @@ mkUnVarSet vs = UnVarSet $ S.fromList $ map k vs |
| 82 | 85 | extendUnVarSet :: Var -> UnVarSet -> UnVarSet
|
| 83 | 86 | extendUnVarSet v (UnVarSet s) = UnVarSet $ S.insert (k v) s
|
| 84 | 87 | |
| 88 | +{-# INLINE extendUnVarSet_Directly #-}
|
|
| 89 | +extendUnVarSet_Directly :: Uniquable key => key -> UnVarSet -> UnVarSet
|
|
| 90 | +extendUnVarSet_Directly u (UnVarSet s) = UnVarSet $ S.insert (getKey $ getUnique u) s
|
|
| 91 | + |
|
| 85 | 92 | extendUnVarSetList :: [Var] -> UnVarSet -> UnVarSet
|
| 86 | 93 | extendUnVarSetList vs s = s `unionUnVarSet` mkUnVarSet vs
|
| 87 | 94 |
| ... | ... | @@ -51,7 +51,9 @@ module GHC.Types.Unique.FM ( |
| 51 | 51 | delListFromUFM,
|
| 52 | 52 | delListFromUFM_Directly,
|
| 53 | 53 | plusUFM,
|
| 54 | + strictPlusUFM,
|
|
| 54 | 55 | plusUFM_C,
|
| 56 | + strictPlusUFM_C,
|
|
| 55 | 57 | plusUFM_CD,
|
| 56 | 58 | plusUFM_CD2,
|
| 57 | 59 | mergeUFM,
|
| ... | ... | @@ -251,16 +253,24 @@ delListFromUFM_Directly = foldl' delFromUFM_Directly |
| 251 | 253 | delFromUFM_Directly :: UniqFM key elt -> Unique -> UniqFM key elt
|
| 252 | 254 | delFromUFM_Directly (UFM m) u = UFM (M.delete (getKey u) m)
|
| 253 | 255 | |
| 254 | --- Bindings in right argument shadow those in the left
|
|
| 256 | +-- | Bindings in right argument shadow those in the left.
|
|
| 257 | +--
|
|
| 258 | +-- Unlike containers this union is right-biased for historic reasons.
|
|
| 255 | 259 | plusUFM :: UniqFM key elt -> UniqFM key elt -> UniqFM key elt
|
| 256 | --- M.union is left-biased, plusUFM should be right-biased.
|
|
| 257 | 260 | plusUFM (UFM x) (UFM y) = UFM (M.union y x)
|
| 258 | 261 | -- Note (M.union y x), with arguments flipped
|
| 259 | 262 | -- M.union is left-biased, plusUFM should be right-biased.
|
| 260 | 263 | |
| 264 | +-- | Right biased
|
|
| 265 | +strictPlusUFM :: UniqFM key elt -> UniqFM key elt -> UniqFM key elt
|
|
| 266 | +strictPlusUFM (UFM x) (UFM y) = UFM (MS.union y x)
|
|
| 267 | + |
|
| 261 | 268 | plusUFM_C :: (elt -> elt -> elt) -> UniqFM key elt -> UniqFM key elt -> UniqFM key elt
|
| 262 | 269 | plusUFM_C f (UFM x) (UFM y) = UFM (M.unionWith f x y)
|
| 263 | 270 | |
| 271 | +strictPlusUFM_C :: (elt -> elt -> elt) -> UniqFM key elt -> UniqFM key elt -> UniqFM key elt
|
|
| 272 | +strictPlusUFM_C f (UFM x) (UFM y) = UFM (MS.unionWith f x y)
|
|
| 273 | + |
|
| 264 | 274 | -- | `plusUFM_CD f m1 d1 m2 d2` merges the maps using `f` as the
|
| 265 | 275 | -- combinding function and `d1` resp. `d2` as the default value if
|
| 266 | 276 | -- there is no entry in `m1` reps. `m2`. The domain is the union of
|
| ... | ... | @@ -12,7 +12,8 @@ module GHC.Types.Var.Env ( |
| 12 | 12 | elemVarEnv, disjointVarEnv, anyVarEnv,
|
| 13 | 13 | extendVarEnv, extendVarEnv_C, extendVarEnv_Acc,
|
| 14 | 14 | extendVarEnvList,
|
| 15 | - plusVarEnv, plusVarEnv_C, plusVarEnv_CD, plusMaybeVarEnv_C,
|
|
| 15 | + strictPlusVarEnv, plusVarEnv, plusVarEnv_C, strictPlusVarEnv_C,
|
|
| 16 | + plusVarEnv_CD, plusMaybeVarEnv_C,
|
|
| 16 | 17 | plusVarEnvList, alterVarEnv,
|
| 17 | 18 | delVarEnvList, delVarEnv,
|
| 18 | 19 | minusVarEnv,
|
| ... | ... | @@ -511,6 +512,7 @@ extendVarEnv :: VarEnv a -> Var -> a -> VarEnv a |
| 511 | 512 | extendVarEnv_C :: (a->a->a) -> VarEnv a -> Var -> a -> VarEnv a
|
| 512 | 513 | extendVarEnv_Acc :: (a->b->b) -> (a->b) -> VarEnv b -> Var -> a -> VarEnv b
|
| 513 | 514 | plusVarEnv :: VarEnv a -> VarEnv a -> VarEnv a
|
| 515 | +strictPlusVarEnv :: VarEnv a -> VarEnv a -> VarEnv a
|
|
| 514 | 516 | plusVarEnvList :: [VarEnv a] -> VarEnv a
|
| 515 | 517 | extendVarEnvList :: VarEnv a -> [(Var, a)] -> VarEnv a
|
| 516 | 518 | varEnvDomain :: VarEnv elt -> UnVarSet
|
| ... | ... | @@ -522,6 +524,7 @@ delVarEnvList :: Foldable f => VarEnv a -> f Var -> VarEnv a |
| 522 | 524 | delVarEnv :: VarEnv a -> Var -> VarEnv a
|
| 523 | 525 | minusVarEnv :: VarEnv a -> VarEnv b -> VarEnv a
|
| 524 | 526 | plusVarEnv_C :: (a -> a -> a) -> VarEnv a -> VarEnv a -> VarEnv a
|
| 527 | +strictPlusVarEnv_C :: (a -> a -> a) -> VarEnv a -> VarEnv a -> VarEnv a
|
|
| 525 | 528 | plusVarEnv_CD :: (a -> a -> a) -> VarEnv a -> a -> VarEnv a -> a -> VarEnv a
|
| 526 | 529 | plusMaybeVarEnv_C :: (a -> a -> Maybe a) -> VarEnv a -> VarEnv a -> VarEnv a
|
| 527 | 530 | mapVarEnv :: (a -> b) -> VarEnv a -> VarEnv b
|
| ... | ... | @@ -548,6 +551,7 @@ extendVarEnv_C = addToUFM_C |
| 548 | 551 | extendVarEnv_Acc = addToUFM_Acc
|
| 549 | 552 | extendVarEnvList = addListToUFM
|
| 550 | 553 | plusVarEnv_C = plusUFM_C
|
| 554 | +strictPlusVarEnv_C = strictPlusUFM_C
|
|
| 551 | 555 | plusVarEnv_CD = plusUFM_CD
|
| 552 | 556 | plusMaybeVarEnv_C = plusMaybeUFM_C
|
| 553 | 557 | delVarEnvList = delListFromUFM
|
| ... | ... | @@ -556,6 +560,7 @@ delVarEnvList = delListFromUFM |
| 556 | 560 | delVarEnv = delFromUFM
|
| 557 | 561 | minusVarEnv = minusUFM
|
| 558 | 562 | plusVarEnv = plusUFM
|
| 563 | +strictPlusVarEnv = strictPlusUFM
|
|
| 559 | 564 | plusVarEnvList = plusUFMList
|
| 560 | 565 | -- lookupVarEnv is very hot (in part due to being called by substTyVar),
|
| 561 | 566 | -- if it's not inlined than the mere allocation of the Just constructor causes
|