Andreas Klebinger pushed to branch wip/andreask/occ_anal_tuning at Glasgow Haskell Compiler / GHC Commits: 98918ebc by Andreas Klebinger at 2025-10-29T13:41:29+01:00 OccAnal: Be stricter for better compiler perf. In particular we are now stricter: * When combining usageDetails. * When computing binder info. In combineUsageDetails when combining the underlying adds we compute a new `LocalOcc` for each entry by combining the two existing ones. Rather than wait for those entries to be forced down the road we now force them immediately. Speeding up T26425 by about 10% with little effect on the common case. We also force binders we put into the Core AST everywhere now. Failure to do so risks leaking the occ env used to set the binders OccInfo. For T26425 compiler residency went down by a factor of ~10x. Compile time also improved by a factor of ~1.6. ------------------------- Metric Decrease: T18698a T26425 T9233 ------------------------- - - - - - 3 changed files: - compiler/GHC/Core/Opt/OccurAnal.hs - compiler/GHC/Types/Unique/FM.hs - compiler/GHC/Types/Var/Env.hs Changes: ===================================== compiler/GHC/Core/Opt/OccurAnal.hs ===================================== @@ -9,6 +9,8 @@ -- many /other/ arguments the function has. Inconsistent unboxing is very -- bad for performance, so I increased the limit to allow it to unbox -- consistently. +-- AK: Seems we no longer unbox OccEnv now anyway so it might be redundant. + {- (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 @@ -967,6 +969,11 @@ occAnalBind -> ([CoreBind] -> r -> r) -- How to combine the scope with new binds -> WithUsageDetails r -- Of the whole let(rec) +-- While not allocating any less inlining occAnalBind turns calls to the passed functions +-- into known calls. One might assume this doesn't matter, but for let heavy +-- code I observed speed ups as big as 10-20%! +{-# INLINE occAnalBind #-} + occAnalBind env lvl ire (Rec pairs) thing_inside combine = addInScopeList env (map fst pairs) $ \env -> let WUD body_uds body' = thing_inside env @@ -984,7 +991,7 @@ occAnalBind !env lvl ire (NonRec bndr rhs) thing_inside combine = -- Analyse the RHS and /then/ the body let -- Analyse the rhs first, generating rhs_uds !(rhs_uds_s, bndr', rhs') = occAnalNonRecRhs env lvl ire mb_join bndr rhs - rhs_uds = foldr1 orUDs rhs_uds_s -- NB: orUDs. See (W4) of + rhs_uds = foldl1' orUDs rhs_uds_s -- NB: orUDs. See (W4) of -- Note [Occurrence analysis for join points] -- Now analyse the body, adding the join point @@ -1049,6 +1056,7 @@ occAnalNonRecRhs !env lvl imp_rule_edges mb_join bndr rhs -- Match join arity O from mb_join_arity with manifest join arity M as -- returned by of occAnalLamTail. It's totally OK for them to mismatch; -- hence adjust the UDs from the RHS + WUD adj_rhs_uds final_rhs = adjustNonRecRhs mb_join $ occAnalLamTail rhs_env rhs final_bndr_with_rules @@ -2188,7 +2196,8 @@ occ_anal_lam_tail env expr@(Lam {}) go env rev_bndrs body = addInScope env rev_bndrs $ \env -> let !(WUD usage body') = occ_anal_lam_tail env body - wrap_lam body bndr = Lam (tagLamBinder usage bndr) body + wrap_lam !body !bndr = let !bndr' = tagLamBinder usage bndr + in Lam bndr' body in WUD (usage `addLamCoVarOccs` rev_bndrs) (foldl' wrap_lam body' rev_bndrs) @@ -2541,7 +2550,7 @@ occAnal env (Case scrut bndr ty alts) let alt_env = addBndrSwap scrut' bndr $ setTailCtxt env -- Kill off OccRhs WUD alts_usage alts' = do_alts alt_env alts - tagged_bndr = tagLamBinder alts_usage bndr + !tagged_bndr = tagLamBinder alts_usage bndr in WUD alts_usage (tagged_bndr, alts') total_usage = markAllNonTail scrut_usage `andUDs` alts_usage @@ -2559,11 +2568,12 @@ occAnal env (Case scrut bndr ty alts) do_alt !env (Alt con bndrs rhs) = addInScopeList env bndrs $ \ env -> let WUD rhs_usage rhs' = occAnal env rhs - tagged_bndrs = tagLamBinders rhs_usage bndrs + !tagged_bndrs = tagLamBinders rhs_usage bndrs in -- See Note [Binders in case alternatives] WUD rhs_usage (Alt con tagged_bndrs rhs') occAnal env (Let bind body) + -- TODO: Would be nice to use a strict version of mkLets here = occAnalBind env NotTopLevel noImpRuleEdges bind (\env -> occAnal env body) mkLets @@ -2644,10 +2654,12 @@ occAnalApp !env (Var fun, args, ticks) | fun `hasKey` runRWKey , [t1, t2, arg] <- args , WUD usage arg' <- adjustNonRecRhs (JoinPoint 1) $ occAnalLamTail env arg - = WUD usage (mkTicks ticks $ mkApps (Var fun) [t1, t2, arg']) + = let app_out = (mkTicks ticks $ mkApps (Var fun) [t1, t2, arg']) + in WUD usage app_out occAnalApp env (Var fun_id, args, ticks) - = WUD all_uds (mkTicks ticks app') + = let app_out = (mkTicks ticks app') + in WUD all_uds app_out where -- Lots of banged bindings: this is a very heavily bit of code, -- so it pays not to make lots of thunks here, all of which @@ -2692,8 +2704,9 @@ occAnalApp env (Var fun_id, args, ticks) -- See Note [Sources of one-shot information], bullet point A'] occAnalApp env (fun, args, ticks) - = WUD (markAllNonTail (fun_uds `andUDs` args_uds)) - (mkTicks ticks app') + = let app_out = (mkTicks ticks app') + in WUD (markAllNonTail (fun_uds `andUDs` args_uds)) app_out + where !(WUD args_uds app') = occAnalArgs env fun' args [] !(WUD fun_uds fun') = occAnal (addAppCtxt env args) fun @@ -3650,8 +3663,8 @@ data WithTailUsageDetails a = WTUD !TailUsageDetails !a ------------------- -- UsageDetails API -andUDs, orUDs - :: UsageDetails -> UsageDetails -> UsageDetails +andUDs:: UsageDetails -> UsageDetails -> UsageDetails +orUDs :: UsageDetails -> UsageDetails -> UsageDetails andUDs = combineUsageDetailsWith andLocalOcc orUDs = combineUsageDetailsWith orLocalOcc @@ -3766,10 +3779,12 @@ combineUsageDetailsWith plus_occ_info | isEmptyVarEnv env1 = uds2 | isEmptyVarEnv env2 = uds1 | otherwise - = UD { ud_env = plusVarEnv_C plus_occ_info env1 env2 - , ud_z_many = plusVarEnv z_many1 z_many2 + -- Using strictPlusVarEnv here speeds up the test T26425 by about 10% by avoiding + -- intermediate thunks. + = UD { ud_env = strictPlusVarEnv_C plus_occ_info env1 env2 + , ud_z_many = strictPlusVarEnv z_many1 z_many2 , ud_z_in_lam = plusVarEnv z_in_lam1 z_in_lam2 - , ud_z_tail = plusVarEnv z_tail1 z_tail2 } + , ud_z_tail = strictPlusVarEnv z_tail1 z_tail2 } lookupLetOccInfo :: UsageDetails -> Id -> OccInfo -- Don't use locally-generated occ_info for exported (visible-elsewhere) @@ -3847,7 +3862,7 @@ tagLamBinders :: UsageDetails -- Of scope -> [Id] -- Binders -> [IdWithOccInfo] -- Tagged binders tagLamBinders usage binders - = map (tagLamBinder usage) binders + = strictMap (tagLamBinder usage) binders tagLamBinder :: UsageDetails -- Of scope -> Id -- Binder ===================================== compiler/GHC/Types/Unique/FM.hs ===================================== @@ -51,7 +51,9 @@ module GHC.Types.Unique.FM ( delListFromUFM, delListFromUFM_Directly, plusUFM, + strictPlusUFM, plusUFM_C, + strictPlusUFM_C, plusUFM_CD, plusUFM_CD2, mergeUFM, @@ -261,16 +263,24 @@ delListFromUFM_Directly = foldl' delFromUFM_Directly delFromUFM_Directly :: UniqFM key elt -> Unique -> UniqFM key elt delFromUFM_Directly (UFM m) u = UFM (M.delete (getKey u) m) --- Bindings in right argument shadow those in the left +-- | Bindings in right argument shadow those in the left. +-- +-- Unlike containers this union is right-biased for historic reasons. plusUFM :: UniqFM key elt -> UniqFM key elt -> UniqFM key elt --- M.union is left-biased, plusUFM should be right-biased. plusUFM (UFM x) (UFM y) = UFM (M.union y x) -- Note (M.union y x), with arguments flipped -- M.union is left-biased, plusUFM should be right-biased. +-- | Right biased +strictPlusUFM :: UniqFM key elt -> UniqFM key elt -> UniqFM key elt +strictPlusUFM (UFM x) (UFM y) = UFM (MS.union y x) + plusUFM_C :: (elt -> elt -> elt) -> UniqFM key elt -> UniqFM key elt -> UniqFM key elt plusUFM_C f (UFM x) (UFM y) = UFM (M.unionWith f x y) +strictPlusUFM_C :: (elt -> elt -> elt) -> UniqFM key elt -> UniqFM key elt -> UniqFM key elt +strictPlusUFM_C f (UFM x) (UFM y) = UFM (MS.unionWith f x y) + -- | `plusUFM_CD f m1 d1 m2 d2` merges the maps using `f` as the -- combinding function and `d1` resp. `d2` as the default value if -- there is no entry in `m1` reps. `m2`. The domain is the union of ===================================== compiler/GHC/Types/Var/Env.hs ===================================== @@ -12,7 +12,8 @@ module GHC.Types.Var.Env ( elemVarEnv, disjointVarEnv, anyVarEnv, extendVarEnv, extendVarEnv_C, extendVarEnv_Acc, extendVarEnvList, - plusVarEnv, plusVarEnv_C, plusVarEnv_CD, plusMaybeVarEnv_C, + strictPlusVarEnv, plusVarEnv, plusVarEnv_C, strictPlusVarEnv_C, + plusVarEnv_CD, plusMaybeVarEnv_C, plusVarEnvList, alterVarEnv, delVarEnvList, delVarEnv, minusVarEnv, @@ -511,6 +512,7 @@ extendVarEnv :: VarEnv a -> Var -> a -> VarEnv a extendVarEnv_C :: (a->a->a) -> VarEnv a -> Var -> a -> VarEnv a extendVarEnv_Acc :: (a->b->b) -> (a->b) -> VarEnv b -> Var -> a -> VarEnv b plusVarEnv :: VarEnv a -> VarEnv a -> VarEnv a +strictPlusVarEnv :: VarEnv a -> VarEnv a -> VarEnv a plusVarEnvList :: [VarEnv a] -> VarEnv a extendVarEnvList :: VarEnv a -> [(Var, a)] -> VarEnv a varEnvDomain :: VarEnv elt -> UnVarSet @@ -522,6 +524,7 @@ delVarEnvList :: Foldable f => VarEnv a -> f Var -> VarEnv a delVarEnv :: VarEnv a -> Var -> VarEnv a minusVarEnv :: VarEnv a -> VarEnv b -> VarEnv a plusVarEnv_C :: (a -> a -> a) -> VarEnv a -> VarEnv a -> VarEnv a +strictPlusVarEnv_C :: (a -> a -> a) -> VarEnv a -> VarEnv a -> VarEnv a plusVarEnv_CD :: (a -> a -> a) -> VarEnv a -> a -> VarEnv a -> a -> VarEnv a plusMaybeVarEnv_C :: (a -> a -> Maybe a) -> VarEnv a -> VarEnv a -> VarEnv a mapVarEnv :: (a -> b) -> VarEnv a -> VarEnv b @@ -548,6 +551,7 @@ extendVarEnv_C = addToUFM_C extendVarEnv_Acc = addToUFM_Acc extendVarEnvList = addListToUFM plusVarEnv_C = plusUFM_C +strictPlusVarEnv_C = strictPlusUFM_C plusVarEnv_CD = plusUFM_CD plusMaybeVarEnv_C = plusMaybeUFM_C delVarEnvList = delListFromUFM @@ -556,6 +560,7 @@ delVarEnvList = delListFromUFM delVarEnv = delFromUFM minusVarEnv = minusUFM plusVarEnv = plusUFM +strictPlusVarEnv = strictPlusUFM plusVarEnvList = plusUFMList -- lookupVarEnv is very hot (in part due to being called by substTyVar), -- if it's not inlined than the mere allocation of the Just constructor causes View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/98918ebcf9a237d86d70cfe820eb7943... -- View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/98918ebcf9a237d86d70cfe820eb7943... You're receiving this email because of your account on gitlab.haskell.org.
participants (1)
-
Andreas Klebinger (@AndreasK)