
Sebastian Graf pushed to branch wip/T25944 at Glasgow Haskell Compiler / GHC Commits: 6425a1f0 by Sebastian Graf at 2025-04-26T14:35:17+02:00 CprAnal: Detect recursive newtypes (#25944) While `cprTransformDataConWork` handles recursive data con workers, it did not detect the case when a newtype is responsible for the recursion. This is now detected in the `Cast` case of `cprAnal`. The same reproducer made it clear that `isRecDataCon` lacked congruent handling for `AppTy` and `CastTy`, now fixed. Furthermore, the new repro case T25944 triggered this bug via an infinite loop in `cprFix`, caused by the infelicity in `isRecDataCon`. While it should be much less likely to trigger such an infinite loop now that `isRecDataCon` has been fixed, I made sure to abort the loop after 10 iterations and emitting a warning instead. Fixes #25944. - - - - - 5 changed files: - compiler/GHC/Core/Opt/CprAnal.hs - compiler/GHC/Core/Opt/WorkWrap/Utils.hs - + testsuite/tests/cpranal/sigs/T25944.hs - + testsuite/tests/cpranal/sigs/T25944.stderr - testsuite/tests/cpranal/sigs/all.T Changes: ===================================== compiler/GHC/Core/Opt/CprAnal.hs ===================================== @@ -1,3 +1,4 @@ +{-# LANGUAGE MultiWayIf #-} -- | Constructed Product Result analysis. Identifies functions that surely -- return heap-allocated records on every code path, so that we can eliminate @@ -22,12 +23,15 @@ import GHC.Types.Demand import GHC.Types.Cpr import GHC.Types.Unique.MemoFun +import GHC.Core import GHC.Core.FamInstEnv import GHC.Core.DataCon import GHC.Core.Type import GHC.Core.Utils -import GHC.Core +import GHC.Core.Coercion +import GHC.Core.Reduction import GHC.Core.Seq +import GHC.Core.TyCon import GHC.Core.Opt.WorkWrap.Utils import GHC.Data.Graph.UnVar -- for UnVarSet @@ -216,9 +220,13 @@ cprAnal' _ (Type ty) = (topCprType, Type ty) -- Doesn't happen, in fact cprAnal' _ (Coercion co) = (topCprType, Coercion co) cprAnal' env (Cast e co) - = (cpr_ty, Cast e' co) + = (cpr_ty', Cast e' co) where (cpr_ty, e') = cprAnal env e + cpr_ty' + | cpr_ty == topCprType = topCprType -- cheap case first + | isRecNewTyConApp env (coercionRKind co) = topCprType -- See Note [CPR for recursive data constructors] + | otherwise = cpr_ty cprAnal' env (Tick t e) = (cpr_ty, Tick t e') @@ -391,6 +399,18 @@ cprTransformDataConWork env con args mAX_CPR_SIZE :: Arity mAX_CPR_SIZE = 10 +isRecNewTyConApp :: AnalEnv -> Type -> Bool +isRecNewTyConApp env ty + --- | pprTrace "isRecNewTyConApp" (ppr ty) False = undefined + | Just (tc, tc_args) <- splitTyConApp_maybe ty = + if | Just (HetReduction (Reduction _ rhs) _) <- topReduceTyFamApp_maybe (ae_fam_envs env) tc tc_args + -> isRecNewTyConApp env rhs + | Just dc <- newTyConDataCon_maybe tc + -> ae_rec_dc env dc == DefinitelyRecursive + | otherwise + -> False + | otherwise = False + -- -- * Bindings -- @@ -414,12 +434,18 @@ cprFix orig_env orig_pairs | otherwise = orig_pairs init_env = extendSigEnvFromIds orig_env (map fst init_pairs) + -- If fixed-point iteration does not yield a result we use this instead + -- See Note [Safe abortion in the fixed-point iteration] + abort :: (AnalEnv, [(Id,CoreExpr)]) + abort = step (nonVirgin orig_env) [(setIdCprSig id topCprSig, rhs) | (id, rhs) <- orig_pairs ] + -- The fixed-point varies the idCprSig field of the binders and and their -- entries in the AnalEnv, and terminates if that annotation does not change -- any more. loop :: Int -> AnalEnv -> [(Id,CoreExpr)] -> (AnalEnv, [(Id,CoreExpr)]) loop n env pairs | found_fixpoint = (reset_env', pairs') + | n == 10 = pprTraceUserWarning (text "cprFix aborts. This is not terrible, but worth reporting a GHC issue." <+> ppr (map fst pairs)) $ abort | otherwise = loop (n+1) env' pairs' where -- In all but the first iteration, delete the virgin flag @@ -519,8 +545,9 @@ cprAnalBind env id rhs -- possibly trim thunk CPR info rhs_ty' -- See Note [CPR for thunks] - | stays_thunk = trimCprTy rhs_ty - | otherwise = rhs_ty + | rhs_ty == topCprType = topCprType -- cheap case first + | stays_thunk = trimCprTy rhs_ty + | otherwise = rhs_ty -- See Note [Arity trimming for CPR signatures] sig = mkCprSigForArity (idArity id) rhs_ty' -- See Note [OPAQUE pragma] @@ -639,7 +666,7 @@ data AnalEnv , ae_fam_envs :: FamInstEnvs -- ^ Needed when expanding type families and synonyms of product types. , ae_rec_dc :: DataCon -> IsRecDataConResult - -- ^ Memoised result of 'GHC.Core.Opt.WorkWrap.Utils.isRecDataCon' + -- ^ Memoised result of 'GHC.Core.Opt.WorkWrap.Utils.isRecDataType } instance Outputable AnalEnv where @@ -1042,10 +1069,11 @@ Eliminating the shared 'c' binding in the process. And then What can we do about it? - A. Don't CPR functions that return a *recursive data type* (the list in this - case). This is the solution we adopt. Rationale: the benefit of CPR on - recursive data structures is slight, because it only affects the outer layer - of a potentially massive data structure. + A. Don't give recursive data constructors or casts representing recursive newtype constructors + the CPR property (the list in this case). This is the solution we adopt. + Rationale: the benefit of CPR on recursive data structures is slight, + because it only affects the outer layer of a potentially massive data + structure. B. Don't CPR any *recursive function*. That would be quite conservative, as it would also affect e.g. the factorial function. C. Flat CPR only for recursive functions. This prevents the asymptotic @@ -1055,11 +1083,14 @@ What can we do about it? `c` in the second eqn of `replicateC`). But we'd need to know which paths were hot. We want such static branch frequency estimates in #20378. -We adopt solution (A) It is ad-hoc, but appears to work reasonably well. +We adopt solution (A). It is ad-hoc, but appears to work reasonably well. Deciding what a "recursive data constructor" is is quite tricky and ad-hoc, too: See Note [Detecting recursive data constructors]. We don't have to be perfect and can simply keep on unboxing if unsure. +(A) is implemented in `cprTransformDataConWork` for data types and in the +`Cast` case of `cprAnal` for newtypes. + Note [Detecting recursive data constructors] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ What qualifies as a "recursive data constructor" as per @@ -1075,12 +1106,15 @@ looks inside the following class of types, represented by `ty` (and responds types of its data constructors and check `tc_args` for recursion. C. If `ty = F tc_args`, `F` is a `FamTyCon` and we can reduce `F tc_args` to `rhs`, look into the `rhs` type. + D. If `ty = f a`, then look into `f` and `a` + E. If `ty = ty' |> co`, then look into `ty'` A few perhaps surprising points: 1. It deems any function type as non-recursive, because it's unlikely that a recursion through a function type builds up a recursive data structure. - 2. It doesn't look into kinds or coercion types because there's nothing to unbox. + 2. It doesn't look into kinds, literals or coercion types because we are + ultimately looking for value-level recursion. Same for promoted data constructors. 3. We don't care whether an AlgTyCon app `T tc_args` is fully saturated or not; we simply look at its definition/DataCons and its field tys and look for ===================================== compiler/GHC/Core/Opt/WorkWrap/Utils.hs ===================================== @@ -63,6 +63,7 @@ import Data.List ( unzip4 ) import GHC.Types.RepType import GHC.Unit.Types +import GHC.Core.TyCo.Rep {- ************************************************************************ @@ -1426,23 +1427,29 @@ isRecDataCon fam_envs fuel orig_dc | arg_ty <- map scaledThing (dataConRepArgTys dc) ] go_arg_ty :: IntWithInf -> TyConSet -> Type -> IsRecDataConResult - go_arg_ty fuel visited_tcs ty - --- | pprTrace "arg_ty" (ppr ty) False = undefined + go_arg_ty fuel visited_tcs ty = -- pprTrace "arg_ty" (ppr ty) $ + case coreFullView ty of + TyConApp tc tc_args -> go_tc_app fuel visited_tcs tc tc_args + -- See Note [Detecting recursive data constructors], points (B) and (C) - | Just (_tcv, ty') <- splitForAllTyCoVar_maybe ty - = go_arg_ty fuel visited_tcs ty' + ForAllTy _ ty' -> go_arg_ty fuel visited_tcs ty' -- See Note [Detecting recursive data constructors], point (A) - | Just (tc, tc_args) <- splitTyConApp_maybe ty - = go_tc_app fuel visited_tcs tc tc_args + CastTy ty' _ -> go_arg_ty fuel visited_tcs ty' - | otherwise - = NonRecursiveOrUnsure + AppTy f a -> go_arg_ty fuel visited_tcs f `combineIRDCR` go_arg_ty fuel visited_tcs a + -- See Note [Detecting recursive data constructors], point (D) + + FunTy{} -> NonRecursiveOrUnsure + -- See Note [Detecting recursive data constructors], point (1) + + -- (TyVarTy{} | LitTy{} | CastTy{}) + _ -> NonRecursiveOrUnsure go_tc_app :: IntWithInf -> TyConSet -> TyCon -> [Type] -> IsRecDataConResult go_tc_app fuel visited_tcs tc tc_args = case tyConDataCons_maybe tc of - --- | pprTrace "tc_app" (vcat [ppr tc, ppr tc_args]) False = undefined + ---_ | pprTrace "tc_app" (vcat [ppr tc, ppr tc_args]) False -> undefined _ | Just (HetReduction (Reduction _ rhs) _) <- topReduceTyFamApp_maybe fam_envs tc tc_args -- This is the only place where we look at tc_args, which might have -- See Note [Detecting recursive data constructors], point (C) and (5) ===================================== testsuite/tests/cpranal/sigs/T25944.hs ===================================== @@ -0,0 +1,114 @@ +{-# LANGUAGE UndecidableInstances, LambdaCase #-} + +-- | This file starts with a small reproducer for #25944 that is easy to debug +-- and then continues with a much larger MWE that is faithful to the original +-- issue. +module T25944 (foo, bar, popMinOneT, popMinOne) where + +import Data.Functor.Identity ( Identity(..) ) +import Data.Coerce + +data ListCons a b = Nil | a :- !b +newtype Fix f = Fix (f (Fix f)) -- Rec + +foo :: Fix (ListCons a) -> Fix (ListCons a) -> Fix (ListCons a) +foo a b = go a + where + -- The outer loop arranges it so that the base case `go as` of `go2` is + -- bottom on the first iteration of the loop. + go (Fix Nil) = Fix Nil + go (Fix (a :- as)) = Fix (a :- go2 b) + where + go2 (Fix Nil) = go as + go2 (Fix (b :- bs)) = Fix (b :- go2 bs) + +bar :: Int -> (Fix (ListCons Int), Int) +bar n = (foo (Fix Nil) (Fix Nil), n) -- should still have CPR property + +-- Now the actual reproducer from #25944: + +newtype ListT m a = ListT { runListT :: m (ListCons a (ListT m a)) } + +cons :: Applicative m => a -> ListT m a -> ListT m a +cons x xs = ListT (pure (x :- xs)) + +nil :: Applicative m => ListT m a +nil = ListT (pure Nil) + +instance Functor m => Functor (ListT m) where + fmap f (ListT m) = ListT (go <$> m) + where + go Nil = Nil + go (a :- m) = f a :- (f <$> m) + +foldListT :: ((ListCons a (ListT m a) -> c) -> m (ListCons a (ListT m a)) -> b) + -> (a -> b -> c) + -> c + -> ListT m a -> b +foldListT r c n = r h . runListT + where + h Nil = n + h (x :- ListT xs) = c x (r h xs) +{-# INLINE foldListT #-} + +mapListT :: forall a m b. Monad m => (a -> ListT m b -> ListT m b) -> ListT m b -> ListT m a -> ListT m b +mapListT = + foldListT + ((coerce :: + ((ListCons a (ListT m a) -> m (ListCons b (ListT m b))) -> m (ListCons a (ListT m a)) -> m (ListCons b (ListT m b))) -> + ((ListCons a (ListT m a) -> ListT m b) -> m (ListCons a (ListT m a)) -> ListT m b)) + (=<<)) +{-# INLINE mapListT #-} + +instance Monad m => Applicative (ListT m) where + pure x = cons x nil + {-# INLINE pure #-} + liftA2 f xs ys = mapListT (\x zs -> mapListT (cons . f x) zs ys) nil xs + {-# INLINE liftA2 #-} + +instance Monad m => Monad (ListT m) where + xs >>= f = mapListT (flip (mapListT cons) . f) nil xs + {-# INLINE (>>=) #-} + +infixr 5 :< +data Node w a b = Leaf a | !w :< b + deriving (Functor) + +bimapNode f g (Leaf x) = Leaf (f x) +bimapNode f g (x :< xs) = x :< g xs + +newtype HeapT w m a = HeapT { runHeapT :: ListT m (Node w a (HeapT w m a)) } + +-- | The 'Heap' type, specialised to the 'Identity' monad. +type Heap w = HeapT w Identity + +instance Functor m => Functor (HeapT w m) where + fmap f = HeapT . fmap (bimapNode f (fmap f)) . runHeapT + +instance Monad m => Applicative (HeapT w m) where + pure = HeapT . pure . Leaf + (<*>) = liftA2 id + +instance Monad m => Monad (HeapT w m) where + HeapT m >>= f = HeapT (m >>= g) + where + g (Leaf x) = runHeapT (f x) + g (w :< xs) = pure (w :< (xs >>= f)) + +popMinOneT :: forall w m a. (Monoid w, Monad m) => HeapT w m a -> m (Maybe ((a, w), HeapT w m a)) +popMinOneT = go mempty [] . runHeapT + where + go' :: w -> Maybe (w, HeapT w m a) -> m (Maybe ((a, w), HeapT w m a)) + go' a Nothing = pure Nothing + go' a (Just (w, HeapT xs)) = go (a <> w) [] xs + + go :: w -> [(w, HeapT w m a)] -> ListT m (Node w a (HeapT w m a)) -> m (Maybe ((a, w), HeapT w m a)) + go w a (ListT xs) = xs >>= \case + Nil -> go' w (undefined) + Leaf x :- xs -> pure (Just ((x, w), undefined >> HeapT (foldl (\ys (yw,y) -> ListT (pure ((yw :< y) :- ys))) xs a))) + (u :< x) :- xs -> go w ((u,x) : a) xs +{-# INLINE popMinOneT #-} + +popMinOne :: Monoid w => Heap w a -> Maybe ((a, w), Heap w a) +popMinOne = runIdentity . popMinOneT +{-# INLINE popMinOne #-} ===================================== testsuite/tests/cpranal/sigs/T25944.stderr ===================================== @@ -0,0 +1,17 @@ + +==================== Cpr signatures ==================== +T25944.$fApplicativeHeapT: +T25944.$fApplicativeListT: +T25944.$fFunctorHeapT: +T25944.$fFunctorListT: +T25944.$fFunctorNode: +T25944.$fMonadHeapT: +T25944.$fMonadListT: +T25944.bar: 1 +T25944.foo: +T25944.popMinOne: 2(1(1,)) +T25944.popMinOneT: +T25944.runHeapT: +T25944.runListT: + + ===================================== testsuite/tests/cpranal/sigs/all.T ===================================== @@ -12,3 +12,4 @@ test('T16040', normal, compile, ['']) test('T19232', normal, compile, ['']) test('T19398', normal, compile, ['']) test('T19822', normal, compile, ['']) +test('T25944', normal, compile, ['']) View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/6425a1f063df92bc9a50870bb7e377a7... -- View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/6425a1f063df92bc9a50870bb7e377a7... You're receiving this email because of your account on gitlab.haskell.org.