Sebastian Graf pushed to branch wip/T25944 at Glasgow Haskell Compiler / GHC
Commits:
-
6425a1f0
by Sebastian Graf at 2025-04-26T14:35:17+02:00
5 changed files:
- compiler/GHC/Core/Opt/CprAnal.hs
- compiler/GHC/Core/Opt/WorkWrap/Utils.hs
- + testsuite/tests/cpranal/sigs/T25944.hs
- + testsuite/tests/cpranal/sigs/T25944.stderr
- testsuite/tests/cpranal/sigs/all.T
Changes:
1 | +{-# LANGUAGE MultiWayIf #-}
|
|
1 | 2 | |
2 | 3 | -- | Constructed Product Result analysis. Identifies functions that surely
|
3 | 4 | -- return heap-allocated records on every code path, so that we can eliminate
|
... | ... | @@ -22,12 +23,15 @@ import GHC.Types.Demand |
22 | 23 | import GHC.Types.Cpr
|
23 | 24 | import GHC.Types.Unique.MemoFun
|
24 | 25 | |
26 | +import GHC.Core
|
|
25 | 27 | import GHC.Core.FamInstEnv
|
26 | 28 | import GHC.Core.DataCon
|
27 | 29 | import GHC.Core.Type
|
28 | 30 | import GHC.Core.Utils
|
29 | -import GHC.Core
|
|
31 | +import GHC.Core.Coercion
|
|
32 | +import GHC.Core.Reduction
|
|
30 | 33 | import GHC.Core.Seq
|
34 | +import GHC.Core.TyCon
|
|
31 | 35 | import GHC.Core.Opt.WorkWrap.Utils
|
32 | 36 | |
33 | 37 | import GHC.Data.Graph.UnVar -- for UnVarSet
|
... | ... | @@ -216,9 +220,13 @@ cprAnal' _ (Type ty) = (topCprType, Type ty) -- Doesn't happen, in fact |
216 | 220 | cprAnal' _ (Coercion co) = (topCprType, Coercion co)
|
217 | 221 | |
218 | 222 | cprAnal' env (Cast e co)
|
219 | - = (cpr_ty, Cast e' co)
|
|
223 | + = (cpr_ty', Cast e' co)
|
|
220 | 224 | where
|
221 | 225 | (cpr_ty, e') = cprAnal env e
|
226 | + cpr_ty'
|
|
227 | + | cpr_ty == topCprType = topCprType -- cheap case first
|
|
228 | + | isRecNewTyConApp env (coercionRKind co) = topCprType -- See Note [CPR for recursive data constructors]
|
|
229 | + | otherwise = cpr_ty
|
|
222 | 230 | |
223 | 231 | cprAnal' env (Tick t e)
|
224 | 232 | = (cpr_ty, Tick t e')
|
... | ... | @@ -391,6 +399,18 @@ cprTransformDataConWork env con args |
391 | 399 | mAX_CPR_SIZE :: Arity
|
392 | 400 | mAX_CPR_SIZE = 10
|
393 | 401 | |
402 | +isRecNewTyConApp :: AnalEnv -> Type -> Bool
|
|
403 | +isRecNewTyConApp env ty
|
|
404 | + --- | pprTrace "isRecNewTyConApp" (ppr ty) False = undefined
|
|
405 | + | Just (tc, tc_args) <- splitTyConApp_maybe ty =
|
|
406 | + if | Just (HetReduction (Reduction _ rhs) _) <- topReduceTyFamApp_maybe (ae_fam_envs env) tc tc_args
|
|
407 | + -> isRecNewTyConApp env rhs
|
|
408 | + | Just dc <- newTyConDataCon_maybe tc
|
|
409 | + -> ae_rec_dc env dc == DefinitelyRecursive
|
|
410 | + | otherwise
|
|
411 | + -> False
|
|
412 | + | otherwise = False
|
|
413 | + |
|
394 | 414 | --
|
395 | 415 | -- * Bindings
|
396 | 416 | --
|
... | ... | @@ -414,12 +434,18 @@ cprFix orig_env orig_pairs |
414 | 434 | | otherwise = orig_pairs
|
415 | 435 | init_env = extendSigEnvFromIds orig_env (map fst init_pairs)
|
416 | 436 | |
437 | + -- If fixed-point iteration does not yield a result we use this instead
|
|
438 | + -- See Note [Safe abortion in the fixed-point iteration]
|
|
439 | + abort :: (AnalEnv, [(Id,CoreExpr)])
|
|
440 | + abort = step (nonVirgin orig_env) [(setIdCprSig id topCprSig, rhs) | (id, rhs) <- orig_pairs ]
|
|
441 | + |
|
417 | 442 | -- The fixed-point varies the idCprSig field of the binders and and their
|
418 | 443 | -- entries in the AnalEnv, and terminates if that annotation does not change
|
419 | 444 | -- any more.
|
420 | 445 | loop :: Int -> AnalEnv -> [(Id,CoreExpr)] -> (AnalEnv, [(Id,CoreExpr)])
|
421 | 446 | loop n env pairs
|
422 | 447 | | found_fixpoint = (reset_env', pairs')
|
448 | + | n == 10 = pprTraceUserWarning (text "cprFix aborts. This is not terrible, but worth reporting a GHC issue." <+> ppr (map fst pairs)) $ abort
|
|
423 | 449 | | otherwise = loop (n+1) env' pairs'
|
424 | 450 | where
|
425 | 451 | -- In all but the first iteration, delete the virgin flag
|
... | ... | @@ -519,8 +545,9 @@ cprAnalBind env id rhs |
519 | 545 | -- possibly trim thunk CPR info
|
520 | 546 | rhs_ty'
|
521 | 547 | -- See Note [CPR for thunks]
|
522 | - | stays_thunk = trimCprTy rhs_ty
|
|
523 | - | otherwise = rhs_ty
|
|
548 | + | rhs_ty == topCprType = topCprType -- cheap case first
|
|
549 | + | stays_thunk = trimCprTy rhs_ty
|
|
550 | + | otherwise = rhs_ty
|
|
524 | 551 | -- See Note [Arity trimming for CPR signatures]
|
525 | 552 | sig = mkCprSigForArity (idArity id) rhs_ty'
|
526 | 553 | -- See Note [OPAQUE pragma]
|
... | ... | @@ -639,7 +666,7 @@ data AnalEnv |
639 | 666 | , ae_fam_envs :: FamInstEnvs
|
640 | 667 | -- ^ Needed when expanding type families and synonyms of product types.
|
641 | 668 | , ae_rec_dc :: DataCon -> IsRecDataConResult
|
642 | - -- ^ Memoised result of 'GHC.Core.Opt.WorkWrap.Utils.isRecDataCon'
|
|
669 | + -- ^ Memoised result of 'GHC.Core.Opt.WorkWrap.Utils.isRecDataType
|
|
643 | 670 | }
|
644 | 671 | |
645 | 672 | instance Outputable AnalEnv where
|
... | ... | @@ -1042,10 +1069,11 @@ Eliminating the shared 'c' binding in the process. And then |
1042 | 1069 | |
1043 | 1070 | What can we do about it?
|
1044 | 1071 | |
1045 | - A. Don't CPR functions that return a *recursive data type* (the list in this
|
|
1046 | - case). This is the solution we adopt. Rationale: the benefit of CPR on
|
|
1047 | - recursive data structures is slight, because it only affects the outer layer
|
|
1048 | - of a potentially massive data structure.
|
|
1072 | + A. Don't give recursive data constructors or casts representing recursive newtype constructors
|
|
1073 | + the CPR property (the list in this case). This is the solution we adopt.
|
|
1074 | + Rationale: the benefit of CPR on recursive data structures is slight,
|
|
1075 | + because it only affects the outer layer of a potentially massive data
|
|
1076 | + structure.
|
|
1049 | 1077 | B. Don't CPR any *recursive function*. That would be quite conservative, as it
|
1050 | 1078 | would also affect e.g. the factorial function.
|
1051 | 1079 | C. Flat CPR only for recursive functions. This prevents the asymptotic
|
... | ... | @@ -1055,11 +1083,14 @@ What can we do about it? |
1055 | 1083 | `c` in the second eqn of `replicateC`). But we'd need to know which paths
|
1056 | 1084 | were hot. We want such static branch frequency estimates in #20378.
|
1057 | 1085 | |
1058 | -We adopt solution (A) It is ad-hoc, but appears to work reasonably well.
|
|
1086 | +We adopt solution (A). It is ad-hoc, but appears to work reasonably well.
|
|
1059 | 1087 | Deciding what a "recursive data constructor" is is quite tricky and ad-hoc, too:
|
1060 | 1088 | See Note [Detecting recursive data constructors]. We don't have to be perfect
|
1061 | 1089 | and can simply keep on unboxing if unsure.
|
1062 | 1090 | |
1091 | +(A) is implemented in `cprTransformDataConWork` for data types and in the
|
|
1092 | +`Cast` case of `cprAnal` for newtypes.
|
|
1093 | + |
|
1063 | 1094 | Note [Detecting recursive data constructors]
|
1064 | 1095 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
1065 | 1096 | What qualifies as a "recursive data constructor" as per
|
... | ... | @@ -1075,12 +1106,15 @@ looks inside the following class of types, represented by `ty` (and responds |
1075 | 1106 | types of its data constructors and check `tc_args` for recursion.
|
1076 | 1107 | C. If `ty = F tc_args`, `F` is a `FamTyCon` and we can reduce `F tc_args` to
|
1077 | 1108 | `rhs`, look into the `rhs` type.
|
1109 | + D. If `ty = f a`, then look into `f` and `a`
|
|
1110 | + E. If `ty = ty' |> co`, then look into `ty'`
|
|
1078 | 1111 | |
1079 | 1112 | A few perhaps surprising points:
|
1080 | 1113 | |
1081 | 1114 | 1. It deems any function type as non-recursive, because it's unlikely that
|
1082 | 1115 | a recursion through a function type builds up a recursive data structure.
|
1083 | - 2. It doesn't look into kinds or coercion types because there's nothing to unbox.
|
|
1116 | + 2. It doesn't look into kinds, literals or coercion types because we are
|
|
1117 | + ultimately looking for value-level recursion.
|
|
1084 | 1118 | Same for promoted data constructors.
|
1085 | 1119 | 3. We don't care whether an AlgTyCon app `T tc_args` is fully saturated or not;
|
1086 | 1120 | we simply look at its definition/DataCons and its field tys and look for
|
... | ... | @@ -63,6 +63,7 @@ import Data.List ( unzip4 ) |
63 | 63 | |
64 | 64 | import GHC.Types.RepType
|
65 | 65 | import GHC.Unit.Types
|
66 | +import GHC.Core.TyCo.Rep
|
|
66 | 67 | |
67 | 68 | {-
|
68 | 69 | ************************************************************************
|
... | ... | @@ -1426,23 +1427,29 @@ isRecDataCon fam_envs fuel orig_dc |
1426 | 1427 | | arg_ty <- map scaledThing (dataConRepArgTys dc) ]
|
1427 | 1428 | |
1428 | 1429 | go_arg_ty :: IntWithInf -> TyConSet -> Type -> IsRecDataConResult
|
1429 | - go_arg_ty fuel visited_tcs ty
|
|
1430 | - --- | pprTrace "arg_ty" (ppr ty) False = undefined
|
|
1430 | + go_arg_ty fuel visited_tcs ty = -- pprTrace "arg_ty" (ppr ty) $
|
|
1431 | + case coreFullView ty of
|
|
1432 | + TyConApp tc tc_args -> go_tc_app fuel visited_tcs tc tc_args
|
|
1433 | + -- See Note [Detecting recursive data constructors], points (B) and (C)
|
|
1431 | 1434 | |
1432 | - | Just (_tcv, ty') <- splitForAllTyCoVar_maybe ty
|
|
1433 | - = go_arg_ty fuel visited_tcs ty'
|
|
1435 | + ForAllTy _ ty' -> go_arg_ty fuel visited_tcs ty'
|
|
1434 | 1436 | -- See Note [Detecting recursive data constructors], point (A)
|
1435 | 1437 | |
1436 | - | Just (tc, tc_args) <- splitTyConApp_maybe ty
|
|
1437 | - = go_tc_app fuel visited_tcs tc tc_args
|
|
1438 | + CastTy ty' _ -> go_arg_ty fuel visited_tcs ty'
|
|
1438 | 1439 | |
1439 | - | otherwise
|
|
1440 | - = NonRecursiveOrUnsure
|
|
1440 | + AppTy f a -> go_arg_ty fuel visited_tcs f `combineIRDCR` go_arg_ty fuel visited_tcs a
|
|
1441 | + -- See Note [Detecting recursive data constructors], point (D)
|
|
1442 | + |
|
1443 | + FunTy{} -> NonRecursiveOrUnsure
|
|
1444 | + -- See Note [Detecting recursive data constructors], point (1)
|
|
1445 | + |
|
1446 | + -- (TyVarTy{} | LitTy{} | CastTy{})
|
|
1447 | + _ -> NonRecursiveOrUnsure
|
|
1441 | 1448 | |
1442 | 1449 | go_tc_app :: IntWithInf -> TyConSet -> TyCon -> [Type] -> IsRecDataConResult
|
1443 | 1450 | go_tc_app fuel visited_tcs tc tc_args =
|
1444 | 1451 | case tyConDataCons_maybe tc of
|
1445 | - --- | pprTrace "tc_app" (vcat [ppr tc, ppr tc_args]) False = undefined
|
|
1452 | + ---_ | pprTrace "tc_app" (vcat [ppr tc, ppr tc_args]) False -> undefined
|
|
1446 | 1453 | _ | Just (HetReduction (Reduction _ rhs) _) <- topReduceTyFamApp_maybe fam_envs tc tc_args
|
1447 | 1454 | -- This is the only place where we look at tc_args, which might have
|
1448 | 1455 | -- See Note [Detecting recursive data constructors], point (C) and (5)
|
1 | +{-# LANGUAGE UndecidableInstances, LambdaCase #-}
|
|
2 | + |
|
3 | +-- | This file starts with a small reproducer for #25944 that is easy to debug
|
|
4 | +-- and then continues with a much larger MWE that is faithful to the original
|
|
5 | +-- issue.
|
|
6 | +module T25944 (foo, bar, popMinOneT, popMinOne) where
|
|
7 | + |
|
8 | +import Data.Functor.Identity ( Identity(..) )
|
|
9 | +import Data.Coerce
|
|
10 | + |
|
11 | +data ListCons a b = Nil | a :- !b
|
|
12 | +newtype Fix f = Fix (f (Fix f)) -- Rec
|
|
13 | + |
|
14 | +foo :: Fix (ListCons a) -> Fix (ListCons a) -> Fix (ListCons a)
|
|
15 | +foo a b = go a
|
|
16 | + where
|
|
17 | + -- The outer loop arranges it so that the base case `go as` of `go2` is
|
|
18 | + -- bottom on the first iteration of the loop.
|
|
19 | + go (Fix Nil) = Fix Nil
|
|
20 | + go (Fix (a :- as)) = Fix (a :- go2 b)
|
|
21 | + where
|
|
22 | + go2 (Fix Nil) = go as
|
|
23 | + go2 (Fix (b :- bs)) = Fix (b :- go2 bs)
|
|
24 | + |
|
25 | +bar :: Int -> (Fix (ListCons Int), Int)
|
|
26 | +bar n = (foo (Fix Nil) (Fix Nil), n) -- should still have CPR property
|
|
27 | + |
|
28 | +-- Now the actual reproducer from #25944:
|
|
29 | + |
|
30 | +newtype ListT m a = ListT { runListT :: m (ListCons a (ListT m a)) }
|
|
31 | + |
|
32 | +cons :: Applicative m => a -> ListT m a -> ListT m a
|
|
33 | +cons x xs = ListT (pure (x :- xs))
|
|
34 | + |
|
35 | +nil :: Applicative m => ListT m a
|
|
36 | +nil = ListT (pure Nil)
|
|
37 | + |
|
38 | +instance Functor m => Functor (ListT m) where
|
|
39 | + fmap f (ListT m) = ListT (go <$> m)
|
|
40 | + where
|
|
41 | + go Nil = Nil
|
|
42 | + go (a :- m) = f a :- (f <$> m)
|
|
43 | + |
|
44 | +foldListT :: ((ListCons a (ListT m a) -> c) -> m (ListCons a (ListT m a)) -> b)
|
|
45 | + -> (a -> b -> c)
|
|
46 | + -> c
|
|
47 | + -> ListT m a -> b
|
|
48 | +foldListT r c n = r h . runListT
|
|
49 | + where
|
|
50 | + h Nil = n
|
|
51 | + h (x :- ListT xs) = c x (r h xs)
|
|
52 | +{-# INLINE foldListT #-}
|
|
53 | + |
|
54 | +mapListT :: forall a m b. Monad m => (a -> ListT m b -> ListT m b) -> ListT m b -> ListT m a -> ListT m b
|
|
55 | +mapListT =
|
|
56 | + foldListT
|
|
57 | + ((coerce ::
|
|
58 | + ((ListCons a (ListT m a) -> m (ListCons b (ListT m b))) -> m (ListCons a (ListT m a)) -> m (ListCons b (ListT m b))) ->
|
|
59 | + ((ListCons a (ListT m a) -> ListT m b) -> m (ListCons a (ListT m a)) -> ListT m b))
|
|
60 | + (=<<))
|
|
61 | +{-# INLINE mapListT #-}
|
|
62 | + |
|
63 | +instance Monad m => Applicative (ListT m) where
|
|
64 | + pure x = cons x nil
|
|
65 | + {-# INLINE pure #-}
|
|
66 | + liftA2 f xs ys = mapListT (\x zs -> mapListT (cons . f x) zs ys) nil xs
|
|
67 | + {-# INLINE liftA2 #-}
|
|
68 | + |
|
69 | +instance Monad m => Monad (ListT m) where
|
|
70 | + xs >>= f = mapListT (flip (mapListT cons) . f) nil xs
|
|
71 | + {-# INLINE (>>=) #-}
|
|
72 | + |
|
73 | +infixr 5 :<
|
|
74 | +data Node w a b = Leaf a | !w :< b
|
|
75 | + deriving (Functor)
|
|
76 | + |
|
77 | +bimapNode f g (Leaf x) = Leaf (f x)
|
|
78 | +bimapNode f g (x :< xs) = x :< g xs
|
|
79 | + |
|
80 | +newtype HeapT w m a = HeapT { runHeapT :: ListT m (Node w a (HeapT w m a)) }
|
|
81 | + |
|
82 | +-- | The 'Heap' type, specialised to the 'Identity' monad.
|
|
83 | +type Heap w = HeapT w Identity
|
|
84 | + |
|
85 | +instance Functor m => Functor (HeapT w m) where
|
|
86 | + fmap f = HeapT . fmap (bimapNode f (fmap f)) . runHeapT
|
|
87 | + |
|
88 | +instance Monad m => Applicative (HeapT w m) where
|
|
89 | + pure = HeapT . pure . Leaf
|
|
90 | + (<*>) = liftA2 id
|
|
91 | + |
|
92 | +instance Monad m => Monad (HeapT w m) where
|
|
93 | + HeapT m >>= f = HeapT (m >>= g)
|
|
94 | + where
|
|
95 | + g (Leaf x) = runHeapT (f x)
|
|
96 | + g (w :< xs) = pure (w :< (xs >>= f))
|
|
97 | + |
|
98 | +popMinOneT :: forall w m a. (Monoid w, Monad m) => HeapT w m a -> m (Maybe ((a, w), HeapT w m a))
|
|
99 | +popMinOneT = go mempty [] . runHeapT
|
|
100 | + where
|
|
101 | + go' :: w -> Maybe (w, HeapT w m a) -> m (Maybe ((a, w), HeapT w m a))
|
|
102 | + go' a Nothing = pure Nothing
|
|
103 | + go' a (Just (w, HeapT xs)) = go (a <> w) [] xs
|
|
104 | + |
|
105 | + go :: w -> [(w, HeapT w m a)] -> ListT m (Node w a (HeapT w m a)) -> m (Maybe ((a, w), HeapT w m a))
|
|
106 | + go w a (ListT xs) = xs >>= \case
|
|
107 | + Nil -> go' w (undefined)
|
|
108 | + Leaf x :- xs -> pure (Just ((x, w), undefined >> HeapT (foldl (\ys (yw,y) -> ListT (pure ((yw :< y) :- ys))) xs a)))
|
|
109 | + (u :< x) :- xs -> go w ((u,x) : a) xs
|
|
110 | +{-# INLINE popMinOneT #-}
|
|
111 | + |
|
112 | +popMinOne :: Monoid w => Heap w a -> Maybe ((a, w), Heap w a)
|
|
113 | +popMinOne = runIdentity . popMinOneT
|
|
114 | +{-# INLINE popMinOne #-} |
1 | + |
|
2 | +==================== Cpr signatures ====================
|
|
3 | +T25944.$fApplicativeHeapT:
|
|
4 | +T25944.$fApplicativeListT:
|
|
5 | +T25944.$fFunctorHeapT:
|
|
6 | +T25944.$fFunctorListT:
|
|
7 | +T25944.$fFunctorNode:
|
|
8 | +T25944.$fMonadHeapT:
|
|
9 | +T25944.$fMonadListT:
|
|
10 | +T25944.bar: 1
|
|
11 | +T25944.foo:
|
|
12 | +T25944.popMinOne: 2(1(1,))
|
|
13 | +T25944.popMinOneT:
|
|
14 | +T25944.runHeapT:
|
|
15 | +T25944.runListT:
|
|
16 | + |
|
17 | + |
... | ... | @@ -12,3 +12,4 @@ test('T16040', normal, compile, ['']) |
12 | 12 | test('T19232', normal, compile, [''])
|
13 | 13 | test('T19398', normal, compile, [''])
|
14 | 14 | test('T19822', normal, compile, [''])
|
15 | +test('T25944', normal, compile, ['']) |