Marge Bot pushed to branch wip/marge_bot_batch_merge_job at Glasgow Haskell Compiler / GHC
Commits:
-
352d5462
by Marc Scholten at 2025-11-22T10:33:03-05:00
-
48a3ed57
by Simon Peyton Jones at 2025-11-25T15:33:54+00:00
-
951e5ed9
by Simon Peyton Jones at 2025-11-25T15:33:54+00:00
-
f1959dfc
by Simon Peyton Jones at 2025-11-26T11:58:07+00:00
-
8cd2d857
by Simon Hengel at 2025-11-27T06:29:33-05:00
-
03618af8
by Simon Hengel at 2025-11-27T06:29:34-05:00
-
0d1dcf6c
by Simon Peyton Jones at 2025-11-27T06:29:34-05:00
-
6ebe13e9
by Simon Peyton Jones at 2025-11-27T06:29:34-05:00
-
41d5abd8
by Matthew Pickering at 2025-11-27T06:29:35-05:00
-
38d7e185
by sheaf at 2025-11-27T06:29:50-05:00
26 changed files:
- compiler/GHC/Core/Make.hs
- compiler/GHC/Core/Opt/Arity.hs
- compiler/GHC/Core/Opt/OccurAnal.hs
- compiler/GHC/Core/SimpleOpt.hs
- compiler/GHC/Driver/DynFlags.hs
- compiler/GHC/Driver/Flags.hs
- compiler/GHC/HsToCore/Binds.hs
- compiler/GHC/HsToCore/Expr.hs
- compiler/GHC/HsToCore/Match.hs
- compiler/GHC/Tc/Gen/App.hs
- compiler/GHC/Types/Basic.hs
- compiler/GHC/Types/Unique/FM.hs
- compiler/GHC/Types/Unique/Set.hs
- compiler/GHC/Types/Var/Env.hs
- docs/users_guide/exts/rank_polymorphism.rst
- docs/users_guide/exts/type_families.rst
- docs/users_guide/using-optimisation.rst
- rts/eventlog/EventLog.c
- testsuite/tests/rts/all.T
- + testsuite/tests/simplCore/should_compile/T26588.hs
- + testsuite/tests/simplCore/should_compile/T26589.hs
- testsuite/tests/simplCore/should_compile/T8331.stderr
- testsuite/tests/simplCore/should_compile/all.T
- testsuite/tests/unboxedsums/UbxSumUnpackedSize.hs
- utils/check-exact/ExactPrint.hs
- utils/haddock/haddock-test/src/Test/Haddock.hs
Changes:
| ... | ... | @@ -151,37 +151,28 @@ mkCoreConWrapApps con args = mkCoreApps (Var (dataConWrapId con)) args |
| 151 | 151 | |
| 152 | 152 | -- | Construct an expression which represents the application of a number of
|
| 153 | 153 | -- expressions to another. The leftmost expression in the list is applied first
|
| 154 | -mkCoreApps :: CoreExpr -- ^ function
|
|
| 154 | +-- See Note [Assertion checking in mkCoreApp]
|
|
| 155 | +mkCoreApps :: CoreExpr -- ^ function
|
|
| 155 | 156 | -> [CoreExpr] -- ^ arguments
|
| 156 | 157 | -> CoreExpr
|
| 157 | -mkCoreApps fun args
|
|
| 158 | - = fst $
|
|
| 159 | - foldl' (mkCoreAppTyped doc_string) (fun, fun_ty) args
|
|
| 160 | - where
|
|
| 161 | - doc_string = ppr fun_ty $$ ppr fun $$ ppr args
|
|
| 162 | - fun_ty = exprType fun
|
|
| 158 | +mkCoreApps fun args = foldl' mkCoreApp fun args
|
|
| 163 | 159 | |
| 164 | 160 | -- | Construct an expression which represents the application of one expression
|
| 165 | 161 | -- to the other
|
| 166 | -mkCoreApp :: SDoc
|
|
| 167 | - -> CoreExpr -- ^ function
|
|
| 162 | +-- See Note [Assertion checking in mkCoreApp]
|
|
| 163 | +mkCoreApp :: CoreExpr -- ^ function
|
|
| 168 | 164 | -> CoreExpr -- ^ argument
|
| 169 | 165 | -> CoreExpr
|
| 170 | -mkCoreApp s fun arg
|
|
| 171 | - = fst $ mkCoreAppTyped s (fun, exprType fun) arg
|
|
| 172 | - |
|
| 173 | --- | Construct an expression which represents the application of one expression
|
|
| 174 | --- paired with its type to an argument. The result is paired with its type. This
|
|
| 175 | --- function is not exported and used in the definition of 'mkCoreApp' and
|
|
| 176 | --- 'mkCoreApps'.
|
|
| 177 | -mkCoreAppTyped :: SDoc -> (CoreExpr, Type) -> CoreExpr -> (CoreExpr, Type)
|
|
| 178 | -mkCoreAppTyped _ (fun, fun_ty) (Type ty)
|
|
| 179 | - = (App fun (Type ty), piResultTy fun_ty ty)
|
|
| 180 | -mkCoreAppTyped _ (fun, fun_ty) (Coercion co)
|
|
| 181 | - = (App fun (Coercion co), funResultTy fun_ty)
|
|
| 182 | -mkCoreAppTyped d (fun, fun_ty) arg
|
|
| 183 | - = assertPpr (isFunTy fun_ty) (ppr fun $$ ppr arg $$ d)
|
|
| 184 | - (App fun arg, funResultTy fun_ty)
|
|
| 166 | +mkCoreApp fun arg = App fun arg
|
|
| 167 | + |
|
| 168 | +{- Note [Assertion checking in mkCoreApp]
|
|
| 169 | +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
| 170 | +At one time we had an assertion to check that the function and argument type match up,
|
|
| 171 | +but that turned out to take 90% of all compile time (!) when compiling test
|
|
| 172 | +`unboxedsums/UbxSumUnpackedSize.hs`. The reason was an unboxed sum constructor with
|
|
| 173 | +hundreds of foralls. It's most straightforward just to remove the assert, and
|
|
| 174 | +rely on Lint to discover any mis-constructed terms.
|
|
| 175 | +-}
|
|
| 185 | 176 | |
| 186 | 177 | {- *********************************************************************
|
| 187 | 178 | * *
|
| ... | ... | @@ -2993,12 +2993,12 @@ pushCoValArg co |
| 2993 | 2993 | Pair tyL tyR = coercionKind co
|
| 2994 | 2994 | |
| 2995 | 2995 | pushCoercionIntoLambda
|
| 2996 | - :: HasDebugCallStack => Subst -> InVar -> InExpr -> OutCoercionR -> Maybe (OutVar, OutExpr)
|
|
| 2996 | + :: HasDebugCallStack => InScopeSet -> Var -> CoreExpr -> CoercionR -> Maybe (Var, CoreExpr)
|
|
| 2997 | 2997 | -- This implements the Push rule from the paper on coercions
|
| 2998 | 2998 | -- (\x. e) |> co
|
| 2999 | 2999 | -- ===>
|
| 3000 | 3000 | -- (\x'. e |> co')
|
| 3001 | -pushCoercionIntoLambda subst x e co
|
|
| 3001 | +pushCoercionIntoLambda in_scope x e co
|
|
| 3002 | 3002 | | assert (not (isTyVar x) && not (isCoVar x)) True
|
| 3003 | 3003 | , Pair s1s2 t1t2 <- coercionKind co
|
| 3004 | 3004 | , Just {} <- splitFunTy_maybe s1s2
|
| ... | ... | @@ -3011,9 +3011,9 @@ pushCoercionIntoLambda subst x e co |
| 3011 | 3011 | -- Should we optimize the coercions here?
|
| 3012 | 3012 | -- Otherwise they might not match too well
|
| 3013 | 3013 | x' = x `setIdType` t1 `setIdMult` w1
|
| 3014 | - in_scope' = substInScopeSet subst `extendInScopeSet` x'
|
|
| 3014 | + in_scope' = in_scope `extendInScopeSet` x'
|
|
| 3015 | 3015 | subst' =
|
| 3016 | - extendIdSubst (setInScope subst in_scope')
|
|
| 3016 | + extendIdSubst (setInScope emptySubst in_scope')
|
|
| 3017 | 3017 | x
|
| 3018 | 3018 | (mkCast (Var x') (mkSymCo co1))
|
| 3019 | 3019 | -- We substitute x' for x, except we need to preserve types.
|
| ... | ... | @@ -66,7 +66,6 @@ import GHC.Builtin.Names( runRWKey ) |
| 66 | 66 | import GHC.Unit.Module( Module )
|
| 67 | 67 | |
| 68 | 68 | import Data.List (mapAccumL)
|
| 69 | -import Data.List.NonEmpty (NonEmpty (..))
|
|
| 70 | 69 | |
| 71 | 70 | {-
|
| 72 | 71 | ************************************************************************
|
| ... | ... | @@ -660,18 +659,35 @@ through A, so it should have ManyOcc. Bear this case in mind! |
| 660 | 659 | * In occ_env, the new (occ_join_points :: IdEnv OccInfoEnv) maps
|
| 661 | 660 | each in-scope non-recursive join point, such as `j` above, to
|
| 662 | 661 | a "zeroed form" of its RHS's usage details. The "zeroed form"
|
| 662 | + * has only occ_nested_lets in its domain (see (W4) below)
|
|
| 663 | 663 | * deletes ManyOccs
|
| 664 | 664 | * maps a OneOcc to OneOcc{ occ_n_br = 0 }
|
| 665 | - In our example, occ_join_points will be extended with
|
|
| 665 | + In our example, assuming `v` is locally-let-bound, occ_join_points will
|
|
| 666 | + be extended with
|
|
| 666 | 667 | [j :-> [v :-> OneOcc{occ_n_br=0}]]
|
| 667 | - See addJoinPoint.
|
|
| 668 | + See `addJoinPoint` and (W4) below.
|
|
| 668 | 669 | |
| 669 | 670 | * At an occurrence of a join point, we do everything as normal, but add in the
|
| 670 | 671 | UsageDetails from the occ_join_points. See mkOneOcc.
|
| 671 | 672 | |
| 672 | -* Crucially, at the NonRec binding of the join point, in `occAnalBind`, we use
|
|
| 673 | - `orUDs`, not `andUDs` to combine the usage from the RHS with the usage from
|
|
| 674 | - the body.
|
|
| 673 | +* Crucially, at the NonRec binding of a join point `j`, in `occAnalBind`,
|
|
| 674 | + we use `combineJoinPointUDs`, not `andUDs` to combine the usage from the
|
|
| 675 | + RHS with the usage from the body. `combineJoinPointUDs` behaves like this:
|
|
| 676 | + |
|
| 677 | + * For all variables than `occ_nested_lets`, use `andUDs`, just like for
|
|
| 678 | + any normal let-binding.
|
|
| 679 | + |
|
| 680 | + * But for a variable `v` in `occ_nested_lets`, use `orUDs`:
|
|
| 681 | + - If `v` occurs `ManyOcc` in the join-point RHS, the variable won't be in
|
|
| 682 | + `occ_join_points`; but we'll get `ManyOcc` anyway.
|
|
| 683 | + - If `v` occurs `OneOcc` in the join-point RHS, the variable will be in
|
|
| 684 | + `occ_join_points` and we'll thereby get a `OneOcc{occ_n_br=0}` from
|
|
| 685 | + each of j's tail calls. We can `or` that with the `OncOcc{occ_n_br=n}`
|
|
| 686 | + from j's RHS.
|
|
| 687 | + |
|
| 688 | + The only reason for `occ_nested_lets` is to reduce the size of the info
|
|
| 689 | + duplicate at each tail call; see (W4). It would sound to put *all* variables
|
|
| 690 | + into `occ_nested_lets`.
|
|
| 675 | 691 | |
| 676 | 692 | Here are the consequences
|
| 677 | 693 | |
| ... | ... | @@ -682,13 +698,14 @@ Here are the consequences |
| 682 | 698 | There are two lexical occurrences of `v`!
|
| 683 | 699 | (NB: `orUDs` adds occ_n_br together, so occ_n_br=1 is impossible, too.)
|
| 684 | 700 | |
| 685 | -* In the tricky (P3) we'll get an `andUDs` of
|
|
| 686 | - * OneOcc{occ_n_br=0} from the occurrences of `j`)
|
|
| 701 | +* In the tricky (P3), when analysing `case (f v) of ...`, we'll get
|
|
| 702 | + an `andUDs` of
|
|
| 703 | + * OneOcc{occ_n_br=0} from the occurrences of `j`
|
|
| 687 | 704 | * OneOcc{occ_n_br=1} from the (f v)
|
| 688 | 705 | These are `andUDs` together in `addOccInfo`, and hence
|
| 689 | 706 | `v` gets ManyOccs, just as it should. Clever!
|
| 690 | 707 | |
| 691 | -There are a couple of tricky wrinkles
|
|
| 708 | +There are, of course, some tricky wrinkles
|
|
| 692 | 709 | |
| 693 | 710 | (W1) Consider this example which shadows `j`:
|
| 694 | 711 | join j = rhs in
|
| ... | ... | @@ -718,6 +735,8 @@ There are a couple of tricky wrinkles |
| 718 | 735 | * In `postprcess_uds`, we add the chucked-out join points to the
|
| 719 | 736 | returned UsageDetails, with `andUDs`.
|
| 720 | 737 | |
| 738 | +Wrinkles (W1) and (W2) are very similar to Note [Binder swap] (BS3).
|
|
| 739 | + |
|
| 721 | 740 | (W3) Consider this example, which shadows `j`, but this time in an argument
|
| 722 | 741 | join j = rhs
|
| 723 | 742 | in f (case x of { K j -> ...; ... })
|
| ... | ... | @@ -732,12 +751,36 @@ There are a couple of tricky wrinkles |
| 732 | 751 | NB: this is just about efficiency: it is always safe /not/ to zap the
|
| 733 | 752 | occ_join_points.
|
| 734 | 753 | |
| 735 | -(W4) What if the join point binding has a stable unfolding, or RULES?
|
|
| 736 | - They are just alternative right-hand sides, and at each call site we
|
|
| 737 | - will use only one of them. So again, we can use `orUDs` to combine
|
|
| 738 | - usage info from all these alternatives RHSs.
|
|
| 739 | - |
|
| 740 | -Wrinkles (W1) and (W2) are very similar to Note [Binder swap] (BS3).
|
|
| 754 | +(W4) Other things being equal, we want keep the OccInfoEnv stored in
|
|
| 755 | + `occ_join_points` as small as possible, because it is /duplicated/ at
|
|
| 756 | + /every occurrence/ of the join point. We really only want to include
|
|
| 757 | + OccInfo for
|
|
| 758 | + * Local, non-recursive let-bound Ids
|
|
| 759 | + * that occur just once in the RHS of the join point
|
|
| 760 | + particularly including
|
|
| 761 | + * thunks (that's the original point) and
|
|
| 762 | + * join points (so that the trick works recursively).
|
|
| 763 | + We call these the "tracked Ids of j".
|
|
| 764 | + |
|
| 765 | + Including lambda binders is pointless, and slows down the occurrence analyser.
|
|
| 766 | + |
|
| 767 | + e.g. \x. let y = x+1 in
|
|
| 768 | + join j v = ..x..y..(f z z)..
|
|
| 769 | + in ...
|
|
| 770 | + In the `occ_join_points` binding for `j`, we want to track `y`, but
|
|
| 771 | + not `x` (lambda bound) nor `z` (occurs many times).
|
|
| 772 | + |
|
| 773 | + To exploit this:
|
|
| 774 | + * `occ_nested_lets` tracks which Ids are
|
|
| 775 | + nested (not-top-level), non-recursive lets
|
|
| 776 | + * `addJoinPoint` only populates j's entry with occ-info for the "tracked Ids"
|
|
| 777 | + of `j`; that is, that are (a) in occ_nested_lets and (b) have OneOcc.
|
|
| 778 | + * `combineJoinPointUDs` uses
|
|
| 779 | + orLocalOcc for local-let Ids
|
|
| 780 | + andLocalOcc for non-local-let Ids
|
|
| 781 | + |
|
| 782 | + This fancy footwork can matter in extreme cases: it gave a 25% reduction in
|
|
| 783 | + total compiler allocation in #26425..
|
|
| 741 | 784 | |
| 742 | 785 | Note [Finding join points]
|
| 743 | 786 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
|
| ... | ... | @@ -759,45 +802,45 @@ rest of 'OccInfo' until it goes on the binder. |
| 759 | 802 | |
| 760 | 803 | Note [Join arity prediction based on joinRhsArity]
|
| 761 | 804 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
| 762 | -In general, the join arity from tail occurrences of a join point (O) may be
|
|
| 763 | -higher or lower than the manifest join arity of the join body (M). E.g.,
|
|
| 805 | +In general, the join arity from tail occurrences of a join point (OAr) may be
|
|
| 806 | +higher or lower than the manifest join arity of the join body (MAr). E.g.,
|
|
| 764 | 807 | |
| 765 | - -- M > O:
|
|
| 766 | - let f x y = x + y -- M = 2
|
|
| 767 | - in if b then f 1 else f 2 -- O = 1
|
|
| 808 | + -- MAr > Oar:
|
|
| 809 | + let f x y = x + y -- MAr = 2
|
|
| 810 | + in if b then f 1 else f 2 -- OAr = 1
|
|
| 768 | 811 | ==> { Contify for join arity 1 }
|
| 769 | 812 | join f x = \y -> x + y
|
| 770 | 813 | in if b then jump f 1 else jump f 2
|
| 771 | 814 | |
| 772 | - -- M < O
|
|
| 773 | - let f = id -- M = 0
|
|
| 774 | - in if ... then f 12 else f 13 -- O = 1
|
|
| 815 | + -- MAr < Oar
|
|
| 816 | + let f = id -- MAr = 0
|
|
| 817 | + in if ... then f 12 else f 13 -- OAr = 1
|
|
| 775 | 818 | ==> { Contify for join arity 1, eta-expand f }
|
| 776 | 819 | join f x = id x
|
| 777 | 820 | in if b then jump f 12 else jump f 13
|
| 778 | 821 | |
| 779 | -But for *recursive* let, it is crucial that both arities match up, consider
|
|
| 822 | +But for *recursive* let, it is crucial MAr=OAr. Consider:
|
|
| 780 | 823 | |
| 781 | 824 | letrec f x y = if ... then f x else True
|
| 782 | 825 | in f 42
|
| 783 | 826 | |
| 784 | -Here, M=2 but O=1. If we settled for a joinrec arity of 1, the recursive jump
|
|
| 827 | +Here, MAr=2 but OAr=1. If we settled for a joinrec arity of 1, the recursive jump
|
|
| 785 | 828 | would not happen in a tail context! Contification is invalid here.
|
| 786 | -So indeed it is crucial to demand that M=O.
|
|
| 829 | +So indeed it is crucial to demand that MAr=OAr.
|
|
| 787 | 830 | |
| 788 | -(Side note: Actually, we could be more specific: Let O1 be the join arity of
|
|
| 789 | -occurrences from the letrec RHS and O2 the join arity from the let body. Then
|
|
| 790 | -we need M=O1 and M<=O2 and could simply eta-expand the RHS to match O2 later.
|
|
| 791 | -M=O is the specific case where we don't want to eta-expand. Neither the join
|
|
| 831 | +(Side note: Actually, we could be more specific: Let OAr1 be the join arity of
|
|
| 832 | +occurrences from the letrec RHS and OAr2 the join arity from the let body. Then
|
|
| 833 | +we need MAr=OAr1 and MAr<=OAr2 and could simply eta-expand the RHS to match OAr2 later.
|
|
| 834 | +MAr=OAr is the specific case where we don't want to eta-expand. Neither the join
|
|
| 792 | 835 | points paper nor GHC does this at the moment.)
|
| 793 | 836 | |
| 794 | 837 | We can capitalise on this observation and conclude that *if* f could become a
|
| 795 | -joinrec (without eta-expansion), it will have join arity M.
|
|
| 796 | -Now, M is just the result of 'joinRhsArity', a rather simple, local analysis.
|
|
| 838 | +joinrec (without eta-expansion), it will have join arity MAr.
|
|
| 839 | +Now, MAr is just the result of 'joinRhsArity', a rather simple, local analysis.
|
|
| 797 | 840 | It is also the join arity inside the 'TailUsageDetails' returned by
|
| 798 | 841 | 'occAnalLamTail', so we can predict join arity without doing any fixed-point
|
| 799 | 842 | iteration or really doing any deep traversal of let body or RHS at all.
|
| 800 | -We check for M in the 'adjustTailUsage' call inside 'tagRecBinders'.
|
|
| 843 | +We check for MAr in the 'adjustTailUsage' call inside 'tagRecBinders'.
|
|
| 801 | 844 | |
| 802 | 845 | All this is quite apparent if you look at the contification transformation in
|
| 803 | 846 | Fig. 5 of "Compiling without Continuations" (which does not account for
|
| ... | ... | @@ -807,14 +850,14 @@ eta-expansion at all, mind you). The letrec case looks like this |
| 807 | 850 | ... and a bunch of conditions establishing that f only occurs
|
| 808 | 851 | in app heads of join arity (len as + len xs) inside us and es ...
|
| 809 | 852 | |
| 810 | -The syntactic form `/\as.\xs. L[us]` forces M=O iff `f` occurs in `us`. However,
|
|
| 853 | +The syntactic form `/\as.\xs. L[us]` forces MAr=OAr iff `f` occurs in `us`. However,
|
|
| 811 | 854 | for non-recursive functions, this is the definition of contification from the
|
| 812 | 855 | paper:
|
| 813 | 856 | |
| 814 | 857 | let f = /\as.\xs.u in L[es] ... conditions ...
|
| 815 | 858 | |
| 816 | -Note that u could be a lambda itself, as we have seen. No relationship between M
|
|
| 817 | -and O to exploit here.
|
|
| 859 | +Note that u could be a lambda itself, as we have seen. No relationship between MAr
|
|
| 860 | +and OAr to exploit here.
|
|
| 818 | 861 | |
| 819 | 862 | Note [Join points and unfoldings/rules]
|
| 820 | 863 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
| ... | ... | @@ -954,6 +997,22 @@ of both functions, serving as a specification: |
| 954 | 997 | Cyclic Recursive case: 'tagRecBinders'
|
| 955 | 998 | Acyclic Recursive case: 'adjustNonRecRhs'
|
| 956 | 999 | Non-recursive case: 'adjustNonRecRhs'
|
| 1000 | + |
|
| 1001 | +Note [Unfoldings and RULES]
|
|
| 1002 | +~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
| 1003 | +For let-bindings we treat (stable) unfoldings and RULES as "alternative right hand
|
|
| 1004 | +sides". That is, it's as if we had
|
|
| 1005 | + f = case <hiatus> of
|
|
| 1006 | + 1 -> <the-rhs>
|
|
| 1007 | + 2 -> <the-stable-unfolding>
|
|
| 1008 | + 3 -> <rhs of rule1>
|
|
| 1009 | + 4 -> <rhs of rule2>
|
|
| 1010 | +So we combine all these with `orUDs` (#26567). But actually it makes
|
|
| 1011 | +very little difference whether we use `andUDs` or `orUDs` because of
|
|
| 1012 | +Note [Occurrences in stable unfoldings and RULES]: occurrences in an unfolding
|
|
| 1013 | +or RULE are treated as ManyOcc anyway.
|
|
| 1014 | + |
|
| 1015 | +But NB that tail-call info is preserved so that we don't thereby lose join points.
|
|
| 957 | 1016 | -}
|
| 958 | 1017 | |
| 959 | 1018 | ------------------------------------------------------------------
|
| ... | ... | @@ -991,24 +1050,24 @@ occAnalBind !env lvl ire (NonRec bndr rhs) thing_inside combine |
| 991 | 1050 | | mb_join@(JoinPoint {}) <- idJoinPointHood bndr
|
| 992 | 1051 | = -- Analyse the RHS and /then/ the body
|
| 993 | 1052 | let -- Analyse the rhs first, generating rhs_uds
|
| 994 | - !(rhs_uds_s, bndr', rhs') = occAnalNonRecRhs env lvl ire mb_join bndr rhs
|
|
| 995 | - rhs_uds = foldl1' orUDs rhs_uds_s -- NB: orUDs. See (W4) of
|
|
| 996 | - -- Note [Occurrence analysis for join points]
|
|
| 1053 | + !(rhs_uds, bndr', rhs') = occAnalNonRecRhs env lvl ire mb_join bndr rhs
|
|
| 997 | 1054 | |
| 998 | 1055 | -- Now analyse the body, adding the join point
|
| 999 | 1056 | -- into the environment with addJoinPoint
|
| 1000 | - !(WUD body_uds (occ, body)) = occAnalNonRecBody env bndr' $ \env ->
|
|
| 1057 | + env_body = addLocalLet env lvl bndr
|
|
| 1058 | + !(WUD body_uds (occ, body)) = occAnalNonRecBody env_body bndr' $ \env ->
|
|
| 1001 | 1059 | thing_inside (addJoinPoint env bndr' rhs_uds)
|
| 1002 | 1060 | in
|
| 1003 | 1061 | if isDeadOcc occ -- Drop dead code; see Note [Dead code]
|
| 1004 | 1062 | then WUD body_uds body
|
| 1005 | - else WUD (rhs_uds `orUDs` body_uds) -- Note `orUDs`
|
|
| 1063 | + else WUD (combineJoinPointUDs env rhs_uds body_uds) -- Note `orUDs`
|
|
| 1006 | 1064 | (combine [NonRec (fst (tagNonRecBinder lvl occ bndr')) rhs']
|
| 1007 | 1065 | body)
|
| 1008 | 1066 | |
| 1009 | 1067 | -- The normal case, including newly-discovered join points
|
| 1010 | 1068 | -- Analyse the body and /then/ the RHS
|
| 1011 | - | WUD body_uds (occ,body) <- occAnalNonRecBody env bndr thing_inside
|
|
| 1069 | + | let env_body = addLocalLet env lvl bndr
|
|
| 1070 | + , WUD body_uds (occ,body) <- occAnalNonRecBody env_body bndr thing_inside
|
|
| 1012 | 1071 | = if isDeadOcc occ -- Drop dead code; see Note [Dead code]
|
| 1013 | 1072 | then WUD body_uds body
|
| 1014 | 1073 | else let
|
| ... | ... | @@ -1017,8 +1076,8 @@ occAnalBind !env lvl ire (NonRec bndr rhs) thing_inside combine |
| 1017 | 1076 | -- => join arity O of Note [Join arity prediction based on joinRhsArity]
|
| 1018 | 1077 | (tagged_bndr, mb_join) = tagNonRecBinder lvl occ bndr
|
| 1019 | 1078 | |
| 1020 | - !(rhs_uds_s, final_bndr, rhs') = occAnalNonRecRhs env lvl ire mb_join tagged_bndr rhs
|
|
| 1021 | - in WUD (foldr andUDs body_uds rhs_uds_s) -- Note `andUDs`
|
|
| 1079 | + !(rhs_uds, final_bndr, rhs') = occAnalNonRecRhs env lvl ire mb_join tagged_bndr rhs
|
|
| 1080 | + in WUD (rhs_uds `andUDs` body_uds) -- Note `andUDs`
|
|
| 1022 | 1081 | (combine [NonRec final_bndr rhs'] body)
|
| 1023 | 1082 | |
| 1024 | 1083 | -----------------
|
| ... | ... | @@ -1033,15 +1092,21 @@ occAnalNonRecBody env bndr thing_inside |
| 1033 | 1092 | |
| 1034 | 1093 | -----------------
|
| 1035 | 1094 | occAnalNonRecRhs :: OccEnv -> TopLevelFlag -> ImpRuleEdges
|
| 1036 | - -> JoinPointHood -> Id -> CoreExpr
|
|
| 1037 | - -> (NonEmpty UsageDetails, Id, CoreExpr)
|
|
| 1095 | + -> JoinPointHood -> Id -> CoreExpr
|
|
| 1096 | + -> (UsageDetails, Id, CoreExpr)
|
|
| 1038 | 1097 | occAnalNonRecRhs !env lvl imp_rule_edges mb_join bndr rhs
|
| 1039 | 1098 | | null rules, null imp_rule_infos
|
| 1040 | 1099 | = -- Fast path for common case of no rules. This is only worth
|
| 1041 | 1100 | -- 0.1% perf on average, but it's also only a line or two of code
|
| 1042 | - ( adj_rhs_uds :| adj_unf_uds : [], final_bndr_no_rules, final_rhs )
|
|
| 1101 | + ( adj_rhs_uds `orUDs` adj_unf_uds
|
|
| 1102 | + , final_bndr_no_rules, final_rhs )
|
|
| 1103 | + |
|
| 1043 | 1104 | | otherwise
|
| 1044 | - = ( adj_rhs_uds :| adj_unf_uds : adj_rule_uds, final_bndr_with_rules, final_rhs )
|
|
| 1105 | + = ( foldl' orUDs (adj_rhs_uds `orUDs` adj_unf_uds) adj_rule_uds
|
|
| 1106 | + , final_bndr_with_rules, final_rhs )
|
|
| 1107 | + |
|
| 1108 | + -- orUDs: Combine the RHS, (stable) unfolding, and RULES with orUDs
|
|
| 1109 | + -- See Note [Unfoldings and RULES]
|
|
| 1045 | 1110 | where
|
| 1046 | 1111 | --------- Right hand side ---------
|
| 1047 | 1112 | -- For join points, set occ_encl to OccVanilla, via setTailCtxt. If we have
|
| ... | ... | @@ -1054,7 +1119,7 @@ occAnalNonRecRhs !env lvl imp_rule_edges mb_join bndr rhs |
| 1054 | 1119 | rhs_ctxt = mkNonRecRhsCtxt lvl bndr unf
|
| 1055 | 1120 | |
| 1056 | 1121 | -- See Note [Join arity prediction based on joinRhsArity]
|
| 1057 | - -- Match join arity O from mb_join_arity with manifest join arity M as
|
|
| 1122 | + -- Match join arity OAr from mb_join_arity with manifest join arity MAr as
|
|
| 1058 | 1123 | -- returned by of occAnalLamTail. It's totally OK for them to mismatch;
|
| 1059 | 1124 | -- hence adjust the UDs from the RHS
|
| 1060 | 1125 | |
| ... | ... | @@ -1764,7 +1829,7 @@ makeNode !env imp_rule_edges bndr_set (bndr, rhs) |
| 1764 | 1829 | -- here because that is what we are setting!
|
| 1765 | 1830 | WTUD unf_tuds unf' = occAnalUnfolding rhs_env unf
|
| 1766 | 1831 | adj_unf_uds = adjustTailArity (JoinPoint rhs_ja) unf_tuds
|
| 1767 | - -- `rhs_ja` is `joinRhsArity rhs` and is the prediction for source M
|
|
| 1832 | + -- `rhs_ja` is `joinRhsArity rhs` and is the prediction for source MAr
|
|
| 1768 | 1833 | -- of Note [Join arity prediction based on joinRhsArity]
|
| 1769 | 1834 | |
| 1770 | 1835 | --------- IMP-RULES --------
|
| ... | ... | @@ -1775,7 +1840,7 @@ makeNode !env imp_rule_edges bndr_set (bndr, rhs) |
| 1775 | 1840 | |
| 1776 | 1841 | --------- All rules --------
|
| 1777 | 1842 | -- See Note [Join points and unfoldings/rules]
|
| 1778 | - -- `rhs_ja` is `joinRhsArity rhs'` and is the prediction for source M
|
|
| 1843 | + -- `rhs_ja` is `joinRhsArity rhs'` and is the prediction for source MAr
|
|
| 1779 | 1844 | -- of Note [Join arity prediction based on joinRhsArity]
|
| 1780 | 1845 | rules_w_uds :: [(CoreRule, UsageDetails, UsageDetails)]
|
| 1781 | 1846 | rules_w_uds = [ (r,l,adjustTailArity (JoinPoint rhs_ja) rhs_wuds)
|
| ... | ... | @@ -2177,7 +2242,9 @@ occAnalLamTail :: OccEnv -> CoreExpr -> WithTailUsageDetails CoreExpr |
| 2177 | 2242 | -- See Note [Adjusting right-hand sides]
|
| 2178 | 2243 | occAnalLamTail env expr
|
| 2179 | 2244 | = let !(WUD usage expr') = occ_anal_lam_tail env expr
|
| 2180 | - in WTUD (TUD (joinRhsArity expr) usage) expr'
|
|
| 2245 | + in WTUD (TUD (joinRhsArity expr') usage) expr'
|
|
| 2246 | + -- If expr looks like (\x. let dead = e in \y. blah), where `dead` is dead
|
|
| 2247 | + -- then joinRhsArity expr' might exceed joinRhsArity expr
|
|
| 2181 | 2248 | |
| 2182 | 2249 | occ_anal_lam_tail :: OccEnv -> CoreExpr -> WithUsageDetails CoreExpr
|
| 2183 | 2250 | -- Does not markInsideLam etc for the outmost batch of lambdas
|
| ... | ... | @@ -2281,7 +2348,7 @@ occAnalUnfolding !env unf |
| 2281 | 2348 | WTUD (TUD rhs_ja uds) rhs' = occAnalLamTail env rhs
|
| 2282 | 2349 | unf' = unf { uf_tmpl = rhs' }
|
| 2283 | 2350 | in WTUD (TUD rhs_ja (markAllMany uds)) unf'
|
| 2284 | - -- markAllMany: see Note [Occurrences in stable unfoldings]
|
|
| 2351 | + -- markAllMany: see Note [Occurrences in stable unfoldings and RULES]
|
|
| 2285 | 2352 | |
| 2286 | 2353 | | otherwise -> WTUD (TUD 0 emptyDetails) unf
|
| 2287 | 2354 | -- For non-Stable unfoldings we leave them undisturbed, but
|
| ... | ... | @@ -2319,12 +2386,13 @@ occAnalRule env rule@(Rule { ru_bndrs = bndrs, ru_args = args, ru_rhs = rhs }) |
| 2319 | 2386 | -- Note [Rules are extra RHSs]
|
| 2320 | 2387 | -- Note [Rule dependency info]
|
| 2321 | 2388 | rhs_uds' = markAllMany rhs_uds
|
| 2389 | + -- markAllMany: Note [Occurrences in stable unfoldings and RULES]
|
|
| 2322 | 2390 | rhs_ja = length args -- See Note [Join points and unfoldings/rules]
|
| 2323 | 2391 | |
| 2324 | 2392 | occAnalRule _ other_rule = (other_rule, emptyDetails, TUD 0 emptyDetails)
|
| 2325 | 2393 | |
| 2326 | -{- Note [Occurrences in stable unfoldings]
|
|
| 2327 | -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
| 2394 | +{- Note [Occurrences in stable unfoldings and RULES]
|
|
| 2395 | +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
| 2328 | 2396 | Consider
|
| 2329 | 2397 | f p = BIG
|
| 2330 | 2398 | {-# INLINE g #-}
|
| ... | ... | @@ -2338,7 +2406,7 @@ preinlineUnconditionally here! |
| 2338 | 2406 | |
| 2339 | 2407 | The INLINE pragma says "inline exactly this RHS"; perhaps the
|
| 2340 | 2408 | programmer wants to expose that 'not', say. If we inline f that will make
|
| 2341 | -the Stable unfoldign big, and that wasn't what the programmer wanted.
|
|
| 2409 | +the Stable unfolding big, and that wasn't what the programmer wanted.
|
|
| 2342 | 2410 | |
| 2343 | 2411 | Another way to think about it: if we inlined g as-is into multiple
|
| 2344 | 2412 | call sites, now there's be multiple calls to f.
|
| ... | ... | @@ -2347,6 +2415,8 @@ Bottom line: treat all occurrences in a stable unfolding as "Many". |
| 2347 | 2415 | We still leave tail call information intact, though, as to not spoil
|
| 2348 | 2416 | potential join points.
|
| 2349 | 2417 | |
| 2418 | +The same goes for RULES.
|
|
| 2419 | + |
|
| 2350 | 2420 | Note [Unfoldings and rules]
|
| 2351 | 2421 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
| 2352 | 2422 | Generally unfoldings and rules are already occurrence-analysed, so we
|
| ... | ... | @@ -2598,7 +2668,7 @@ occAnalArgs :: OccEnv -> CoreExpr -> [CoreExpr] |
| 2598 | 2668 | -> WithUsageDetails CoreExpr
|
| 2599 | 2669 | -- The `fun` argument is just an accumulating parameter,
|
| 2600 | 2670 | -- the base for building the application we return
|
| 2601 | -occAnalArgs !env fun args !one_shots
|
|
| 2671 | +occAnalArgs env fun args one_shots
|
|
| 2602 | 2672 | = go emptyDetails fun args one_shots
|
| 2603 | 2673 | where
|
| 2604 | 2674 | env_args = setNonTailCtxt encl env
|
| ... | ... | @@ -2657,8 +2727,19 @@ Constructors are rather like lambdas in this way. |
| 2657 | 2727 | occAnalApp :: OccEnv
|
| 2658 | 2728 | -> (Expr CoreBndr, [Arg CoreBndr], [CoreTickish])
|
| 2659 | 2729 | -> WithUsageDetails (Expr CoreBndr)
|
| 2660 | --- Naked variables (not applied) end up here too
|
|
| 2661 | -occAnalApp !env (Var fun, args, ticks)
|
|
| 2730 | +occAnalApp !env (Var fun_id, [], ticks)
|
|
| 2731 | + = -- Naked variables (not applied) end up here too, and it's worth giving
|
|
| 2732 | + -- this common case special treatment, because there is so much less to do.
|
|
| 2733 | + -- This is just a specialised copy of the (Var fun_id) case below
|
|
| 2734 | + WUD fun_uds (mkTicks ticks fun')
|
|
| 2735 | + where
|
|
| 2736 | + !(fun', fun_id') = lookupBndrSwap env fun_id
|
|
| 2737 | + !fun_uds = mkOneOcc env fun_id' int_cxt 0
|
|
| 2738 | + !int_cxt = case occ_encl env of
|
|
| 2739 | + OccScrut -> IsInteresting
|
|
| 2740 | + _other -> NotInteresting
|
|
| 2741 | + |
|
| 2742 | +occAnalApp env (Var fun, args, ticks)
|
|
| 2662 | 2743 | -- Account for join arity of runRW# continuation
|
| 2663 | 2744 | -- See Note [Simplification of runRW#]
|
| 2664 | 2745 | --
|
| ... | ... | @@ -2863,7 +2944,11 @@ data OccEnv |
| 2863 | 2944 | -- Invariant: no Id maps to an empty OccInfoEnv
|
| 2864 | 2945 | -- See Note [Occurrence analysis for join points]
|
| 2865 | 2946 | , occ_join_points :: !JoinPointInfo
|
| 2866 | - }
|
|
| 2947 | + |
|
| 2948 | + , occ_nested_lets :: IdSet -- Non-top-level, non-rec-bound lets
|
|
| 2949 | + -- I tried making this field strict, but doing so increased
|
|
| 2950 | + -- compile-time allocation very slightly: 0.1% on average
|
|
| 2951 | + }
|
|
| 2867 | 2952 | |
| 2868 | 2953 | type JoinPointInfo = IdEnv OccInfoEnv
|
| 2869 | 2954 | |
| ... | ... | @@ -2914,7 +2999,8 @@ initOccEnv |
| 2914 | 2999 | |
| 2915 | 3000 | , occ_join_points = emptyVarEnv
|
| 2916 | 3001 | , occ_bs_env = emptyVarEnv
|
| 2917 | - , occ_bs_rng = emptyVarSet }
|
|
| 3002 | + , occ_bs_rng = emptyVarSet
|
|
| 3003 | + , occ_nested_lets = emptyVarSet }
|
|
| 2918 | 3004 | |
| 2919 | 3005 | noBinderSwaps :: OccEnv -> Bool
|
| 2920 | 3006 | noBinderSwaps (OccEnv { occ_bs_env = bs_env }) = isEmptyVarEnv bs_env
|
| ... | ... | @@ -3154,23 +3240,26 @@ postprocess_uds bndrs bad_joins uds |
| 3154 | 3240 | | uniq `elemVarEnvByKey` env = plusVarEnv_C andLocalOcc env join_env
|
| 3155 | 3241 | | otherwise = env
|
| 3156 | 3242 | |
| 3243 | +addLocalLet :: OccEnv -> TopLevelFlag -> Id -> OccEnv
|
|
| 3244 | +addLocalLet env@(OccEnv { occ_nested_lets = ids }) top_lvl id
|
|
| 3245 | + | isTopLevel top_lvl = env
|
|
| 3246 | + | otherwise = env { occ_nested_lets = ids `extendVarSet` id }
|
|
| 3247 | + |
|
| 3157 | 3248 | addJoinPoint :: OccEnv -> Id -> UsageDetails -> OccEnv
|
| 3158 | -addJoinPoint env bndr rhs_uds
|
|
| 3249 | +addJoinPoint env@(OccEnv { occ_join_points = join_points, occ_nested_lets = nested_lets })
|
|
| 3250 | + join_bndr (UD { ud_env = rhs_occs })
|
|
| 3159 | 3251 | | isEmptyVarEnv zeroed_form
|
| 3160 | 3252 | = env
|
| 3161 | 3253 | | otherwise
|
| 3162 | - = env { occ_join_points = extendVarEnv (occ_join_points env) bndr zeroed_form }
|
|
| 3254 | + = env { occ_join_points = extendVarEnv join_points join_bndr zeroed_form }
|
|
| 3163 | 3255 | where
|
| 3164 | - zeroed_form = mkZeroedForm rhs_uds
|
|
| 3256 | + zeroed_form = mapMaybeUniqSetToUFM do_one nested_lets
|
|
| 3257 | + -- See Note [Occurrence analysis for join points] for "zeroed form"
|
|
| 3165 | 3258 | |
| 3166 | -mkZeroedForm :: UsageDetails -> OccInfoEnv
|
|
| 3167 | --- See Note [Occurrence analysis for join points] for "zeroed form"
|
|
| 3168 | -mkZeroedForm (UD { ud_env = rhs_occs })
|
|
| 3169 | - = mapMaybeUFM do_one rhs_occs
|
|
| 3170 | - where
|
|
| 3171 | - do_one :: LocalOcc -> Maybe LocalOcc
|
|
| 3172 | - do_one (ManyOccL {}) = Nothing
|
|
| 3173 | - do_one occ@(OneOccL {}) = Just (occ { lo_n_br = 0 })
|
|
| 3259 | + do_one :: Var -> Maybe LocalOcc
|
|
| 3260 | + do_one bndr = case lookupVarEnv rhs_occs bndr of
|
|
| 3261 | + Just occ@(OneOccL {}) -> Just (occ { lo_n_br = 0 })
|
|
| 3262 | + _ -> Nothing
|
|
| 3174 | 3263 | |
| 3175 | 3264 | --------------------
|
| 3176 | 3265 | transClosureFV :: VarEnv VarSet -> VarEnv VarSet
|
| ... | ... | @@ -3628,7 +3717,14 @@ data LocalOcc -- See Note [LocalOcc] |
| 3628 | 3717 | -- Combining (AlwaysTailCalled 2) and (AlwaysTailCalled 3)
|
| 3629 | 3718 | -- gives NoTailCallInfo
|
| 3630 | 3719 | , lo_int_cxt :: !InterestingCxt }
|
| 3720 | + |
|
| 3631 | 3721 | | ManyOccL !TailCallInfo
|
| 3722 | + -- Why do we need TailCallInfo on ManyOccL?
|
|
| 3723 | + -- Answer 1: recursive bindings are entered many times:
|
|
| 3724 | + -- rec { j x = ...j x'... } in j y
|
|
| 3725 | + -- See the uses of `andUDs` in `tagRecBinders`
|
|
| 3726 | + -- Answer 2: occurrences in stable unfoldings are many-ified
|
|
| 3727 | + -- See Note [Occurrences in stable unfoldings and RULES]
|
|
| 3632 | 3728 | |
| 3633 | 3729 | instance Outputable LocalOcc where
|
| 3634 | 3730 | ppr (OneOccL { lo_n_br = n, lo_tail = tci })
|
| ... | ... | @@ -3651,10 +3747,13 @@ data UsageDetails |
| 3651 | 3747 | |
| 3652 | 3748 | instance Outputable UsageDetails where
|
| 3653 | 3749 | ppr ud@(UD { ud_env = env, ud_z_tail = z_tail })
|
| 3654 | - = text "UD" <+> (braces $ fsep $ punctuate comma $
|
|
| 3655 | - [ ppr uq <+> text ":->" <+> ppr (lookupOccInfoByUnique ud uq)
|
|
| 3656 | - | (uq, _) <- nonDetStrictFoldVarEnv_Directly do_one [] env ])
|
|
| 3657 | - $$ nest 2 (text "ud_z_tail" <+> ppr z_tail)
|
|
| 3750 | + = text "UD" <> (braces (vcat
|
|
| 3751 | + [ -- `final` shows the result of a proper lookupOccInfo, returning OccInfo
|
|
| 3752 | + -- after accounting for `ud_z_tail` etc.
|
|
| 3753 | + text "final =" <+> (fsep $ punctuate comma $
|
|
| 3754 | + [ ppr uq <+> text ":->" <+> ppr (lookupOccInfoByUnique ud uq)
|
|
| 3755 | + | (uq, _) <- nonDetStrictFoldVarEnv_Directly do_one [] env ])
|
|
| 3756 | + , text "ud_z_tail" <+> ppr z_tail ] ))
|
|
| 3658 | 3757 | where
|
| 3659 | 3758 | do_one :: Unique -> LocalOcc -> [(Unique,LocalOcc)] -> [(Unique,LocalOcc)]
|
| 3660 | 3759 | do_one uniq occ occs = (uniq, occ) : occs
|
| ... | ... | @@ -3663,7 +3762,7 @@ instance Outputable UsageDetails where |
| 3663 | 3762 | -- | TailUsageDetails captures the result of applying 'occAnalLamTail'
|
| 3664 | 3763 | -- to a function `\xyz.body`. The TailUsageDetails pairs together
|
| 3665 | 3764 | -- * the number of lambdas (including type lambdas: a JoinArity)
|
| 3666 | --- * UsageDetails for the `body` of the lambda, unadjusted by `adjustTailUsage`.
|
|
| 3765 | +-- * UsageDetails for the `body` of the lambda, /unadjusted/ by `adjustTailUsage`.
|
|
| 3667 | 3766 | -- If the binding turns out to be a join point with the indicated join
|
| 3668 | 3767 | -- arity, this unadjusted usage details is just what we need; otherwise we
|
| 3669 | 3768 | -- need to discard tail calls. That's what `adjustTailUsage` does.
|
| ... | ... | @@ -3681,8 +3780,17 @@ data WithTailUsageDetails a = WTUD !TailUsageDetails !a |
| 3681 | 3780 | |
| 3682 | 3781 | andUDs:: UsageDetails -> UsageDetails -> UsageDetails
|
| 3683 | 3782 | orUDs :: UsageDetails -> UsageDetails -> UsageDetails
|
| 3684 | -andUDs = combineUsageDetailsWith andLocalOcc
|
|
| 3685 | -orUDs = combineUsageDetailsWith orLocalOcc
|
|
| 3783 | +andUDs = combineUsageDetailsWith (\_uniq -> andLocalOcc)
|
|
| 3784 | +orUDs = combineUsageDetailsWith (\_uniq -> orLocalOcc)
|
|
| 3785 | + |
|
| 3786 | +combineJoinPointUDs :: OccEnv -> UsageDetails -> UsageDetails -> UsageDetails
|
|
| 3787 | +-- See (W4) in Note [Occurrence analysis for join points]
|
|
| 3788 | +combineJoinPointUDs (OccEnv { occ_nested_lets = nested_lets }) uds1 uds2
|
|
| 3789 | + = combineUsageDetailsWith combine uds1 uds2
|
|
| 3790 | + where
|
|
| 3791 | + combine uniq occ1 occ2
|
|
| 3792 | + | uniq `elemVarSetByKey` nested_lets = orLocalOcc occ1 occ2
|
|
| 3793 | + | otherwise = andLocalOcc occ1 occ2
|
|
| 3686 | 3794 | |
| 3687 | 3795 | mkOneOcc :: OccEnv -> Id -> InterestingCxt -> JoinArity -> UsageDetails
|
| 3688 | 3796 | mkOneOcc !env id int_cxt arity
|
| ... | ... | @@ -3699,7 +3807,8 @@ mkOneOcc !env id int_cxt arity |
| 3699 | 3807 | = mkSimpleDetails (unitVarEnv id occ)
|
| 3700 | 3808 | |
| 3701 | 3809 | where
|
| 3702 | - occ = OneOccL { lo_n_br = 1, lo_int_cxt = int_cxt
|
|
| 3810 | + occ = OneOccL { lo_n_br = 1
|
|
| 3811 | + , lo_int_cxt = int_cxt
|
|
| 3703 | 3812 | , lo_tail = AlwaysTailCalled arity }
|
| 3704 | 3813 | |
| 3705 | 3814 | -- Add several occurrences, assumed not to be tail calls
|
| ... | ... | @@ -3786,7 +3895,7 @@ restrictFreeVars bndrs fvs = restrictUniqSetToUFM bndrs fvs |
| 3786 | 3895 | -------------------
|
| 3787 | 3896 | -- Auxiliary functions for UsageDetails implementation
|
| 3788 | 3897 | |
| 3789 | -combineUsageDetailsWith :: (LocalOcc -> LocalOcc -> LocalOcc)
|
|
| 3898 | +combineUsageDetailsWith :: (Unique -> LocalOcc -> LocalOcc -> LocalOcc)
|
|
| 3790 | 3899 | -> UsageDetails -> UsageDetails -> UsageDetails
|
| 3791 | 3900 | {-# INLINE combineUsageDetailsWith #-}
|
| 3792 | 3901 | combineUsageDetailsWith plus_occ_info
|
| ... | ... | @@ -3796,9 +3905,9 @@ combineUsageDetailsWith plus_occ_info |
| 3796 | 3905 | | isEmptyVarEnv env2 = uds1
|
| 3797 | 3906 | | otherwise
|
| 3798 | 3907 | -- See Note [Strictness in the occurrence analyser]
|
| 3799 | - -- Using strictPlusVarEnv here speeds up the test T26425 by about 10% by avoiding
|
|
| 3800 | - -- intermediate thunks.
|
|
| 3801 | - = UD { ud_env = strictPlusVarEnv_C plus_occ_info env1 env2
|
|
| 3908 | + -- Using strictPlusVarEnv here speeds up the test T26425
|
|
| 3909 | + -- by about 10% by avoiding intermediate thunks.
|
|
| 3910 | + = UD { ud_env = strictPlusVarEnv_C_Directly plus_occ_info env1 env2
|
|
| 3802 | 3911 | , ud_z_many = strictPlusVarEnv z_many1 z_many2
|
| 3803 | 3912 | , ud_z_in_lam = plusVarEnv z_in_lam1 z_in_lam2
|
| 3804 | 3913 | , ud_z_tail = strictPlusVarEnv z_tail1 z_tail2 }
|
| ... | ... | @@ -3842,8 +3951,6 @@ lookupOccInfoByUnique (UD { ud_env = env |
| 3842 | 3951 | | uniq `elemVarEnvByKey` z_tail = NoTailCallInfo
|
| 3843 | 3952 | | otherwise = ti
|
| 3844 | 3953 | |
| 3845 | - |
|
| 3846 | - |
|
| 3847 | 3954 | -------------------
|
| 3848 | 3955 | -- See Note [Adjusting right-hand sides]
|
| 3849 | 3956 | |
| ... | ... | @@ -3853,21 +3960,22 @@ adjustNonRecRhs :: JoinPointHood |
| 3853 | 3960 | -- ^ This function concentrates shared logic between occAnalNonRecBind and the
|
| 3854 | 3961 | -- AcyclicSCC case of occAnalRec.
|
| 3855 | 3962 | -- It returns the adjusted rhs UsageDetails combined with the body usage
|
| 3856 | -adjustNonRecRhs mb_join_arity rhs_wuds@(WTUD _ rhs)
|
|
| 3857 | - = WUD (adjustTailUsage mb_join_arity rhs_wuds) rhs
|
|
| 3858 | - |
|
| 3963 | +adjustNonRecRhs mb_join_arity (WTUD (TUD rhs_ja uds) rhs)
|
|
| 3964 | + = WUD (adjustTailUsage exact_join rhs uds) rhs
|
|
| 3965 | + where
|
|
| 3966 | + exact_join = mb_join_arity == JoinPoint rhs_ja
|
|
| 3859 | 3967 | |
| 3860 | -adjustTailUsage :: JoinPointHood
|
|
| 3861 | - -> WithTailUsageDetails CoreExpr -- Rhs usage, AFTER occAnalLamTail
|
|
| 3968 | +adjustTailUsage :: Bool -- True <=> Exactly-matching join point; don't do markNonTail
|
|
| 3969 | + -> CoreExpr -- Rhs usage, AFTER occAnalLamTail
|
|
| 3970 | + -> UsageDetails
|
|
| 3862 | 3971 | -> UsageDetails
|
| 3863 | -adjustTailUsage mb_join_arity (WTUD (TUD rhs_ja uds) rhs)
|
|
| 3972 | +adjustTailUsage exact_join rhs uds
|
|
| 3864 | 3973 | = -- c.f. occAnal (Lam {})
|
| 3865 | 3974 | markAllInsideLamIf (not one_shot) $
|
| 3866 | 3975 | markAllNonTailIf (not exact_join) $
|
| 3867 | 3976 | uds
|
| 3868 | 3977 | where
|
| 3869 | 3978 | one_shot = isOneShotFun rhs
|
| 3870 | - exact_join = mb_join_arity == JoinPoint rhs_ja
|
|
| 3871 | 3979 | |
| 3872 | 3980 | adjustTailArity :: JoinPointHood -> TailUsageDetails -> UsageDetails
|
| 3873 | 3981 | adjustTailArity mb_rhs_ja (TUD ja usage)
|
| ... | ... | @@ -3914,8 +4022,9 @@ tagNonRecBinder lvl occ bndr |
| 3914 | 4022 | tagRecBinders :: TopLevelFlag -- At top level?
|
| 3915 | 4023 | -> UsageDetails -- Of body of let ONLY
|
| 3916 | 4024 | -> [NodeDetails]
|
| 3917 | - -> WithUsageDetails -- Adjusted details for whole scope,
|
|
| 3918 | - -- with binders removed
|
|
| 4025 | + -> WithUsageDetails -- Adjusted details for whole scope
|
|
| 4026 | + -- still including the binders;
|
|
| 4027 | + -- (they are removed by `addInScope`)
|
|
| 3919 | 4028 | [IdWithOccInfo] -- Tagged binders
|
| 3920 | 4029 | -- Substantially more complicated than non-recursive case. Need to adjust RHS
|
| 3921 | 4030 | -- details *before* tagging binders (because the tags depend on the RHSes).
|
| ... | ... | @@ -3925,32 +4034,21 @@ tagRecBinders lvl body_uds details_s |
| 3925 | 4034 | |
| 3926 | 4035 | -- 1. See Note [Join arity prediction based on joinRhsArity]
|
| 3927 | 4036 | -- Determine possible join-point-hood of whole group, by testing for
|
| 3928 | - -- manifest join arity M.
|
|
| 3929 | - -- This (re-)asserts that makeNode had made tuds for that same arity M!
|
|
| 4037 | + -- manifest join arity MAr.
|
|
| 4038 | + -- This (re-)asserts that makeNode had made tuds for that same arity MAr!
|
|
| 3930 | 4039 | unadj_uds = foldr (andUDs . test_manifest_arity) body_uds details_s
|
| 3931 | - test_manifest_arity ND{nd_rhs = WTUD tuds rhs}
|
|
| 3932 | - = adjustTailArity (JoinPoint (joinRhsArity rhs)) tuds
|
|
| 4040 | + test_manifest_arity ND{nd_rhs = WTUD (TUD rhs_ja uds) rhs}
|
|
| 4041 | + = assertPpr (rhs_ja == joinRhsArity rhs) (ppr rhs_ja $$ ppr uds $$ ppr rhs) $
|
|
| 4042 | + uds
|
|
| 3933 | 4043 | |
| 4044 | + will_be_joins :: Bool
|
|
| 3934 | 4045 | will_be_joins = decideRecJoinPointHood lvl unadj_uds bndrs
|
| 3935 | 4046 | |
| 3936 | - mb_join_arity :: Id -> JoinPointHood
|
|
| 3937 | - -- mb_join_arity: See Note [Join arity prediction based on joinRhsArity]
|
|
| 3938 | - -- This is the source O
|
|
| 3939 | - mb_join_arity bndr
|
|
| 3940 | - -- Can't use willBeJoinId_maybe here because we haven't tagged
|
|
| 3941 | - -- the binder yet (the tag depends on these adjustments!)
|
|
| 3942 | - | will_be_joins
|
|
| 3943 | - , AlwaysTailCalled arity <- lookupTailCallInfo unadj_uds bndr
|
|
| 3944 | - = JoinPoint arity
|
|
| 3945 | - | otherwise
|
|
| 3946 | - = assert (not will_be_joins) -- Should be AlwaysTailCalled if
|
|
| 3947 | - NotJoinPoint -- we are making join points!
|
|
| 3948 | - |
|
| 3949 | 4047 | -- 2. Adjust usage details of each RHS, taking into account the
|
| 3950 | 4048 | -- join-point-hood decision
|
| 3951 | - rhs_udss' = [ adjustTailUsage (mb_join_arity bndr) rhs_wuds
|
|
| 4049 | + rhs_udss' = [ adjustTailUsage will_be_joins rhs rhs_uds
|
|
| 3952 | 4050 | -- Matching occAnalLamTail in makeNode
|
| 3953 | - | ND { nd_bndr = bndr, nd_rhs = rhs_wuds } <- details_s ]
|
|
| 4051 | + | ND { nd_rhs = WTUD (TUD _ rhs_uds) rhs } <- details_s ]
|
|
| 3954 | 4052 | |
| 3955 | 4053 | -- 3. Compute final usage details from adjusted RHS details
|
| 3956 | 4054 | adj_uds = foldr andUDs body_uds rhs_udss'
|
| ... | ... | @@ -3969,9 +4067,9 @@ setBinderOcc occ_info bndr |
| 3969 | 4067 | | otherwise = setIdOccInfo bndr occ_info
|
| 3970 | 4068 | |
| 3971 | 4069 | -- | Decide whether some bindings should be made into join points or not, based
|
| 3972 | --- on its occurrences. This is
|
|
| 4070 | +-- on its occurrences.
|
|
| 3973 | 4071 | -- Returns `False` if they can't be join points. Note that it's an
|
| 4072 | +-- all-or-nothing decision: if multiple binders are given, they are
|
|
| 3974 | 4073 | -- assumed to be mutually recursive.
|
| 3975 | 4074 | --
|
| 3976 | 4075 | -- It must, however, be a final decision. If we say `True` for 'f',
|
| ... | ... | @@ -393,12 +393,19 @@ simple_app env e0@(Lam {}) as0@(_:_) |
| 393 | 393 | = wrapLet mb_pr $ do_beta env'' body as
|
| 394 | 394 | where (env', b') = subst_opt_bndr env b
|
| 395 | 395 | |
| 396 | - do_beta env e@(Lam b body) as@(CastIt co:rest)
|
|
| 397 | - -- See Note [Desugaring unlifted newtypes]
|
|
| 396 | + -- See Note [Eliminate casts in function position]
|
|
| 397 | + do_beta env e@(Lam b _) as@(CastIt out_co:rest)
|
|
| 398 | 398 | | isNonCoVarId b
|
| 399 | - , Just (b', body') <- pushCoercionIntoLambda (soe_subst env) b body co
|
|
| 399 | + -- Optimise the inner lambda to make it an 'OutExpr', which makes it
|
|
| 400 | + -- possible to call 'pushCoercionIntoLambda' with the 'OutCoercion' 'co'.
|
|
| 401 | + -- This is kind of horrible, as for nested casted lambdas with a big body,
|
|
| 402 | + -- we will repeatedly optimise the body (once for each binder). However,
|
|
| 403 | + -- we need to do this to avoid mixing 'InExpr' and 'OutExpr', or two
|
|
| 404 | + -- 'InExpr' with different environments (getting this wrong caused #26588 & #26589.)
|
|
| 405 | + , Lam out_b out_body <- simple_app env e []
|
|
| 406 | + , Just (b', body') <- pushCoercionIntoLambda (soeInScope env) out_b out_body out_co
|
|
| 400 | 407 | = do_beta (soeZapSubst env) (Lam b' body') rest
|
| 401 | - -- soeZapSubst: pushCoercionIntoLambda applies the substitution
|
|
| 408 | + -- soeZapSubst: we've already optimised everything (the lambda and 'rest') by now.
|
|
| 402 | 409 | | otherwise
|
| 403 | 410 | = rebuild_app env (simple_opt_expr env e) as
|
| 404 | 411 | |
| ... | ... | @@ -511,7 +518,31 @@ TL;DR: To avoid the rest of the compiler pipeline seeing these bad lambas, we |
| 511 | 518 | rely on the simple optimiser to both inline the newtype unfolding and
|
| 512 | 519 | subsequently deal with the resulting lambdas (either beta-reducing them
|
| 513 | 520 | altogether or pushing coercions into them so that they satisfy the
|
| 514 | -representation-polymorphism invariants).
|
|
| 521 | +representation-polymorphism invariants). See Note [Eliminate casts in function position].
|
|
| 522 | + |
|
| 523 | +[Alternative approach] (GHC ticket #26608)
|
|
| 524 | + |
|
| 525 | + We could instead, in the typechecker, emit a special form (a new constructor
|
|
| 526 | + of XXExprGhcTc) for instantiations of representation-polymorphic unlifted
|
|
| 527 | + newtypes (whether applied to a value argument or not):
|
|
| 528 | + |
|
| 529 | + UnliftedNT :: DataCon -> [Type] -> Coercion -> XXExprGhcTc
|
|
| 530 | + |
|
| 531 | + where "UnliftedNT nt_con [ty1, ...] co" represents the expression:
|
|
| 532 | + |
|
| 533 | + ( nt_con @ty1 ... ) |> co
|
|
| 534 | + |
|
| 535 | + The desugarer would then turn these AST nodes into appropriate Core, doing
|
|
| 536 | + what the simple optimiser does today:
|
|
| 537 | + - inline the compulsory unfolding of the newtype constructor
|
|
| 538 | + - apply it to its type arguments and beta reduce
|
|
| 539 | + - push the coercion into the resulting lambda
|
|
| 540 | + |
|
| 541 | + This would have several advantages:
|
|
| 542 | + - the desugarer would never produce "invalid" Core that needs to be
|
|
| 543 | + tidied up by the simple optimiser,
|
|
| 544 | + - the ugly and inefficient implementation described in
|
|
| 545 | + Note [Eliminate casts in function position] could be removed.
|
|
| 515 | 546 | |
| 516 | 547 | Wrinkle [Unlifted newtypes with wrappers]
|
| 517 | 548 | |
| ... | ... | @@ -717,50 +748,49 @@ rhss here. |
| 717 | 748 | |
| 718 | 749 | Note [Eliminate casts in function position]
|
| 719 | 750 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
| 720 | -Consider the following program:
|
|
| 751 | +Due to the current implementation strategy for representation-polymorphic
|
|
| 752 | +unlifted newtypes, as described in Note [Desugaring unlifted newtypes], we rely
|
|
| 753 | +on the simple optimiser to push coercions into lambdas, such as in the following
|
|
| 754 | +example:
|
|
| 721 | 755 | |
| 722 | 756 | type R :: Type -> RuntimeRep
|
| 723 | - type family R a where { R Float = FloatRep; R Double = DoubleRep }
|
|
| 724 | - type F :: forall (a :: Type) -> TYPE (R a)
|
|
| 725 | - type family F a where { F Float = Float# ; F Double = Double# }
|
|
| 757 | + type family R a where { R Int = IntRep }
|
|
| 758 | + type F :: forall a -> TYPE (R a)
|
|
| 759 | + type family F a where { F Int = Int# }
|
|
| 726 | 760 | |
| 727 | - type N :: forall (a :: Type) -> TYPE (R a)
|
|
| 728 | 761 | newtype N a = MkN (F a)
|
| 729 | 762 | |
| 730 | -As MkN is a newtype, its unfolding is a lambda which wraps its argument
|
|
| 731 | -in a cast:
|
|
| 732 | - |
|
| 733 | - MkN :: forall (a :: Type). F a -> N a
|
|
| 734 | - MkN = /\a \(x::F a). x |> co_ax
|
|
| 735 | - -- recall that F a :: TYPE (R a)
|
|
| 736 | - |
|
| 737 | -This is a representation-polymorphic lambda, in which the binder has an unknown
|
|
| 738 | -representation (R a). We can't compile such a lambda on its own, but we can
|
|
| 739 | -compile instantiations, such as `MkN @Float` or `MkN @Double`.
|
|
| 763 | +Now, an instantiated occurrence of 'MkN', such as 'MkN @Int' (whether applied
|
|
| 764 | +to a value argument or not) will lead, after inlining the compulsory unfolding
|
|
| 765 | +of 'MkN', to a lambda fo the form:
|
|
| 740 | 766 | |
| 741 | -Our strategy to avoid running afoul of the representation-polymorphism
|
|
| 742 | -invariants of Note [Representation polymorphism invariants] in GHC.Core is thus:
|
|
| 767 | + ( \ ( x :: F Int ) -> body ) |> co
|
|
| 743 | 768 | |
| 744 | - 1. Give the newtype a compulsory unfolding (it has no binding, as we can't
|
|
| 745 | - define lambdas with representation-polymorphic value binders in source Haskell).
|
|
| 746 | - 2. Rely on the optimiser to beta-reduce away any representation-polymorphic
|
|
| 747 | - value binders.
|
|
| 769 | + where
|
|
| 770 | + co :: ( F Int -> res ) ~# ( Int# -> res )
|
|
| 748 | 771 | |
| 749 | -For example, consider the application
|
|
| 772 | +The problem is that we now have a lambda abstraction whose binder does not have a
|
|
| 773 | +fixed RuntimeRep in the sense of Note [Fixed RuntimeRep] in GHC.Tc.Utils.Concrete.
|
|
| 750 | 774 | |
| 751 | - MkN @Float 34.0#
|
|
| 775 | +However, if we use 'pushCoercionIntoLambda', we end up with:
|
|
| 752 | 776 | |
| 753 | -After inlining MkN we'll get
|
|
| 777 | + ( \ ( x' :: Int# ) -> body' )
|
|
| 754 | 778 | |
| 755 | - ((/\a \(x:F a). x |> co_ax) @Float) |> co 34#
|
|
| 779 | +which satisfies the representation-polymorphism invariants of
|
|
| 780 | +Note [Representation polymorphism invariants] in GHC.Core.
|
|
| 756 | 781 | |
| 757 | -where co :: (F Float -> N Float) ~ (Float# ~ N Float)
|
|
| 782 | +In conclusion:
|
|
| 758 | 783 | |
| 759 | -But to actually beta-reduce that lambda, we need to push the 'co'
|
|
| 760 | -inside the `\x` with pushCoercionIntoLambda. Hence the extra
|
|
| 761 | -equation for Cast-of-Lam in simple_app.
|
|
| 784 | + 1. The simple optimiser must push casts into lambdas.
|
|
| 785 | + 2. It must also deal with a situation such as (MkN @Int) |> co, where we first
|
|
| 786 | + inline the compulsory unfolding of N. This means the simple optimiser must
|
|
| 787 | + "peel off" the casts and optimise the inner expression first, to determine
|
|
| 788 | + whether it is a lambda abstraction or not.
|
|
| 762 | 789 | |
| 763 | -This is regrettably delicate.
|
|
| 790 | +This is regrettably delicate. If we could make sure the typechecker/desugarer
|
|
| 791 | +did not produce these bad lambdas in the first place (as described in
|
|
| 792 | +[Alternative approach] in Note [Desugaring unlifted newtypes]), we could
|
|
| 793 | +get rid of this ugly logic.
|
|
| 764 | 794 | |
| 765 | 795 | Note [Preserve join-binding arity]
|
| 766 | 796 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
| ... | ... | @@ -1673,7 +1703,7 @@ exprIsLambda_maybe ise@(ISE in_scope_set _) (Cast casted_e co) |
| 1673 | 1703 | -- this implies that x is not in scope in gamma (makes this code simpler)
|
| 1674 | 1704 | , not (isTyVar x) && not (isCoVar x)
|
| 1675 | 1705 | , assert (not $ x `elemVarSet` tyCoVarsOfCo co) True
|
| 1676 | - , Just (x',e') <- pushCoercionIntoLambda (mkEmptySubst in_scope_set) x e co
|
|
| 1706 | + , Just (x',e') <- pushCoercionIntoLambda in_scope_set x e co
|
|
| 1677 | 1707 | , let res = Just (x',e',ts)
|
| 1678 | 1708 | = --pprTrace "exprIsLambda_maybe:Cast" (vcat [ppr casted_e,ppr co,ppr res)])
|
| 1679 | 1709 | res
|
| ... | ... | @@ -1268,6 +1268,7 @@ optLevelFlags -- see Note [Documenting optimisation flags] |
| 1268 | 1268 | , ([1,2], Opt_CfgBlocklayout) -- Experimental
|
| 1269 | 1269 | |
| 1270 | 1270 | , ([1,2], Opt_Specialise)
|
| 1271 | + , ([1,2], Opt_PolymorphicSpecialisation) -- Now on by default (#23559)
|
|
| 1271 | 1272 | , ([1,2], Opt_CrossModuleSpecialise)
|
| 1272 | 1273 | , ([1,2], Opt_InlineGenerics)
|
| 1273 | 1274 | , ([1,2], Opt_Strictness)
|
| ... | ... | @@ -909,6 +909,7 @@ optimisationFlags = EnumSet.fromList |
| 909 | 909 | , Opt_SpecialiseAggressively
|
| 910 | 910 | , Opt_CrossModuleSpecialise
|
| 911 | 911 | , Opt_StaticArgumentTransformation
|
| 912 | + , Opt_PolymorphicSpecialisation
|
|
| 912 | 913 | , Opt_CSE
|
| 913 | 914 | , Opt_StgCSE
|
| 914 | 915 | , Opt_StgLiftLams
|
| ... | ... | @@ -1620,7 +1620,7 @@ ds_hs_wrapper hs_wrap |
| 1620 | 1620 | do { x <- newSysLocalDs (mkScaled (subMultCoRKind w_co) t)
|
| 1621 | 1621 | ; go c1 $ \w1 ->
|
| 1622 | 1622 | go c2 $ \w2 ->
|
| 1623 | - let app f a = mkCoreApp (text "dsHsWrapper") f a
|
|
| 1623 | + let app f a = mkCoreApp f a
|
|
| 1624 | 1624 | arg = w1 (Var x)
|
| 1625 | 1625 | in k (\e -> (Lam x (w2 (app e arg)))) }
|
| 1626 | 1626 |
| ... | ... | @@ -877,8 +877,7 @@ dsHsConLike (PatSynCon ps) |
| 877 | 877 | | Just (builder_name, _, add_void) <- patSynBuilder ps
|
| 878 | 878 | = do { builder_id <- dsLookupGlobalId builder_name
|
| 879 | 879 | ; return (if add_void
|
| 880 | - then mkCoreApp (text "dsConLike" <+> ppr ps)
|
|
| 881 | - (Var builder_id) unboxedUnitExpr
|
|
| 880 | + then mkCoreApp (Var builder_id) unboxedUnitExpr
|
|
| 882 | 881 | else Var builder_id) }
|
| 883 | 882 | | otherwise
|
| 884 | 883 | = pprPanic "dsConLike" (ppr ps)
|
| ... | ... | @@ -301,7 +301,7 @@ matchView (var :| vars) ty eqns@(eqn1 :| _) |
| 301 | 301 | -- compile the view expressions
|
| 302 | 302 | ; viewExpr' <- dsExpr viewExpr
|
| 303 | 303 | ; return (mkViewMatchResult var'
|
| 304 | - (mkCoreApp (text "matchView") viewExpr' (Var var))
|
|
| 304 | + (mkCoreApp viewExpr' (Var var))
|
|
| 305 | 305 | match_result) }
|
| 306 | 306 | |
| 307 | 307 | -- decompose the first pattern and leave the rest alone
|
| ... | ... | @@ -749,13 +749,13 @@ tcInstFun do_ql inst_final (tc_fun, fun_ctxt) fun_sigma rn_args |
| 749 | 749 | go1 _pos acc fun_ty []
|
| 750 | 750 | | XExpr (ConLikeTc (RealDataCon dc)) <- tc_fun
|
| 751 | 751 | , isNewDataCon dc
|
| 752 | - , [Scaled _ arg_ty] <- dataConOrigArgTys dc
|
|
| 752 | + , [Scaled _ orig_arg_ty] <- dataConOrigArgTys dc
|
|
| 753 | 753 | , n_val_args == 0
|
| 754 | 754 | -- If we're dealing with an unsaturated representation-polymorphic
|
| 755 | 755 | -- UnliftedNewype, then perform a representation-polymorphism check.
|
| 756 | 756 | -- See Note [Representation-polymorphism checks for unsaturated unlifted newtypes]
|
| 757 | 757 | -- in GHC.Tc.Utils.Concrete.
|
| 758 | - , not $ typeHasFixedRuntimeRep arg_ty
|
|
| 758 | + , not $ typeHasFixedRuntimeRep orig_arg_ty
|
|
| 759 | 759 | = do { (wrap_co, arg_ty, res_ty) <-
|
| 760 | 760 | matchActualFunTy (FRRRepPolyUnliftedNewtype dc)
|
| 761 | 761 | (Just $ HsExprTcThing tc_fun)
|
| ... | ... | @@ -1333,7 +1333,7 @@ zapFragileOcc occ = zapOccTailCallInfo occ |
| 1333 | 1333 | |
| 1334 | 1334 | instance Outputable OccInfo where
|
| 1335 | 1335 | -- only used for debugging; never parsed. KSW 1999-07
|
| 1336 | - ppr (ManyOccs tails) = pprShortTailCallInfo tails
|
|
| 1336 | + ppr (ManyOccs tails) = text "Many" <> parens (pprShortTailCallInfo tails)
|
|
| 1337 | 1337 | ppr IAmDead = text "Dead"
|
| 1338 | 1338 | ppr (IAmALoopBreaker rule_only tails)
|
| 1339 | 1339 | = text "LoopBreaker" <> pp_ro <> pprShortTailCallInfo tails
|
| ... | ... | @@ -53,7 +53,7 @@ module GHC.Types.Unique.FM ( |
| 53 | 53 | plusUFM,
|
| 54 | 54 | strictPlusUFM,
|
| 55 | 55 | plusUFM_C,
|
| 56 | - strictPlusUFM_C,
|
|
| 56 | + strictPlusUFM_C, strictPlusUFM_C_Directly,
|
|
| 57 | 57 | plusUFM_CD,
|
| 58 | 58 | plusUFM_CD2,
|
| 59 | 59 | mergeUFM,
|
| ... | ... | @@ -281,6 +281,9 @@ plusUFM_C f (UFM x) (UFM y) = UFM (M.unionWith f x y) |
| 281 | 281 | strictPlusUFM_C :: (elt -> elt -> elt) -> UniqFM key elt -> UniqFM key elt -> UniqFM key elt
|
| 282 | 282 | strictPlusUFM_C f (UFM x) (UFM y) = UFM (MS.unionWith f x y)
|
| 283 | 283 | |
| 284 | +strictPlusUFM_C_Directly :: (Unique -> elt -> elt -> elt) -> UniqFM key elt -> UniqFM key elt -> UniqFM key elt
|
|
| 285 | +strictPlusUFM_C_Directly f (UFM x) (UFM y) = UFM (MS.unionWithKey (f . mkUniqueGrimily) x y)
|
|
| 286 | + |
|
| 284 | 287 | -- | `plusUFM_CD f m1 d1 m2 d2` merges the maps using `f` as the
|
| 285 | 288 | -- combinding function and `d1` resp. `d2` as the default value if
|
| 286 | 289 | -- there is no entry in `m1` reps. `m2`. The domain is the union of
|
| ... | ... | @@ -40,6 +40,7 @@ module GHC.Types.Unique.Set ( |
| 40 | 40 | lookupUniqSet_Directly,
|
| 41 | 41 | partitionUniqSet,
|
| 42 | 42 | mapUniqSet,
|
| 43 | + mapUniqSetToUFM, mapMaybeUniqSetToUFM,
|
|
| 43 | 44 | unsafeUFMToUniqSet,
|
| 44 | 45 | nonDetEltsUniqSet,
|
| 45 | 46 | nonDetKeysUniqSet,
|
| ... | ... | @@ -211,6 +212,14 @@ mapUniqSet f = mkUniqSet . map f . nonDetEltsUniqSet |
| 211 | 212 | mapMaybeUniqSet_sameUnique :: (a -> Maybe b) -> UniqSet a -> UniqSet b
|
| 212 | 213 | mapMaybeUniqSet_sameUnique f (UniqSet a) = UniqSet $ mapMaybeUFM_sameUnique f a
|
| 213 | 214 | |
| 215 | +mapUniqSetToUFM :: (a -> b) -> UniqSet a -> UniqFM a b
|
|
| 216 | +-- Same keys, new values
|
|
| 217 | +mapUniqSetToUFM f (UniqSet ufm) = mapUFM f ufm
|
|
| 218 | + |
|
| 219 | +mapMaybeUniqSetToUFM :: (a -> Maybe b) -> UniqSet a -> UniqFM a b
|
|
| 220 | +-- Same keys, new values
|
|
| 221 | +mapMaybeUniqSetToUFM f (UniqSet ufm) = mapMaybeUFM f ufm
|
|
| 222 | + |
|
| 214 | 223 | -- Two 'UniqSet's are considered equal if they contain the same
|
| 215 | 224 | -- uniques.
|
| 216 | 225 | instance Eq (UniqSet a) where
|
| ... | ... | @@ -12,7 +12,8 @@ module GHC.Types.Var.Env ( |
| 12 | 12 | elemVarEnv, disjointVarEnv, anyVarEnv,
|
| 13 | 13 | extendVarEnv, extendVarEnv_C, extendVarEnv_Acc,
|
| 14 | 14 | extendVarEnvList,
|
| 15 | - strictPlusVarEnv, plusVarEnv, plusVarEnv_C, strictPlusVarEnv_C,
|
|
| 15 | + strictPlusVarEnv, plusVarEnv, plusVarEnv_C,
|
|
| 16 | + strictPlusVarEnv_C, strictPlusVarEnv_C_Directly,
|
|
| 16 | 17 | plusVarEnv_CD, plusMaybeVarEnv_C,
|
| 17 | 18 | plusVarEnvList, alterVarEnv,
|
| 18 | 19 | delVarEnvList, delVarEnv,
|
| ... | ... | @@ -525,6 +526,7 @@ delVarEnv :: VarEnv a -> Var -> VarEnv a |
| 525 | 526 | minusVarEnv :: VarEnv a -> VarEnv b -> VarEnv a
|
| 526 | 527 | plusVarEnv_C :: (a -> a -> a) -> VarEnv a -> VarEnv a -> VarEnv a
|
| 527 | 528 | strictPlusVarEnv_C :: (a -> a -> a) -> VarEnv a -> VarEnv a -> VarEnv a
|
| 529 | +strictPlusVarEnv_C_Directly :: (Unique -> a -> a -> a) -> VarEnv a -> VarEnv a -> VarEnv a
|
|
| 528 | 530 | plusVarEnv_CD :: (a -> a -> a) -> VarEnv a -> a -> VarEnv a -> a -> VarEnv a
|
| 529 | 531 | plusMaybeVarEnv_C :: (a -> a -> Maybe a) -> VarEnv a -> VarEnv a -> VarEnv a
|
| 530 | 532 | mapVarEnv :: (a -> b) -> VarEnv a -> VarEnv b
|
| ... | ... | @@ -552,6 +554,7 @@ extendVarEnv_Acc = addToUFM_Acc |
| 552 | 554 | extendVarEnvList = addListToUFM
|
| 553 | 555 | plusVarEnv_C = plusUFM_C
|
| 554 | 556 | strictPlusVarEnv_C = strictPlusUFM_C
|
| 557 | +strictPlusVarEnv_C_Directly = strictPlusUFM_C_Directly
|
|
| 555 | 558 | plusVarEnv_CD = plusUFM_CD
|
| 556 | 559 | plusMaybeVarEnv_C = plusMaybeUFM_C
|
| 557 | 560 | delVarEnvList = delListFromUFM
|
| ... | ... | @@ -195,7 +195,7 @@ For example: :: |
| 195 | 195 | g3c :: Int -> forall x y. y -> x -> x
|
| 196 | 196 | |
| 197 | 197 | f4 :: (Int -> forall a. (Eq a, Show a) => a -> a) -> Bool
|
| 198 | - g4 :: Int -> forall x. (Show x, Eq x) => x -> x) -> Bool
|
|
| 198 | + g4 :: Int -> forall x. (Show x, Eq x) => x -> x
|
|
| 199 | 199 | |
| 200 | 200 | Then the application ``f3 g3a`` is well-typed, because ``g3a`` has a type that matches the type
|
| 201 | 201 | expected by ``f3``. But ``f3 g3b`` is not well typed, because the foralls are in different places.
|
| ... | ... | @@ -680,7 +680,7 @@ thus: :: |
| 680 | 680 | When doing so, we (optionally) may drop the "``family``" keyword.
|
| 681 | 681 | |
| 682 | 682 | The type parameters must all be type variables, of course, and some (but
|
| 683 | -not necessarily all) of then can be the class parameters. Each class
|
|
| 683 | +not necessarily all) of them can be the class parameters. Each class
|
|
| 684 | 684 | parameter may only be used at most once per associated type, but some
|
| 685 | 685 | may be omitted and they may be in an order other than in the class head.
|
| 686 | 686 | Hence, the following contrived example is admissible: ::
|
| ... | ... | @@ -1325,10 +1325,7 @@ as such you shouldn't need to set any of them explicitly. A flag |
| 1325 | 1325 | :reverse: -fno-polymorphic-specialisation
|
| 1326 | 1326 | :category:
|
| 1327 | 1327 | |
| 1328 | - :default: off
|
|
| 1329 | - |
|
| 1330 | - Warning, this feature is highly experimental and may lead to incorrect runtime
|
|
| 1331 | - results. Use at your own risk (:ghc-ticket:`23469`, :ghc-ticket:`23109`, :ghc-ticket:`21229`, :ghc-ticket:`23445`).
|
|
| 1328 | + :default: on
|
|
| 1332 | 1329 | |
| 1333 | 1330 | Enable specialisation of function calls to known dictionaries with free type variables.
|
| 1334 | 1331 | The created specialisation will abstract over the type variables free in the dictionary.
|
| ... | ... | @@ -491,13 +491,7 @@ endEventLogging(void) |
| 491 | 491 | |
| 492 | 492 | eventlog_enabled = false;
|
| 493 | 493 | |
| 494 | - // Flush all events remaining in the buffers.
|
|
| 495 | - //
|
|
| 496 | - // N.B. Don't flush if shutting down: this was done in
|
|
| 497 | - // finishCapEventLogging and the capabilities have already been freed.
|
|
| 498 | - if (getSchedState() != SCHED_SHUTTING_DOWN) {
|
|
| 499 | - flushEventLog(NULL);
|
|
| 500 | - }
|
|
| 494 | + flushEventLog(NULL);
|
|
| 501 | 495 | |
| 502 | 496 | ACQUIRE_LOCK(&eventBufMutex);
|
| 503 | 497 | |
| ... | ... | @@ -1626,15 +1620,24 @@ void flushEventLog(Capability **cap USED_IF_THREADS) |
| 1626 | 1620 | return;
|
| 1627 | 1621 | }
|
| 1628 | 1622 | |
| 1623 | + // N.B. Don't flush if shutting down: this was done in
|
|
| 1624 | + // finishCapEventLogging and the capabilities have already been freed.
|
|
| 1625 | + // This can also race against the shutdown if the flush is triggered by the
|
|
| 1626 | + // ticker thread. (#26573)
|
|
| 1627 | + if (getSchedState() == SCHED_SHUTTING_DOWN) {
|
|
| 1628 | + return;
|
|
| 1629 | + }
|
|
| 1630 | + |
|
| 1629 | 1631 | ACQUIRE_LOCK(&eventBufMutex);
|
| 1630 | 1632 | printAndClearEventBuf(&eventBuf);
|
| 1631 | 1633 | RELEASE_LOCK(&eventBufMutex);
|
| 1632 | 1634 | |
| 1633 | 1635 | #if defined(THREADED_RTS)
|
| 1634 | - Task *task = getMyTask();
|
|
| 1636 | + Task *task = newBoundTask();
|
|
| 1635 | 1637 | stopAllCapabilitiesWith(cap, task, SYNC_FLUSH_EVENT_LOG);
|
| 1636 | 1638 | flushAllCapsEventsBufs();
|
| 1637 | 1639 | releaseAllCapabilities(getNumCapabilities(), cap ? *cap : NULL, task);
|
| 1640 | + exitMyTask();
|
|
| 1638 | 1641 | #else
|
| 1639 | 1642 | flushLocalEventsBuf(getCapability(0));
|
| 1640 | 1643 | #endif
|
| ... | ... | @@ -2,6 +2,11 @@ test('testblockalloc', |
| 2 | 2 | [c_src, only_ways(['normal','threaded1']), extra_run_opts('+RTS -I0')],
|
| 3 | 3 | compile_and_run, [''])
|
| 4 | 4 | |
| 5 | +test('numeric_version_eventlog_flush',
|
|
| 6 | + [ignore_stdout, req_ghc_with_threaded_rts],
|
|
| 7 | + run_command,
|
|
| 8 | + ['{compiler} --numeric-version +RTS -l --eventlog-flush-interval=1 -RTS'])
|
|
| 9 | + |
|
| 5 | 10 | test('testmblockalloc',
|
| 6 | 11 | [c_src, only_ways(['normal','threaded1']), extra_run_opts('+RTS -I0 -xr0.125T'),
|
| 7 | 12 | when(arch('wasm32'), skip)], # MBlocks can't be freed on wasm32, see Note [Megablock allocator on wasm] in rts
|
| 1 | +module T26588 ( getOptionSettingFromText ) where
|
|
| 2 | + |
|
| 3 | +import Control.Applicative ( Const(..) )
|
|
| 4 | +import Data.Map (Map)
|
|
| 5 | +import qualified Data.Map.Strict as Map
|
|
| 6 | + |
|
| 7 | +------------------------------------------------------------------------
|
|
| 8 | +-- ConfigState
|
|
| 9 | + |
|
| 10 | +data ConfigLeaf
|
|
| 11 | +data ConfigTrie = ConfigTrie !(Maybe ConfigLeaf) !ConfigMap
|
|
| 12 | + |
|
| 13 | +type ConfigMap = Map Int ConfigTrie
|
|
| 14 | + |
|
| 15 | +freshLeaf :: [Int] -> ConfigLeaf -> ConfigTrie
|
|
| 16 | +freshLeaf [] l = ConfigTrie (Just l) mempty
|
|
| 17 | +freshLeaf (a:as) l = ConfigTrie Nothing (Map.singleton a (freshLeaf as l))
|
|
| 18 | + |
|
| 19 | +adjustConfigTrie :: Functor t => [Int] -> (Maybe ConfigLeaf -> t (Maybe ConfigLeaf)) -> Maybe (ConfigTrie) -> t (Maybe ConfigTrie)
|
|
| 20 | +adjustConfigTrie as f Nothing = fmap (freshLeaf as) <$> f Nothing
|
|
| 21 | +adjustConfigTrie (a:as) f (Just (ConfigTrie x m)) = Just . ConfigTrie x <$> adjustConfigMap a as f m
|
|
| 22 | +adjustConfigTrie [] f (Just (ConfigTrie x m)) = g <$> f x
|
|
| 23 | + where g Nothing | Map.null m = Nothing
|
|
| 24 | + g x' = Just (ConfigTrie x' m)
|
|
| 25 | + |
|
| 26 | +adjustConfigMap :: Functor t => Int -> [Int] -> (Maybe ConfigLeaf -> t (Maybe ConfigLeaf)) -> ConfigMap -> t ConfigMap
|
|
| 27 | +adjustConfigMap a as f = Map.alterF (adjustConfigTrie as f) a
|
|
| 28 | + |
|
| 29 | +getOptionSettingFromText :: Int -> [Int] -> ConfigMap -> IO ()
|
|
| 30 | +getOptionSettingFromText p ps = getConst . adjustConfigMap p ps f
|
|
| 31 | + where
|
|
| 32 | + f _ = Const (return ()) |
| 1 | +module T26589 ( executeTest ) where
|
|
| 2 | + |
|
| 3 | +-- base
|
|
| 4 | +import Data.Coerce ( coerce )
|
|
| 5 | +import Data.Foldable ( foldMap )
|
|
| 6 | + |
|
| 7 | +--------------------------------------------------------------------------------
|
|
| 8 | + |
|
| 9 | +newtype Traversal f = Traversal { getTraversal :: f () }
|
|
| 10 | + |
|
| 11 | +instance Applicative f => Semigroup (Traversal f) where
|
|
| 12 | + Traversal f1 <> Traversal f2 = Traversal $ f1 *> f2
|
|
| 13 | +instance Applicative f => Monoid (Traversal f) where
|
|
| 14 | + mempty = Traversal $ pure ()
|
|
| 15 | + |
|
| 16 | +newtype Seq a = Seq (FingerTree (Elem a))
|
|
| 17 | +newtype Elem a = Elem { getElem :: a }
|
|
| 18 | + |
|
| 19 | +data FingerTree a
|
|
| 20 | + = EmptyT
|
|
| 21 | + | Deep !a (FingerTree a) !a
|
|
| 22 | + |
|
| 23 | +executeTest :: Seq () -> IO ()
|
|
| 24 | +executeTest fins = destroyResources
|
|
| 25 | + where
|
|
| 26 | + destroyResources :: IO ()
|
|
| 27 | + destroyResources =
|
|
| 28 | + getTraversal $
|
|
| 29 | + flip foldMap1 fins $ \ _ ->
|
|
| 30 | + Traversal $ return ()
|
|
| 31 | + |
|
| 32 | +foldMap1 :: forall m a. Monoid m => (a -> m) -> Seq a -> m
|
|
| 33 | +foldMap1 = coerce (foldMap2 :: (Elem a -> m) -> FingerTree (Elem a) -> m)
|
|
| 34 | + |
|
| 35 | +foldMap2 :: Monoid m => (Elem a -> m) -> FingerTree (Elem a) -> m
|
|
| 36 | +foldMap2 _ EmptyT = mempty
|
|
| 37 | +foldMap2 f' (Deep pr' m' sf') = f' pr' <> foldMapTree f' m' <> f' sf'
|
|
| 38 | + where
|
|
| 39 | + foldMapTree :: Monoid m => (a -> m) -> FingerTree a -> m
|
|
| 40 | + foldMapTree _ EmptyT = mempty
|
|
| 41 | + foldMapTree f (Deep pr m sf) =
|
|
| 42 | + f pr <>
|
|
| 43 | + foldMapTree f m <>
|
|
| 44 | + f sf |
| 1 | 1 | |
| 2 | 2 | ==================== Tidy Core rules ====================
|
| 3 | +"SPEC $c*> @(ST s) @_"
|
|
| 4 | + forall (@s) (@r) ($dApplicative :: Applicative (ST s)).
|
|
| 5 | + $fApplicativeReaderT_$c*> @(ST s) @r $dApplicative
|
|
| 6 | + = ($fApplicativeReaderT2 @s @r)
|
|
| 7 | + `cast` (forall (a ::~ <*>_N) (b ::~ <*>_N).
|
|
| 8 | + <ReaderT r (ST s) a>_R
|
|
| 9 | + ->_R <ReaderT r (ST s) b>_R
|
|
| 10 | + ->_R <r>_R ->_R Sym (N:ST <s>_N <b>_R)
|
|
| 11 | + ; Sym (N:ReaderT <*>_N <r>_R <ST s>_R <b>_N)
|
|
| 12 | + :: Coercible
|
|
| 13 | + (forall a b.
|
|
| 14 | + ReaderT r (ST s) a -> ReaderT r (ST s) b -> r -> STRep s b)
|
|
| 15 | + (forall a b.
|
|
| 16 | + ReaderT r (ST s) a -> ReaderT r (ST s) b -> ReaderT r (ST s) b))
|
|
| 17 | +"SPEC $c<$ @(ST s) @_"
|
|
| 18 | + forall (@s) (@r) ($dFunctor :: Functor (ST s)).
|
|
| 19 | + $fFunctorReaderT_$c<$ @(ST s) @r $dFunctor
|
|
| 20 | + = ($fApplicativeReaderT6 @s @r)
|
|
| 21 | + `cast` (forall (a ::~ <*>_N) (b ::~ <*>_N).
|
|
| 22 | + <a>_R
|
|
| 23 | + ->_R <ReaderT r (ST s) b>_R
|
|
| 24 | + ->_R <r>_R ->_R Sym (N:ST <s>_N <a>_R)
|
|
| 25 | + ; Sym (N:ReaderT <*>_N <r>_R <ST s>_R <a>_N)
|
|
| 26 | + :: Coercible
|
|
| 27 | + (forall a b. a -> ReaderT r (ST s) b -> r -> STRep s a)
|
|
| 28 | + (forall a b. a -> ReaderT r (ST s) b -> ReaderT r (ST s) a))
|
|
| 29 | +"SPEC $c<* @(ST s) @_"
|
|
| 30 | + forall (@s) (@r) ($dApplicative :: Applicative (ST s)).
|
|
| 31 | + $fApplicativeReaderT_$c<* @(ST s) @r $dApplicative
|
|
| 32 | + = ($fApplicativeReaderT1 @s @r)
|
|
| 33 | + `cast` (forall (a ::~ <*>_N) (b ::~ <*>_N).
|
|
| 34 | + <ReaderT r (ST s) a>_R
|
|
| 35 | + ->_R <ReaderT r (ST s) b>_R
|
|
| 36 | + ->_R <r>_R ->_R Sym (N:ST <s>_N <a>_R)
|
|
| 37 | + ; Sym (N:ReaderT <*>_N <r>_R <ST s>_R <a>_N)
|
|
| 38 | + :: Coercible
|
|
| 39 | + (forall a b.
|
|
| 40 | + ReaderT r (ST s) a -> ReaderT r (ST s) b -> r -> STRep s a)
|
|
| 41 | + (forall a b.
|
|
| 42 | + ReaderT r (ST s) a -> ReaderT r (ST s) b -> ReaderT r (ST s) a))
|
|
| 43 | +"SPEC $c<*> @(ST s) @_"
|
|
| 44 | + forall (@s) (@r) ($dApplicative :: Applicative (ST s)).
|
|
| 45 | + $fApplicativeReaderT9 @(ST s) @r $dApplicative
|
|
| 46 | + = ($fApplicativeReaderT4 @s @r)
|
|
| 47 | + `cast` (forall (a ::~ <*>_N) (b ::~ <*>_N).
|
|
| 48 | + <ReaderT r (ST s) (a -> b)>_R
|
|
| 49 | + ->_R <ReaderT r (ST s) a>_R
|
|
| 50 | + ->_R <r>_R
|
|
| 51 | + ->_R Sym (N:ST <s>_N <b>_R)
|
|
| 52 | + :: Coercible
|
|
| 53 | + (forall a b.
|
|
| 54 | + ReaderT r (ST s) (a -> b) -> ReaderT r (ST s) a -> r -> STRep s b)
|
|
| 55 | + (forall a b.
|
|
| 56 | + ReaderT r (ST s) (a -> b) -> ReaderT r (ST s) a -> r -> ST s b))
|
|
| 57 | +"SPEC $c>> @(ST s) @_"
|
|
| 58 | + forall (@s) (@r) ($dMonad :: Monad (ST s)).
|
|
| 59 | + $fMonadReaderT1 @(ST s) @r $dMonad
|
|
| 60 | + = $fMonadAbstractIOSTReaderT_$s$c>> @s @r
|
|
| 61 | +"SPEC $c>>= @(ST s) @_"
|
|
| 62 | + forall (@s) (@r) ($dMonad :: Monad (ST s)).
|
|
| 63 | + $fMonadReaderT2 @(ST s) @r $dMonad
|
|
| 64 | + = ($fMonadAbstractIOSTReaderT2 @s @r)
|
|
| 65 | + `cast` (forall (a ::~ <*>_N) (b ::~ <*>_N).
|
|
| 66 | + <ReaderT r (ST s) a>_R
|
|
| 67 | + ->_R <a -> ReaderT r (ST s) b>_R
|
|
| 68 | + ->_R <r>_R
|
|
| 69 | + ->_R Sym (N:ST <s>_N <b>_R)
|
|
| 70 | + :: Coercible
|
|
| 71 | + (forall a b.
|
|
| 72 | + ReaderT r (ST s) a -> (a -> ReaderT r (ST s) b) -> r -> STRep s b)
|
|
| 73 | + (forall a b.
|
|
| 74 | + ReaderT r (ST s) a -> (a -> ReaderT r (ST s) b) -> r -> ST s b))
|
|
| 75 | +"SPEC $cfmap @(ST s) @_"
|
|
| 76 | + forall (@s) (@r) ($dFunctor :: Functor (ST s)).
|
|
| 77 | + $fFunctorReaderT_$cfmap @(ST s) @r $dFunctor
|
|
| 78 | + = ($fApplicativeReaderT7 @s @r)
|
|
| 79 | + `cast` (forall (a ::~ <*>_N) (b ::~ <*>_N).
|
|
| 80 | + <a -> b>_R
|
|
| 81 | + ->_R <ReaderT r (ST s) a>_R
|
|
| 82 | + ->_R <r>_R ->_R Sym (N:ST <s>_N <b>_R)
|
|
| 83 | + ; Sym (N:ReaderT <*>_N <r>_R <ST s>_R <b>_N)
|
|
| 84 | + :: Coercible
|
|
| 85 | + (forall a b. (a -> b) -> ReaderT r (ST s) a -> r -> STRep s b)
|
|
| 86 | + (forall a b. (a -> b) -> ReaderT r (ST s) a -> ReaderT r (ST s) b))
|
|
| 87 | +"SPEC $cliftA2 @(ST s) @_"
|
|
| 88 | + forall (@s) (@r) ($dApplicative :: Applicative (ST s)).
|
|
| 89 | + $fApplicativeReaderT_$cliftA2 @(ST s) @r $dApplicative
|
|
| 90 | + = ($fApplicativeReaderT3 @s @r)
|
|
| 91 | + `cast` (forall (a ::~ <*>_N) (b ::~ <*>_N) (c ::~ <*>_N).
|
|
| 92 | + <a -> b -> c>_R
|
|
| 93 | + ->_R <ReaderT r (ST s) a>_R
|
|
| 94 | + ->_R <ReaderT r (ST s) b>_R
|
|
| 95 | + ->_R <r>_R ->_R Sym (N:ST <s>_N <c>_R)
|
|
| 96 | + ; Sym (N:ReaderT <*>_N <r>_R <ST s>_R <c>_N)
|
|
| 97 | + :: Coercible
|
|
| 98 | + (forall a b c.
|
|
| 99 | + (a -> b -> c)
|
|
| 100 | + -> ReaderT r (ST s) a -> ReaderT r (ST s) b -> r -> STRep s c)
|
|
| 101 | + (forall a b c.
|
|
| 102 | + (a -> b -> c)
|
|
| 103 | + -> ReaderT r (ST s) a -> ReaderT r (ST s) b -> ReaderT r (ST s) c))
|
|
| 104 | +"SPEC $cp1Applicative @(ST s) @_"
|
|
| 105 | + forall (@s) (@r) ($dApplicative :: Applicative (ST s)).
|
|
| 106 | + $fApplicativeReaderT_$cp1Applicative @(ST s) @r $dApplicative
|
|
| 107 | + = $fApplicativeReaderT_$s$fFunctorReaderT @s @r
|
|
| 108 | +"SPEC $cp1Monad @(ST s) @_"
|
|
| 109 | + forall (@s) (@r) ($dMonad :: Monad (ST s)).
|
|
| 110 | + $fMonadReaderT_$cp1Monad @(ST s) @r $dMonad
|
|
| 111 | + = $fApplicativeReaderT_$s$fApplicativeReaderT @s @r
|
|
| 112 | +"SPEC $cpure @(ST s) @_"
|
|
| 113 | + forall (@s) (@r) ($dApplicative :: Applicative (ST s)).
|
|
| 114 | + $fApplicativeReaderT_$cpure @(ST s) @r $dApplicative
|
|
| 115 | + = ($fApplicativeReaderT5 @s @r)
|
|
| 116 | + `cast` (forall (a ::~ <*>_N).
|
|
| 117 | + <a>_R
|
|
| 118 | + ->_R <r>_R ->_R Sym (N:ST <s>_N <a>_R)
|
|
| 119 | + ; Sym (N:ReaderT <*>_N <r>_R <ST s>_R <a>_N)
|
|
| 120 | + :: Coercible
|
|
| 121 | + (forall a. a -> r -> STRep s a)
|
|
| 122 | + (forall a. a -> ReaderT r (ST s) a))
|
|
| 123 | +"SPEC $creturn @(ST s) @_"
|
|
| 124 | + forall (@s) (@r) ($dMonad :: Monad (ST s)).
|
|
| 125 | + $fMonadReaderT_$creturn @(ST s) @r $dMonad
|
|
| 126 | + = ($fApplicativeReaderT5 @s @r)
|
|
| 127 | + `cast` (forall (a ::~ <*>_N).
|
|
| 128 | + <a>_R
|
|
| 129 | + ->_R <r>_R ->_R Sym (N:ST <s>_N <a>_R)
|
|
| 130 | + ; Sym (N:ReaderT <*>_N <r>_R <ST s>_R <a>_N)
|
|
| 131 | + :: Coercible
|
|
| 132 | + (forall a. a -> r -> STRep s a)
|
|
| 133 | + (forall a. a -> ReaderT r (ST s) a))
|
|
| 134 | +"SPEC $fApplicativeReaderT @(ST s) @_"
|
|
| 135 | + forall (@s) (@r) ($dApplicative :: Applicative (ST s)).
|
|
| 136 | + $fApplicativeReaderT @(ST s) @r $dApplicative
|
|
| 137 | + = $fApplicativeReaderT_$s$fApplicativeReaderT @s @r
|
|
| 138 | +"SPEC $fFunctorReaderT @(ST s) @_"
|
|
| 139 | + forall (@s) (@r) ($dFunctor :: Functor (ST s)).
|
|
| 140 | + $fFunctorReaderT @(ST s) @r $dFunctor
|
|
| 141 | + = $fApplicativeReaderT_$s$fFunctorReaderT @s @r
|
|
| 142 | +"SPEC $fMonadReaderT @(ST s) @_"
|
|
| 143 | + forall (@s) (@r) ($dMonad :: Monad (ST s)).
|
|
| 144 | + $fMonadReaderT @(ST s) @r $dMonad
|
|
| 145 | + = $fMonadAbstractIOSTReaderT_$s$fMonadReaderT @s @r
|
|
| 3 | 146 | "USPEC useAbstractMonad @(ReaderT Int (ST s))"
|
| 4 | 147 | forall (@s)
|
| 5 | 148 | ($dMonadAbstractIOST :: MonadAbstractIOST (ReaderT Int (ST s))).
|
| ... | ... | @@ -544,6 +544,9 @@ test('T25883b', normal, compile_grep_core, ['']) |
| 544 | 544 | test('T25883c', normal, compile_grep_core, [''])
|
| 545 | 545 | test('T25883d', [extra_files(['T25883d_import.hs'])], multimod_compile_filter, ['T25883d', '-O -ddump-simpl -dno-typeable-binds -dsuppress-all -dsuppress-uniques', r'grep -e "y ="'])
|
| 546 | 546 | |
| 547 | +test('T26588', normal, compile, ['-package containers -O'])
|
|
| 548 | +test('T26589', normal, compile, ['-O'])
|
|
| 549 | + |
|
| 547 | 550 | test('T25976', [grep_errmsg('Dead Code')], compile, ['-O -ddump-simpl -dsuppress-uniques -dno-typeable-binds'])
|
| 548 | 551 | |
| 549 | 552 | test('T25965', normal, compile, ['-O'])
|
| ... | ... | @@ -133,6 +133,124 @@ data U_E1 = U_E1 {-# UNPACK #-} !E1 |
| 133 | 133 | {-# UNPACK #-} !Int8
|
| 134 | 134 | deriving (Show)
|
| 135 | 135 | |
| 136 | +{- In `data U_E`, the {-# UNPACK #-} !E1 gives rise to a pretty clumsy expression
|
|
| 137 | + for the wrapper for U_E1. Here is what it looks like when ther are only 16
|
|
| 138 | + data constructors in E1, and we have just
|
|
| 139 | + data U_E1 = U_E1 {-# UNPACK #-} !E1
|
|
| 140 | + Blimey!
|
|
| 141 | + |
|
| 142 | +Main.$WU_E1
|
|
| 143 | + = \ (conrep_t1N4 [Occ=Once1!] :: Main.E1) ->
|
|
| 144 | + case case conrep_t1N4 of {
|
|
| 145 | + Main.E1_1 ->
|
|
| 146 | + GHC.Internal.Types.(# _| | | | | | | | | | | | | | | #)
|
|
| 147 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 148 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 149 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 150 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 151 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 152 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 153 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 154 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 155 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 156 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 157 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 158 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 159 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 160 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 161 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 162 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 163 | + @(# #)
|
|
| 164 | + @(# #)
|
|
| 165 | + @(# #)
|
|
| 166 | + @(# #)
|
|
| 167 | + @(# #)
|
|
| 168 | + @(# #)
|
|
| 169 | + @(# #)
|
|
| 170 | + @(# #)
|
|
| 171 | + @(# #)
|
|
| 172 | + @(# #)
|
|
| 173 | + @(# #)
|
|
| 174 | + @(# #)
|
|
| 175 | + @(# #)
|
|
| 176 | + @(# #)
|
|
| 177 | + @(# #)
|
|
| 178 | + @(# #)
|
|
| 179 | + GHC.Internal.Types.(##);
|
|
| 180 | + Main.E1_2 ->
|
|
| 181 | + GHC.Internal.Types.(# |_| | | | | | | | | | | | | | #)
|
|
| 182 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 183 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 184 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 185 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 186 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 187 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 188 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 189 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 190 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 191 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 192 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 193 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 194 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 195 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 196 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 197 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 198 | + @(# #)
|
|
| 199 | + @(# #)
|
|
| 200 | + @(# #)
|
|
| 201 | + @(# #)
|
|
| 202 | + @(# #)
|
|
| 203 | + @(# #)
|
|
| 204 | + @(# #)
|
|
| 205 | + @(# #)
|
|
| 206 | + @(# #)
|
|
| 207 | + @(# #)
|
|
| 208 | + @(# #)
|
|
| 209 | + @(# #)
|
|
| 210 | + @(# #)
|
|
| 211 | + @(# #)
|
|
| 212 | + @(# #)
|
|
| 213 | + @(# #)
|
|
| 214 | + GHC.Internal.Types.(##);
|
|
| 215 | + Main.E1_3 ->
|
|
| 216 | + GHC.Internal.Types.(# | |_| | | | | | | | | | | | | #)
|
|
| 217 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 218 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 219 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 220 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 221 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 222 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 223 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 224 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 225 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 226 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 227 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 228 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 229 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 230 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 231 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 232 | + @GHC.Internal.Types.ZeroBitRep
|
|
| 233 | + @(# #)
|
|
| 234 | + @(# #)
|
|
| 235 | + @(# #)
|
|
| 236 | + @(# #)
|
|
| 237 | + @(# #)
|
|
| 238 | + @(# #)
|
|
| 239 | + @(# #)
|
|
| 240 | + @(# #)
|
|
| 241 | + @(# #)
|
|
| 242 | + @(# #)
|
|
| 243 | + @(# #)
|
|
| 244 | + @(# #)
|
|
| 245 | + @(# #)
|
|
| 246 | + @(# #)
|
|
| 247 | + @(# #)
|
|
| 248 | + @(# #)
|
|
| 249 | + GHC.Internal.Types.(##);
|
|
| 250 | + |
|
| 251 | + ... etc ....
|
|
| 252 | +-}
|
|
| 253 | + |
|
| 136 | 254 | data U_E2 = U_E2 {-# UNPACK #-} !E2
|
| 137 | 255 | {-# UNPACK #-} !Int8
|
| 138 | 256 | {-# UNPACK #-} !Int8
|
| ... | ... | @@ -19,6 +19,13 @@ |
| 19 | 19 | {-# LANGUAGE UndecidableInstances #-} -- For the (StmtLR GhcPs GhcPs (LocatedA (body GhcPs))) ExactPrint instance
|
| 20 | 20 | {-# OPTIONS_GHC -Wno-incomplete-uni-patterns -Wno-incomplete-record-updates #-}
|
| 21 | 21 | |
| 22 | +-- We switch off specialisation in this module. Otherwise we get lots of functions
|
|
| 23 | +-- specialised on lots of (GHC syntax tree) data types. Compilation time allocation
|
|
| 24 | +-- (at least with -fpolymorphic-specialisation; see !15058) blows up from 17G to 108G.
|
|
| 25 | +-- Bad! ExactPrint is not a performance-critical module so it's not worth taking the
|
|
| 26 | +-- largely-fruitless hit in compile time.
|
|
| 27 | +{-# OPTIONS_GHC -fno-specialise #-}
|
|
| 28 | + |
|
| 22 | 29 | module ExactPrint
|
| 23 | 30 | (
|
| 24 | 31 | ExactPrint(..)
|
| ... | ... | @@ -8,7 +8,6 @@ module Test.Haddock |
| 8 | 8 | ) where
|
| 9 | 9 | |
| 10 | 10 | import Control.Monad
|
| 11 | -import qualified Data.ByteString.Char8 as BS
|
|
| 12 | 11 | import qualified Data.Map.Strict as Map
|
| 13 | 12 | import Data.Foldable (for_)
|
| 14 | 13 | import Data.Maybe
|
| ... | ... | @@ -211,7 +210,7 @@ checkFile cfg file = do |
| 211 | 210 | ccfg = cfgCheckConfig cfg
|
| 212 | 211 | dcfg = cfgDirConfig cfg
|
| 213 | 212 | |
| 214 | --- We use ByteString here to ensure that no lazy I/O is performed.
|
|
| 213 | +-- We use readFile' here to ensure that no lazy I/O is performed.
|
|
| 215 | 214 | -- This way to ensure that the reference file isn't held open in
|
| 216 | 215 | -- case after `diffFile` (which is problematic if we need to rewrite
|
| 217 | 216 | -- the reference file in `maybeAcceptFile`)
|
| ... | ... | @@ -219,8 +218,8 @@ checkFile cfg file = do |
| 219 | 218 | -- | Read the reference artifact for a test
|
| 220 | 219 | readRef :: Config c -> FilePath -> IO (Maybe c)
|
| 221 | 220 | readRef cfg file =
|
| 222 | - ccfgRead ccfg . BS.unpack
|
|
| 223 | - <$> BS.readFile (refFile dcfg file)
|
|
| 221 | + ccfgRead ccfg
|
|
| 222 | + <$> readFile' (refFile dcfg file)
|
|
| 224 | 223 | where
|
| 225 | 224 | ccfg = cfgCheckConfig cfg
|
| 226 | 225 | dcfg = cfgDirConfig cfg
|
| ... | ... | @@ -228,8 +227,8 @@ readRef cfg file = |
| 228 | 227 | -- | Read (and clean) the test output artifact for a test
|
| 229 | 228 | readOut :: Config c -> (DirConfig -> FilePath) -> FilePath -> IO c
|
| 230 | 229 | readOut cfg dcfgDir file = do
|
| 231 | - res <- fmap (ccfgClean ccfg file) . ccfgRead ccfg . BS.unpack
|
|
| 232 | - <$> BS.readFile outFile
|
|
| 230 | + res <- fmap (ccfgClean ccfg file) . ccfgRead ccfg
|
|
| 231 | + <$> readFile' outFile
|
|
| 233 | 232 | case res of
|
| 234 | 233 | Just out -> return out
|
| 235 | 234 | Nothing -> error $ "Failed to parse output file: " ++ outFile
|