Marge Bot pushed to branch wip/marge_bot_batch_merge_job at Glasgow Haskell Compiler / GHC

Commits:

26 changed files:

Changes:

  • compiler/GHC/Core/Make.hs
    ... ... @@ -151,37 +151,28 @@ mkCoreConWrapApps con args = mkCoreApps (Var (dataConWrapId con)) args
    151 151
     
    
    152 152
     -- | Construct an expression which represents the application of a number of
    
    153 153
     -- expressions to another. The leftmost expression in the list is applied first
    
    154
    -mkCoreApps :: CoreExpr -- ^ function
    
    154
    +-- See Note [Assertion checking in mkCoreApp]
    
    155
    +mkCoreApps :: CoreExpr   -- ^ function
    
    155 156
                -> [CoreExpr] -- ^ arguments
    
    156 157
                -> CoreExpr
    
    157
    -mkCoreApps fun args
    
    158
    -  = fst $
    
    159
    -    foldl' (mkCoreAppTyped doc_string) (fun, fun_ty) args
    
    160
    -  where
    
    161
    -    doc_string = ppr fun_ty $$ ppr fun $$ ppr args
    
    162
    -    fun_ty = exprType fun
    
    158
    +mkCoreApps fun args = foldl' mkCoreApp fun args
    
    163 159
     
    
    164 160
     -- | Construct an expression which represents the application of one expression
    
    165 161
     -- to the other
    
    166
    -mkCoreApp :: SDoc
    
    167
    -          -> CoreExpr -- ^ function
    
    162
    +-- See Note [Assertion checking in mkCoreApp]
    
    163
    +mkCoreApp :: CoreExpr -- ^ function
    
    168 164
               -> CoreExpr -- ^ argument
    
    169 165
               -> CoreExpr
    
    170
    -mkCoreApp s fun arg
    
    171
    -  = fst $ mkCoreAppTyped s (fun, exprType fun) arg
    
    172
    -
    
    173
    --- | Construct an expression which represents the application of one expression
    
    174
    --- paired with its type to an argument. The result is paired with its type. This
    
    175
    --- function is not exported and used in the definition of 'mkCoreApp' and
    
    176
    --- 'mkCoreApps'.
    
    177
    -mkCoreAppTyped :: SDoc -> (CoreExpr, Type) -> CoreExpr -> (CoreExpr, Type)
    
    178
    -mkCoreAppTyped _ (fun, fun_ty) (Type ty)
    
    179
    -  = (App fun (Type ty), piResultTy fun_ty ty)
    
    180
    -mkCoreAppTyped _ (fun, fun_ty) (Coercion co)
    
    181
    -  = (App fun (Coercion co), funResultTy fun_ty)
    
    182
    -mkCoreAppTyped d (fun, fun_ty) arg
    
    183
    -  = assertPpr (isFunTy fun_ty) (ppr fun $$ ppr arg $$ d)
    
    184
    -    (App fun arg, funResultTy fun_ty)
    
    166
    +mkCoreApp fun arg = App fun arg
    
    167
    +
    
    168
    +{- Note [Assertion checking in mkCoreApp]
    
    169
    +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    170
    +At one time we had an assertion to check that the function and argument type match up,
    
    171
    +but that turned out to take 90% of all compile time (!) when compiling test
    
    172
    +`unboxedsums/UbxSumUnpackedSize.hs`. The reason was an unboxed sum constructor with
    
    173
    +hundreds of foralls.   It's most straightforward just to remove the assert, and
    
    174
    +rely on Lint to discover any mis-constructed terms.
    
    175
    +-}
    
    185 176
     
    
    186 177
     {- *********************************************************************
    
    187 178
     *                                                                      *
    

  • compiler/GHC/Core/Opt/Arity.hs
    ... ... @@ -2993,12 +2993,12 @@ pushCoValArg co
    2993 2993
         Pair tyL tyR = coercionKind co
    
    2994 2994
     
    
    2995 2995
     pushCoercionIntoLambda
    
    2996
    -    :: HasDebugCallStack => Subst -> InVar -> InExpr -> OutCoercionR -> Maybe (OutVar, OutExpr)
    
    2996
    +    :: HasDebugCallStack => InScopeSet -> Var -> CoreExpr -> CoercionR -> Maybe (Var, CoreExpr)
    
    2997 2997
     -- This implements the Push rule from the paper on coercions
    
    2998 2998
     --    (\x. e) |> co
    
    2999 2999
     -- ===>
    
    3000 3000
     --    (\x'. e |> co')
    
    3001
    -pushCoercionIntoLambda subst x e co
    
    3001
    +pushCoercionIntoLambda in_scope x e co
    
    3002 3002
         | assert (not (isTyVar x) && not (isCoVar x)) True
    
    3003 3003
         , Pair s1s2 t1t2 <- coercionKind co
    
    3004 3004
         , Just {}              <- splitFunTy_maybe s1s2
    
    ... ... @@ -3011,9 +3011,9 @@ pushCoercionIntoLambda subst x e co
    3011 3011
               -- Should we optimize the coercions here?
    
    3012 3012
               -- Otherwise they might not match too well
    
    3013 3013
               x' = x `setIdType` t1 `setIdMult` w1
    
    3014
    -          in_scope' = substInScopeSet subst `extendInScopeSet` x'
    
    3014
    +          in_scope' = in_scope `extendInScopeSet` x'
    
    3015 3015
               subst' =
    
    3016
    -            extendIdSubst (setInScope subst in_scope')
    
    3016
    +            extendIdSubst (setInScope emptySubst in_scope')
    
    3017 3017
                   x
    
    3018 3018
                   (mkCast (Var x') (mkSymCo co1))
    
    3019 3019
                 -- We substitute x' for x, except we need to preserve types.
    

  • compiler/GHC/Core/Opt/OccurAnal.hs
    ... ... @@ -66,7 +66,6 @@ import GHC.Builtin.Names( runRWKey )
    66 66
     import GHC.Unit.Module( Module )
    
    67 67
     
    
    68 68
     import Data.List (mapAccumL)
    
    69
    -import Data.List.NonEmpty (NonEmpty (..))
    
    70 69
     
    
    71 70
     {-
    
    72 71
     ************************************************************************
    
    ... ... @@ -660,18 +659,35 @@ through A, so it should have ManyOcc. Bear this case in mind!
    660 659
     * In occ_env, the new (occ_join_points :: IdEnv OccInfoEnv) maps
    
    661 660
       each in-scope non-recursive join point, such as `j` above, to
    
    662 661
       a "zeroed form" of its RHS's usage details. The "zeroed form"
    
    662
    +    * has only occ_nested_lets in its domain  (see (W4) below)
    
    663 663
         * deletes ManyOccs
    
    664 664
         * maps a OneOcc to OneOcc{ occ_n_br = 0 }
    
    665
    -  In our example, occ_join_points will be extended with
    
    665
    +  In our example, assuming `v` is locally-let-bound, occ_join_points will
    
    666
    +  be extended with
    
    666 667
           [j :-> [v :-> OneOcc{occ_n_br=0}]]
    
    667
    -  See addJoinPoint.
    
    668
    +  See `addJoinPoint` and (W4) below.
    
    668 669
     
    
    669 670
     * At an occurrence of a join point, we do everything as normal, but add in the
    
    670 671
       UsageDetails from the occ_join_points.  See mkOneOcc.
    
    671 672
     
    
    672
    -* Crucially, at the NonRec binding of the join point, in `occAnalBind`, we use
    
    673
    -  `orUDs`, not `andUDs` to combine the usage from the RHS with the usage from
    
    674
    -  the body.
    
    673
    +* Crucially, at the NonRec binding of a join point `j`, in `occAnalBind`,
    
    674
    +  we use `combineJoinPointUDs`, not `andUDs` to combine the usage from the
    
    675
    +  RHS with the usage from the body.  `combineJoinPointUDs` behaves like this:
    
    676
    +
    
    677
    +   * For all variables than `occ_nested_lets`, use `andUDs`, just like for
    
    678
    +     any normal let-binding.
    
    679
    +
    
    680
    +   * But for a variable `v` in `occ_nested_lets`, use `orUDs`:
    
    681
    +     - If `v` occurs `ManyOcc` in the join-point RHS, the variable won't be in
    
    682
    +       `occ_join_points`; but we'll get `ManyOcc` anyway.
    
    683
    +     - If `v` occurs `OneOcc` in the join-point RHS, the variable will be in
    
    684
    +       `occ_join_points` and we'll thereby get a `OneOcc{occ_n_br=0}` from
    
    685
    +       each of j's tail calls.  We can `or` that with the `OncOcc{occ_n_br=n}`
    
    686
    +       from j's RHS.
    
    687
    +
    
    688
    +  The only reason for `occ_nested_lets` is to reduce the size of the info
    
    689
    +  duplicate at each tail call; see (W4). It would sound to put *all* variables
    
    690
    +  into `occ_nested_lets`.
    
    675 691
     
    
    676 692
     Here are the consequences
    
    677 693
     
    
    ... ... @@ -682,13 +698,14 @@ Here are the consequences
    682 698
       There are two lexical occurrences of `v`!
    
    683 699
       (NB: `orUDs` adds occ_n_br together, so occ_n_br=1 is impossible, too.)
    
    684 700
     
    
    685
    -* In the tricky (P3) we'll get an `andUDs` of
    
    686
    -    * OneOcc{occ_n_br=0} from the occurrences of `j`)
    
    701
    +* In the tricky (P3), when analysing `case (f v) of ...`, we'll get
    
    702
    +  an `andUDs` of
    
    703
    +    * OneOcc{occ_n_br=0} from the occurrences of `j`
    
    687 704
         * OneOcc{occ_n_br=1} from the (f v)
    
    688 705
       These are `andUDs` together in `addOccInfo`, and hence
    
    689 706
       `v` gets ManyOccs, just as it should.  Clever!
    
    690 707
     
    
    691
    -There are a couple of tricky wrinkles
    
    708
    +There are, of course, some tricky wrinkles
    
    692 709
     
    
    693 710
     (W1) Consider this example which shadows `j`:
    
    694 711
               join j = rhs in
    
    ... ... @@ -718,6 +735,8 @@ There are a couple of tricky wrinkles
    718 735
          * In `postprcess_uds`, we add the chucked-out join points to the
    
    719 736
            returned UsageDetails, with `andUDs`.
    
    720 737
     
    
    738
    +Wrinkles (W1) and (W2) are very similar to Note [Binder swap] (BS3).
    
    739
    +
    
    721 740
     (W3) Consider this example, which shadows `j`, but this time in an argument
    
    722 741
                   join j = rhs
    
    723 742
                   in f (case x of { K j -> ...; ... })
    
    ... ... @@ -732,12 +751,36 @@ There are a couple of tricky wrinkles
    732 751
          NB: this is just about efficiency: it is always safe /not/ to zap the
    
    733 752
          occ_join_points.
    
    734 753
     
    
    735
    -(W4) What if the join point binding has a stable unfolding, or RULES?
    
    736
    -     They are just alternative right-hand sides, and at each call site we
    
    737
    -     will use only one of them. So again, we can use `orUDs` to combine
    
    738
    -     usage info from all these alternatives RHSs.
    
    739
    -
    
    740
    -Wrinkles (W1) and (W2) are very similar to Note [Binder swap] (BS3).
    
    754
    +(W4) Other things being equal, we want keep the OccInfoEnv stored in
    
    755
    +  `occ_join_points` as small as possible, because it is /duplicated/ at
    
    756
    +  /every occurrence/ of the join point.  We really only want to include
    
    757
    +  OccInfo for
    
    758
    +       * Local, non-recursive let-bound Ids
    
    759
    +       * that occur just once in the RHS of the join point
    
    760
    +  particularly including
    
    761
    +       * thunks (that's the original point) and
    
    762
    +       * join points (so that the trick works recursively).
    
    763
    +  We call these the "tracked Ids of j".
    
    764
    +
    
    765
    +  Including lambda binders is pointless, and slows down the occurrence analyser.
    
    766
    +
    
    767
    +  e.g.    \x. let y = x+1 in
    
    768
    +              join j v = ..x..y..(f z z)..
    
    769
    +              in ...
    
    770
    +  In the `occ_join_points` binding for `j`, we want to track `y`, but
    
    771
    +  not `x` (lambda bound) nor `z` (occurs many times).
    
    772
    +
    
    773
    +  To exploit this:
    
    774
    +     * `occ_nested_lets` tracks which Ids are
    
    775
    +              nested (not-top-level), non-recursive lets
    
    776
    +     * `addJoinPoint` only populates j's entry with occ-info for the "tracked Ids"
    
    777
    +       of `j`; that is, that are (a) in occ_nested_lets and (b) have OneOcc.
    
    778
    +     * `combineJoinPointUDs` uses
    
    779
    +          orLocalOcc  for local-let Ids
    
    780
    +          andLocalOcc for non-local-let Ids
    
    781
    +
    
    782
    +  This fancy footwork can matter in extreme cases: it gave a 25% reduction in
    
    783
    +  total compiler allocation in #26425..
    
    741 784
     
    
    742 785
     Note [Finding join points]
    
    743 786
     ~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    ... ... @@ -759,45 +802,45 @@ rest of 'OccInfo' until it goes on the binder.
    759 802
     
    
    760 803
     Note [Join arity prediction based on joinRhsArity]
    
    761 804
     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    762
    -In general, the join arity from tail occurrences of a join point (O) may be
    
    763
    -higher or lower than the manifest join arity of the join body (M). E.g.,
    
    805
    +In general, the join arity from tail occurrences of a join point (OAr) may be
    
    806
    +higher or lower than the manifest join arity of the join body (MAr). E.g.,
    
    764 807
     
    
    765
    -  -- M > O:
    
    766
    -  let f x y = x + y              -- M = 2
    
    767
    -  in if b then f 1 else f 2      -- O = 1
    
    808
    +  -- MAr > Oar:
    
    809
    +  let f x y = x + y              -- MAr = 2
    
    810
    +  in if b then f 1 else f 2      -- OAr = 1
    
    768 811
       ==> { Contify for join arity 1 }
    
    769 812
       join f x = \y -> x + y
    
    770 813
       in if b then jump f 1 else jump f 2
    
    771 814
     
    
    772
    -  -- M < O
    
    773
    -  let f = id                     -- M = 0
    
    774
    -  in if ... then f 12 else f 13  -- O = 1
    
    815
    +  -- MAr < Oar
    
    816
    +  let f = id                     -- MAr = 0
    
    817
    +  in if ... then f 12 else f 13  -- OAr = 1
    
    775 818
       ==> { Contify for join arity 1, eta-expand f }
    
    776 819
       join f x = id x
    
    777 820
       in if b then jump f 12 else jump f 13
    
    778 821
     
    
    779
    -But for *recursive* let, it is crucial that both arities match up, consider
    
    822
    +But for *recursive* let, it is crucial MAr=OAr.  Consider:
    
    780 823
     
    
    781 824
       letrec f x y = if ... then f x else True
    
    782 825
       in f 42
    
    783 826
     
    
    784
    -Here, M=2 but O=1. If we settled for a joinrec arity of 1, the recursive jump
    
    827
    +Here, MAr=2 but OAr=1. If we settled for a joinrec arity of 1, the recursive jump
    
    785 828
     would not happen in a tail context! Contification is invalid here.
    
    786
    -So indeed it is crucial to demand that M=O.
    
    829
    +So indeed it is crucial to demand that MAr=OAr.
    
    787 830
     
    
    788
    -(Side note: Actually, we could be more specific: Let O1 be the join arity of
    
    789
    -occurrences from the letrec RHS and O2 the join arity from the let body. Then
    
    790
    -we need M=O1 and M<=O2 and could simply eta-expand the RHS to match O2 later.
    
    791
    -M=O is the specific case where we don't want to eta-expand. Neither the join
    
    831
    +(Side note: Actually, we could be more specific: Let OAr1 be the join arity of
    
    832
    +occurrences from the letrec RHS and OAr2 the join arity from the let body. Then
    
    833
    +we need MAr=OAr1 and MAr<=OAr2 and could simply eta-expand the RHS to match OAr2 later.
    
    834
    +MAr=OAr is the specific case where we don't want to eta-expand. Neither the join
    
    792 835
     points paper nor GHC does this at the moment.)
    
    793 836
     
    
    794 837
     We can capitalise on this observation and conclude that *if* f could become a
    
    795
    -joinrec (without eta-expansion), it will have join arity M.
    
    796
    -Now, M is just the result of 'joinRhsArity', a rather simple, local analysis.
    
    838
    +joinrec (without eta-expansion), it will have join arity MAr.
    
    839
    +Now, MAr is just the result of 'joinRhsArity', a rather simple, local analysis.
    
    797 840
     It is also the join arity inside the 'TailUsageDetails' returned by
    
    798 841
     'occAnalLamTail', so we can predict join arity without doing any fixed-point
    
    799 842
     iteration or really doing any deep traversal of let body or RHS at all.
    
    800
    -We check for M in the 'adjustTailUsage' call inside 'tagRecBinders'.
    
    843
    +We check for MAr in the 'adjustTailUsage' call inside 'tagRecBinders'.
    
    801 844
     
    
    802 845
     All this is quite apparent if you look at the contification transformation in
    
    803 846
     Fig. 5 of "Compiling without Continuations" (which does not account for
    
    ... ... @@ -807,14 +850,14 @@ eta-expansion at all, mind you). The letrec case looks like this
    807 850
         ... and a bunch of conditions establishing that f only occurs
    
    808 851
             in app heads of join arity (len as + len xs) inside us and es ...
    
    809 852
     
    
    810
    -The syntactic form `/\as.\xs. L[us]` forces M=O iff `f` occurs in `us`. However,
    
    853
    +The syntactic form `/\as.\xs. L[us]` forces MAr=OAr iff `f` occurs in `us`. However,
    
    811 854
     for non-recursive functions, this is the definition of contification from the
    
    812 855
     paper:
    
    813 856
     
    
    814 857
       let f = /\as.\xs.u in L[es]     ... conditions ...
    
    815 858
     
    
    816
    -Note that u could be a lambda itself, as we have seen. No relationship between M
    
    817
    -and O to exploit here.
    
    859
    +Note that u could be a lambda itself, as we have seen. No relationship between MAr
    
    860
    +and OAr to exploit here.
    
    818 861
     
    
    819 862
     Note [Join points and unfoldings/rules]
    
    820 863
     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    ... ... @@ -954,6 +997,22 @@ of both functions, serving as a specification:
    954 997
          Cyclic Recursive case:   'tagRecBinders'
    
    955 998
          Acyclic Recursive case:  'adjustNonRecRhs'
    
    956 999
          Non-recursive case:      'adjustNonRecRhs'
    
    1000
    +
    
    1001
    +Note [Unfoldings and RULES]
    
    1002
    +~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    1003
    +For let-bindings we treat (stable) unfoldings and RULES as "alternative right hand
    
    1004
    +sides".  That is, it's as if we had
    
    1005
    +  f = case <hiatus> of
    
    1006
    +         1 -> <the-rhs>
    
    1007
    +         2 -> <the-stable-unfolding>
    
    1008
    +         3 -> <rhs of rule1>
    
    1009
    +         4 -> <rhs of rule2>
    
    1010
    +So we combine all these with `orUDs` (#26567).  But actually it makes
    
    1011
    +very little difference whether we use `andUDs` or `orUDs` because of
    
    1012
    +Note [Occurrences in stable unfoldings and RULES]: occurrences in an unfolding
    
    1013
    +or RULE are treated as ManyOcc anyway.
    
    1014
    +
    
    1015
    +But NB that tail-call info is preserved so that we don't thereby lose join points.
    
    957 1016
     -}
    
    958 1017
     
    
    959 1018
     ------------------------------------------------------------------
    
    ... ... @@ -991,24 +1050,24 @@ occAnalBind !env lvl ire (NonRec bndr rhs) thing_inside combine
    991 1050
       | mb_join@(JoinPoint {}) <- idJoinPointHood bndr
    
    992 1051
       = -- Analyse the RHS and /then/ the body
    
    993 1052
         let -- Analyse the rhs first, generating rhs_uds
    
    994
    -        !(rhs_uds_s, bndr', rhs') = occAnalNonRecRhs env lvl ire mb_join bndr rhs
    
    995
    -        rhs_uds = foldl1' orUDs rhs_uds_s   -- NB: orUDs.  See (W4) of
    
    996
    -                                           -- Note [Occurrence analysis for join points]
    
    1053
    +        !(rhs_uds, bndr', rhs') = occAnalNonRecRhs env lvl ire mb_join bndr rhs
    
    997 1054
     
    
    998 1055
             -- Now analyse the body, adding the join point
    
    999 1056
             -- into the environment with addJoinPoint
    
    1000
    -        !(WUD body_uds (occ, body)) = occAnalNonRecBody env bndr' $ \env ->
    
    1057
    +        env_body = addLocalLet env lvl bndr
    
    1058
    +        !(WUD body_uds (occ, body)) = occAnalNonRecBody env_body bndr' $ \env ->
    
    1001 1059
                                           thing_inside (addJoinPoint env bndr' rhs_uds)
    
    1002 1060
         in
    
    1003 1061
         if isDeadOcc occ     -- Drop dead code; see Note [Dead code]
    
    1004 1062
         then WUD body_uds body
    
    1005
    -    else WUD (rhs_uds `orUDs` body_uds)    -- Note `orUDs`
    
    1063
    +    else WUD (combineJoinPointUDs env rhs_uds body_uds)    -- Note `orUDs`
    
    1006 1064
                  (combine [NonRec (fst (tagNonRecBinder lvl occ bndr')) rhs']
    
    1007 1065
                           body)
    
    1008 1066
     
    
    1009 1067
       -- The normal case, including newly-discovered join points
    
    1010 1068
       -- Analyse the body and /then/ the RHS
    
    1011
    -  | WUD body_uds (occ,body) <- occAnalNonRecBody env bndr thing_inside
    
    1069
    +  | let env_body = addLocalLet env lvl bndr
    
    1070
    +  , WUD body_uds (occ,body) <- occAnalNonRecBody env_body bndr thing_inside
    
    1012 1071
       = if isDeadOcc occ   -- Drop dead code; see Note [Dead code]
    
    1013 1072
         then WUD body_uds body
    
    1014 1073
         else let
    
    ... ... @@ -1017,8 +1076,8 @@ occAnalBind !env lvl ire (NonRec bndr rhs) thing_inside combine
    1017 1076
             -- => join arity O of Note [Join arity prediction based on joinRhsArity]
    
    1018 1077
             (tagged_bndr, mb_join) = tagNonRecBinder lvl occ bndr
    
    1019 1078
     
    
    1020
    -        !(rhs_uds_s, final_bndr, rhs') = occAnalNonRecRhs env lvl ire mb_join tagged_bndr rhs
    
    1021
    -    in WUD (foldr andUDs body_uds rhs_uds_s)      -- Note `andUDs`
    
    1079
    +        !(rhs_uds, final_bndr, rhs') = occAnalNonRecRhs env lvl ire mb_join tagged_bndr rhs
    
    1080
    +    in WUD (rhs_uds `andUDs` body_uds)      -- Note `andUDs`
    
    1022 1081
                (combine [NonRec final_bndr rhs'] body)
    
    1023 1082
     
    
    1024 1083
     -----------------
    
    ... ... @@ -1033,15 +1092,21 @@ occAnalNonRecBody env bndr thing_inside
    1033 1092
     
    
    1034 1093
     -----------------
    
    1035 1094
     occAnalNonRecRhs :: OccEnv -> TopLevelFlag -> ImpRuleEdges
    
    1036
    -                -> JoinPointHood -> Id -> CoreExpr
    
    1037
    -                 -> (NonEmpty UsageDetails, Id, CoreExpr)
    
    1095
    +                 -> JoinPointHood -> Id -> CoreExpr
    
    1096
    +                 -> (UsageDetails, Id, CoreExpr)
    
    1038 1097
     occAnalNonRecRhs !env lvl imp_rule_edges mb_join bndr rhs
    
    1039 1098
       | null rules, null imp_rule_infos
    
    1040 1099
       =  -- Fast path for common case of no rules. This is only worth
    
    1041 1100
          -- 0.1% perf on average, but it's also only a line or two of code
    
    1042
    -    ( adj_rhs_uds :| adj_unf_uds : [], final_bndr_no_rules, final_rhs )
    
    1101
    +    ( adj_rhs_uds `orUDs` adj_unf_uds
    
    1102
    +    , final_bndr_no_rules, final_rhs )
    
    1103
    +
    
    1043 1104
       | otherwise
    
    1044
    -  = ( adj_rhs_uds :| adj_unf_uds : adj_rule_uds, final_bndr_with_rules, final_rhs )
    
    1105
    +  = ( foldl' orUDs (adj_rhs_uds `orUDs` adj_unf_uds) adj_rule_uds
    
    1106
    +    , final_bndr_with_rules, final_rhs )
    
    1107
    +
    
    1108
    +    -- orUDs: Combine the RHS, (stable) unfolding, and RULES with orUDs
    
    1109
    +    --        See Note [Unfoldings and RULES]
    
    1045 1110
       where
    
    1046 1111
         --------- Right hand side ---------
    
    1047 1112
         -- For join points, set occ_encl to OccVanilla, via setTailCtxt.  If we have
    
    ... ... @@ -1054,7 +1119,7 @@ occAnalNonRecRhs !env lvl imp_rule_edges mb_join bndr rhs
    1054 1119
         rhs_ctxt = mkNonRecRhsCtxt lvl bndr unf
    
    1055 1120
     
    
    1056 1121
         -- See Note [Join arity prediction based on joinRhsArity]
    
    1057
    -    -- Match join arity O from mb_join_arity with manifest join arity M as
    
    1122
    +    -- Match join arity OAr from mb_join_arity with manifest join arity MAr as
    
    1058 1123
         -- returned by of occAnalLamTail. It's totally OK for them to mismatch;
    
    1059 1124
         -- hence adjust the UDs from the RHS
    
    1060 1125
     
    
    ... ... @@ -1764,7 +1829,7 @@ makeNode !env imp_rule_edges bndr_set (bndr, rhs)
    1764 1829
                                    -- here because that is what we are setting!
    
    1765 1830
         WTUD unf_tuds unf' = occAnalUnfolding rhs_env unf
    
    1766 1831
         adj_unf_uds = adjustTailArity (JoinPoint rhs_ja) unf_tuds
    
    1767
    -      -- `rhs_ja` is `joinRhsArity rhs` and is the prediction for source M
    
    1832
    +      -- `rhs_ja` is `joinRhsArity rhs` and is the prediction for source MAr
    
    1768 1833
           -- of Note [Join arity prediction based on joinRhsArity]
    
    1769 1834
     
    
    1770 1835
         --------- IMP-RULES --------
    
    ... ... @@ -1775,7 +1840,7 @@ makeNode !env imp_rule_edges bndr_set (bndr, rhs)
    1775 1840
     
    
    1776 1841
         --------- All rules --------
    
    1777 1842
         -- See Note [Join points and unfoldings/rules]
    
    1778
    -    -- `rhs_ja` is `joinRhsArity rhs'` and is the prediction for source M
    
    1843
    +    -- `rhs_ja` is `joinRhsArity rhs'` and is the prediction for source MAr
    
    1779 1844
         -- of Note [Join arity prediction based on joinRhsArity]
    
    1780 1845
         rules_w_uds :: [(CoreRule, UsageDetails, UsageDetails)]
    
    1781 1846
         rules_w_uds = [ (r,l,adjustTailArity (JoinPoint rhs_ja) rhs_wuds)
    
    ... ... @@ -2177,7 +2242,9 @@ occAnalLamTail :: OccEnv -> CoreExpr -> WithTailUsageDetails CoreExpr
    2177 2242
     -- See Note [Adjusting right-hand sides]
    
    2178 2243
     occAnalLamTail env expr
    
    2179 2244
       = let !(WUD usage expr') = occ_anal_lam_tail env expr
    
    2180
    -    in WTUD (TUD (joinRhsArity expr) usage) expr'
    
    2245
    +    in WTUD (TUD (joinRhsArity expr') usage) expr'
    
    2246
    +       -- If expr looks like (\x. let dead = e in \y. blah), where `dead` is dead
    
    2247
    +       -- then joinRhsArity expr' might exceed joinRhsArity expr
    
    2181 2248
     
    
    2182 2249
     occ_anal_lam_tail :: OccEnv -> CoreExpr -> WithUsageDetails CoreExpr
    
    2183 2250
     -- Does not markInsideLam etc for the outmost batch of lambdas
    
    ... ... @@ -2281,7 +2348,7 @@ occAnalUnfolding !env unf
    2281 2348
                   WTUD (TUD rhs_ja uds) rhs' = occAnalLamTail env rhs
    
    2282 2349
                   unf' = unf { uf_tmpl = rhs' }
    
    2283 2350
                 in WTUD (TUD rhs_ja (markAllMany uds)) unf'
    
    2284
    -              -- markAllMany: see Note [Occurrences in stable unfoldings]
    
    2351
    +              -- markAllMany: see Note [Occurrences in stable unfoldings and RULES]
    
    2285 2352
     
    
    2286 2353
             | otherwise -> WTUD (TUD 0 emptyDetails) unf
    
    2287 2354
                   -- For non-Stable unfoldings we leave them undisturbed, but
    
    ... ... @@ -2319,12 +2386,13 @@ occAnalRule env rule@(Rule { ru_bndrs = bndrs, ru_args = args, ru_rhs = rhs })
    2319 2386
                               -- Note [Rules are extra RHSs]
    
    2320 2387
                               -- Note [Rule dependency info]
    
    2321 2388
         rhs_uds' = markAllMany rhs_uds
    
    2389
    +               -- markAllMany: Note [Occurrences in stable unfoldings and RULES]
    
    2322 2390
         rhs_ja = length args -- See Note [Join points and unfoldings/rules]
    
    2323 2391
     
    
    2324 2392
     occAnalRule _ other_rule = (other_rule, emptyDetails, TUD 0 emptyDetails)
    
    2325 2393
     
    
    2326
    -{- Note [Occurrences in stable unfoldings]
    
    2327
    -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    2394
    +{- Note [Occurrences in stable unfoldings and RULES]
    
    2395
    +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    2328 2396
     Consider
    
    2329 2397
         f p = BIG
    
    2330 2398
         {-# INLINE g #-}
    
    ... ... @@ -2338,7 +2406,7 @@ preinlineUnconditionally here!
    2338 2406
     
    
    2339 2407
     The INLINE pragma says "inline exactly this RHS"; perhaps the
    
    2340 2408
     programmer wants to expose that 'not', say. If we inline f that will make
    
    2341
    -the Stable unfoldign big, and that wasn't what the programmer wanted.
    
    2409
    +the Stable unfolding big, and that wasn't what the programmer wanted.
    
    2342 2410
     
    
    2343 2411
     Another way to think about it: if we inlined g as-is into multiple
    
    2344 2412
     call sites, now there's be multiple calls to f.
    
    ... ... @@ -2347,6 +2415,8 @@ Bottom line: treat all occurrences in a stable unfolding as "Many".
    2347 2415
     We still leave tail call information intact, though, as to not spoil
    
    2348 2416
     potential join points.
    
    2349 2417
     
    
    2418
    +The same goes for RULES.
    
    2419
    +
    
    2350 2420
     Note [Unfoldings and rules]
    
    2351 2421
     ~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    2352 2422
     Generally unfoldings and rules are already occurrence-analysed, so we
    
    ... ... @@ -2598,7 +2668,7 @@ occAnalArgs :: OccEnv -> CoreExpr -> [CoreExpr]
    2598 2668
                 -> WithUsageDetails CoreExpr
    
    2599 2669
     -- The `fun` argument is just an accumulating parameter,
    
    2600 2670
     -- the base for building the application we return
    
    2601
    -occAnalArgs !env fun args !one_shots
    
    2671
    +occAnalArgs env fun args one_shots
    
    2602 2672
       = go emptyDetails fun args one_shots
    
    2603 2673
       where
    
    2604 2674
         env_args = setNonTailCtxt encl env
    
    ... ... @@ -2657,8 +2727,19 @@ Constructors are rather like lambdas in this way.
    2657 2727
     occAnalApp :: OccEnv
    
    2658 2728
                -> (Expr CoreBndr, [Arg CoreBndr], [CoreTickish])
    
    2659 2729
                -> WithUsageDetails (Expr CoreBndr)
    
    2660
    --- Naked variables (not applied) end up here too
    
    2661
    -occAnalApp !env (Var fun, args, ticks)
    
    2730
    +occAnalApp !env (Var fun_id, [], ticks)
    
    2731
    +  = -- Naked variables (not applied) end up here too, and it's worth giving
    
    2732
    +    -- this common case special treatment, because there is so much less to do.
    
    2733
    +    -- This is just a specialised copy of the (Var fun_id) case below
    
    2734
    +    WUD fun_uds (mkTicks ticks fun')
    
    2735
    +  where
    
    2736
    +    !(fun', fun_id')  = lookupBndrSwap env fun_id
    
    2737
    +    !fun_uds = mkOneOcc env fun_id' int_cxt 0
    
    2738
    +    !int_cxt = case occ_encl env of
    
    2739
    +                   OccScrut -> IsInteresting
    
    2740
    +                   _other   -> NotInteresting
    
    2741
    +
    
    2742
    +occAnalApp env (Var fun, args, ticks)
    
    2662 2743
       -- Account for join arity of runRW# continuation
    
    2663 2744
       -- See Note [Simplification of runRW#]
    
    2664 2745
       --
    
    ... ... @@ -2863,7 +2944,11 @@ data OccEnv
    2863 2944
                  -- Invariant: no Id maps to an empty OccInfoEnv
    
    2864 2945
                  -- See Note [Occurrence analysis for join points]
    
    2865 2946
                , occ_join_points :: !JoinPointInfo
    
    2866
    -    }
    
    2947
    +
    
    2948
    +           , occ_nested_lets :: IdSet    -- Non-top-level, non-rec-bound lets
    
    2949
    +                -- I tried making this field strict, but doing so increased
    
    2950
    +                -- compile-time allocation very slightly: 0.1% on average
    
    2951
    +           }
    
    2867 2952
     
    
    2868 2953
     type JoinPointInfo = IdEnv OccInfoEnv
    
    2869 2954
     
    
    ... ... @@ -2914,7 +2999,8 @@ initOccEnv
    2914 2999
     
    
    2915 3000
                , occ_join_points = emptyVarEnv
    
    2916 3001
                , occ_bs_env = emptyVarEnv
    
    2917
    -           , occ_bs_rng = emptyVarSet }
    
    3002
    +           , occ_bs_rng = emptyVarSet
    
    3003
    +           , occ_nested_lets = emptyVarSet }
    
    2918 3004
     
    
    2919 3005
     noBinderSwaps :: OccEnv -> Bool
    
    2920 3006
     noBinderSwaps (OccEnv { occ_bs_env = bs_env }) = isEmptyVarEnv bs_env
    
    ... ... @@ -3154,23 +3240,26 @@ postprocess_uds bndrs bad_joins uds
    3154 3240
           | uniq `elemVarEnvByKey` env = plusVarEnv_C andLocalOcc env join_env
    
    3155 3241
           | otherwise                  = env
    
    3156 3242
     
    
    3243
    +addLocalLet :: OccEnv -> TopLevelFlag -> Id -> OccEnv
    
    3244
    +addLocalLet env@(OccEnv { occ_nested_lets = ids }) top_lvl id
    
    3245
    +  | isTopLevel top_lvl = env
    
    3246
    +  | otherwise          = env { occ_nested_lets = ids `extendVarSet` id }
    
    3247
    +
    
    3157 3248
     addJoinPoint :: OccEnv -> Id -> UsageDetails -> OccEnv
    
    3158
    -addJoinPoint env bndr rhs_uds
    
    3249
    +addJoinPoint env@(OccEnv { occ_join_points = join_points, occ_nested_lets = nested_lets })
    
    3250
    +             join_bndr (UD { ud_env = rhs_occs })
    
    3159 3251
       | isEmptyVarEnv zeroed_form
    
    3160 3252
       = env
    
    3161 3253
       | otherwise
    
    3162
    -  = env { occ_join_points = extendVarEnv (occ_join_points env) bndr zeroed_form }
    
    3254
    +  = env { occ_join_points = extendVarEnv join_points join_bndr zeroed_form }
    
    3163 3255
       where
    
    3164
    -    zeroed_form = mkZeroedForm rhs_uds
    
    3256
    +    zeroed_form = mapMaybeUniqSetToUFM do_one nested_lets
    
    3257
    +     -- See Note [Occurrence analysis for join points] for "zeroed form"
    
    3165 3258
     
    
    3166
    -mkZeroedForm :: UsageDetails -> OccInfoEnv
    
    3167
    --- See Note [Occurrence analysis for join points] for "zeroed form"
    
    3168
    -mkZeroedForm (UD { ud_env = rhs_occs })
    
    3169
    -  = mapMaybeUFM do_one rhs_occs
    
    3170
    -  where
    
    3171
    -    do_one :: LocalOcc -> Maybe LocalOcc
    
    3172
    -    do_one (ManyOccL {})    = Nothing
    
    3173
    -    do_one occ@(OneOccL {}) = Just (occ { lo_n_br = 0 })
    
    3259
    +    do_one :: Var -> Maybe LocalOcc
    
    3260
    +    do_one bndr = case lookupVarEnv rhs_occs bndr of
    
    3261
    +                    Just occ@(OneOccL {}) -> Just (occ { lo_n_br = 0 })
    
    3262
    +                    _                     -> Nothing
    
    3174 3263
     
    
    3175 3264
     --------------------
    
    3176 3265
     transClosureFV :: VarEnv VarSet -> VarEnv VarSet
    
    ... ... @@ -3628,7 +3717,14 @@ data LocalOcc -- See Note [LocalOcc]
    3628 3717
                        -- Combining (AlwaysTailCalled 2) and (AlwaysTailCalled 3)
    
    3629 3718
                        -- gives NoTailCallInfo
    
    3630 3719
                   , lo_int_cxt :: !InterestingCxt }
    
    3720
    +
    
    3631 3721
         | ManyOccL !TailCallInfo
    
    3722
    +       -- Why do we need TailCallInfo on ManyOccL?
    
    3723
    +       -- Answer 1: recursive bindings are entered many times:
    
    3724
    +       --    rec { j x = ...j x'... } in j y
    
    3725
    +       -- See the uses of `andUDs` in `tagRecBinders`
    
    3726
    +       -- Answer 2: occurrences in stable unfoldings are many-ified
    
    3727
    +       --           See Note [Occurrences in stable unfoldings and RULES]
    
    3632 3728
     
    
    3633 3729
     instance Outputable LocalOcc where
    
    3634 3730
       ppr (OneOccL { lo_n_br = n, lo_tail = tci })
    
    ... ... @@ -3651,10 +3747,13 @@ data UsageDetails
    3651 3747
     
    
    3652 3748
     instance Outputable UsageDetails where
    
    3653 3749
       ppr ud@(UD { ud_env = env, ud_z_tail = z_tail })
    
    3654
    -    = text "UD" <+> (braces $ fsep $ punctuate comma $
    
    3655
    -      [ ppr uq <+> text ":->" <+> ppr (lookupOccInfoByUnique ud uq)
    
    3656
    -      | (uq, _) <- nonDetStrictFoldVarEnv_Directly do_one [] env ])
    
    3657
    -      $$ nest 2 (text "ud_z_tail" <+> ppr z_tail)
    
    3750
    +    = text "UD" <> (braces (vcat
    
    3751
    +         [ -- `final` shows the result of a proper lookupOccInfo, returning OccInfo
    
    3752
    +           --         after accounting for `ud_z_tail` etc.
    
    3753
    +           text "final =" <+> (fsep $ punctuate comma $
    
    3754
    +                 [ ppr uq <+> text ":->" <+> ppr (lookupOccInfoByUnique ud uq)
    
    3755
    +                 | (uq, _) <- nonDetStrictFoldVarEnv_Directly do_one [] env ])
    
    3756
    +         , text "ud_z_tail" <+> ppr z_tail ] ))
    
    3658 3757
         where
    
    3659 3758
           do_one :: Unique -> LocalOcc -> [(Unique,LocalOcc)] -> [(Unique,LocalOcc)]
    
    3660 3759
           do_one uniq occ occs = (uniq, occ) : occs
    
    ... ... @@ -3663,7 +3762,7 @@ instance Outputable UsageDetails where
    3663 3762
     -- | TailUsageDetails captures the result of applying 'occAnalLamTail'
    
    3664 3763
     --   to a function `\xyz.body`. The TailUsageDetails pairs together
    
    3665 3764
     --   * the number of lambdas (including type lambdas: a JoinArity)
    
    3666
    ---   * UsageDetails for the `body` of the lambda, unadjusted by `adjustTailUsage`.
    
    3765
    +--   * UsageDetails for the `body` of the lambda, /unadjusted/ by `adjustTailUsage`.
    
    3667 3766
     -- If the binding turns out to be a join point with the indicated join
    
    3668 3767
     -- arity, this unadjusted usage details is just what we need; otherwise we
    
    3669 3768
     -- need to discard tail calls. That's what `adjustTailUsage` does.
    
    ... ... @@ -3681,8 +3780,17 @@ data WithTailUsageDetails a = WTUD !TailUsageDetails !a
    3681 3780
     
    
    3682 3781
     andUDs:: UsageDetails -> UsageDetails -> UsageDetails
    
    3683 3782
     orUDs :: UsageDetails -> UsageDetails -> UsageDetails
    
    3684
    -andUDs = combineUsageDetailsWith andLocalOcc
    
    3685
    -orUDs  = combineUsageDetailsWith orLocalOcc
    
    3783
    +andUDs = combineUsageDetailsWith (\_uniq -> andLocalOcc)
    
    3784
    +orUDs  = combineUsageDetailsWith (\_uniq -> orLocalOcc)
    
    3785
    +
    
    3786
    +combineJoinPointUDs :: OccEnv -> UsageDetails -> UsageDetails -> UsageDetails
    
    3787
    +-- See (W4) in Note [Occurrence analysis for join points]
    
    3788
    +combineJoinPointUDs (OccEnv { occ_nested_lets = nested_lets }) uds1 uds2
    
    3789
    +  = combineUsageDetailsWith combine uds1 uds2
    
    3790
    +  where
    
    3791
    +    combine uniq occ1 occ2
    
    3792
    +      | uniq `elemVarSetByKey` nested_lets = orLocalOcc  occ1 occ2
    
    3793
    +      | otherwise                          = andLocalOcc occ1 occ2
    
    3686 3794
     
    
    3687 3795
     mkOneOcc :: OccEnv -> Id -> InterestingCxt -> JoinArity -> UsageDetails
    
    3688 3796
     mkOneOcc !env id int_cxt arity
    
    ... ... @@ -3699,7 +3807,8 @@ mkOneOcc !env id int_cxt arity
    3699 3807
       = mkSimpleDetails (unitVarEnv id occ)
    
    3700 3808
     
    
    3701 3809
       where
    
    3702
    -    occ = OneOccL { lo_n_br = 1, lo_int_cxt = int_cxt
    
    3810
    +    occ = OneOccL { lo_n_br = 1
    
    3811
    +                  , lo_int_cxt = int_cxt
    
    3703 3812
                       , lo_tail = AlwaysTailCalled arity }
    
    3704 3813
     
    
    3705 3814
     -- Add several occurrences, assumed not to be tail calls
    
    ... ... @@ -3786,7 +3895,7 @@ restrictFreeVars bndrs fvs = restrictUniqSetToUFM bndrs fvs
    3786 3895
     -------------------
    
    3787 3896
     -- Auxiliary functions for UsageDetails implementation
    
    3788 3897
     
    
    3789
    -combineUsageDetailsWith :: (LocalOcc -> LocalOcc -> LocalOcc)
    
    3898
    +combineUsageDetailsWith :: (Unique -> LocalOcc -> LocalOcc -> LocalOcc)
    
    3790 3899
                             -> UsageDetails -> UsageDetails -> UsageDetails
    
    3791 3900
     {-# INLINE combineUsageDetailsWith #-}
    
    3792 3901
     combineUsageDetailsWith plus_occ_info
    
    ... ... @@ -3796,9 +3905,9 @@ combineUsageDetailsWith plus_occ_info
    3796 3905
       | isEmptyVarEnv env2 = uds1
    
    3797 3906
       | otherwise
    
    3798 3907
       -- See Note [Strictness in the occurrence analyser]
    
    3799
    -  -- Using strictPlusVarEnv here speeds up the test T26425 by about 10% by avoiding
    
    3800
    -  -- intermediate thunks.
    
    3801
    -  = UD { ud_env       = strictPlusVarEnv_C plus_occ_info env1 env2
    
    3908
    +  -- Using strictPlusVarEnv here speeds up the test T26425
    
    3909
    +  -- by about 10% by avoiding intermediate thunks.
    
    3910
    +  = UD { ud_env       = strictPlusVarEnv_C_Directly plus_occ_info env1 env2
    
    3802 3911
            , ud_z_many    = strictPlusVarEnv z_many1   z_many2
    
    3803 3912
            , ud_z_in_lam  = plusVarEnv z_in_lam1 z_in_lam2
    
    3804 3913
            , ud_z_tail    = strictPlusVarEnv z_tail1   z_tail2 }
    
    ... ... @@ -3842,8 +3951,6 @@ lookupOccInfoByUnique (UD { ud_env = env
    3842 3951
             | uniq `elemVarEnvByKey` z_tail = NoTailCallInfo
    
    3843 3952
             | otherwise                     = ti
    
    3844 3953
     
    
    3845
    -
    
    3846
    -
    
    3847 3954
     -------------------
    
    3848 3955
     -- See Note [Adjusting right-hand sides]
    
    3849 3956
     
    
    ... ... @@ -3853,21 +3960,22 @@ adjustNonRecRhs :: JoinPointHood
    3853 3960
     -- ^ This function concentrates shared logic between occAnalNonRecBind and the
    
    3854 3961
     -- AcyclicSCC case of occAnalRec.
    
    3855 3962
     -- It returns the adjusted rhs UsageDetails combined with the body usage
    
    3856
    -adjustNonRecRhs mb_join_arity rhs_wuds@(WTUD _ rhs)
    
    3857
    -  = WUD (adjustTailUsage mb_join_arity rhs_wuds) rhs
    
    3858
    -
    
    3963
    +adjustNonRecRhs mb_join_arity (WTUD (TUD rhs_ja uds) rhs)
    
    3964
    +  = WUD (adjustTailUsage exact_join rhs uds) rhs
    
    3965
    +  where
    
    3966
    +    exact_join = mb_join_arity == JoinPoint rhs_ja
    
    3859 3967
     
    
    3860
    -adjustTailUsage :: JoinPointHood
    
    3861
    -                -> WithTailUsageDetails CoreExpr    -- Rhs usage, AFTER occAnalLamTail
    
    3968
    +adjustTailUsage :: Bool        -- True <=> Exactly-matching join point; don't do markNonTail
    
    3969
    +                -> CoreExpr    -- Rhs usage, AFTER occAnalLamTail
    
    3970
    +                -> UsageDetails
    
    3862 3971
                     -> UsageDetails
    
    3863
    -adjustTailUsage mb_join_arity (WTUD (TUD rhs_ja uds) rhs)
    
    3972
    +adjustTailUsage exact_join rhs uds
    
    3864 3973
       = -- c.f. occAnal (Lam {})
    
    3865 3974
         markAllInsideLamIf (not one_shot) $
    
    3866 3975
         markAllNonTailIf (not exact_join) $
    
    3867 3976
         uds
    
    3868 3977
       where
    
    3869 3978
         one_shot   = isOneShotFun rhs
    
    3870
    -    exact_join = mb_join_arity == JoinPoint rhs_ja
    
    3871 3979
     
    
    3872 3980
     adjustTailArity :: JoinPointHood -> TailUsageDetails -> UsageDetails
    
    3873 3981
     adjustTailArity mb_rhs_ja (TUD ja usage)
    
    ... ... @@ -3914,8 +4022,9 @@ tagNonRecBinder lvl occ bndr
    3914 4022
     tagRecBinders :: TopLevelFlag           -- At top level?
    
    3915 4023
                   -> UsageDetails           -- Of body of let ONLY
    
    3916 4024
                   -> [NodeDetails]
    
    3917
    -              -> WithUsageDetails       -- Adjusted details for whole scope,
    
    3918
    -                                        -- with binders removed
    
    4025
    +              -> WithUsageDetails       -- Adjusted details for whole scope
    
    4026
    +                                        -- still including the binders;
    
    4027
    +                                        -- (they are removed by `addInScope`)
    
    3919 4028
                       [IdWithOccInfo]       -- Tagged binders
    
    3920 4029
     -- Substantially more complicated than non-recursive case. Need to adjust RHS
    
    3921 4030
     -- details *before* tagging binders (because the tags depend on the RHSes).
    
    ... ... @@ -3925,32 +4034,21 @@ tagRecBinders lvl body_uds details_s
    3925 4034
     
    
    3926 4035
          -- 1. See Note [Join arity prediction based on joinRhsArity]
    
    3927 4036
          --    Determine possible join-point-hood of whole group, by testing for
    
    3928
    -     --    manifest join arity M.
    
    3929
    -     --    This (re-)asserts that makeNode had made tuds for that same arity M!
    
    4037
    +     --    manifest join arity MAr.
    
    4038
    +     --    This (re-)asserts that makeNode had made tuds for that same arity MAr!
    
    3930 4039
          unadj_uds = foldr (andUDs . test_manifest_arity) body_uds details_s
    
    3931
    -     test_manifest_arity ND{nd_rhs = WTUD tuds rhs}
    
    3932
    -       = adjustTailArity (JoinPoint (joinRhsArity rhs)) tuds
    
    4040
    +     test_manifest_arity ND{nd_rhs = WTUD (TUD rhs_ja uds) rhs}
    
    4041
    +       = assertPpr (rhs_ja == joinRhsArity rhs) (ppr rhs_ja $$ ppr uds $$ ppr rhs) $
    
    4042
    +         uds
    
    3933 4043
     
    
    4044
    +     will_be_joins :: Bool
    
    3934 4045
          will_be_joins = decideRecJoinPointHood lvl unadj_uds bndrs
    
    3935 4046
     
    
    3936
    -     mb_join_arity :: Id -> JoinPointHood
    
    3937
    -     -- mb_join_arity: See Note [Join arity prediction based on joinRhsArity]
    
    3938
    -     -- This is the source O
    
    3939
    -     mb_join_arity bndr
    
    3940
    -         -- Can't use willBeJoinId_maybe here because we haven't tagged
    
    3941
    -         -- the binder yet (the tag depends on these adjustments!)
    
    3942
    -       | will_be_joins
    
    3943
    -       , AlwaysTailCalled arity <- lookupTailCallInfo unadj_uds bndr
    
    3944
    -       = JoinPoint arity
    
    3945
    -       | otherwise
    
    3946
    -       = assert (not will_be_joins) -- Should be AlwaysTailCalled if
    
    3947
    -         NotJoinPoint               -- we are making join points!
    
    3948
    -
    
    3949 4047
          -- 2. Adjust usage details of each RHS, taking into account the
    
    3950 4048
          --    join-point-hood decision
    
    3951
    -     rhs_udss' = [ adjustTailUsage (mb_join_arity bndr) rhs_wuds
    
    4049
    +     rhs_udss' = [ adjustTailUsage will_be_joins rhs rhs_uds
    
    3952 4050
                          -- Matching occAnalLamTail in makeNode
    
    3953
    -                 | ND { nd_bndr = bndr, nd_rhs = rhs_wuds } <- details_s ]
    
    4051
    +                 | ND { nd_rhs = WTUD (TUD _ rhs_uds) rhs } <- details_s ]
    
    3954 4052
     
    
    3955 4053
          -- 3. Compute final usage details from adjusted RHS details
    
    3956 4054
          adj_uds = foldr andUDs body_uds rhs_udss'
    
    ... ... @@ -3969,9 +4067,9 @@ setBinderOcc occ_info bndr
    3969 4067
       | otherwise                  = setIdOccInfo bndr occ_info
    
    3970 4068
     
    
    3971 4069
     -- | Decide whether some bindings should be made into join points or not, based
    
    3972
    --- on its occurrences. This is
    
    4070
    +-- on its occurrences.
    
    3973 4071
     -- Returns `False` if they can't be join points. Note that it's an
    
    4072
    +-- all-or-nothing decision: if multiple binders are given, they are
    
    3974 4073
     -- assumed to be mutually recursive.
    
    3975 4074
     --
    
    3976 4075
     -- It must, however, be a final decision. If we say `True` for 'f',
    

  • compiler/GHC/Core/SimpleOpt.hs
    ... ... @@ -393,12 +393,19 @@ simple_app env e0@(Lam {}) as0@(_:_)
    393 393
           = wrapLet mb_pr $ do_beta env'' body as
    
    394 394
           where (env', b') = subst_opt_bndr env b
    
    395 395
     
    
    396
    -    do_beta env e@(Lam b body) as@(CastIt co:rest)
    
    397
    -      -- See Note [Desugaring unlifted newtypes]
    
    396
    +    -- See Note [Eliminate casts in function position]
    
    397
    +    do_beta env e@(Lam b _) as@(CastIt out_co:rest)
    
    398 398
           | isNonCoVarId b
    
    399
    -      , Just (b', body') <- pushCoercionIntoLambda (soe_subst env) b body co
    
    399
    +      -- Optimise the inner lambda to make it an 'OutExpr', which makes it
    
    400
    +      -- possible to call 'pushCoercionIntoLambda' with the 'OutCoercion' 'co'.
    
    401
    +      -- This is kind of horrible, as for nested casted lambdas with a big body,
    
    402
    +      -- we will repeatedly optimise the body (once for each binder). However,
    
    403
    +      -- we need to do this to avoid mixing 'InExpr' and 'OutExpr', or two
    
    404
    +      -- 'InExpr' with different environments (getting this wrong caused #26588 & #26589.)
    
    405
    +      , Lam out_b out_body <- simple_app env e []
    
    406
    +      , Just (b', body') <- pushCoercionIntoLambda (soeInScope env) out_b out_body out_co
    
    400 407
           = do_beta (soeZapSubst env) (Lam b' body') rest
    
    401
    -        -- soeZapSubst: pushCoercionIntoLambda applies the substitution
    
    408
    +        -- soeZapSubst: we've already optimised everything (the lambda and 'rest') by now.
    
    402 409
           | otherwise
    
    403 410
           = rebuild_app env (simple_opt_expr env e) as
    
    404 411
     
    
    ... ... @@ -511,7 +518,31 @@ TL;DR: To avoid the rest of the compiler pipeline seeing these bad lambas, we
    511 518
     rely on the simple optimiser to both inline the newtype unfolding and
    
    512 519
     subsequently deal with the resulting lambdas (either beta-reducing them
    
    513 520
     altogether or pushing coercions into them so that they satisfy the
    
    514
    -representation-polymorphism invariants).
    
    521
    +representation-polymorphism invariants). See Note [Eliminate casts in function position].
    
    522
    +
    
    523
    +[Alternative approach] (GHC ticket #26608)
    
    524
    +
    
    525
    +  We could instead, in the typechecker, emit a special form (a new constructor
    
    526
    +  of XXExprGhcTc) for instantiations of representation-polymorphic unlifted
    
    527
    +  newtypes (whether applied to a value argument or not):
    
    528
    +
    
    529
    +    UnliftedNT :: DataCon -> [Type] -> Coercion -> XXExprGhcTc
    
    530
    +
    
    531
    +  where "UnliftedNT nt_con [ty1, ...] co" represents the expression:
    
    532
    +
    
    533
    +    ( nt_con @ty1 ... ) |> co
    
    534
    +
    
    535
    +  The desugarer would then turn these AST nodes into appropriate Core, doing
    
    536
    +  what the simple optimiser does today:
    
    537
    +    - inline the compulsory unfolding of the newtype constructor
    
    538
    +    - apply it to its type arguments and beta reduce
    
    539
    +    - push the coercion into the resulting lambda
    
    540
    +
    
    541
    +  This would have several advantages:
    
    542
    +    - the desugarer would never produce "invalid" Core that needs to be
    
    543
    +      tidied up by the simple optimiser,
    
    544
    +    - the ugly and inefficient implementation described in
    
    545
    +      Note [Eliminate casts in function position] could be removed.
    
    515 546
     
    
    516 547
     Wrinkle [Unlifted newtypes with wrappers]
    
    517 548
     
    
    ... ... @@ -717,50 +748,49 @@ rhss here.
    717 748
     
    
    718 749
     Note [Eliminate casts in function position]
    
    719 750
     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    720
    -Consider the following program:
    
    751
    +Due to the current implementation strategy for representation-polymorphic
    
    752
    +unlifted newtypes, as described in Note [Desugaring unlifted newtypes], we rely
    
    753
    +on the simple optimiser to push coercions into lambdas, such as in the following
    
    754
    +example:
    
    721 755
     
    
    722 756
       type R :: Type -> RuntimeRep
    
    723
    -  type family R a where { R Float = FloatRep; R Double = DoubleRep }
    
    724
    -  type F :: forall (a :: Type) -> TYPE (R a)
    
    725
    -  type family F a where { F Float = Float#  ; F Double = Double# }
    
    757
    +  type family R a where { R Int = IntRep }
    
    758
    +  type F :: forall a -> TYPE (R a)
    
    759
    +  type family F a where { F Int = Int# }
    
    726 760
     
    
    727
    -  type N :: forall (a :: Type) -> TYPE (R a)
    
    728 761
       newtype N a = MkN (F a)
    
    729 762
     
    
    730
    -As MkN is a newtype, its unfolding is a lambda which wraps its argument
    
    731
    -in a cast:
    
    732
    -
    
    733
    -  MkN :: forall (a :: Type). F a -> N a
    
    734
    -  MkN = /\a \(x::F a). x |> co_ax
    
    735
    -    -- recall that F a :: TYPE (R a)
    
    736
    -
    
    737
    -This is a representation-polymorphic lambda, in which the binder has an unknown
    
    738
    -representation (R a). We can't compile such a lambda on its own, but we can
    
    739
    -compile instantiations, such as `MkN @Float` or `MkN @Double`.
    
    763
    +Now, an instantiated occurrence of 'MkN', such as 'MkN @Int' (whether applied
    
    764
    +to a value argument or not) will lead, after inlining the compulsory unfolding
    
    765
    +of 'MkN', to a lambda fo the form:
    
    740 766
     
    
    741
    -Our strategy to avoid running afoul of the representation-polymorphism
    
    742
    -invariants of Note [Representation polymorphism invariants] in GHC.Core is thus:
    
    767
    +  ( \ ( x :: F Int ) -> body ) |> co
    
    743 768
     
    
    744
    -  1. Give the newtype a compulsory unfolding (it has no binding, as we can't
    
    745
    -     define lambdas with representation-polymorphic value binders in source Haskell).
    
    746
    -  2. Rely on the optimiser to beta-reduce away any representation-polymorphic
    
    747
    -     value binders.
    
    769
    +    where
    
    770
    +      co :: ( F Int -> res ) ~# ( Int# -> res )
    
    748 771
     
    
    749
    -For example, consider the application
    
    772
    +The problem is that we now have a lambda abstraction whose binder does not have a
    
    773
    +fixed RuntimeRep in the sense of Note [Fixed RuntimeRep] in GHC.Tc.Utils.Concrete.
    
    750 774
     
    
    751
    -    MkN @Float 34.0#
    
    775
    +However, if we use 'pushCoercionIntoLambda', we end up with:
    
    752 776
     
    
    753
    -After inlining MkN we'll get
    
    777
    +  ( \ ( x' :: Int# ) -> body' )
    
    754 778
     
    
    755
    -   ((/\a \(x:F a). x |> co_ax) @Float) |> co 34#
    
    779
    +which satisfies the representation-polymorphism invariants of
    
    780
    +Note [Representation polymorphism invariants] in GHC.Core.
    
    756 781
     
    
    757
    -where co :: (F Float -> N Float) ~ (Float# ~ N Float)
    
    782
    +In conclusion:
    
    758 783
     
    
    759
    -But to actually beta-reduce that lambda, we need to push the 'co'
    
    760
    -inside the `\x` with pushCoercionIntoLambda.  Hence the extra
    
    761
    -equation for Cast-of-Lam in simple_app.
    
    784
    +  1. The simple optimiser must push casts into lambdas.
    
    785
    +  2. It must also deal with a situation such as (MkN @Int) |> co, where we first
    
    786
    +     inline the compulsory unfolding of N. This means the simple optimiser must
    
    787
    +     "peel off" the casts and optimise the inner expression first, to determine
    
    788
    +     whether it is a lambda abstraction or not.
    
    762 789
     
    
    763
    -This is regrettably delicate.
    
    790
    +This is regrettably delicate. If we could make sure the typechecker/desugarer
    
    791
    +did not produce these bad lambdas in the first place (as described in
    
    792
    +[Alternative approach] in Note [Desugaring unlifted newtypes]), we could
    
    793
    +get rid of this ugly logic.
    
    764 794
     
    
    765 795
     Note [Preserve join-binding arity]
    
    766 796
     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    ... ... @@ -1673,7 +1703,7 @@ exprIsLambda_maybe ise@(ISE in_scope_set _) (Cast casted_e co)
    1673 1703
         -- this implies that x is not in scope in gamma (makes this code simpler)
    
    1674 1704
         , not (isTyVar x) && not (isCoVar x)
    
    1675 1705
         , assert (not $ x `elemVarSet` tyCoVarsOfCo co) True
    
    1676
    -    , Just (x',e') <- pushCoercionIntoLambda (mkEmptySubst in_scope_set) x e co
    
    1706
    +    , Just (x',e') <- pushCoercionIntoLambda in_scope_set x e co
    
    1677 1707
         , let res = Just (x',e',ts)
    
    1678 1708
         = --pprTrace "exprIsLambda_maybe:Cast" (vcat [ppr casted_e,ppr co,ppr res)])
    
    1679 1709
           res
    

  • compiler/GHC/Driver/DynFlags.hs
    ... ... @@ -1268,6 +1268,7 @@ optLevelFlags -- see Note [Documenting optimisation flags]
    1268 1268
         , ([1,2],   Opt_CfgBlocklayout)      -- Experimental
    
    1269 1269
     
    
    1270 1270
         , ([1,2],   Opt_Specialise)
    
    1271
    +    , ([1,2],   Opt_PolymorphicSpecialisation)  -- Now on by default (#23559)
    
    1271 1272
         , ([1,2],   Opt_CrossModuleSpecialise)
    
    1272 1273
         , ([1,2],   Opt_InlineGenerics)
    
    1273 1274
         , ([1,2],   Opt_Strictness)
    

  • compiler/GHC/Driver/Flags.hs
    ... ... @@ -909,6 +909,7 @@ optimisationFlags = EnumSet.fromList
    909 909
        , Opt_SpecialiseAggressively
    
    910 910
        , Opt_CrossModuleSpecialise
    
    911 911
        , Opt_StaticArgumentTransformation
    
    912
    +   , Opt_PolymorphicSpecialisation
    
    912 913
        , Opt_CSE
    
    913 914
        , Opt_StgCSE
    
    914 915
        , Opt_StgLiftLams
    

  • compiler/GHC/HsToCore/Binds.hs
    ... ... @@ -1620,7 +1620,7 @@ ds_hs_wrapper hs_wrap
    1620 1620
                                   do { x <- newSysLocalDs (mkScaled (subMultCoRKind w_co) t)
    
    1621 1621
                                      ; go c1 $ \w1 ->
    
    1622 1622
                                        go c2 $ \w2 ->
    
    1623
    -                                   let app f a = mkCoreApp (text "dsHsWrapper") f a
    
    1623
    +                                   let app f a = mkCoreApp f a
    
    1624 1624
                                            arg     = w1 (Var x)
    
    1625 1625
                                        in k (\e -> (Lam x (w2 (app e arg)))) }
    
    1626 1626
     
    

  • compiler/GHC/HsToCore/Expr.hs
    ... ... @@ -877,8 +877,7 @@ dsHsConLike (PatSynCon ps)
    877 877
       | Just (builder_name, _, add_void) <- patSynBuilder ps
    
    878 878
       = do { builder_id <- dsLookupGlobalId builder_name
    
    879 879
            ; return (if add_void
    
    880
    -                 then mkCoreApp (text "dsConLike" <+> ppr ps)
    
    881
    -                                (Var builder_id) unboxedUnitExpr
    
    880
    +                 then mkCoreApp (Var builder_id) unboxedUnitExpr
    
    882 881
                      else Var builder_id) }
    
    883 882
       | otherwise
    
    884 883
       = pprPanic "dsConLike" (ppr ps)
    

  • compiler/GHC/HsToCore/Match.hs
    ... ... @@ -301,7 +301,7 @@ matchView (var :| vars) ty eqns@(eqn1 :| _)
    301 301
              -- compile the view expressions
    
    302 302
             ; viewExpr' <- dsExpr viewExpr
    
    303 303
             ; return (mkViewMatchResult var'
    
    304
    -                    (mkCoreApp (text "matchView") viewExpr' (Var var))
    
    304
    +                    (mkCoreApp viewExpr' (Var var))
    
    305 305
                         match_result) }
    
    306 306
     
    
    307 307
     -- decompose the first pattern and leave the rest alone
    

  • compiler/GHC/Tc/Gen/App.hs
    ... ... @@ -749,13 +749,13 @@ tcInstFun do_ql inst_final (tc_fun, fun_ctxt) fun_sigma rn_args
    749 749
         go1 _pos acc fun_ty []
    
    750 750
            | XExpr (ConLikeTc (RealDataCon dc)) <- tc_fun
    
    751 751
            , isNewDataCon dc
    
    752
    -       , [Scaled _ arg_ty] <- dataConOrigArgTys dc
    
    752
    +       , [Scaled _ orig_arg_ty] <- dataConOrigArgTys dc
    
    753 753
            , n_val_args == 0
    
    754 754
            -- If we're dealing with an unsaturated representation-polymorphic
    
    755 755
            -- UnliftedNewype, then perform a representation-polymorphism check.
    
    756 756
            -- See Note [Representation-polymorphism checks for unsaturated unlifted newtypes]
    
    757 757
            -- in GHC.Tc.Utils.Concrete.
    
    758
    -       , not $ typeHasFixedRuntimeRep arg_ty
    
    758
    +       , not $ typeHasFixedRuntimeRep orig_arg_ty
    
    759 759
            = do { (wrap_co, arg_ty, res_ty) <-
    
    760 760
                       matchActualFunTy (FRRRepPolyUnliftedNewtype dc)
    
    761 761
                         (Just $ HsExprTcThing tc_fun)
    

  • compiler/GHC/Types/Basic.hs
    ... ... @@ -1333,7 +1333,7 @@ zapFragileOcc occ = zapOccTailCallInfo occ
    1333 1333
     
    
    1334 1334
     instance Outputable OccInfo where
    
    1335 1335
       -- only used for debugging; never parsed.  KSW 1999-07
    
    1336
    -  ppr (ManyOccs tails)     = pprShortTailCallInfo tails
    
    1336
    +  ppr (ManyOccs tails)     = text "Many" <> parens (pprShortTailCallInfo tails)
    
    1337 1337
       ppr IAmDead              = text "Dead"
    
    1338 1338
       ppr (IAmALoopBreaker rule_only tails)
    
    1339 1339
             = text "LoopBreaker" <> pp_ro <> pprShortTailCallInfo tails
    

  • compiler/GHC/Types/Unique/FM.hs
    ... ... @@ -53,7 +53,7 @@ module GHC.Types.Unique.FM (
    53 53
             plusUFM,
    
    54 54
             strictPlusUFM,
    
    55 55
             plusUFM_C,
    
    56
    -        strictPlusUFM_C,
    
    56
    +        strictPlusUFM_C, strictPlusUFM_C_Directly,
    
    57 57
             plusUFM_CD,
    
    58 58
             plusUFM_CD2,
    
    59 59
             mergeUFM,
    
    ... ... @@ -281,6 +281,9 @@ plusUFM_C f (UFM x) (UFM y) = UFM (M.unionWith f x y)
    281 281
     strictPlusUFM_C :: (elt -> elt -> elt) -> UniqFM key elt -> UniqFM key elt -> UniqFM key elt
    
    282 282
     strictPlusUFM_C f (UFM x) (UFM y) = UFM (MS.unionWith f x y)
    
    283 283
     
    
    284
    +strictPlusUFM_C_Directly :: (Unique -> elt -> elt -> elt) -> UniqFM key elt -> UniqFM key elt -> UniqFM key elt
    
    285
    +strictPlusUFM_C_Directly f (UFM x) (UFM y) = UFM (MS.unionWithKey (f . mkUniqueGrimily) x y)
    
    286
    +
    
    284 287
     -- | `plusUFM_CD f m1 d1 m2 d2` merges the maps using `f` as the
    
    285 288
     -- combinding function and `d1` resp. `d2` as the default value if
    
    286 289
     -- there is no entry in `m1` reps. `m2`. The domain is the union of
    

  • compiler/GHC/Types/Unique/Set.hs
    ... ... @@ -40,6 +40,7 @@ module GHC.Types.Unique.Set (
    40 40
             lookupUniqSet_Directly,
    
    41 41
             partitionUniqSet,
    
    42 42
             mapUniqSet,
    
    43
    +        mapUniqSetToUFM, mapMaybeUniqSetToUFM,
    
    43 44
             unsafeUFMToUniqSet,
    
    44 45
             nonDetEltsUniqSet,
    
    45 46
             nonDetKeysUniqSet,
    
    ... ... @@ -211,6 +212,14 @@ mapUniqSet f = mkUniqSet . map f . nonDetEltsUniqSet
    211 212
     mapMaybeUniqSet_sameUnique :: (a -> Maybe b) -> UniqSet a -> UniqSet b
    
    212 213
     mapMaybeUniqSet_sameUnique f (UniqSet a) = UniqSet $ mapMaybeUFM_sameUnique f a
    
    213 214
     
    
    215
    +mapUniqSetToUFM :: (a -> b) -> UniqSet a -> UniqFM a b
    
    216
    +-- Same keys, new values
    
    217
    +mapUniqSetToUFM f (UniqSet ufm) = mapUFM f ufm
    
    218
    +
    
    219
    +mapMaybeUniqSetToUFM :: (a -> Maybe b) -> UniqSet a -> UniqFM a b
    
    220
    +-- Same keys, new values
    
    221
    +mapMaybeUniqSetToUFM f (UniqSet ufm) = mapMaybeUFM f ufm
    
    222
    +
    
    214 223
     -- Two 'UniqSet's are considered equal if they contain the same
    
    215 224
     -- uniques.
    
    216 225
     instance Eq (UniqSet a) where
    

  • compiler/GHC/Types/Var/Env.hs
    ... ... @@ -12,7 +12,8 @@ module GHC.Types.Var.Env (
    12 12
             elemVarEnv, disjointVarEnv, anyVarEnv,
    
    13 13
             extendVarEnv, extendVarEnv_C, extendVarEnv_Acc,
    
    14 14
             extendVarEnvList,
    
    15
    -        strictPlusVarEnv, plusVarEnv, plusVarEnv_C, strictPlusVarEnv_C,
    
    15
    +        strictPlusVarEnv, plusVarEnv, plusVarEnv_C,
    
    16
    +        strictPlusVarEnv_C, strictPlusVarEnv_C_Directly,
    
    16 17
             plusVarEnv_CD, plusMaybeVarEnv_C,
    
    17 18
             plusVarEnvList, alterVarEnv,
    
    18 19
             delVarEnvList, delVarEnv,
    
    ... ... @@ -525,6 +526,7 @@ delVarEnv :: VarEnv a -> Var -> VarEnv a
    525 526
     minusVarEnv       :: VarEnv a -> VarEnv b -> VarEnv a
    
    526 527
     plusVarEnv_C      :: (a -> a -> a) -> VarEnv a -> VarEnv a -> VarEnv a
    
    527 528
     strictPlusVarEnv_C :: (a -> a -> a) -> VarEnv a -> VarEnv a -> VarEnv a
    
    529
    +strictPlusVarEnv_C_Directly :: (Unique -> a -> a -> a) -> VarEnv a -> VarEnv a -> VarEnv a
    
    528 530
     plusVarEnv_CD     :: (a -> a -> a) -> VarEnv a -> a -> VarEnv a -> a -> VarEnv a
    
    529 531
     plusMaybeVarEnv_C :: (a -> a -> Maybe a) -> VarEnv a -> VarEnv a -> VarEnv a
    
    530 532
     mapVarEnv         :: (a -> b) -> VarEnv a -> VarEnv b
    
    ... ... @@ -552,6 +554,7 @@ extendVarEnv_Acc = addToUFM_Acc
    552 554
     extendVarEnvList = addListToUFM
    
    553 555
     plusVarEnv_C     = plusUFM_C
    
    554 556
     strictPlusVarEnv_C = strictPlusUFM_C
    
    557
    +strictPlusVarEnv_C_Directly = strictPlusUFM_C_Directly
    
    555 558
     plusVarEnv_CD    = plusUFM_CD
    
    556 559
     plusMaybeVarEnv_C = plusMaybeUFM_C
    
    557 560
     delVarEnvList    = delListFromUFM
    

  • docs/users_guide/exts/rank_polymorphism.rst
    ... ... @@ -195,7 +195,7 @@ For example: ::
    195 195
       g3c :: Int -> forall x y. y -> x -> x
    
    196 196
     
    
    197 197
       f4 :: (Int -> forall a. (Eq a, Show a) => a -> a) -> Bool
    
    198
    -  g4 ::  Int -> forall x. (Show x, Eq x) => x -> x) -> Bool
    
    198
    +  g4 ::  Int -> forall x. (Show x, Eq x) => x -> x
    
    199 199
     
    
    200 200
     Then the application ``f3 g3a`` is well-typed, because ``g3a`` has a type that matches the type
    
    201 201
     expected by ``f3``.  But ``f3 g3b`` is not well typed, because the foralls are in different places.
    

  • docs/users_guide/exts/type_families.rst
    ... ... @@ -680,7 +680,7 @@ thus: ::
    680 680
     When doing so, we (optionally) may drop the "``family``" keyword.
    
    681 681
     
    
    682 682
     The type parameters must all be type variables, of course, and some (but
    
    683
    -not necessarily all) of then can be the class parameters. Each class
    
    683
    +not necessarily all) of them can be the class parameters. Each class
    
    684 684
     parameter may only be used at most once per associated type, but some
    
    685 685
     may be omitted and they may be in an order other than in the class head.
    
    686 686
     Hence, the following contrived example is admissible: ::
    

  • docs/users_guide/using-optimisation.rst
    ... ... @@ -1325,10 +1325,7 @@ as such you shouldn't need to set any of them explicitly. A flag
    1325 1325
         :reverse: -fno-polymorphic-specialisation
    
    1326 1326
         :category:
    
    1327 1327
     
    
    1328
    -    :default: off
    
    1329
    -
    
    1330
    -    Warning, this feature is highly experimental and may lead to incorrect runtime
    
    1331
    -    results. Use at your own risk (:ghc-ticket:`23469`, :ghc-ticket:`23109`, :ghc-ticket:`21229`, :ghc-ticket:`23445`).
    
    1328
    +    :default: on
    
    1332 1329
     
    
    1333 1330
         Enable specialisation of function calls to known dictionaries with free type variables.
    
    1334 1331
         The created specialisation will abstract over the type variables free in the dictionary.
    

  • rts/eventlog/EventLog.c
    ... ... @@ -491,13 +491,7 @@ endEventLogging(void)
    491 491
     
    
    492 492
         eventlog_enabled = false;
    
    493 493
     
    
    494
    -    // Flush all events remaining in the buffers.
    
    495
    -    //
    
    496
    -    // N.B. Don't flush if shutting down: this was done in
    
    497
    -    // finishCapEventLogging and the capabilities have already been freed.
    
    498
    -    if (getSchedState() != SCHED_SHUTTING_DOWN) {
    
    499
    -        flushEventLog(NULL);
    
    500
    -    }
    
    494
    +    flushEventLog(NULL);
    
    501 495
     
    
    502 496
         ACQUIRE_LOCK(&eventBufMutex);
    
    503 497
     
    
    ... ... @@ -1626,15 +1620,24 @@ void flushEventLog(Capability **cap USED_IF_THREADS)
    1626 1620
             return;
    
    1627 1621
         }
    
    1628 1622
     
    
    1623
    +    // N.B. Don't flush if shutting down: this was done in
    
    1624
    +    // finishCapEventLogging and the capabilities have already been freed.
    
    1625
    +    // This can also race against the shutdown if the flush is triggered by the
    
    1626
    +    // ticker thread. (#26573)
    
    1627
    +    if (getSchedState() == SCHED_SHUTTING_DOWN) {
    
    1628
    +      return;
    
    1629
    +    }
    
    1630
    +
    
    1629 1631
         ACQUIRE_LOCK(&eventBufMutex);
    
    1630 1632
         printAndClearEventBuf(&eventBuf);
    
    1631 1633
         RELEASE_LOCK(&eventBufMutex);
    
    1632 1634
     
    
    1633 1635
     #if defined(THREADED_RTS)
    
    1634
    -    Task *task = getMyTask();
    
    1636
    +    Task *task = newBoundTask();
    
    1635 1637
         stopAllCapabilitiesWith(cap, task, SYNC_FLUSH_EVENT_LOG);
    
    1636 1638
         flushAllCapsEventsBufs();
    
    1637 1639
         releaseAllCapabilities(getNumCapabilities(), cap ? *cap : NULL, task);
    
    1640
    +    exitMyTask();
    
    1638 1641
     #else
    
    1639 1642
         flushLocalEventsBuf(getCapability(0));
    
    1640 1643
     #endif
    

  • testsuite/tests/rts/all.T
    ... ... @@ -2,6 +2,11 @@ test('testblockalloc',
    2 2
          [c_src, only_ways(['normal','threaded1']), extra_run_opts('+RTS -I0')],
    
    3 3
          compile_and_run, [''])
    
    4 4
     
    
    5
    +test('numeric_version_eventlog_flush',
    
    6
    +     [ignore_stdout, req_ghc_with_threaded_rts],
    
    7
    +     run_command,
    
    8
    +     ['{compiler} --numeric-version +RTS -l --eventlog-flush-interval=1 -RTS'])
    
    9
    +
    
    5 10
     test('testmblockalloc',
    
    6 11
          [c_src, only_ways(['normal','threaded1']), extra_run_opts('+RTS -I0 -xr0.125T'),
    
    7 12
           when(arch('wasm32'), skip)], # MBlocks can't be freed on wasm32, see Note [Megablock allocator on wasm] in rts
    

  • testsuite/tests/simplCore/should_compile/T26588.hs
    1
    +module T26588 ( getOptionSettingFromText ) where
    
    2
    +
    
    3
    +import           Control.Applicative ( Const(..) )
    
    4
    +import           Data.Map (Map)
    
    5
    +import qualified Data.Map.Strict as Map
    
    6
    +
    
    7
    +------------------------------------------------------------------------
    
    8
    +-- ConfigState
    
    9
    +
    
    10
    +data ConfigLeaf
    
    11
    +data ConfigTrie = ConfigTrie !(Maybe ConfigLeaf) !ConfigMap
    
    12
    +
    
    13
    +type ConfigMap = Map Int ConfigTrie
    
    14
    +
    
    15
    +freshLeaf :: [Int] -> ConfigLeaf -> ConfigTrie
    
    16
    +freshLeaf [] l     = ConfigTrie (Just l) mempty
    
    17
    +freshLeaf (a:as) l = ConfigTrie Nothing (Map.singleton a (freshLeaf as l))
    
    18
    +
    
    19
    +adjustConfigTrie :: Functor t => [Int] -> (Maybe ConfigLeaf -> t (Maybe ConfigLeaf)) -> Maybe (ConfigTrie) -> t (Maybe ConfigTrie)
    
    20
    +adjustConfigTrie     as f Nothing                 = fmap (freshLeaf as) <$> f Nothing
    
    21
    +adjustConfigTrie (a:as) f (Just (ConfigTrie x m)) = Just . ConfigTrie x <$> adjustConfigMap a as f m
    
    22
    +adjustConfigTrie     [] f (Just (ConfigTrie x m)) = g <$> f x
    
    23
    +  where g Nothing | Map.null m = Nothing
    
    24
    +        g x' = Just (ConfigTrie x' m)
    
    25
    +
    
    26
    +adjustConfigMap :: Functor t => Int -> [Int] -> (Maybe ConfigLeaf -> t (Maybe ConfigLeaf)) -> ConfigMap -> t ConfigMap
    
    27
    +adjustConfigMap a as f = Map.alterF (adjustConfigTrie as f) a
    
    28
    +
    
    29
    +getOptionSettingFromText :: Int -> [Int] -> ConfigMap -> IO ()
    
    30
    +getOptionSettingFromText p ps = getConst . adjustConfigMap p ps f
    
    31
    +  where
    
    32
    +    f _ = Const (return ())

  • testsuite/tests/simplCore/should_compile/T26589.hs
    1
    +module T26589 ( executeTest ) where
    
    2
    +
    
    3
    +-- base
    
    4
    +import Data.Coerce ( coerce )
    
    5
    +import Data.Foldable ( foldMap )
    
    6
    +
    
    7
    +--------------------------------------------------------------------------------
    
    8
    +
    
    9
    +newtype Traversal f = Traversal { getTraversal :: f () }
    
    10
    +
    
    11
    +instance Applicative f => Semigroup (Traversal f) where
    
    12
    +  Traversal f1 <> Traversal f2 = Traversal $ f1 *> f2
    
    13
    +instance Applicative f => Monoid (Traversal f) where
    
    14
    +  mempty = Traversal $ pure ()
    
    15
    +
    
    16
    +newtype Seq a = Seq (FingerTree (Elem a))
    
    17
    +newtype Elem a = Elem { getElem :: a }
    
    18
    +
    
    19
    +data FingerTree a
    
    20
    +    = EmptyT
    
    21
    +    | Deep !a (FingerTree a) !a
    
    22
    +
    
    23
    +executeTest :: Seq () -> IO ()
    
    24
    +executeTest fins = destroyResources
    
    25
    +  where
    
    26
    +    destroyResources :: IO ()
    
    27
    +    destroyResources =
    
    28
    +      getTraversal $
    
    29
    +        flip foldMap1 fins $ \ _ ->
    
    30
    +          Traversal $ return ()
    
    31
    +
    
    32
    +foldMap1 :: forall m a. Monoid m => (a -> m) -> Seq a -> m
    
    33
    +foldMap1 = coerce (foldMap2 :: (Elem a -> m) -> FingerTree (Elem a) -> m)
    
    34
    +
    
    35
    +foldMap2 :: Monoid m => (Elem a -> m) -> FingerTree (Elem a) -> m
    
    36
    +foldMap2 _ EmptyT = mempty
    
    37
    +foldMap2 f' (Deep pr' m' sf') = f' pr' <> foldMapTree f' m' <> f' sf'
    
    38
    +      where
    
    39
    +        foldMapTree :: Monoid m => (a -> m) -> FingerTree a -> m
    
    40
    +        foldMapTree _ EmptyT = mempty
    
    41
    +        foldMapTree f (Deep pr m sf) =
    
    42
    +            f pr <>
    
    43
    +            foldMapTree f m <>
    
    44
    +            f sf

  • testsuite/tests/simplCore/should_compile/T8331.stderr
    1 1
     
    
    2 2
     ==================== Tidy Core rules ====================
    
    3
    +"SPEC $c*> @(ST s) @_"
    
    4
    +    forall (@s) (@r) ($dApplicative :: Applicative (ST s)).
    
    5
    +      $fApplicativeReaderT_$c*> @(ST s) @r $dApplicative
    
    6
    +      = ($fApplicativeReaderT2 @s @r)
    
    7
    +        `cast` (forall (a ::~ <*>_N) (b ::~ <*>_N).
    
    8
    +                <ReaderT r (ST s) a>_R
    
    9
    +                ->_R <ReaderT r (ST s) b>_R
    
    10
    +                ->_R <r>_R ->_R Sym (N:ST <s>_N <b>_R)
    
    11
    +                     ; Sym (N:ReaderT <*>_N <r>_R <ST s>_R <b>_N)
    
    12
    +                :: Coercible
    
    13
    +                     (forall a b.
    
    14
    +                      ReaderT r (ST s) a -> ReaderT r (ST s) b -> r -> STRep s b)
    
    15
    +                     (forall a b.
    
    16
    +                      ReaderT r (ST s) a -> ReaderT r (ST s) b -> ReaderT r (ST s) b))
    
    17
    +"SPEC $c<$ @(ST s) @_"
    
    18
    +    forall (@s) (@r) ($dFunctor :: Functor (ST s)).
    
    19
    +      $fFunctorReaderT_$c<$ @(ST s) @r $dFunctor
    
    20
    +      = ($fApplicativeReaderT6 @s @r)
    
    21
    +        `cast` (forall (a ::~ <*>_N) (b ::~ <*>_N).
    
    22
    +                <a>_R
    
    23
    +                ->_R <ReaderT r (ST s) b>_R
    
    24
    +                ->_R <r>_R ->_R Sym (N:ST <s>_N <a>_R)
    
    25
    +                     ; Sym (N:ReaderT <*>_N <r>_R <ST s>_R <a>_N)
    
    26
    +                :: Coercible
    
    27
    +                     (forall a b. a -> ReaderT r (ST s) b -> r -> STRep s a)
    
    28
    +                     (forall a b. a -> ReaderT r (ST s) b -> ReaderT r (ST s) a))
    
    29
    +"SPEC $c<* @(ST s) @_"
    
    30
    +    forall (@s) (@r) ($dApplicative :: Applicative (ST s)).
    
    31
    +      $fApplicativeReaderT_$c<* @(ST s) @r $dApplicative
    
    32
    +      = ($fApplicativeReaderT1 @s @r)
    
    33
    +        `cast` (forall (a ::~ <*>_N) (b ::~ <*>_N).
    
    34
    +                <ReaderT r (ST s) a>_R
    
    35
    +                ->_R <ReaderT r (ST s) b>_R
    
    36
    +                ->_R <r>_R ->_R Sym (N:ST <s>_N <a>_R)
    
    37
    +                     ; Sym (N:ReaderT <*>_N <r>_R <ST s>_R <a>_N)
    
    38
    +                :: Coercible
    
    39
    +                     (forall a b.
    
    40
    +                      ReaderT r (ST s) a -> ReaderT r (ST s) b -> r -> STRep s a)
    
    41
    +                     (forall a b.
    
    42
    +                      ReaderT r (ST s) a -> ReaderT r (ST s) b -> ReaderT r (ST s) a))
    
    43
    +"SPEC $c<*> @(ST s) @_"
    
    44
    +    forall (@s) (@r) ($dApplicative :: Applicative (ST s)).
    
    45
    +      $fApplicativeReaderT9 @(ST s) @r $dApplicative
    
    46
    +      = ($fApplicativeReaderT4 @s @r)
    
    47
    +        `cast` (forall (a ::~ <*>_N) (b ::~ <*>_N).
    
    48
    +                <ReaderT r (ST s) (a -> b)>_R
    
    49
    +                ->_R <ReaderT r (ST s) a>_R
    
    50
    +                ->_R <r>_R
    
    51
    +                ->_R Sym (N:ST <s>_N <b>_R)
    
    52
    +                :: Coercible
    
    53
    +                     (forall a b.
    
    54
    +                      ReaderT r (ST s) (a -> b) -> ReaderT r (ST s) a -> r -> STRep s b)
    
    55
    +                     (forall a b.
    
    56
    +                      ReaderT r (ST s) (a -> b) -> ReaderT r (ST s) a -> r -> ST s b))
    
    57
    +"SPEC $c>> @(ST s) @_"
    
    58
    +    forall (@s) (@r) ($dMonad :: Monad (ST s)).
    
    59
    +      $fMonadReaderT1 @(ST s) @r $dMonad
    
    60
    +      = $fMonadAbstractIOSTReaderT_$s$c>> @s @r
    
    61
    +"SPEC $c>>= @(ST s) @_"
    
    62
    +    forall (@s) (@r) ($dMonad :: Monad (ST s)).
    
    63
    +      $fMonadReaderT2 @(ST s) @r $dMonad
    
    64
    +      = ($fMonadAbstractIOSTReaderT2 @s @r)
    
    65
    +        `cast` (forall (a ::~ <*>_N) (b ::~ <*>_N).
    
    66
    +                <ReaderT r (ST s) a>_R
    
    67
    +                ->_R <a -> ReaderT r (ST s) b>_R
    
    68
    +                ->_R <r>_R
    
    69
    +                ->_R Sym (N:ST <s>_N <b>_R)
    
    70
    +                :: Coercible
    
    71
    +                     (forall a b.
    
    72
    +                      ReaderT r (ST s) a -> (a -> ReaderT r (ST s) b) -> r -> STRep s b)
    
    73
    +                     (forall a b.
    
    74
    +                      ReaderT r (ST s) a -> (a -> ReaderT r (ST s) b) -> r -> ST s b))
    
    75
    +"SPEC $cfmap @(ST s) @_"
    
    76
    +    forall (@s) (@r) ($dFunctor :: Functor (ST s)).
    
    77
    +      $fFunctorReaderT_$cfmap @(ST s) @r $dFunctor
    
    78
    +      = ($fApplicativeReaderT7 @s @r)
    
    79
    +        `cast` (forall (a ::~ <*>_N) (b ::~ <*>_N).
    
    80
    +                <a -> b>_R
    
    81
    +                ->_R <ReaderT r (ST s) a>_R
    
    82
    +                ->_R <r>_R ->_R Sym (N:ST <s>_N <b>_R)
    
    83
    +                     ; Sym (N:ReaderT <*>_N <r>_R <ST s>_R <b>_N)
    
    84
    +                :: Coercible
    
    85
    +                     (forall a b. (a -> b) -> ReaderT r (ST s) a -> r -> STRep s b)
    
    86
    +                     (forall a b. (a -> b) -> ReaderT r (ST s) a -> ReaderT r (ST s) b))
    
    87
    +"SPEC $cliftA2 @(ST s) @_"
    
    88
    +    forall (@s) (@r) ($dApplicative :: Applicative (ST s)).
    
    89
    +      $fApplicativeReaderT_$cliftA2 @(ST s) @r $dApplicative
    
    90
    +      = ($fApplicativeReaderT3 @s @r)
    
    91
    +        `cast` (forall (a ::~ <*>_N) (b ::~ <*>_N) (c ::~ <*>_N).
    
    92
    +                <a -> b -> c>_R
    
    93
    +                ->_R <ReaderT r (ST s) a>_R
    
    94
    +                ->_R <ReaderT r (ST s) b>_R
    
    95
    +                ->_R <r>_R ->_R Sym (N:ST <s>_N <c>_R)
    
    96
    +                     ; Sym (N:ReaderT <*>_N <r>_R <ST s>_R <c>_N)
    
    97
    +                :: Coercible
    
    98
    +                     (forall a b c.
    
    99
    +                      (a -> b -> c)
    
    100
    +                      -> ReaderT r (ST s) a -> ReaderT r (ST s) b -> r -> STRep s c)
    
    101
    +                     (forall a b c.
    
    102
    +                      (a -> b -> c)
    
    103
    +                      -> ReaderT r (ST s) a -> ReaderT r (ST s) b -> ReaderT r (ST s) c))
    
    104
    +"SPEC $cp1Applicative @(ST s) @_"
    
    105
    +    forall (@s) (@r) ($dApplicative :: Applicative (ST s)).
    
    106
    +      $fApplicativeReaderT_$cp1Applicative @(ST s) @r $dApplicative
    
    107
    +      = $fApplicativeReaderT_$s$fFunctorReaderT @s @r
    
    108
    +"SPEC $cp1Monad @(ST s) @_"
    
    109
    +    forall (@s) (@r) ($dMonad :: Monad (ST s)).
    
    110
    +      $fMonadReaderT_$cp1Monad @(ST s) @r $dMonad
    
    111
    +      = $fApplicativeReaderT_$s$fApplicativeReaderT @s @r
    
    112
    +"SPEC $cpure @(ST s) @_"
    
    113
    +    forall (@s) (@r) ($dApplicative :: Applicative (ST s)).
    
    114
    +      $fApplicativeReaderT_$cpure @(ST s) @r $dApplicative
    
    115
    +      = ($fApplicativeReaderT5 @s @r)
    
    116
    +        `cast` (forall (a ::~ <*>_N).
    
    117
    +                <a>_R
    
    118
    +                ->_R <r>_R ->_R Sym (N:ST <s>_N <a>_R)
    
    119
    +                     ; Sym (N:ReaderT <*>_N <r>_R <ST s>_R <a>_N)
    
    120
    +                :: Coercible
    
    121
    +                     (forall a. a -> r -> STRep s a)
    
    122
    +                     (forall a. a -> ReaderT r (ST s) a))
    
    123
    +"SPEC $creturn @(ST s) @_"
    
    124
    +    forall (@s) (@r) ($dMonad :: Monad (ST s)).
    
    125
    +      $fMonadReaderT_$creturn @(ST s) @r $dMonad
    
    126
    +      = ($fApplicativeReaderT5 @s @r)
    
    127
    +        `cast` (forall (a ::~ <*>_N).
    
    128
    +                <a>_R
    
    129
    +                ->_R <r>_R ->_R Sym (N:ST <s>_N <a>_R)
    
    130
    +                     ; Sym (N:ReaderT <*>_N <r>_R <ST s>_R <a>_N)
    
    131
    +                :: Coercible
    
    132
    +                     (forall a. a -> r -> STRep s a)
    
    133
    +                     (forall a. a -> ReaderT r (ST s) a))
    
    134
    +"SPEC $fApplicativeReaderT @(ST s) @_"
    
    135
    +    forall (@s) (@r) ($dApplicative :: Applicative (ST s)).
    
    136
    +      $fApplicativeReaderT @(ST s) @r $dApplicative
    
    137
    +      = $fApplicativeReaderT_$s$fApplicativeReaderT @s @r
    
    138
    +"SPEC $fFunctorReaderT @(ST s) @_"
    
    139
    +    forall (@s) (@r) ($dFunctor :: Functor (ST s)).
    
    140
    +      $fFunctorReaderT @(ST s) @r $dFunctor
    
    141
    +      = $fApplicativeReaderT_$s$fFunctorReaderT @s @r
    
    142
    +"SPEC $fMonadReaderT @(ST s) @_"
    
    143
    +    forall (@s) (@r) ($dMonad :: Monad (ST s)).
    
    144
    +      $fMonadReaderT @(ST s) @r $dMonad
    
    145
    +      = $fMonadAbstractIOSTReaderT_$s$fMonadReaderT @s @r
    
    3 146
     "USPEC useAbstractMonad @(ReaderT Int (ST s))"
    
    4 147
         forall (@s)
    
    5 148
                ($dMonadAbstractIOST :: MonadAbstractIOST (ReaderT Int (ST s))).
    

  • testsuite/tests/simplCore/should_compile/all.T
    ... ... @@ -544,6 +544,9 @@ test('T25883b', normal, compile_grep_core, [''])
    544 544
     test('T25883c', normal, compile_grep_core, [''])
    
    545 545
     test('T25883d', [extra_files(['T25883d_import.hs'])], multimod_compile_filter, ['T25883d', '-O -ddump-simpl -dno-typeable-binds -dsuppress-all -dsuppress-uniques', r'grep -e "y ="'])
    
    546 546
     
    
    547
    +test('T26588', normal, compile, ['-package containers -O'])
    
    548
    +test('T26589', normal, compile, ['-O'])
    
    549
    +
    
    547 550
     test('T25976', [grep_errmsg('Dead Code')], compile, ['-O -ddump-simpl -dsuppress-uniques -dno-typeable-binds'])
    
    548 551
     
    
    549 552
     test('T25965', normal, compile, ['-O'])
    

  • testsuite/tests/unboxedsums/UbxSumUnpackedSize.hs
    ... ... @@ -133,6 +133,124 @@ data U_E1 = U_E1 {-# UNPACK #-} !E1
    133 133
                      {-# UNPACK #-} !Int8
    
    134 134
         deriving (Show)
    
    135 135
     
    
    136
    +{- In `data U_E`, the {-# UNPACK #-} !E1 gives rise to a pretty clumsy expression
    
    137
    +   for the wrapper for U_E1. Here is what it looks like when ther are only 16
    
    138
    +   data constructors in E1, and we have just
    
    139
    +       data U_E1 = U_E1 {-# UNPACK #-} !E1
    
    140
    +   Blimey!
    
    141
    +
    
    142
    +Main.$WU_E1
    
    143
    +  = \ (conrep_t1N4 [Occ=Once1!] :: Main.E1) ->
    
    144
    +      case case conrep_t1N4 of {
    
    145
    +             Main.E1_1 ->
    
    146
    +               GHC.Internal.Types.(# _| | | | | | | | | | | | | | | #)
    
    147
    +                 @GHC.Internal.Types.ZeroBitRep
    
    148
    +                 @GHC.Internal.Types.ZeroBitRep
    
    149
    +                 @GHC.Internal.Types.ZeroBitRep
    
    150
    +                 @GHC.Internal.Types.ZeroBitRep
    
    151
    +                 @GHC.Internal.Types.ZeroBitRep
    
    152
    +                 @GHC.Internal.Types.ZeroBitRep
    
    153
    +                 @GHC.Internal.Types.ZeroBitRep
    
    154
    +                 @GHC.Internal.Types.ZeroBitRep
    
    155
    +                 @GHC.Internal.Types.ZeroBitRep
    
    156
    +                 @GHC.Internal.Types.ZeroBitRep
    
    157
    +                 @GHC.Internal.Types.ZeroBitRep
    
    158
    +                 @GHC.Internal.Types.ZeroBitRep
    
    159
    +                 @GHC.Internal.Types.ZeroBitRep
    
    160
    +                 @GHC.Internal.Types.ZeroBitRep
    
    161
    +                 @GHC.Internal.Types.ZeroBitRep
    
    162
    +                 @GHC.Internal.Types.ZeroBitRep
    
    163
    +                 @(# #)
    
    164
    +                 @(# #)
    
    165
    +                 @(# #)
    
    166
    +                 @(# #)
    
    167
    +                 @(# #)
    
    168
    +                 @(# #)
    
    169
    +                 @(# #)
    
    170
    +                 @(# #)
    
    171
    +                 @(# #)
    
    172
    +                 @(# #)
    
    173
    +                 @(# #)
    
    174
    +                 @(# #)
    
    175
    +                 @(# #)
    
    176
    +                 @(# #)
    
    177
    +                 @(# #)
    
    178
    +                 @(# #)
    
    179
    +                 GHC.Internal.Types.(##);
    
    180
    +             Main.E1_2 ->
    
    181
    +               GHC.Internal.Types.(# |_| | | | | | | | | | | | | | #)
    
    182
    +                 @GHC.Internal.Types.ZeroBitRep
    
    183
    +                 @GHC.Internal.Types.ZeroBitRep
    
    184
    +                 @GHC.Internal.Types.ZeroBitRep
    
    185
    +                 @GHC.Internal.Types.ZeroBitRep
    
    186
    +                 @GHC.Internal.Types.ZeroBitRep
    
    187
    +                 @GHC.Internal.Types.ZeroBitRep
    
    188
    +                 @GHC.Internal.Types.ZeroBitRep
    
    189
    +                 @GHC.Internal.Types.ZeroBitRep
    
    190
    +                 @GHC.Internal.Types.ZeroBitRep
    
    191
    +                 @GHC.Internal.Types.ZeroBitRep
    
    192
    +                 @GHC.Internal.Types.ZeroBitRep
    
    193
    +                 @GHC.Internal.Types.ZeroBitRep
    
    194
    +                 @GHC.Internal.Types.ZeroBitRep
    
    195
    +                 @GHC.Internal.Types.ZeroBitRep
    
    196
    +                 @GHC.Internal.Types.ZeroBitRep
    
    197
    +                 @GHC.Internal.Types.ZeroBitRep
    
    198
    +                 @(# #)
    
    199
    +                 @(# #)
    
    200
    +                 @(# #)
    
    201
    +                 @(# #)
    
    202
    +                 @(# #)
    
    203
    +                 @(# #)
    
    204
    +                 @(# #)
    
    205
    +                 @(# #)
    
    206
    +                 @(# #)
    
    207
    +                 @(# #)
    
    208
    +                 @(# #)
    
    209
    +                 @(# #)
    
    210
    +                 @(# #)
    
    211
    +                 @(# #)
    
    212
    +                 @(# #)
    
    213
    +                 @(# #)
    
    214
    +                 GHC.Internal.Types.(##);
    
    215
    +             Main.E1_3 ->
    
    216
    +               GHC.Internal.Types.(# | |_| | | | | | | | | | | | | #)
    
    217
    +                 @GHC.Internal.Types.ZeroBitRep
    
    218
    +                 @GHC.Internal.Types.ZeroBitRep
    
    219
    +                 @GHC.Internal.Types.ZeroBitRep
    
    220
    +                 @GHC.Internal.Types.ZeroBitRep
    
    221
    +                 @GHC.Internal.Types.ZeroBitRep
    
    222
    +                 @GHC.Internal.Types.ZeroBitRep
    
    223
    +                 @GHC.Internal.Types.ZeroBitRep
    
    224
    +                 @GHC.Internal.Types.ZeroBitRep
    
    225
    +                 @GHC.Internal.Types.ZeroBitRep
    
    226
    +                 @GHC.Internal.Types.ZeroBitRep
    
    227
    +                 @GHC.Internal.Types.ZeroBitRep
    
    228
    +                 @GHC.Internal.Types.ZeroBitRep
    
    229
    +                 @GHC.Internal.Types.ZeroBitRep
    
    230
    +                 @GHC.Internal.Types.ZeroBitRep
    
    231
    +                 @GHC.Internal.Types.ZeroBitRep
    
    232
    +                 @GHC.Internal.Types.ZeroBitRep
    
    233
    +                 @(# #)
    
    234
    +                 @(# #)
    
    235
    +                 @(# #)
    
    236
    +                 @(# #)
    
    237
    +                 @(# #)
    
    238
    +                 @(# #)
    
    239
    +                 @(# #)
    
    240
    +                 @(# #)
    
    241
    +                 @(# #)
    
    242
    +                 @(# #)
    
    243
    +                 @(# #)
    
    244
    +                 @(# #)
    
    245
    +                 @(# #)
    
    246
    +                 @(# #)
    
    247
    +                 @(# #)
    
    248
    +                 @(# #)
    
    249
    +                 GHC.Internal.Types.(##);
    
    250
    +
    
    251
    +       ... etc ....
    
    252
    +-}
    
    253
    +
    
    136 254
     data U_E2 = U_E2 {-# UNPACK #-} !E2
    
    137 255
                      {-# UNPACK #-} !Int8
    
    138 256
                      {-# UNPACK #-} !Int8
    

  • utils/check-exact/ExactPrint.hs
    ... ... @@ -19,6 +19,13 @@
    19 19
     {-# LANGUAGE UndecidableInstances  #-} -- For the (StmtLR GhcPs GhcPs (LocatedA (body GhcPs))) ExactPrint instance
    
    20 20
     {-# OPTIONS_GHC -Wno-incomplete-uni-patterns -Wno-incomplete-record-updates #-}
    
    21 21
     
    
    22
    +-- We switch off specialisation in this module. Otherwise we get lots of functions
    
    23
    +-- specialised on lots of (GHC syntax tree) data types.  Compilation time allocation
    
    24
    +-- (at least with -fpolymorphic-specialisation; see !15058) blows up from 17G to 108G.
    
    25
    +-- Bad! ExactPrint is not a performance-critical module so it's not worth taking the
    
    26
    +-- largely-fruitless hit in compile time.
    
    27
    +{-# OPTIONS_GHC -fno-specialise #-}
    
    28
    +
    
    22 29
     module ExactPrint
    
    23 30
       (
    
    24 31
         ExactPrint(..)
    

  • utils/haddock/haddock-test/src/Test/Haddock.hs
    ... ... @@ -8,7 +8,6 @@ module Test.Haddock
    8 8
       ) where
    
    9 9
     
    
    10 10
     import Control.Monad
    
    11
    -import qualified Data.ByteString.Char8 as BS
    
    12 11
     import qualified Data.Map.Strict as Map
    
    13 12
     import Data.Foldable (for_)
    
    14 13
     import Data.Maybe
    
    ... ... @@ -211,7 +210,7 @@ checkFile cfg file = do
    211 210
         ccfg = cfgCheckConfig cfg
    
    212 211
         dcfg = cfgDirConfig cfg
    
    213 212
     
    
    214
    --- We use ByteString here to ensure that no lazy I/O is performed.
    
    213
    +-- We use readFile' here to ensure that no lazy I/O is performed.
    
    215 214
     -- This way to ensure that the reference file isn't held open in
    
    216 215
     -- case after `diffFile` (which is problematic if we need to rewrite
    
    217 216
     -- the reference file in `maybeAcceptFile`)
    
    ... ... @@ -219,8 +218,8 @@ checkFile cfg file = do
    219 218
     -- | Read the reference artifact for a test
    
    220 219
     readRef :: Config c -> FilePath -> IO (Maybe c)
    
    221 220
     readRef cfg file =
    
    222
    -  ccfgRead ccfg . BS.unpack
    
    223
    -    <$> BS.readFile (refFile dcfg file)
    
    221
    +  ccfgRead ccfg
    
    222
    +    <$> readFile' (refFile dcfg file)
    
    224 223
       where
    
    225 224
         ccfg = cfgCheckConfig cfg
    
    226 225
         dcfg = cfgDirConfig cfg
    
    ... ... @@ -228,8 +227,8 @@ readRef cfg file =
    228 227
     -- | Read (and clean) the test output artifact for a test
    
    229 228
     readOut :: Config c -> (DirConfig -> FilePath) -> FilePath -> IO c
    
    230 229
     readOut cfg dcfgDir file = do
    
    231
    -  res <- fmap (ccfgClean ccfg file) . ccfgRead ccfg . BS.unpack
    
    232
    -    <$> BS.readFile outFile
    
    230
    +  res <- fmap (ccfgClean ccfg file) . ccfgRead ccfg
    
    231
    +    <$> readFile' outFile
    
    233 232
       case res of
    
    234 233
         Just out -> return out
    
    235 234
         Nothing -> error $ "Failed to parse output file: " ++ outFile