Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC

Commits:

10 changed files:

Changes:

  • compiler/GHC/Core/Make.hs
    ... ... @@ -151,37 +151,28 @@ mkCoreConWrapApps con args = mkCoreApps (Var (dataConWrapId con)) args
    151 151
     
    
    152 152
     -- | Construct an expression which represents the application of a number of
    
    153 153
     -- expressions to another. The leftmost expression in the list is applied first
    
    154
    -mkCoreApps :: CoreExpr -- ^ function
    
    154
    +-- See Note [Assertion checking in mkCoreApp]
    
    155
    +mkCoreApps :: CoreExpr   -- ^ function
    
    155 156
                -> [CoreExpr] -- ^ arguments
    
    156 157
                -> CoreExpr
    
    157
    -mkCoreApps fun args
    
    158
    -  = fst $
    
    159
    -    foldl' (mkCoreAppTyped doc_string) (fun, fun_ty) args
    
    160
    -  where
    
    161
    -    doc_string = ppr fun_ty $$ ppr fun $$ ppr args
    
    162
    -    fun_ty = exprType fun
    
    158
    +mkCoreApps fun args = foldl' mkCoreApp fun args
    
    163 159
     
    
    164 160
     -- | Construct an expression which represents the application of one expression
    
    165 161
     -- to the other
    
    166
    -mkCoreApp :: SDoc
    
    167
    -          -> CoreExpr -- ^ function
    
    162
    +-- See Note [Assertion checking in mkCoreApp]
    
    163
    +mkCoreApp :: CoreExpr -- ^ function
    
    168 164
               -> CoreExpr -- ^ argument
    
    169 165
               -> CoreExpr
    
    170
    -mkCoreApp s fun arg
    
    171
    -  = fst $ mkCoreAppTyped s (fun, exprType fun) arg
    
    172
    -
    
    173
    --- | Construct an expression which represents the application of one expression
    
    174
    --- paired with its type to an argument. The result is paired with its type. This
    
    175
    --- function is not exported and used in the definition of 'mkCoreApp' and
    
    176
    --- 'mkCoreApps'.
    
    177
    -mkCoreAppTyped :: SDoc -> (CoreExpr, Type) -> CoreExpr -> (CoreExpr, Type)
    
    178
    -mkCoreAppTyped _ (fun, fun_ty) (Type ty)
    
    179
    -  = (App fun (Type ty), piResultTy fun_ty ty)
    
    180
    -mkCoreAppTyped _ (fun, fun_ty) (Coercion co)
    
    181
    -  = (App fun (Coercion co), funResultTy fun_ty)
    
    182
    -mkCoreAppTyped d (fun, fun_ty) arg
    
    183
    -  = assertPpr (isFunTy fun_ty) (ppr fun $$ ppr arg $$ d)
    
    184
    -    (App fun arg, funResultTy fun_ty)
    
    166
    +mkCoreApp fun arg = App fun arg
    
    167
    +
    
    168
    +{- Note [Assertion checking in mkCoreApp]
    
    169
    +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    170
    +At one time we had an assertion to check that the function and argument type match up,
    
    171
    +but that turned out to take 90% of all compile time (!) when compiling test
    
    172
    +`unboxedsums/UbxSumUnpackedSize.hs`. The reason was an unboxed sum constructor with
    
    173
    +hundreds of foralls.   It's most straightforward just to remove the assert, and
    
    174
    +rely on Lint to discover any mis-constructed terms.
    
    175
    +-}
    
    185 176
     
    
    186 177
     {- *********************************************************************
    
    187 178
     *                                                                      *
    

  • compiler/GHC/Core/Opt/OccurAnal.hs
    ... ... @@ -66,7 +66,6 @@ import GHC.Builtin.Names( runRWKey )
    66 66
     import GHC.Unit.Module( Module )
    
    67 67
     
    
    68 68
     import Data.List (mapAccumL)
    
    69
    -import Data.List.NonEmpty (NonEmpty (..))
    
    70 69
     
    
    71 70
     {-
    
    72 71
     ************************************************************************
    
    ... ... @@ -660,18 +659,35 @@ through A, so it should have ManyOcc. Bear this case in mind!
    660 659
     * In occ_env, the new (occ_join_points :: IdEnv OccInfoEnv) maps
    
    661 660
       each in-scope non-recursive join point, such as `j` above, to
    
    662 661
       a "zeroed form" of its RHS's usage details. The "zeroed form"
    
    662
    +    * has only occ_nested_lets in its domain  (see (W4) below)
    
    663 663
         * deletes ManyOccs
    
    664 664
         * maps a OneOcc to OneOcc{ occ_n_br = 0 }
    
    665
    -  In our example, occ_join_points will be extended with
    
    665
    +  In our example, assuming `v` is locally-let-bound, occ_join_points will
    
    666
    +  be extended with
    
    666 667
           [j :-> [v :-> OneOcc{occ_n_br=0}]]
    
    667
    -  See addJoinPoint.
    
    668
    +  See `addJoinPoint` and (W4) below.
    
    668 669
     
    
    669 670
     * At an occurrence of a join point, we do everything as normal, but add in the
    
    670 671
       UsageDetails from the occ_join_points.  See mkOneOcc.
    
    671 672
     
    
    672
    -* Crucially, at the NonRec binding of the join point, in `occAnalBind`, we use
    
    673
    -  `orUDs`, not `andUDs` to combine the usage from the RHS with the usage from
    
    674
    -  the body.
    
    673
    +* Crucially, at the NonRec binding of a join point `j`, in `occAnalBind`,
    
    674
    +  we use `combineJoinPointUDs`, not `andUDs` to combine the usage from the
    
    675
    +  RHS with the usage from the body.  `combineJoinPointUDs` behaves like this:
    
    676
    +
    
    677
    +   * For all variables than `occ_nested_lets`, use `andUDs`, just like for
    
    678
    +     any normal let-binding.
    
    679
    +
    
    680
    +   * But for a variable `v` in `occ_nested_lets`, use `orUDs`:
    
    681
    +     - If `v` occurs `ManyOcc` in the join-point RHS, the variable won't be in
    
    682
    +       `occ_join_points`; but we'll get `ManyOcc` anyway.
    
    683
    +     - If `v` occurs `OneOcc` in the join-point RHS, the variable will be in
    
    684
    +       `occ_join_points` and we'll thereby get a `OneOcc{occ_n_br=0}` from
    
    685
    +       each of j's tail calls.  We can `or` that with the `OncOcc{occ_n_br=n}`
    
    686
    +       from j's RHS.
    
    687
    +
    
    688
    +  The only reason for `occ_nested_lets` is to reduce the size of the info
    
    689
    +  duplicate at each tail call; see (W4). It would sound to put *all* variables
    
    690
    +  into `occ_nested_lets`.
    
    675 691
     
    
    676 692
     Here are the consequences
    
    677 693
     
    
    ... ... @@ -682,13 +698,14 @@ Here are the consequences
    682 698
       There are two lexical occurrences of `v`!
    
    683 699
       (NB: `orUDs` adds occ_n_br together, so occ_n_br=1 is impossible, too.)
    
    684 700
     
    
    685
    -* In the tricky (P3) we'll get an `andUDs` of
    
    686
    -    * OneOcc{occ_n_br=0} from the occurrences of `j`)
    
    701
    +* In the tricky (P3), when analysing `case (f v) of ...`, we'll get
    
    702
    +  an `andUDs` of
    
    703
    +    * OneOcc{occ_n_br=0} from the occurrences of `j`
    
    687 704
         * OneOcc{occ_n_br=1} from the (f v)
    
    688 705
       These are `andUDs` together in `addOccInfo`, and hence
    
    689 706
       `v` gets ManyOccs, just as it should.  Clever!
    
    690 707
     
    
    691
    -There are a couple of tricky wrinkles
    
    708
    +There are, of course, some tricky wrinkles
    
    692 709
     
    
    693 710
     (W1) Consider this example which shadows `j`:
    
    694 711
               join j = rhs in
    
    ... ... @@ -718,6 +735,8 @@ There are a couple of tricky wrinkles
    718 735
          * In `postprcess_uds`, we add the chucked-out join points to the
    
    719 736
            returned UsageDetails, with `andUDs`.
    
    720 737
     
    
    738
    +Wrinkles (W1) and (W2) are very similar to Note [Binder swap] (BS3).
    
    739
    +
    
    721 740
     (W3) Consider this example, which shadows `j`, but this time in an argument
    
    722 741
                   join j = rhs
    
    723 742
                   in f (case x of { K j -> ...; ... })
    
    ... ... @@ -732,12 +751,36 @@ There are a couple of tricky wrinkles
    732 751
          NB: this is just about efficiency: it is always safe /not/ to zap the
    
    733 752
          occ_join_points.
    
    734 753
     
    
    735
    -(W4) What if the join point binding has a stable unfolding, or RULES?
    
    736
    -     They are just alternative right-hand sides, and at each call site we
    
    737
    -     will use only one of them. So again, we can use `orUDs` to combine
    
    738
    -     usage info from all these alternatives RHSs.
    
    739
    -
    
    740
    -Wrinkles (W1) and (W2) are very similar to Note [Binder swap] (BS3).
    
    754
    +(W4) Other things being equal, we want keep the OccInfoEnv stored in
    
    755
    +  `occ_join_points` as small as possible, because it is /duplicated/ at
    
    756
    +  /every occurrence/ of the join point.  We really only want to include
    
    757
    +  OccInfo for
    
    758
    +       * Local, non-recursive let-bound Ids
    
    759
    +       * that occur just once in the RHS of the join point
    
    760
    +  particularly including
    
    761
    +       * thunks (that's the original point) and
    
    762
    +       * join points (so that the trick works recursively).
    
    763
    +  We call these the "tracked Ids of j".
    
    764
    +
    
    765
    +  Including lambda binders is pointless, and slows down the occurrence analyser.
    
    766
    +
    
    767
    +  e.g.    \x. let y = x+1 in
    
    768
    +              join j v = ..x..y..(f z z)..
    
    769
    +              in ...
    
    770
    +  In the `occ_join_points` binding for `j`, we want to track `y`, but
    
    771
    +  not `x` (lambda bound) nor `z` (occurs many times).
    
    772
    +
    
    773
    +  To exploit this:
    
    774
    +     * `occ_nested_lets` tracks which Ids are
    
    775
    +              nested (not-top-level), non-recursive lets
    
    776
    +     * `addJoinPoint` only populates j's entry with occ-info for the "tracked Ids"
    
    777
    +       of `j`; that is, that are (a) in occ_nested_lets and (b) have OneOcc.
    
    778
    +     * `combineJoinPointUDs` uses
    
    779
    +          orLocalOcc  for local-let Ids
    
    780
    +          andLocalOcc for non-local-let Ids
    
    781
    +
    
    782
    +  This fancy footwork can matter in extreme cases: it gave a 25% reduction in
    
    783
    +  total compiler allocation in #26425..
    
    741 784
     
    
    742 785
     Note [Finding join points]
    
    743 786
     ~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    ... ... @@ -759,45 +802,45 @@ rest of 'OccInfo' until it goes on the binder.
    759 802
     
    
    760 803
     Note [Join arity prediction based on joinRhsArity]
    
    761 804
     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    762
    -In general, the join arity from tail occurrences of a join point (O) may be
    
    763
    -higher or lower than the manifest join arity of the join body (M). E.g.,
    
    805
    +In general, the join arity from tail occurrences of a join point (OAr) may be
    
    806
    +higher or lower than the manifest join arity of the join body (MAr). E.g.,
    
    764 807
     
    
    765
    -  -- M > O:
    
    766
    -  let f x y = x + y              -- M = 2
    
    767
    -  in if b then f 1 else f 2      -- O = 1
    
    808
    +  -- MAr > Oar:
    
    809
    +  let f x y = x + y              -- MAr = 2
    
    810
    +  in if b then f 1 else f 2      -- OAr = 1
    
    768 811
       ==> { Contify for join arity 1 }
    
    769 812
       join f x = \y -> x + y
    
    770 813
       in if b then jump f 1 else jump f 2
    
    771 814
     
    
    772
    -  -- M < O
    
    773
    -  let f = id                     -- M = 0
    
    774
    -  in if ... then f 12 else f 13  -- O = 1
    
    815
    +  -- MAr < Oar
    
    816
    +  let f = id                     -- MAr = 0
    
    817
    +  in if ... then f 12 else f 13  -- OAr = 1
    
    775 818
       ==> { Contify for join arity 1, eta-expand f }
    
    776 819
       join f x = id x
    
    777 820
       in if b then jump f 12 else jump f 13
    
    778 821
     
    
    779
    -But for *recursive* let, it is crucial that both arities match up, consider
    
    822
    +But for *recursive* let, it is crucial MAr=OAr.  Consider:
    
    780 823
     
    
    781 824
       letrec f x y = if ... then f x else True
    
    782 825
       in f 42
    
    783 826
     
    
    784
    -Here, M=2 but O=1. If we settled for a joinrec arity of 1, the recursive jump
    
    827
    +Here, MAr=2 but OAr=1. If we settled for a joinrec arity of 1, the recursive jump
    
    785 828
     would not happen in a tail context! Contification is invalid here.
    
    786
    -So indeed it is crucial to demand that M=O.
    
    829
    +So indeed it is crucial to demand that MAr=OAr.
    
    787 830
     
    
    788
    -(Side note: Actually, we could be more specific: Let O1 be the join arity of
    
    789
    -occurrences from the letrec RHS and O2 the join arity from the let body. Then
    
    790
    -we need M=O1 and M<=O2 and could simply eta-expand the RHS to match O2 later.
    
    791
    -M=O is the specific case where we don't want to eta-expand. Neither the join
    
    831
    +(Side note: Actually, we could be more specific: Let OAr1 be the join arity of
    
    832
    +occurrences from the letrec RHS and OAr2 the join arity from the let body. Then
    
    833
    +we need MAr=OAr1 and MAr<=OAr2 and could simply eta-expand the RHS to match OAr2 later.
    
    834
    +MAr=OAr is the specific case where we don't want to eta-expand. Neither the join
    
    792 835
     points paper nor GHC does this at the moment.)
    
    793 836
     
    
    794 837
     We can capitalise on this observation and conclude that *if* f could become a
    
    795
    -joinrec (without eta-expansion), it will have join arity M.
    
    796
    -Now, M is just the result of 'joinRhsArity', a rather simple, local analysis.
    
    838
    +joinrec (without eta-expansion), it will have join arity MAr.
    
    839
    +Now, MAr is just the result of 'joinRhsArity', a rather simple, local analysis.
    
    797 840
     It is also the join arity inside the 'TailUsageDetails' returned by
    
    798 841
     'occAnalLamTail', so we can predict join arity without doing any fixed-point
    
    799 842
     iteration or really doing any deep traversal of let body or RHS at all.
    
    800
    -We check for M in the 'adjustTailUsage' call inside 'tagRecBinders'.
    
    843
    +We check for MAr in the 'adjustTailUsage' call inside 'tagRecBinders'.
    
    801 844
     
    
    802 845
     All this is quite apparent if you look at the contification transformation in
    
    803 846
     Fig. 5 of "Compiling without Continuations" (which does not account for
    
    ... ... @@ -807,14 +850,14 @@ eta-expansion at all, mind you). The letrec case looks like this
    807 850
         ... and a bunch of conditions establishing that f only occurs
    
    808 851
             in app heads of join arity (len as + len xs) inside us and es ...
    
    809 852
     
    
    810
    -The syntactic form `/\as.\xs. L[us]` forces M=O iff `f` occurs in `us`. However,
    
    853
    +The syntactic form `/\as.\xs. L[us]` forces MAr=OAr iff `f` occurs in `us`. However,
    
    811 854
     for non-recursive functions, this is the definition of contification from the
    
    812 855
     paper:
    
    813 856
     
    
    814 857
       let f = /\as.\xs.u in L[es]     ... conditions ...
    
    815 858
     
    
    816
    -Note that u could be a lambda itself, as we have seen. No relationship between M
    
    817
    -and O to exploit here.
    
    859
    +Note that u could be a lambda itself, as we have seen. No relationship between MAr
    
    860
    +and OAr to exploit here.
    
    818 861
     
    
    819 862
     Note [Join points and unfoldings/rules]
    
    820 863
     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    ... ... @@ -954,6 +997,22 @@ of both functions, serving as a specification:
    954 997
          Cyclic Recursive case:   'tagRecBinders'
    
    955 998
          Acyclic Recursive case:  'adjustNonRecRhs'
    
    956 999
          Non-recursive case:      'adjustNonRecRhs'
    
    1000
    +
    
    1001
    +Note [Unfoldings and RULES]
    
    1002
    +~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    1003
    +For let-bindings we treat (stable) unfoldings and RULES as "alternative right hand
    
    1004
    +sides".  That is, it's as if we had
    
    1005
    +  f = case <hiatus> of
    
    1006
    +         1 -> <the-rhs>
    
    1007
    +         2 -> <the-stable-unfolding>
    
    1008
    +         3 -> <rhs of rule1>
    
    1009
    +         4 -> <rhs of rule2>
    
    1010
    +So we combine all these with `orUDs` (#26567).  But actually it makes
    
    1011
    +very little difference whether we use `andUDs` or `orUDs` because of
    
    1012
    +Note [Occurrences in stable unfoldings and RULES]: occurrences in an unfolding
    
    1013
    +or RULE are treated as ManyOcc anyway.
    
    1014
    +
    
    1015
    +But NB that tail-call info is preserved so that we don't thereby lose join points.
    
    957 1016
     -}
    
    958 1017
     
    
    959 1018
     ------------------------------------------------------------------
    
    ... ... @@ -991,24 +1050,24 @@ occAnalBind !env lvl ire (NonRec bndr rhs) thing_inside combine
    991 1050
       | mb_join@(JoinPoint {}) <- idJoinPointHood bndr
    
    992 1051
       = -- Analyse the RHS and /then/ the body
    
    993 1052
         let -- Analyse the rhs first, generating rhs_uds
    
    994
    -        !(rhs_uds_s, bndr', rhs') = occAnalNonRecRhs env lvl ire mb_join bndr rhs
    
    995
    -        rhs_uds = foldl1' orUDs rhs_uds_s   -- NB: orUDs.  See (W4) of
    
    996
    -                                           -- Note [Occurrence analysis for join points]
    
    1053
    +        !(rhs_uds, bndr', rhs') = occAnalNonRecRhs env lvl ire mb_join bndr rhs
    
    997 1054
     
    
    998 1055
             -- Now analyse the body, adding the join point
    
    999 1056
             -- into the environment with addJoinPoint
    
    1000
    -        !(WUD body_uds (occ, body)) = occAnalNonRecBody env bndr' $ \env ->
    
    1057
    +        env_body = addLocalLet env lvl bndr
    
    1058
    +        !(WUD body_uds (occ, body)) = occAnalNonRecBody env_body bndr' $ \env ->
    
    1001 1059
                                           thing_inside (addJoinPoint env bndr' rhs_uds)
    
    1002 1060
         in
    
    1003 1061
         if isDeadOcc occ     -- Drop dead code; see Note [Dead code]
    
    1004 1062
         then WUD body_uds body
    
    1005
    -    else WUD (rhs_uds `orUDs` body_uds)    -- Note `orUDs`
    
    1063
    +    else WUD (combineJoinPointUDs env rhs_uds body_uds)    -- Note `orUDs`
    
    1006 1064
                  (combine [NonRec (fst (tagNonRecBinder lvl occ bndr')) rhs']
    
    1007 1065
                           body)
    
    1008 1066
     
    
    1009 1067
       -- The normal case, including newly-discovered join points
    
    1010 1068
       -- Analyse the body and /then/ the RHS
    
    1011
    -  | WUD body_uds (occ,body) <- occAnalNonRecBody env bndr thing_inside
    
    1069
    +  | let env_body = addLocalLet env lvl bndr
    
    1070
    +  , WUD body_uds (occ,body) <- occAnalNonRecBody env_body bndr thing_inside
    
    1012 1071
       = if isDeadOcc occ   -- Drop dead code; see Note [Dead code]
    
    1013 1072
         then WUD body_uds body
    
    1014 1073
         else let
    
    ... ... @@ -1017,8 +1076,8 @@ occAnalBind !env lvl ire (NonRec bndr rhs) thing_inside combine
    1017 1076
             -- => join arity O of Note [Join arity prediction based on joinRhsArity]
    
    1018 1077
             (tagged_bndr, mb_join) = tagNonRecBinder lvl occ bndr
    
    1019 1078
     
    
    1020
    -        !(rhs_uds_s, final_bndr, rhs') = occAnalNonRecRhs env lvl ire mb_join tagged_bndr rhs
    
    1021
    -    in WUD (foldr andUDs body_uds rhs_uds_s)      -- Note `andUDs`
    
    1079
    +        !(rhs_uds, final_bndr, rhs') = occAnalNonRecRhs env lvl ire mb_join tagged_bndr rhs
    
    1080
    +    in WUD (rhs_uds `andUDs` body_uds)      -- Note `andUDs`
    
    1022 1081
                (combine [NonRec final_bndr rhs'] body)
    
    1023 1082
     
    
    1024 1083
     -----------------
    
    ... ... @@ -1033,15 +1092,21 @@ occAnalNonRecBody env bndr thing_inside
    1033 1092
     
    
    1034 1093
     -----------------
    
    1035 1094
     occAnalNonRecRhs :: OccEnv -> TopLevelFlag -> ImpRuleEdges
    
    1036
    -                -> JoinPointHood -> Id -> CoreExpr
    
    1037
    -                 -> (NonEmpty UsageDetails, Id, CoreExpr)
    
    1095
    +                 -> JoinPointHood -> Id -> CoreExpr
    
    1096
    +                 -> (UsageDetails, Id, CoreExpr)
    
    1038 1097
     occAnalNonRecRhs !env lvl imp_rule_edges mb_join bndr rhs
    
    1039 1098
       | null rules, null imp_rule_infos
    
    1040 1099
       =  -- Fast path for common case of no rules. This is only worth
    
    1041 1100
          -- 0.1% perf on average, but it's also only a line or two of code
    
    1042
    -    ( adj_rhs_uds :| adj_unf_uds : [], final_bndr_no_rules, final_rhs )
    
    1101
    +    ( adj_rhs_uds `orUDs` adj_unf_uds
    
    1102
    +    , final_bndr_no_rules, final_rhs )
    
    1103
    +
    
    1043 1104
       | otherwise
    
    1044
    -  = ( adj_rhs_uds :| adj_unf_uds : adj_rule_uds, final_bndr_with_rules, final_rhs )
    
    1105
    +  = ( foldl' orUDs (adj_rhs_uds `orUDs` adj_unf_uds) adj_rule_uds
    
    1106
    +    , final_bndr_with_rules, final_rhs )
    
    1107
    +
    
    1108
    +    -- orUDs: Combine the RHS, (stable) unfolding, and RULES with orUDs
    
    1109
    +    --        See Note [Unfoldings and RULES]
    
    1045 1110
       where
    
    1046 1111
         --------- Right hand side ---------
    
    1047 1112
         -- For join points, set occ_encl to OccVanilla, via setTailCtxt.  If we have
    
    ... ... @@ -1054,7 +1119,7 @@ occAnalNonRecRhs !env lvl imp_rule_edges mb_join bndr rhs
    1054 1119
         rhs_ctxt = mkNonRecRhsCtxt lvl bndr unf
    
    1055 1120
     
    
    1056 1121
         -- See Note [Join arity prediction based on joinRhsArity]
    
    1057
    -    -- Match join arity O from mb_join_arity with manifest join arity M as
    
    1122
    +    -- Match join arity OAr from mb_join_arity with manifest join arity MAr as
    
    1058 1123
         -- returned by of occAnalLamTail. It's totally OK for them to mismatch;
    
    1059 1124
         -- hence adjust the UDs from the RHS
    
    1060 1125
     
    
    ... ... @@ -1764,7 +1829,7 @@ makeNode !env imp_rule_edges bndr_set (bndr, rhs)
    1764 1829
                                    -- here because that is what we are setting!
    
    1765 1830
         WTUD unf_tuds unf' = occAnalUnfolding rhs_env unf
    
    1766 1831
         adj_unf_uds = adjustTailArity (JoinPoint rhs_ja) unf_tuds
    
    1767
    -      -- `rhs_ja` is `joinRhsArity rhs` and is the prediction for source M
    
    1832
    +      -- `rhs_ja` is `joinRhsArity rhs` and is the prediction for source MAr
    
    1768 1833
           -- of Note [Join arity prediction based on joinRhsArity]
    
    1769 1834
     
    
    1770 1835
         --------- IMP-RULES --------
    
    ... ... @@ -1775,7 +1840,7 @@ makeNode !env imp_rule_edges bndr_set (bndr, rhs)
    1775 1840
     
    
    1776 1841
         --------- All rules --------
    
    1777 1842
         -- See Note [Join points and unfoldings/rules]
    
    1778
    -    -- `rhs_ja` is `joinRhsArity rhs'` and is the prediction for source M
    
    1843
    +    -- `rhs_ja` is `joinRhsArity rhs'` and is the prediction for source MAr
    
    1779 1844
         -- of Note [Join arity prediction based on joinRhsArity]
    
    1780 1845
         rules_w_uds :: [(CoreRule, UsageDetails, UsageDetails)]
    
    1781 1846
         rules_w_uds = [ (r,l,adjustTailArity (JoinPoint rhs_ja) rhs_wuds)
    
    ... ... @@ -2177,7 +2242,9 @@ occAnalLamTail :: OccEnv -> CoreExpr -> WithTailUsageDetails CoreExpr
    2177 2242
     -- See Note [Adjusting right-hand sides]
    
    2178 2243
     occAnalLamTail env expr
    
    2179 2244
       = let !(WUD usage expr') = occ_anal_lam_tail env expr
    
    2180
    -    in WTUD (TUD (joinRhsArity expr) usage) expr'
    
    2245
    +    in WTUD (TUD (joinRhsArity expr') usage) expr'
    
    2246
    +       -- If expr looks like (\x. let dead = e in \y. blah), where `dead` is dead
    
    2247
    +       -- then joinRhsArity expr' might exceed joinRhsArity expr
    
    2181 2248
     
    
    2182 2249
     occ_anal_lam_tail :: OccEnv -> CoreExpr -> WithUsageDetails CoreExpr
    
    2183 2250
     -- Does not markInsideLam etc for the outmost batch of lambdas
    
    ... ... @@ -2281,7 +2348,7 @@ occAnalUnfolding !env unf
    2281 2348
                   WTUD (TUD rhs_ja uds) rhs' = occAnalLamTail env rhs
    
    2282 2349
                   unf' = unf { uf_tmpl = rhs' }
    
    2283 2350
                 in WTUD (TUD rhs_ja (markAllMany uds)) unf'
    
    2284
    -              -- markAllMany: see Note [Occurrences in stable unfoldings]
    
    2351
    +              -- markAllMany: see Note [Occurrences in stable unfoldings and RULES]
    
    2285 2352
     
    
    2286 2353
             | otherwise -> WTUD (TUD 0 emptyDetails) unf
    
    2287 2354
                   -- For non-Stable unfoldings we leave them undisturbed, but
    
    ... ... @@ -2319,12 +2386,13 @@ occAnalRule env rule@(Rule { ru_bndrs = bndrs, ru_args = args, ru_rhs = rhs })
    2319 2386
                               -- Note [Rules are extra RHSs]
    
    2320 2387
                               -- Note [Rule dependency info]
    
    2321 2388
         rhs_uds' = markAllMany rhs_uds
    
    2389
    +               -- markAllMany: Note [Occurrences in stable unfoldings and RULES]
    
    2322 2390
         rhs_ja = length args -- See Note [Join points and unfoldings/rules]
    
    2323 2391
     
    
    2324 2392
     occAnalRule _ other_rule = (other_rule, emptyDetails, TUD 0 emptyDetails)
    
    2325 2393
     
    
    2326
    -{- Note [Occurrences in stable unfoldings]
    
    2327
    -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    2394
    +{- Note [Occurrences in stable unfoldings and RULES]
    
    2395
    +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    2328 2396
     Consider
    
    2329 2397
         f p = BIG
    
    2330 2398
         {-# INLINE g #-}
    
    ... ... @@ -2338,7 +2406,7 @@ preinlineUnconditionally here!
    2338 2406
     
    
    2339 2407
     The INLINE pragma says "inline exactly this RHS"; perhaps the
    
    2340 2408
     programmer wants to expose that 'not', say. If we inline f that will make
    
    2341
    -the Stable unfoldign big, and that wasn't what the programmer wanted.
    
    2409
    +the Stable unfolding big, and that wasn't what the programmer wanted.
    
    2342 2410
     
    
    2343 2411
     Another way to think about it: if we inlined g as-is into multiple
    
    2344 2412
     call sites, now there's be multiple calls to f.
    
    ... ... @@ -2347,6 +2415,8 @@ Bottom line: treat all occurrences in a stable unfolding as "Many".
    2347 2415
     We still leave tail call information intact, though, as to not spoil
    
    2348 2416
     potential join points.
    
    2349 2417
     
    
    2418
    +The same goes for RULES.
    
    2419
    +
    
    2350 2420
     Note [Unfoldings and rules]
    
    2351 2421
     ~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    2352 2422
     Generally unfoldings and rules are already occurrence-analysed, so we
    
    ... ... @@ -2598,7 +2668,7 @@ occAnalArgs :: OccEnv -> CoreExpr -> [CoreExpr]
    2598 2668
                 -> WithUsageDetails CoreExpr
    
    2599 2669
     -- The `fun` argument is just an accumulating parameter,
    
    2600 2670
     -- the base for building the application we return
    
    2601
    -occAnalArgs !env fun args !one_shots
    
    2671
    +occAnalArgs env fun args one_shots
    
    2602 2672
       = go emptyDetails fun args one_shots
    
    2603 2673
       where
    
    2604 2674
         env_args = setNonTailCtxt encl env
    
    ... ... @@ -2657,8 +2727,19 @@ Constructors are rather like lambdas in this way.
    2657 2727
     occAnalApp :: OccEnv
    
    2658 2728
                -> (Expr CoreBndr, [Arg CoreBndr], [CoreTickish])
    
    2659 2729
                -> WithUsageDetails (Expr CoreBndr)
    
    2660
    --- Naked variables (not applied) end up here too
    
    2661
    -occAnalApp !env (Var fun, args, ticks)
    
    2730
    +occAnalApp !env (Var fun_id, [], ticks)
    
    2731
    +  = -- Naked variables (not applied) end up here too, and it's worth giving
    
    2732
    +    -- this common case special treatment, because there is so much less to do.
    
    2733
    +    -- This is just a specialised copy of the (Var fun_id) case below
    
    2734
    +    WUD fun_uds (mkTicks ticks fun')
    
    2735
    +  where
    
    2736
    +    !(fun', fun_id')  = lookupBndrSwap env fun_id
    
    2737
    +    !fun_uds = mkOneOcc env fun_id' int_cxt 0
    
    2738
    +    !int_cxt = case occ_encl env of
    
    2739
    +                   OccScrut -> IsInteresting
    
    2740
    +                   _other   -> NotInteresting
    
    2741
    +
    
    2742
    +occAnalApp env (Var fun, args, ticks)
    
    2662 2743
       -- Account for join arity of runRW# continuation
    
    2663 2744
       -- See Note [Simplification of runRW#]
    
    2664 2745
       --
    
    ... ... @@ -2863,7 +2944,11 @@ data OccEnv
    2863 2944
                  -- Invariant: no Id maps to an empty OccInfoEnv
    
    2864 2945
                  -- See Note [Occurrence analysis for join points]
    
    2865 2946
                , occ_join_points :: !JoinPointInfo
    
    2866
    -    }
    
    2947
    +
    
    2948
    +           , occ_nested_lets :: IdSet    -- Non-top-level, non-rec-bound lets
    
    2949
    +                -- I tried making this field strict, but doing so increased
    
    2950
    +                -- compile-time allocation very slightly: 0.1% on average
    
    2951
    +           }
    
    2867 2952
     
    
    2868 2953
     type JoinPointInfo = IdEnv OccInfoEnv
    
    2869 2954
     
    
    ... ... @@ -2914,7 +2999,8 @@ initOccEnv
    2914 2999
     
    
    2915 3000
                , occ_join_points = emptyVarEnv
    
    2916 3001
                , occ_bs_env = emptyVarEnv
    
    2917
    -           , occ_bs_rng = emptyVarSet }
    
    3002
    +           , occ_bs_rng = emptyVarSet
    
    3003
    +           , occ_nested_lets = emptyVarSet }
    
    2918 3004
     
    
    2919 3005
     noBinderSwaps :: OccEnv -> Bool
    
    2920 3006
     noBinderSwaps (OccEnv { occ_bs_env = bs_env }) = isEmptyVarEnv bs_env
    
    ... ... @@ -3154,23 +3240,26 @@ postprocess_uds bndrs bad_joins uds
    3154 3240
           | uniq `elemVarEnvByKey` env = plusVarEnv_C andLocalOcc env join_env
    
    3155 3241
           | otherwise                  = env
    
    3156 3242
     
    
    3243
    +addLocalLet :: OccEnv -> TopLevelFlag -> Id -> OccEnv
    
    3244
    +addLocalLet env@(OccEnv { occ_nested_lets = ids }) top_lvl id
    
    3245
    +  | isTopLevel top_lvl = env
    
    3246
    +  | otherwise          = env { occ_nested_lets = ids `extendVarSet` id }
    
    3247
    +
    
    3157 3248
     addJoinPoint :: OccEnv -> Id -> UsageDetails -> OccEnv
    
    3158
    -addJoinPoint env bndr rhs_uds
    
    3249
    +addJoinPoint env@(OccEnv { occ_join_points = join_points, occ_nested_lets = nested_lets })
    
    3250
    +             join_bndr (UD { ud_env = rhs_occs })
    
    3159 3251
       | isEmptyVarEnv zeroed_form
    
    3160 3252
       = env
    
    3161 3253
       | otherwise
    
    3162
    -  = env { occ_join_points = extendVarEnv (occ_join_points env) bndr zeroed_form }
    
    3254
    +  = env { occ_join_points = extendVarEnv join_points join_bndr zeroed_form }
    
    3163 3255
       where
    
    3164
    -    zeroed_form = mkZeroedForm rhs_uds
    
    3256
    +    zeroed_form = mapMaybeUniqSetToUFM do_one nested_lets
    
    3257
    +     -- See Note [Occurrence analysis for join points] for "zeroed form"
    
    3165 3258
     
    
    3166
    -mkZeroedForm :: UsageDetails -> OccInfoEnv
    
    3167
    --- See Note [Occurrence analysis for join points] for "zeroed form"
    
    3168
    -mkZeroedForm (UD { ud_env = rhs_occs })
    
    3169
    -  = mapMaybeUFM do_one rhs_occs
    
    3170
    -  where
    
    3171
    -    do_one :: LocalOcc -> Maybe LocalOcc
    
    3172
    -    do_one (ManyOccL {})    = Nothing
    
    3173
    -    do_one occ@(OneOccL {}) = Just (occ { lo_n_br = 0 })
    
    3259
    +    do_one :: Var -> Maybe LocalOcc
    
    3260
    +    do_one bndr = case lookupVarEnv rhs_occs bndr of
    
    3261
    +                    Just occ@(OneOccL {}) -> Just (occ { lo_n_br = 0 })
    
    3262
    +                    _                     -> Nothing
    
    3174 3263
     
    
    3175 3264
     --------------------
    
    3176 3265
     transClosureFV :: VarEnv VarSet -> VarEnv VarSet
    
    ... ... @@ -3628,7 +3717,14 @@ data LocalOcc -- See Note [LocalOcc]
    3628 3717
                        -- Combining (AlwaysTailCalled 2) and (AlwaysTailCalled 3)
    
    3629 3718
                        -- gives NoTailCallInfo
    
    3630 3719
                   , lo_int_cxt :: !InterestingCxt }
    
    3720
    +
    
    3631 3721
         | ManyOccL !TailCallInfo
    
    3722
    +       -- Why do we need TailCallInfo on ManyOccL?
    
    3723
    +       -- Answer 1: recursive bindings are entered many times:
    
    3724
    +       --    rec { j x = ...j x'... } in j y
    
    3725
    +       -- See the uses of `andUDs` in `tagRecBinders`
    
    3726
    +       -- Answer 2: occurrences in stable unfoldings are many-ified
    
    3727
    +       --           See Note [Occurrences in stable unfoldings and RULES]
    
    3632 3728
     
    
    3633 3729
     instance Outputable LocalOcc where
    
    3634 3730
       ppr (OneOccL { lo_n_br = n, lo_tail = tci })
    
    ... ... @@ -3651,10 +3747,13 @@ data UsageDetails
    3651 3747
     
    
    3652 3748
     instance Outputable UsageDetails where
    
    3653 3749
       ppr ud@(UD { ud_env = env, ud_z_tail = z_tail })
    
    3654
    -    = text "UD" <+> (braces $ fsep $ punctuate comma $
    
    3655
    -      [ ppr uq <+> text ":->" <+> ppr (lookupOccInfoByUnique ud uq)
    
    3656
    -      | (uq, _) <- nonDetStrictFoldVarEnv_Directly do_one [] env ])
    
    3657
    -      $$ nest 2 (text "ud_z_tail" <+> ppr z_tail)
    
    3750
    +    = text "UD" <> (braces (vcat
    
    3751
    +         [ -- `final` shows the result of a proper lookupOccInfo, returning OccInfo
    
    3752
    +           --         after accounting for `ud_z_tail` etc.
    
    3753
    +           text "final =" <+> (fsep $ punctuate comma $
    
    3754
    +                 [ ppr uq <+> text ":->" <+> ppr (lookupOccInfoByUnique ud uq)
    
    3755
    +                 | (uq, _) <- nonDetStrictFoldVarEnv_Directly do_one [] env ])
    
    3756
    +         , text "ud_z_tail" <+> ppr z_tail ] ))
    
    3658 3757
         where
    
    3659 3758
           do_one :: Unique -> LocalOcc -> [(Unique,LocalOcc)] -> [(Unique,LocalOcc)]
    
    3660 3759
           do_one uniq occ occs = (uniq, occ) : occs
    
    ... ... @@ -3663,7 +3762,7 @@ instance Outputable UsageDetails where
    3663 3762
     -- | TailUsageDetails captures the result of applying 'occAnalLamTail'
    
    3664 3763
     --   to a function `\xyz.body`. The TailUsageDetails pairs together
    
    3665 3764
     --   * the number of lambdas (including type lambdas: a JoinArity)
    
    3666
    ---   * UsageDetails for the `body` of the lambda, unadjusted by `adjustTailUsage`.
    
    3765
    +--   * UsageDetails for the `body` of the lambda, /unadjusted/ by `adjustTailUsage`.
    
    3667 3766
     -- If the binding turns out to be a join point with the indicated join
    
    3668 3767
     -- arity, this unadjusted usage details is just what we need; otherwise we
    
    3669 3768
     -- need to discard tail calls. That's what `adjustTailUsage` does.
    
    ... ... @@ -3681,8 +3780,17 @@ data WithTailUsageDetails a = WTUD !TailUsageDetails !a
    3681 3780
     
    
    3682 3781
     andUDs:: UsageDetails -> UsageDetails -> UsageDetails
    
    3683 3782
     orUDs :: UsageDetails -> UsageDetails -> UsageDetails
    
    3684
    -andUDs = combineUsageDetailsWith andLocalOcc
    
    3685
    -orUDs  = combineUsageDetailsWith orLocalOcc
    
    3783
    +andUDs = combineUsageDetailsWith (\_uniq -> andLocalOcc)
    
    3784
    +orUDs  = combineUsageDetailsWith (\_uniq -> orLocalOcc)
    
    3785
    +
    
    3786
    +combineJoinPointUDs :: OccEnv -> UsageDetails -> UsageDetails -> UsageDetails
    
    3787
    +-- See (W4) in Note [Occurrence analysis for join points]
    
    3788
    +combineJoinPointUDs (OccEnv { occ_nested_lets = nested_lets }) uds1 uds2
    
    3789
    +  = combineUsageDetailsWith combine uds1 uds2
    
    3790
    +  where
    
    3791
    +    combine uniq occ1 occ2
    
    3792
    +      | uniq `elemVarSetByKey` nested_lets = orLocalOcc  occ1 occ2
    
    3793
    +      | otherwise                          = andLocalOcc occ1 occ2
    
    3686 3794
     
    
    3687 3795
     mkOneOcc :: OccEnv -> Id -> InterestingCxt -> JoinArity -> UsageDetails
    
    3688 3796
     mkOneOcc !env id int_cxt arity
    
    ... ... @@ -3699,7 +3807,8 @@ mkOneOcc !env id int_cxt arity
    3699 3807
       = mkSimpleDetails (unitVarEnv id occ)
    
    3700 3808
     
    
    3701 3809
       where
    
    3702
    -    occ = OneOccL { lo_n_br = 1, lo_int_cxt = int_cxt
    
    3810
    +    occ = OneOccL { lo_n_br = 1
    
    3811
    +                  , lo_int_cxt = int_cxt
    
    3703 3812
                       , lo_tail = AlwaysTailCalled arity }
    
    3704 3813
     
    
    3705 3814
     -- Add several occurrences, assumed not to be tail calls
    
    ... ... @@ -3786,7 +3895,7 @@ restrictFreeVars bndrs fvs = restrictUniqSetToUFM bndrs fvs
    3786 3895
     -------------------
    
    3787 3896
     -- Auxiliary functions for UsageDetails implementation
    
    3788 3897
     
    
    3789
    -combineUsageDetailsWith :: (LocalOcc -> LocalOcc -> LocalOcc)
    
    3898
    +combineUsageDetailsWith :: (Unique -> LocalOcc -> LocalOcc -> LocalOcc)
    
    3790 3899
                             -> UsageDetails -> UsageDetails -> UsageDetails
    
    3791 3900
     {-# INLINE combineUsageDetailsWith #-}
    
    3792 3901
     combineUsageDetailsWith plus_occ_info
    
    ... ... @@ -3796,9 +3905,9 @@ combineUsageDetailsWith plus_occ_info
    3796 3905
       | isEmptyVarEnv env2 = uds1
    
    3797 3906
       | otherwise
    
    3798 3907
       -- See Note [Strictness in the occurrence analyser]
    
    3799
    -  -- Using strictPlusVarEnv here speeds up the test T26425 by about 10% by avoiding
    
    3800
    -  -- intermediate thunks.
    
    3801
    -  = UD { ud_env       = strictPlusVarEnv_C plus_occ_info env1 env2
    
    3908
    +  -- Using strictPlusVarEnv here speeds up the test T26425
    
    3909
    +  -- by about 10% by avoiding intermediate thunks.
    
    3910
    +  = UD { ud_env       = strictPlusVarEnv_C_Directly plus_occ_info env1 env2
    
    3802 3911
            , ud_z_many    = strictPlusVarEnv z_many1   z_many2
    
    3803 3912
            , ud_z_in_lam  = plusVarEnv z_in_lam1 z_in_lam2
    
    3804 3913
            , ud_z_tail    = strictPlusVarEnv z_tail1   z_tail2 }
    
    ... ... @@ -3842,8 +3951,6 @@ lookupOccInfoByUnique (UD { ud_env = env
    3842 3951
             | uniq `elemVarEnvByKey` z_tail = NoTailCallInfo
    
    3843 3952
             | otherwise                     = ti
    
    3844 3953
     
    
    3845
    -
    
    3846
    -
    
    3847 3954
     -------------------
    
    3848 3955
     -- See Note [Adjusting right-hand sides]
    
    3849 3956
     
    
    ... ... @@ -3853,21 +3960,22 @@ adjustNonRecRhs :: JoinPointHood
    3853 3960
     -- ^ This function concentrates shared logic between occAnalNonRecBind and the
    
    3854 3961
     -- AcyclicSCC case of occAnalRec.
    
    3855 3962
     -- It returns the adjusted rhs UsageDetails combined with the body usage
    
    3856
    -adjustNonRecRhs mb_join_arity rhs_wuds@(WTUD _ rhs)
    
    3857
    -  = WUD (adjustTailUsage mb_join_arity rhs_wuds) rhs
    
    3858
    -
    
    3963
    +adjustNonRecRhs mb_join_arity (WTUD (TUD rhs_ja uds) rhs)
    
    3964
    +  = WUD (adjustTailUsage exact_join rhs uds) rhs
    
    3965
    +  where
    
    3966
    +    exact_join = mb_join_arity == JoinPoint rhs_ja
    
    3859 3967
     
    
    3860
    -adjustTailUsage :: JoinPointHood
    
    3861
    -                -> WithTailUsageDetails CoreExpr    -- Rhs usage, AFTER occAnalLamTail
    
    3968
    +adjustTailUsage :: Bool        -- True <=> Exactly-matching join point; don't do markNonTail
    
    3969
    +                -> CoreExpr    -- Rhs usage, AFTER occAnalLamTail
    
    3970
    +                -> UsageDetails
    
    3862 3971
                     -> UsageDetails
    
    3863
    -adjustTailUsage mb_join_arity (WTUD (TUD rhs_ja uds) rhs)
    
    3972
    +adjustTailUsage exact_join rhs uds
    
    3864 3973
       = -- c.f. occAnal (Lam {})
    
    3865 3974
         markAllInsideLamIf (not one_shot) $
    
    3866 3975
         markAllNonTailIf (not exact_join) $
    
    3867 3976
         uds
    
    3868 3977
       where
    
    3869 3978
         one_shot   = isOneShotFun rhs
    
    3870
    -    exact_join = mb_join_arity == JoinPoint rhs_ja
    
    3871 3979
     
    
    3872 3980
     adjustTailArity :: JoinPointHood -> TailUsageDetails -> UsageDetails
    
    3873 3981
     adjustTailArity mb_rhs_ja (TUD ja usage)
    
    ... ... @@ -3914,8 +4022,9 @@ tagNonRecBinder lvl occ bndr
    3914 4022
     tagRecBinders :: TopLevelFlag           -- At top level?
    
    3915 4023
                   -> UsageDetails           -- Of body of let ONLY
    
    3916 4024
                   -> [NodeDetails]
    
    3917
    -              -> WithUsageDetails       -- Adjusted details for whole scope,
    
    3918
    -                                        -- with binders removed
    
    4025
    +              -> WithUsageDetails       -- Adjusted details for whole scope
    
    4026
    +                                        -- still including the binders;
    
    4027
    +                                        -- (they are removed by `addInScope`)
    
    3919 4028
                       [IdWithOccInfo]       -- Tagged binders
    
    3920 4029
     -- Substantially more complicated than non-recursive case. Need to adjust RHS
    
    3921 4030
     -- details *before* tagging binders (because the tags depend on the RHSes).
    
    ... ... @@ -3925,32 +4034,21 @@ tagRecBinders lvl body_uds details_s
    3925 4034
     
    
    3926 4035
          -- 1. See Note [Join arity prediction based on joinRhsArity]
    
    3927 4036
          --    Determine possible join-point-hood of whole group, by testing for
    
    3928
    -     --    manifest join arity M.
    
    3929
    -     --    This (re-)asserts that makeNode had made tuds for that same arity M!
    
    4037
    +     --    manifest join arity MAr.
    
    4038
    +     --    This (re-)asserts that makeNode had made tuds for that same arity MAr!
    
    3930 4039
          unadj_uds = foldr (andUDs . test_manifest_arity) body_uds details_s
    
    3931
    -     test_manifest_arity ND{nd_rhs = WTUD tuds rhs}
    
    3932
    -       = adjustTailArity (JoinPoint (joinRhsArity rhs)) tuds
    
    4040
    +     test_manifest_arity ND{nd_rhs = WTUD (TUD rhs_ja uds) rhs}
    
    4041
    +       = assertPpr (rhs_ja == joinRhsArity rhs) (ppr rhs_ja $$ ppr uds $$ ppr rhs) $
    
    4042
    +         uds
    
    3933 4043
     
    
    4044
    +     will_be_joins :: Bool
    
    3934 4045
          will_be_joins = decideRecJoinPointHood lvl unadj_uds bndrs
    
    3935 4046
     
    
    3936
    -     mb_join_arity :: Id -> JoinPointHood
    
    3937
    -     -- mb_join_arity: See Note [Join arity prediction based on joinRhsArity]
    
    3938
    -     -- This is the source O
    
    3939
    -     mb_join_arity bndr
    
    3940
    -         -- Can't use willBeJoinId_maybe here because we haven't tagged
    
    3941
    -         -- the binder yet (the tag depends on these adjustments!)
    
    3942
    -       | will_be_joins
    
    3943
    -       , AlwaysTailCalled arity <- lookupTailCallInfo unadj_uds bndr
    
    3944
    -       = JoinPoint arity
    
    3945
    -       | otherwise
    
    3946
    -       = assert (not will_be_joins) -- Should be AlwaysTailCalled if
    
    3947
    -         NotJoinPoint               -- we are making join points!
    
    3948
    -
    
    3949 4047
          -- 2. Adjust usage details of each RHS, taking into account the
    
    3950 4048
          --    join-point-hood decision
    
    3951
    -     rhs_udss' = [ adjustTailUsage (mb_join_arity bndr) rhs_wuds
    
    4049
    +     rhs_udss' = [ adjustTailUsage will_be_joins rhs rhs_uds
    
    3952 4050
                          -- Matching occAnalLamTail in makeNode
    
    3953
    -                 | ND { nd_bndr = bndr, nd_rhs = rhs_wuds } <- details_s ]
    
    4051
    +                 | ND { nd_rhs = WTUD (TUD _ rhs_uds) rhs } <- details_s ]
    
    3954 4052
     
    
    3955 4053
          -- 3. Compute final usage details from adjusted RHS details
    
    3956 4054
          adj_uds = foldr andUDs body_uds rhs_udss'
    
    ... ... @@ -3969,9 +4067,9 @@ setBinderOcc occ_info bndr
    3969 4067
       | otherwise                  = setIdOccInfo bndr occ_info
    
    3970 4068
     
    
    3971 4069
     -- | Decide whether some bindings should be made into join points or not, based
    
    3972
    --- on its occurrences. This is
    
    4070
    +-- on its occurrences.
    
    3973 4071
     -- Returns `False` if they can't be join points. Note that it's an
    
    4072
    +-- all-or-nothing decision: if multiple binders are given, they are
    
    3974 4073
     -- assumed to be mutually recursive.
    
    3975 4074
     --
    
    3976 4075
     -- It must, however, be a final decision. If we say `True` for 'f',
    

  • compiler/GHC/HsToCore/Binds.hs
    ... ... @@ -1620,7 +1620,7 @@ ds_hs_wrapper hs_wrap
    1620 1620
                                   do { x <- newSysLocalDs (mkScaled (subMultCoRKind w_co) t)
    
    1621 1621
                                      ; go c1 $ \w1 ->
    
    1622 1622
                                        go c2 $ \w2 ->
    
    1623
    -                                   let app f a = mkCoreApp (text "dsHsWrapper") f a
    
    1623
    +                                   let app f a = mkCoreApp f a
    
    1624 1624
                                            arg     = w1 (Var x)
    
    1625 1625
                                        in k (\e -> (Lam x (w2 (app e arg)))) }
    
    1626 1626
     
    

  • compiler/GHC/HsToCore/Expr.hs
    ... ... @@ -877,8 +877,7 @@ dsHsConLike (PatSynCon ps)
    877 877
       | Just (builder_name, _, add_void) <- patSynBuilder ps
    
    878 878
       = do { builder_id <- dsLookupGlobalId builder_name
    
    879 879
            ; return (if add_void
    
    880
    -                 then mkCoreApp (text "dsConLike" <+> ppr ps)
    
    881
    -                                (Var builder_id) unboxedUnitExpr
    
    880
    +                 then mkCoreApp (Var builder_id) unboxedUnitExpr
    
    882 881
                      else Var builder_id) }
    
    883 882
       | otherwise
    
    884 883
       = pprPanic "dsConLike" (ppr ps)
    

  • compiler/GHC/HsToCore/Match.hs
    ... ... @@ -301,7 +301,7 @@ matchView (var :| vars) ty eqns@(eqn1 :| _)
    301 301
              -- compile the view expressions
    
    302 302
             ; viewExpr' <- dsExpr viewExpr
    
    303 303
             ; return (mkViewMatchResult var'
    
    304
    -                    (mkCoreApp (text "matchView") viewExpr' (Var var))
    
    304
    +                    (mkCoreApp viewExpr' (Var var))
    
    305 305
                         match_result) }
    
    306 306
     
    
    307 307
     -- decompose the first pattern and leave the rest alone
    

  • compiler/GHC/Types/Basic.hs
    ... ... @@ -1333,7 +1333,7 @@ zapFragileOcc occ = zapOccTailCallInfo occ
    1333 1333
     
    
    1334 1334
     instance Outputable OccInfo where
    
    1335 1335
       -- only used for debugging; never parsed.  KSW 1999-07
    
    1336
    -  ppr (ManyOccs tails)     = pprShortTailCallInfo tails
    
    1336
    +  ppr (ManyOccs tails)     = text "Many" <> parens (pprShortTailCallInfo tails)
    
    1337 1337
       ppr IAmDead              = text "Dead"
    
    1338 1338
       ppr (IAmALoopBreaker rule_only tails)
    
    1339 1339
             = text "LoopBreaker" <> pp_ro <> pprShortTailCallInfo tails
    

  • compiler/GHC/Types/Unique/FM.hs
    ... ... @@ -53,7 +53,7 @@ module GHC.Types.Unique.FM (
    53 53
             plusUFM,
    
    54 54
             strictPlusUFM,
    
    55 55
             plusUFM_C,
    
    56
    -        strictPlusUFM_C,
    
    56
    +        strictPlusUFM_C, strictPlusUFM_C_Directly,
    
    57 57
             plusUFM_CD,
    
    58 58
             plusUFM_CD2,
    
    59 59
             mergeUFM,
    
    ... ... @@ -281,6 +281,9 @@ plusUFM_C f (UFM x) (UFM y) = UFM (M.unionWith f x y)
    281 281
     strictPlusUFM_C :: (elt -> elt -> elt) -> UniqFM key elt -> UniqFM key elt -> UniqFM key elt
    
    282 282
     strictPlusUFM_C f (UFM x) (UFM y) = UFM (MS.unionWith f x y)
    
    283 283
     
    
    284
    +strictPlusUFM_C_Directly :: (Unique -> elt -> elt -> elt) -> UniqFM key elt -> UniqFM key elt -> UniqFM key elt
    
    285
    +strictPlusUFM_C_Directly f (UFM x) (UFM y) = UFM (MS.unionWithKey (f . mkUniqueGrimily) x y)
    
    286
    +
    
    284 287
     -- | `plusUFM_CD f m1 d1 m2 d2` merges the maps using `f` as the
    
    285 288
     -- combinding function and `d1` resp. `d2` as the default value if
    
    286 289
     -- there is no entry in `m1` reps. `m2`. The domain is the union of
    

  • compiler/GHC/Types/Unique/Set.hs
    ... ... @@ -40,6 +40,7 @@ module GHC.Types.Unique.Set (
    40 40
             lookupUniqSet_Directly,
    
    41 41
             partitionUniqSet,
    
    42 42
             mapUniqSet,
    
    43
    +        mapUniqSetToUFM, mapMaybeUniqSetToUFM,
    
    43 44
             unsafeUFMToUniqSet,
    
    44 45
             nonDetEltsUniqSet,
    
    45 46
             nonDetKeysUniqSet,
    
    ... ... @@ -211,6 +212,14 @@ mapUniqSet f = mkUniqSet . map f . nonDetEltsUniqSet
    211 212
     mapMaybeUniqSet_sameUnique :: (a -> Maybe b) -> UniqSet a -> UniqSet b
    
    212 213
     mapMaybeUniqSet_sameUnique f (UniqSet a) = UniqSet $ mapMaybeUFM_sameUnique f a
    
    213 214
     
    
    215
    +mapUniqSetToUFM :: (a -> b) -> UniqSet a -> UniqFM a b
    
    216
    +-- Same keys, new values
    
    217
    +mapUniqSetToUFM f (UniqSet ufm) = mapUFM f ufm
    
    218
    +
    
    219
    +mapMaybeUniqSetToUFM :: (a -> Maybe b) -> UniqSet a -> UniqFM a b
    
    220
    +-- Same keys, new values
    
    221
    +mapMaybeUniqSetToUFM f (UniqSet ufm) = mapMaybeUFM f ufm
    
    222
    +
    
    214 223
     -- Two 'UniqSet's are considered equal if they contain the same
    
    215 224
     -- uniques.
    
    216 225
     instance Eq (UniqSet a) where
    

  • compiler/GHC/Types/Var/Env.hs
    ... ... @@ -12,7 +12,8 @@ module GHC.Types.Var.Env (
    12 12
             elemVarEnv, disjointVarEnv, anyVarEnv,
    
    13 13
             extendVarEnv, extendVarEnv_C, extendVarEnv_Acc,
    
    14 14
             extendVarEnvList,
    
    15
    -        strictPlusVarEnv, plusVarEnv, plusVarEnv_C, strictPlusVarEnv_C,
    
    15
    +        strictPlusVarEnv, plusVarEnv, plusVarEnv_C,
    
    16
    +        strictPlusVarEnv_C, strictPlusVarEnv_C_Directly,
    
    16 17
             plusVarEnv_CD, plusMaybeVarEnv_C,
    
    17 18
             plusVarEnvList, alterVarEnv,
    
    18 19
             delVarEnvList, delVarEnv,
    
    ... ... @@ -525,6 +526,7 @@ delVarEnv :: VarEnv a -> Var -> VarEnv a
    525 526
     minusVarEnv       :: VarEnv a -> VarEnv b -> VarEnv a
    
    526 527
     plusVarEnv_C      :: (a -> a -> a) -> VarEnv a -> VarEnv a -> VarEnv a
    
    527 528
     strictPlusVarEnv_C :: (a -> a -> a) -> VarEnv a -> VarEnv a -> VarEnv a
    
    529
    +strictPlusVarEnv_C_Directly :: (Unique -> a -> a -> a) -> VarEnv a -> VarEnv a -> VarEnv a
    
    528 530
     plusVarEnv_CD     :: (a -> a -> a) -> VarEnv a -> a -> VarEnv a -> a -> VarEnv a
    
    529 531
     plusMaybeVarEnv_C :: (a -> a -> Maybe a) -> VarEnv a -> VarEnv a -> VarEnv a
    
    530 532
     mapVarEnv         :: (a -> b) -> VarEnv a -> VarEnv b
    
    ... ... @@ -552,6 +554,7 @@ extendVarEnv_Acc = addToUFM_Acc
    552 554
     extendVarEnvList = addListToUFM
    
    553 555
     plusVarEnv_C     = plusUFM_C
    
    554 556
     strictPlusVarEnv_C = strictPlusUFM_C
    
    557
    +strictPlusVarEnv_C_Directly = strictPlusUFM_C_Directly
    
    555 558
     plusVarEnv_CD    = plusUFM_CD
    
    556 559
     plusMaybeVarEnv_C = plusMaybeUFM_C
    
    557 560
     delVarEnvList    = delListFromUFM
    

  • testsuite/tests/unboxedsums/UbxSumUnpackedSize.hs
    ... ... @@ -133,6 +133,124 @@ data U_E1 = U_E1 {-# UNPACK #-} !E1
    133 133
                      {-# UNPACK #-} !Int8
    
    134 134
         deriving (Show)
    
    135 135
     
    
    136
    +{- In `data U_E`, the {-# UNPACK #-} !E1 gives rise to a pretty clumsy expression
    
    137
    +   for the wrapper for U_E1. Here is what it looks like when ther are only 16
    
    138
    +   data constructors in E1, and we have just
    
    139
    +       data U_E1 = U_E1 {-# UNPACK #-} !E1
    
    140
    +   Blimey!
    
    141
    +
    
    142
    +Main.$WU_E1
    
    143
    +  = \ (conrep_t1N4 [Occ=Once1!] :: Main.E1) ->
    
    144
    +      case case conrep_t1N4 of {
    
    145
    +             Main.E1_1 ->
    
    146
    +               GHC.Internal.Types.(# _| | | | | | | | | | | | | | | #)
    
    147
    +                 @GHC.Internal.Types.ZeroBitRep
    
    148
    +                 @GHC.Internal.Types.ZeroBitRep
    
    149
    +                 @GHC.Internal.Types.ZeroBitRep
    
    150
    +                 @GHC.Internal.Types.ZeroBitRep
    
    151
    +                 @GHC.Internal.Types.ZeroBitRep
    
    152
    +                 @GHC.Internal.Types.ZeroBitRep
    
    153
    +                 @GHC.Internal.Types.ZeroBitRep
    
    154
    +                 @GHC.Internal.Types.ZeroBitRep
    
    155
    +                 @GHC.Internal.Types.ZeroBitRep
    
    156
    +                 @GHC.Internal.Types.ZeroBitRep
    
    157
    +                 @GHC.Internal.Types.ZeroBitRep
    
    158
    +                 @GHC.Internal.Types.ZeroBitRep
    
    159
    +                 @GHC.Internal.Types.ZeroBitRep
    
    160
    +                 @GHC.Internal.Types.ZeroBitRep
    
    161
    +                 @GHC.Internal.Types.ZeroBitRep
    
    162
    +                 @GHC.Internal.Types.ZeroBitRep
    
    163
    +                 @(# #)
    
    164
    +                 @(# #)
    
    165
    +                 @(# #)
    
    166
    +                 @(# #)
    
    167
    +                 @(# #)
    
    168
    +                 @(# #)
    
    169
    +                 @(# #)
    
    170
    +                 @(# #)
    
    171
    +                 @(# #)
    
    172
    +                 @(# #)
    
    173
    +                 @(# #)
    
    174
    +                 @(# #)
    
    175
    +                 @(# #)
    
    176
    +                 @(# #)
    
    177
    +                 @(# #)
    
    178
    +                 @(# #)
    
    179
    +                 GHC.Internal.Types.(##);
    
    180
    +             Main.E1_2 ->
    
    181
    +               GHC.Internal.Types.(# |_| | | | | | | | | | | | | | #)
    
    182
    +                 @GHC.Internal.Types.ZeroBitRep
    
    183
    +                 @GHC.Internal.Types.ZeroBitRep
    
    184
    +                 @GHC.Internal.Types.ZeroBitRep
    
    185
    +                 @GHC.Internal.Types.ZeroBitRep
    
    186
    +                 @GHC.Internal.Types.ZeroBitRep
    
    187
    +                 @GHC.Internal.Types.ZeroBitRep
    
    188
    +                 @GHC.Internal.Types.ZeroBitRep
    
    189
    +                 @GHC.Internal.Types.ZeroBitRep
    
    190
    +                 @GHC.Internal.Types.ZeroBitRep
    
    191
    +                 @GHC.Internal.Types.ZeroBitRep
    
    192
    +                 @GHC.Internal.Types.ZeroBitRep
    
    193
    +                 @GHC.Internal.Types.ZeroBitRep
    
    194
    +                 @GHC.Internal.Types.ZeroBitRep
    
    195
    +                 @GHC.Internal.Types.ZeroBitRep
    
    196
    +                 @GHC.Internal.Types.ZeroBitRep
    
    197
    +                 @GHC.Internal.Types.ZeroBitRep
    
    198
    +                 @(# #)
    
    199
    +                 @(# #)
    
    200
    +                 @(# #)
    
    201
    +                 @(# #)
    
    202
    +                 @(# #)
    
    203
    +                 @(# #)
    
    204
    +                 @(# #)
    
    205
    +                 @(# #)
    
    206
    +                 @(# #)
    
    207
    +                 @(# #)
    
    208
    +                 @(# #)
    
    209
    +                 @(# #)
    
    210
    +                 @(# #)
    
    211
    +                 @(# #)
    
    212
    +                 @(# #)
    
    213
    +                 @(# #)
    
    214
    +                 GHC.Internal.Types.(##);
    
    215
    +             Main.E1_3 ->
    
    216
    +               GHC.Internal.Types.(# | |_| | | | | | | | | | | | | #)
    
    217
    +                 @GHC.Internal.Types.ZeroBitRep
    
    218
    +                 @GHC.Internal.Types.ZeroBitRep
    
    219
    +                 @GHC.Internal.Types.ZeroBitRep
    
    220
    +                 @GHC.Internal.Types.ZeroBitRep
    
    221
    +                 @GHC.Internal.Types.ZeroBitRep
    
    222
    +                 @GHC.Internal.Types.ZeroBitRep
    
    223
    +                 @GHC.Internal.Types.ZeroBitRep
    
    224
    +                 @GHC.Internal.Types.ZeroBitRep
    
    225
    +                 @GHC.Internal.Types.ZeroBitRep
    
    226
    +                 @GHC.Internal.Types.ZeroBitRep
    
    227
    +                 @GHC.Internal.Types.ZeroBitRep
    
    228
    +                 @GHC.Internal.Types.ZeroBitRep
    
    229
    +                 @GHC.Internal.Types.ZeroBitRep
    
    230
    +                 @GHC.Internal.Types.ZeroBitRep
    
    231
    +                 @GHC.Internal.Types.ZeroBitRep
    
    232
    +                 @GHC.Internal.Types.ZeroBitRep
    
    233
    +                 @(# #)
    
    234
    +                 @(# #)
    
    235
    +                 @(# #)
    
    236
    +                 @(# #)
    
    237
    +                 @(# #)
    
    238
    +                 @(# #)
    
    239
    +                 @(# #)
    
    240
    +                 @(# #)
    
    241
    +                 @(# #)
    
    242
    +                 @(# #)
    
    243
    +                 @(# #)
    
    244
    +                 @(# #)
    
    245
    +                 @(# #)
    
    246
    +                 @(# #)
    
    247
    +                 @(# #)
    
    248
    +                 @(# #)
    
    249
    +                 GHC.Internal.Types.(##);
    
    250
    +
    
    251
    +       ... etc ....
    
    252
    +-}
    
    253
    +
    
    136 254
     data U_E2 = U_E2 {-# UNPACK #-} !E2
    
    137 255
                      {-# UNPACK #-} !Int8
    
    138 256
                      {-# UNPACK #-} !Int8