Simon Peyton Jones pushed to branch wip/T26722 at Glasgow Haskell Compiler / GHC

Commits:

25 changed files:

Changes:

  • .gitlab/generate-ci/gen_ci.hs
    ... ... @@ -1250,7 +1250,7 @@ alpine_x86 =
    1250 1250
       , fullyStaticBrokenTests (disableValidate (allowFailureGroup (standardBuildsWithConfig Amd64 (Linux Alpine312) staticNativeInt)))
    
    1251 1251
         -- Dynamically linked build, suitable for building your own static executables on alpine
    
    1252 1252
       , disableValidate (standardBuildsWithConfig Amd64 (Linux Alpine323) (splitSectionsBroken vanilla))
    
    1253
    -  , allowFailureGroup (standardBuildsWithConfig I386 (Linux Alpine323) (splitSectionsBroken vanilla))
    
    1253
    +  , standardBuildsWithConfig I386 (Linux Alpine323) (splitSectionsBroken vanilla)
    
    1254 1254
       ]
    
    1255 1255
       where
    
    1256 1256
         -- ghcilink002 broken due to #17869
    

  • .gitlab/jobs.yaml
    ... ... @@ -484,7 +484,7 @@
    484 484
           ".gitlab/ci.sh clean",
    
    485 485
           "cat ci_timings.txt"
    
    486 486
         ],
    
    487
    -    "allow_failure": true,
    
    487
    +    "allow_failure": false,
    
    488 488
         "artifacts": {
    
    489 489
           "expire_in": "2 weeks",
    
    490 490
           "paths": [
    
    ... ... @@ -1155,7 +1155,7 @@
    1155 1155
           ".gitlab/ci.sh clean",
    
    1156 1156
           "cat ci_timings.txt"
    
    1157 1157
         ],
    
    1158
    -    "allow_failure": true,
    
    1158
    +    "allow_failure": false,
    
    1159 1159
         "artifacts": {
    
    1160 1160
           "expire_in": "8 weeks",
    
    1161 1161
           "paths": [
    
    ... ... @@ -4034,7 +4034,7 @@
    4034 4034
           ".gitlab/ci.sh clean",
    
    4035 4035
           "cat ci_timings.txt"
    
    4036 4036
         ],
    
    4037
    -    "allow_failure": true,
    
    4037
    +    "allow_failure": false,
    
    4038 4038
         "artifacts": {
    
    4039 4039
           "expire_in": "1 year",
    
    4040 4040
           "paths": [
    

  • compiler/GHC.hs
    ... ... @@ -719,7 +719,7 @@ setTopSessionDynFlags dflags = do
    719 719
                           { interpCreateProcess = createIservProcessHook (hsc_hooks hsc_env)
    
    720 720
                           }
    
    721 721
     
    
    722
    -  interp <- liftIO $ initInterpreter tmpfs logger platform finder_cache unit_env interp_opts
    
    722
    +  interp <- liftIO $ initInterpreter dflags tmpfs logger platform finder_cache unit_env interp_opts
    
    723 723
     
    
    724 724
       modifySession $ \h -> hscSetFlags dflags
    
    725 725
                             h{ hsc_IC = (hsc_IC h){ ic_dflags = dflags }
    

  • compiler/GHC/CmmToLlvm/CodeGen.hs
    ... ... @@ -248,6 +248,14 @@ Since x86 PDep/PExt instructions only exist for 32/64 bit widths
    248 248
     we use the 32bit variant to compute the 8/16bit primops.
    
    249 249
     To do so we extend/truncate the argument/result around the
    
    250 250
     call.
    
    251
    +
    
    252
    +Note that the 64-bit intrinsics (`llvm.x86.bmi.pdep.64` and
    
    253
    +`llvm.x86.bmi.pext.64`) are only legal on 64-bit x86 targets, not on
    
    254
    +i386. Therefore on i386 we must fall back to the runtime helper
    
    255
    +(`hs_pdep64`/`hs_pext64`) for the 64-bit primops.
    
    256
    +
    
    257
    +See https://github.com/llvm/llvm-project/issues/172857 for upstream
    
    258
    +discussion about portable pdep/pext intrinsics.
    
    251 259
     -}
    
    252 260
     genCall (PrimTarget op@(MO_Pdep w)) [dst] args = do
    
    253 261
         cfg <- getConfig
    
    ... ... @@ -970,36 +978,34 @@ cmmPrimOpFunctions mop = do
    970 978
               W8   -> fsLit "llvm.x86.bmi.pdep.32"
    
    971 979
               W16  -> fsLit "llvm.x86.bmi.pdep.32"
    
    972 980
               W32  -> fsLit "llvm.x86.bmi.pdep.32"
    
    973
    -          W64  -> fsLit "llvm.x86.bmi.pdep.64"
    
    974
    -          W128 -> fsLit "llvm.x86.bmi.pdep.128"
    
    975
    -          W256 -> fsLit "llvm.x86.bmi.pdep.256"
    
    976
    -          W512 -> fsLit "llvm.x86.bmi.pdep.512"
    
    981
    +          W64
    
    982
    +            | is32bit   -> fsLit "hs_pdep64"
    
    983
    +            | otherwise -> fsLit "llvm.x86.bmi.pdep.64"
    
    984
    +          -- LLVM only provides x86 PDep/PExt intrinsics for 32/64 bits
    
    985
    +          _ -> unsupported
    
    977 986
           | otherwise -> case w of
    
    978 987
               W8   -> fsLit "hs_pdep8"
    
    979 988
               W16  -> fsLit "hs_pdep16"
    
    980 989
               W32  -> fsLit "hs_pdep32"
    
    981 990
               W64  -> fsLit "hs_pdep64"
    
    982
    -          W128 -> fsLit "hs_pdep128"
    
    983
    -          W256 -> fsLit "hs_pdep256"
    
    984
    -          W512 -> fsLit "hs_pdep512"
    
    991
    +          _ -> unsupported
    
    985 992
         MO_Pext w
    
    986 993
           | isBmi2Enabled -> case w of
    
    987 994
               -- See Note [LLVM PDep/PExt intrinsics]
    
    988 995
               W8   -> fsLit "llvm.x86.bmi.pext.32"
    
    989 996
               W16  -> fsLit "llvm.x86.bmi.pext.32"
    
    990 997
               W32  -> fsLit "llvm.x86.bmi.pext.32"
    
    991
    -          W64  -> fsLit "llvm.x86.bmi.pext.64"
    
    992
    -          W128 -> fsLit "llvm.x86.bmi.pext.128"
    
    993
    -          W256 -> fsLit "llvm.x86.bmi.pext.256"
    
    994
    -          W512 -> fsLit "llvm.x86.bmi.pext.512"
    
    998
    +          W64
    
    999
    +            | is32bit   -> fsLit "hs_pext64"
    
    1000
    +            | otherwise -> fsLit "llvm.x86.bmi.pext.64"
    
    1001
    +          -- LLVM only provides x86 PDep/PExt intrinsics for 32/64 bits
    
    1002
    +          _ -> unsupported
    
    995 1003
           | otherwise -> case w of
    
    996 1004
               W8   -> fsLit "hs_pext8"
    
    997 1005
               W16  -> fsLit "hs_pext16"
    
    998 1006
               W32  -> fsLit "hs_pext32"
    
    999 1007
               W64  -> fsLit "hs_pext64"
    
    1000
    -          W128 -> fsLit "hs_pext128"
    
    1001
    -          W256 -> fsLit "hs_pext256"
    
    1002
    -          W512 -> fsLit "hs_pext512"
    
    1008
    +          _ -> unsupported
    
    1003 1009
     
    
    1004 1010
         MO_AddIntC w    -> case w of
    
    1005 1011
           W8   -> fsLit "llvm.sadd.with.overflow.i8"
    

  • compiler/GHC/Core/Opt/SpecConstr.hs
    ... ... @@ -2062,7 +2062,7 @@ mkSeqs seqees res_ty rhs =
    2062 2062
           addEval :: Var -> CoreExpr -> CoreExpr
    
    2063 2063
           addEval arg_id rhs
    
    2064 2064
             -- Argument representing strict field and it's worth passing via cbv
    
    2065
    -        | shouldStrictifyIdForCbv arg_id
    
    2065
    +        | wantCbvForId arg_id
    
    2066 2066
             = Case (Var arg_id)
    
    2067 2067
                    (localiseId arg_id)  -- See (SCF1) in Note [SpecConstr and strict fields]
    
    2068 2068
                    res_ty
    

  • compiler/GHC/Core/Tidy.hs
    ... ... @@ -38,7 +38,7 @@ import GHC.Utils.Outputable
    38 38
     import GHC.Types.RepType (typePrimRep)
    
    39 39
     import GHC.Utils.Panic
    
    40 40
     import GHC.Types.Basic (isMarkedCbv, CbvMark (..))
    
    41
    -import GHC.Core.Utils (shouldUseCbvForId)
    
    41
    +import GHC.Core.Utils ( wantCbvForId )
    
    42 42
     
    
    43 43
     {-
    
    44 44
     ************************************************************************
    
    ... ... @@ -197,7 +197,7 @@ computeCbvInfo fun_id rhs
    197 197
             isSimplePrimRep _   = False
    
    198 198
     
    
    199 199
         mkMark arg
    
    200
    -      | not $ shouldUseCbvForId arg = NotMarkedCbv
    
    200
    +      | not $ wantCbvForId arg = NotMarkedCbv
    
    201 201
           -- We can only safely use cbv for strict arguments
    
    202 202
           | (isStrUsedDmd (idDemandInfo arg))
    
    203 203
           , not (isDeadEndId fun_id) = MarkedCbv
    

  • compiler/GHC/Core/Utils.hs
    ... ... @@ -58,7 +58,7 @@ module GHC.Core.Utils (
    58 58
             isJoinBind,
    
    59 59
     
    
    60 60
             -- * Tag inference
    
    61
    -        mkStrictFieldSeqs, shouldStrictifyIdForCbv, shouldUseCbvForId,
    
    61
    +        mkStrictFieldSeqs, wantCbvForId,
    
    62 62
     
    
    63 63
             -- * unsafeEqualityProof
    
    64 64
             isUnsafeEqualityCase,
    
    ... ... @@ -2902,18 +2902,24 @@ Here comes the tricky part: If we make $wloop strict in both x/y and we get:
    2902 2902
                 };
    
    2903 2903
         end Rec }
    
    2904 2904
     
    
    2905
    -Here both x and y are known to be tagged in the function body since we pass strict worker args using unlifted cbv.
    
    2906
    -This means the seqs on x and y both become no-ops and compared to the first version the seq on `y` disappears at runtime.
    
    2907
    -
    
    2908
    -The downside is that the caller of $wfoo potentially has to evaluate `y` once if we can't prove it isn't already evaluated.
    
    2909
    -But y coming out of a strict field is in WHNF so safe to evaluated. And most of the time it will be properly tagged+evaluated
    
    2910
    -already at the call site because of the EPT Invariant! See Note [EPT enforcement] for more in this.
    
    2911
    -This makes GHC itself around 1% faster despite doing slightly more work! So this is generally quite good.
    
    2912
    -
    
    2913
    -We only apply this when we think there is a benefit in doing so however. There are a number of cases in which
    
    2914
    -it would be useless to insert an extra seq. ShouldStrictifyIdForCbv tries to identify these to avoid churn in the
    
    2905
    +Here both x and y are known to be tagged in the function body since we pass
    
    2906
    +strict worker args using unlifted cbv.  This means the seqs on x and y both
    
    2907
    +become no-ops (via (EPT-codegen) in Not [EPT enforcement]) and, compared to the
    
    2908
    +first version, the seq on `y` disappears at runtime.
    
    2909
    +
    
    2910
    +The downside is that the caller of $wfoo potentially has to evaluate `y` once if
    
    2911
    +we can't prove it isn't already evaluated.  But y coming out of a strict field
    
    2912
    +is in WHNF so safe to evaluated. And most of the time it will be properly
    
    2913
    +tagged+evaluated already at the call site because of the EPT Invariant! See Note
    
    2914
    +[EPT enforcement] for more in this.  This makes GHC itself around 1% faster
    
    2915
    +despite doing slightly more work! So this is generally quite good.
    
    2916
    +
    
    2917
    +We only apply this when we think there is a benefit in doing so however. There
    
    2918
    +are a number of cases in which it would be useless to insert an extra
    
    2919
    +seq. `wantCbvForId` tries to identify these to avoid churn in the
    
    2915 2920
     simplifier. See Note [Which Ids should be strictified] for details on this.
    
    2916 2921
     -}
    
    2922
    +
    
    2917 2923
     mkStrictFieldSeqs :: [(Id,StrictnessMark)] -> CoreExpr -> (CoreExpr)
    
    2918 2924
     mkStrictFieldSeqs args rhs =
    
    2919 2925
       foldr addEval rhs args
    
    ... ... @@ -2923,7 +2929,7 @@ mkStrictFieldSeqs args rhs =
    2923 2929
           addEval (arg_id,arg_cbv) (rhs)
    
    2924 2930
             -- Argument representing strict field.
    
    2925 2931
             | isMarkedStrict arg_cbv
    
    2926
    -        , shouldStrictifyIdForCbv arg_id
    
    2932
    +        , wantCbvForId arg_id
    
    2927 2933
             -- Make sure to remove unfoldings here to avoid the simplifier dropping those for OtherCon[] unfoldings.
    
    2928 2934
             = Case (Var $! zapIdUnfolding arg_id) arg_id case_ty ([Alt DEFAULT [] rhs])
    
    2929 2935
             -- Normal argument
    
    ... ... @@ -2943,87 +2949,99 @@ There are multiple reasons why we might not want to insert a seq in the rhs to
    2943 2949
     strictify a functions argument:
    
    2944 2950
     
    
    2945 2951
     1) The argument doesn't exist at runtime.
    
    2946
    -
    
    2947
    -For zero width types (like Types) there is no benefit as we don't operate on them
    
    2948
    -at runtime at all. This includes things like void#, coercions and state tokens.
    
    2952
    +   For zero width types (like Types) there is no benefit as we don't operate on them
    
    2953
    +   at runtime at all. This includes things like void#, coercions and state tokens.
    
    2949 2954
     
    
    2950 2955
     2) The argument is a unlifted type.
    
    2951
    -
    
    2952
    -If the argument is a unlifted type the calling convention already is explicitly
    
    2953
    -cbv. This means inserting a seq on this argument wouldn't do anything as the seq
    
    2954
    -would be a no-op *and* it wouldn't affect the calling convention.
    
    2956
    +   If the argument is a unlifted type the calling convention already is explicitly
    
    2957
    +   cbv. This means inserting a seq on this argument wouldn't do anything as the seq
    
    2958
    +   would be a no-op *and* it wouldn't affect the calling convention.
    
    2955 2959
     
    
    2956 2960
     3) The argument is absent.
    
    2961
    +   If the argument is absent in the body there is no advantage to it being passed as
    
    2962
    +   cbv to the function. The function won't ever look at it so we don't save any work.
    
    2957 2963
     
    
    2958
    -If the argument is absent in the body there is no advantage to it being passed as
    
    2959
    -cbv to the function. The function won't ever look at it so we don't safe any work.
    
    2960
    -
    
    2961
    -This mostly happens for join point. For example we might have:
    
    2962
    -
    
    2963
    -    data T = MkT ![Int] [Char]
    
    2964
    -    f t = case t of MkT xs{strict} ys-> snd (xs,ys)
    
    2965
    -
    
    2966
    -and abstract the case alternative to:
    
    2964
    +   This mostly happens for join points. For example we might have:
    
    2967 2965
     
    
    2968
    -    f t = join j1 = \xs ys -> snd (xs,ys)
    
    2969
    -          in case t of MkT xs{strict} ys-> j1 xs xy
    
    2966
    +       data T = MkT ![Int] [Char]
    
    2967
    +       f t = case t of MkT xs{strict} ys-> snd (xs,ys)
    
    2970 2968
     
    
    2971
    -While we "use" xs inside `j1` it's not used inside the function `snd` we pass it to.
    
    2972
    -In short a absent demand means neither our RHS, nor any function we pass the argument
    
    2973
    -to will inspect it. So there is no work to be saved by forcing `xs` early.
    
    2969
    +   and abstract the case alternative to:
    
    2974 2970
     
    
    2975
    -NB: There is an edge case where if we rebox we *can* end up seqing an absent value.
    
    2976
    -Note [Absent fillers] has an example of this. However this is so rare it's not worth
    
    2977
    -caring about here.
    
    2971
    +       f t = join j1 = \xs ys -> snd (xs,ys)
    
    2972
    +             in case t of MkT xs{strict} ys-> j1 xs xy
    
    2978 2973
     
    
    2979
    -4) The argument is already strict.
    
    2974
    +   While we "use" xs inside `j1` it's not used inside the function `snd` we pass it to.
    
    2975
    +   In short a absent demand means neither our RHS, nor any function we pass the argument
    
    2976
    +   to will inspect it. So there is no work to be saved by forcing `xs` early.
    
    2980 2977
     
    
    2981
    -Consider this code:
    
    2982
    -
    
    2983
    -    data T = MkT ![Int]
    
    2984
    -    f t = case t of MkT xs{strict} -> reverse xs
    
    2985
    -
    
    2986
    -The `xs{strict}` indicates that `xs` is used strictly by the `reverse xs`.
    
    2987
    -If we do a w/w split, and add the extra eval on `xs`, we'll get
    
    2988
    -
    
    2989
    -    $wf xs =
    
    2990
    -        case xs of xs1 ->
    
    2991
    -            let t = MkT xs1 in
    
    2992
    -            case t of MkT xs2 -> reverse xs2
    
    2993
    -
    
    2994
    -That's not wrong; but the w/w body will simplify to
    
    2995
    -
    
    2996
    -    $wf xs = case xs of xs1 -> reverse xs1
    
    2997
    -
    
    2998
    -and now we'll drop the `case xs` because `xs1` is used strictly in its scope.
    
    2999
    -Adding that eval was a waste of time.  So don't add it for strictly-demanded Ids.
    
    2978
    +   NB: There is an edge case where if we rebox we *can* end up seqing an absent value.
    
    2979
    +   Note [Absent fillers] has an example of this. However this is so rare it's not worth
    
    2980
    +   caring about here.
    
    3000 2981
     
    
    3001 2982
     5) Functions
    
    3002
    -
    
    3003
    -Functions are tricky (see Note [TagInfo of functions] in EnforceEpt).
    
    3004
    -But the gist of it even if we make a higher order function argument strict
    
    3005
    -we can't avoid the tag check when it's used later in the body.
    
    3006
    -So there is no benefit.
    
    2983
    +   Functions are tricky (see Note [TagInfo of functions] in EnforceEpt).
    
    2984
    +   But the gist of it even if we make a higher order function argument strict
    
    2985
    +   we can't avoid the tag check when it's used later in the body.
    
    2986
    +   So there is no benefit.
    
    2987
    +
    
    2988
    +Wrinkles:
    
    2989
    +
    
    2990
    +(WIS1) You might have thought that we can omit the eval if the argument is used
    
    2991
    +   strictly demanded in the body.  But you'd be wrong. Consider this code:
    
    2992
    +          data T = MkT ![Int]
    
    2993
    +          f t = case t of MkT xs{Dmd=STR} -> reverse xs
    
    2994
    +
    
    2995
    +   The `xs{Dmd=STR}` indicates that `xs` is used strictly by the `reverse xs`.
    
    2996
    +   If we do a w/w split, and add the extra eval on `xs`, we'll get
    
    2997
    +       $wf xs = case xs of xs1 ->
    
    2998
    +                  let t = MkT xs1 in
    
    2999
    +                  case t of MkT xs2 -> reverse xs2
    
    3000
    +
    
    3001
    +   That's not wrong; but you might wonder if the eval on `xs` is needed
    
    3002
    +   when it is certainly evaluated by the `reverse`.  But yes, it is (#26722):
    
    3003
    +       g s True  t = f s t t
    
    3004
    +       g s False t = g s True t
    
    3005
    +
    
    3006
    +       f True  (MkT xs) t = f False (MkT xs) t
    
    3007
    +       f False (MkT xs) _ = xs
    
    3008
    +
    
    3009
    +   After worker/wrapper we get:
    
    3010
    +       g s b t = case t of MkT ww -> $wg s b ww
    
    3011
    +       $wg s ds ww = case ds of {
    
    3012
    +                      False -> case ww of wg { __DEFAULT -> Bar.$wg s True wg }
    
    3013
    +                      True  -> let { t1 = MkT ww } in f s t1 t1 }
    
    3014
    +
    
    3015
    +   We must make `f` inline inside `$wg`, because `f` too is ww'd, and we
    
    3016
    +   don't want to rebox `t1` before passing it to `f`.  BUT while `t1`
    
    3017
    +   looks like a HNF, `exprIsHNF` will say False because `MkT` is strict
    
    3018
    +   and `ww` isn't evaluated.  So `f` doesn't inline and we get lots of
    
    3019
    +   reboxing.
    
    3020
    +
    
    3021
    +   The Right Thing to to is to add the eval for the data con argument:
    
    3022
    +       $wg s ds ww = case ww of ww' { DEFAULT ->
    
    3023
    +                     case ds of {
    
    3024
    +                      False -> case ww of wg { __DEFAULT -> Bar.$wg s True wg }
    
    3025
    +                      True  -> let { t1 = MkT ww' } in f s t1 t1 } }
    
    3026
    +
    
    3027
    +   Now `t1` will be a HNF, and `f` will inline, and we get
    
    3028
    +       $wg s ds ww = case ww of ww' { DEFAULT ->
    
    3029
    +                     case ds of {
    
    3030
    +                      False -> Bar.$wg s True ww'
    
    3031
    +                      True  -> $wf s ww'
    
    3032
    +
    
    3033
    +  (Ultimately `$wg` will be a CBV function, so that `case ww` will be a
    
    3034
    +  no-op: see (EPT-codegen) in Note [EPT enforcement] in GHC.Stg.EnforceEpt.)
    
    3007 3035
     
    
    3008 3036
     -}
    
    3009
    --- | Do we expect there to be any benefit if we make this var strict
    
    3010
    --- in order for it to get treated as as cbv argument?
    
    3011
    --- See Note [Which Ids should be strictified]
    
    3012
    --- See Note [CBV Function Ids] for more background.
    
    3013
    -shouldStrictifyIdForCbv :: Var -> Bool
    
    3014
    -shouldStrictifyIdForCbv = wantCbvForId False
    
    3015
    -
    
    3016
    --- Like shouldStrictifyIdForCbv but also wants to use cbv for strict args.
    
    3017
    -shouldUseCbvForId :: Var -> Bool
    
    3018
    -shouldUseCbvForId = wantCbvForId True
    
    3019 3037
     
    
    3020 3038
     -- When we strictify we want to skip strict args otherwise the logic is the same
    
    3039
    +-- as for wantCbvForId so we common up the logic here.
    
    3021 3040
     -- Basically returns true if it would be beneficial for runtime to pass this argument
    
    3022 3041
     -- as CBV independent of weither or not it's correct. E.g. it might return true for lazy args
    
    3023 3042
     -- we are not allowed to force.
    
    3024
    --- we are not allowed to force.
    
    3025
    -wantCbvForId :: Bool -> Var -> Bool
    
    3043
    +wantCbvForId :: Var -> Bool
    
    3044
    +wantCbvForId v
    
    3026 3045
       -- Must be a runtime var.
    
    3027 3046
       -- See Note [Which Ids should be strictified] point 1)
    
    3028 3047
       | isId v
    
    ... ... @@ -3037,9 +3055,6 @@ wantCbvForId cbv_for_strict v
    3037 3055
       , not $ isFunTy ty
    
    3038 3056
       -- If the var is strict already a seq is redundant.
    
    3039 3057
       -- See Note [Which Ids should be strictified] point 4)
    
    3040
    -  , not (isStrictDmd dmd) || cbv_for_strict
    
    3041
    -  -- If the var is absent a seq is almost always useless.
    
    3042
    -  -- See Note [Which Ids should be strictified] point 3)
    
    3043 3058
       , not (isAbsDmd dmd)
    
    3044 3059
       = True
    
    3045 3060
       | otherwise
    

  • compiler/GHC/Driver/Session.hs
    ... ... @@ -197,6 +197,8 @@ module GHC.Driver.Session (
    197 197
             -- * Compiler configuration suitable for display to the user
    
    198 198
             compilerInfo,
    
    199 199
     
    
    200
    +        targetHasRTSWays,
    
    201
    +
    
    200 202
             wordAlignment,
    
    201 203
     
    
    202 204
             setUnsafeGlobalDynFlags,
    
    ... ... @@ -3635,6 +3637,15 @@ compilerInfo dflags
    3635 3637
         queryCmdMaybe p f = expandDirectories (query (maybe "" (prgPath . p) . f))
    
    3636 3638
         queryFlagsMaybe p f = query (maybe "" (unwords . map escapeArg . prgFlags . p) . f)
    
    3637 3639
     
    
    3640
    +-- | Query if the target RTS has the given 'Ways'. It's computed from
    
    3641
    +-- the @"RTS ways"@ field in the settings file.
    
    3642
    +targetHasRTSWays :: DynFlags -> Ways -> Bool
    
    3643
    +targetHasRTSWays dflags ways
    
    3644
    +  | Just ws <- lookup "RTS ways" $ compilerInfo dflags =
    
    3645
    +      waysTag ways
    
    3646
    +        `elem` words ws
    
    3647
    +  | otherwise = panic "RTS ways not found in settings"
    
    3648
    +
    
    3638 3649
     -- Note [Special unit-ids]
    
    3639 3650
     -- ~~~~~~~~~~~~~~~~~~~~~~~
    
    3640 3651
     -- Certain units are special to the compiler:
    

  • compiler/GHC/Runtime/Interpreter/C.hs
    ... ... @@ -8,7 +8,9 @@ where
    8 8
     
    
    9 9
     import GHC.Prelude
    
    10 10
     import GHC.Platform
    
    11
    +import GHC.Platform.Ways
    
    11 12
     import GHC.Data.FastString
    
    13
    +import GHC.Driver.Session
    
    12 14
     import GHC.Utils.Logger
    
    13 15
     import GHC.Utils.TmpFs
    
    14 16
     import GHC.Unit.Types
    
    ... ... @@ -18,11 +20,10 @@ import GHC.Unit.State
    18 20
     import GHC.Utils.Panic.Plain
    
    19 21
     import GHC.Linker.Executable
    
    20 22
     import GHC.Linker.Config
    
    21
    -import GHC.Utils.CliOption
    
    22 23
     
    
    23 24
     -- | Generate iserv program for the target
    
    24
    -generateIservC :: Logger -> TmpFs -> ExecutableLinkOpts -> UnitEnv -> IO FilePath
    
    25
    -generateIservC logger tmpfs opts unit_env = do
    
    25
    +generateIservC :: DynFlags -> Logger -> TmpFs -> ExecutableLinkOpts -> UnitEnv -> IO FilePath
    
    26
    +generateIservC dflags logger tmpfs opts unit_env = do
    
    26 27
       -- get the unit-id of the ghci package. We need this to load the
    
    27 28
       -- interpreter code.
    
    28 29
       let unit_state = ue_homeUnitState unit_env
    
    ... ... @@ -60,6 +61,12 @@ generateIservC logger tmpfs opts unit_env = do
    60 61
               -- must retain CAFs for running interpreted code.
    
    61 62
             , leKeepCafs = True
    
    62 63
     
    
    64
    +          -- link with -threaded if target has threaded RTS
    
    65
    +        , leWays =
    
    66
    +            let ways = leWays opts
    
    67
    +                ways' = addWay WayThreaded ways
    
    68
    +            in if targetHasRTSWays dflags ways' then ways' else ways
    
    69
    +
    
    63 70
               -- enable all rts options
    
    64 71
             , leRtsOptsEnabled = RtsOptsAll
    
    65 72
     
    

  • compiler/GHC/Runtime/Interpreter/Init.hs
    ... ... @@ -9,6 +9,7 @@ where
    9 9
     
    
    10 10
     
    
    11 11
     import GHC.Prelude
    
    12
    +import GHC.Driver.DynFlags
    
    12 13
     import GHC.Platform
    
    13 14
     import GHC.Platform.Ways
    
    14 15
     import GHC.Settings
    
    ... ... @@ -57,14 +58,15 @@ data InterpOpts = InterpOpts
    57 58
     
    
    58 59
     -- | Initialize code interpreter
    
    59 60
     initInterpreter
    
    60
    -  :: TmpFs
    
    61
    +  :: DynFlags
    
    62
    +  -> TmpFs
    
    61 63
       -> Logger
    
    62 64
       -> Platform
    
    63 65
       -> FinderCache
    
    64 66
       -> UnitEnv
    
    65 67
       -> InterpOpts
    
    66 68
       -> IO (Maybe Interp)
    
    67
    -initInterpreter tmpfs logger platform finder_cache unit_env opts = do
    
    69
    +initInterpreter dflags tmpfs logger platform finder_cache unit_env opts = do
    
    68 70
     
    
    69 71
       lookup_cache  <- liftIO $ mkInterpSymbolCache
    
    70 72
     
    
    ... ... @@ -125,7 +127,7 @@ initInterpreter tmpfs logger platform finder_cache unit_env opts = do
    125 127
               dynamic  = interpWays opts `hasWay` WayDyn
    
    126 128
             prog <- case interpProg opts of
    
    127 129
               -- build iserv program if none specified
    
    128
    -          "" -> generateIservC logger tmpfs (interpExecutableLinkOpts opts) unit_env
    
    130
    +          "" -> generateIservC dflags logger tmpfs (interpExecutableLinkOpts opts) unit_env
    
    129 131
               _ -> pure (interpProg opts ++ flavour)
    
    130 132
                 where
    
    131 133
                   flavour
    

  • compiler/GHC/Stg/EnforceEpt.hs
    ... ... @@ -140,9 +140,11 @@ Afterwards, the *EPT rewriter* inserts the actual evals realising Upcasts.
    140 140
     Implementation
    
    141 141
     --------------
    
    142 142
     
    
    143
    -* EPT analysis is implemented in GHC.Stg.EnforceEpt.inferTags.
    
    143
    +(EPT-anal) EPT analysis is implemented in `GHC.Stg.EnforceEpt.inferTags.`
    
    144 144
       It attaches its result to /binders/, not occurrence sites.
    
    145
    -* The EPT rewriter establishes the EPT invariant by inserting evals. That is, if
    
    145
    +
    
    146
    +(EPT-rewrite) The EPT rewriter, `GHC.Stg.EnforceEpt.Rewrite.rewriteTopBinds`,
    
    147
    +   establishes the EPT invariant by inserting evals. That is, if
    
    146 148
         (a) a binder x is used to
    
    147 149
               * construct a strict field (`SP x y`), or
    
    148 150
               * passed as a CBV argument (`$wf x`),
    
    ... ... @@ -152,17 +154,27 @@ Implementation
    152 154
         case x of x' { __ DEFAULT -> SP x' y }.
    
    153 155
         case x of x' { __ DEFAULT -> $wf x' }.
    
    154 156
       (Recall that the case binder x' is always EPT.)
    
    155
    -  This is implemented in GHC.Stg.EnforceEpt.Rewrite.rewriteTopBinds.
    
    157
    +
    
    156 158
       This pass also propagates the EPTness from binders to occurrences.
    
    159
    +
    
    157 160
       It is sound to insert evals on strict fields (Note [Strict fields in Core]),
    
    158 161
       and on CBV arguments as well (Note [CBV Function Ids]).
    
    159
    -* We also export the EPTness of top level bindings to allow this optimisation
    
    162
    +
    
    163
    +(EPT-codegen) Finally, code generation for (case x of alts) skips the thunk check
    
    164
    +  when `x` is EPT. This is done (a bit indirectly) thus:
    
    165
    +   * GHC.StgToCmm.Expr.cgCase: builds a `sequel`, and recurses into `cgExpr` on `x`.
    
    166
    +   * When `cgExpr` sees a `x` goes to `cgIdApp`, which uses `getCallMethod`.
    
    167
    +   * Then `getCallMethod` sees that `x` is EPT (via `idTagSigMaybe`), and
    
    168
    +     returns `InferredReturnIt`.
    
    169
    +   * Now `cgIdApp` can jump straight to the case-alternative switch in the `sequel`
    
    170
    +     constructed by `cgCase`.
    
    171
    +
    
    172
    +(EPT-export) We also export the EPTness of top level bindings to allow this optimisation
    
    160 173
       to work across module boundaries.
    
    174
    +
    
    161 175
       NB: The EPT Invariant *must* be upheld, regardless of the optimisation level;
    
    162 176
       hence EPTness is practically part of the internal ABI of a strict data
    
    163 177
       constructor or CBV function. Note [CBV Function Ids] contains the details.
    
    164
    -* Finally, code generation skips the thunk check when branching on binders that
    
    165
    -  are EPT. This is done by `cgExpr`/`cgCase` in the backend.
    
    166 178
     
    
    167 179
     Evaluation
    
    168 180
     ----------
    

  • compiler/GHC/StgToCmm/Closure.hs
    ... ... @@ -617,12 +617,15 @@ getCallMethod cfg name id (LFThunk _ _ updatable std_form_info is_fun)
    617 617
     getCallMethod cfg name id (LFUnknown might_be_a_function) n_args _cg_locs _self_loop_info
    
    618 618
       | n_args == 0
    
    619 619
       , Just sig <- idTagSig_maybe id
    
    620
    -  , isTaggedSig sig -- Infered to be already evaluated by EPT analysis
    
    621
    -  -- When profiling we must enter all potential functions to make sure we update the SCC
    
    622
    -  -- even if the function itself is already evaluated.
    
    620
    +  , isTaggedSig sig -- This `id` is evaluated and properly tagged; no need to enter it
    
    621
    +                    -- See (EPT-codegen) in Note [EPT enforcement] in GHC.Stg.EnforceEpt
    
    622
    +
    
    623
    +  -- When profiling we must enter all potential functions to make sure we update
    
    624
    +  -- the SCC even if the function itself is already evaluated.
    
    623 625
       -- See Note [Evaluating functions with profiling] in rts/Apply.cmm
    
    624 626
       , not (profileIsProfiling (stgToCmmProfile cfg) && might_be_a_function)
    
    625
    -  = InferedReturnIt -- See Note [EPT enforcement]
    
    627
    +
    
    628
    +  = InferedReturnIt -- See (EPT-codegen) in Note [EPT enforcement]
    
    626 629
     
    
    627 630
       | might_be_a_function = SlowCall
    
    628 631
     
    

  • compiler/GHC/StgToCmm/Expr.hs
    ... ... @@ -1053,6 +1053,7 @@ cgIdApp fun_id args = do
    1053 1053
               | otherwise                -> emitReturn [fun]
    
    1054 1054
     
    
    1055 1055
             -- A value infered to be in WHNF, so we can just return it.
    
    1056
    +        -- See (EPT-codegen) in Note [EPT enforcement] in GHC.Stg.EnforceEpt
    
    1056 1057
             InferedReturnIt
    
    1057 1058
               | isZeroBitTy (idType fun_id) -> trace >> emitReturn []
    
    1058 1059
               | otherwise                   -> trace >> assertTag >>
    

  • hadrian/src/Flavour.hs
    ... ... @@ -70,7 +70,8 @@ flavourTransformers = M.fromList
    70 70
         , "fully_static"     =: fullyStatic
    
    71 71
         , "host_fully_static" =: hostFullyStatic
    
    72 72
         , "collect_timings"  =: collectTimings
    
    73
    -    , "assertions"       =: enableAssertions
    
    73
    +    , "assertions"        =: enableAssertions Stage2
    
    74
    +    , "assertions_stage1" =: enableAssertions Stage1
    
    74 75
         , "debug_ghc"        =: debugGhc Stage2
    
    75 76
         , "debug_stage1_ghc" =: debugGhc Stage1
    
    76 77
         , "lint"             =: enableLinting
    
    ... ... @@ -394,11 +395,11 @@ enableLateCCS = addArgs
    394 395
       ? arg "-fprof-late"
    
    395 396
     
    
    396 397
     -- | Enable assertions for the stage2 compiler
    
    397
    -enableAssertions :: Flavour -> Flavour
    
    398
    -enableAssertions flav = flav { ghcDebugAssertions = f }
    
    398
    +enableAssertions :: Stage -> Flavour -> Flavour
    
    399
    +enableAssertions stage flav = flav { ghcDebugAssertions = f }
    
    399 400
       where
    
    400
    -    f Stage2 = True
    
    401
    -    f st = ghcDebugAssertions flav st
    
    401
    +    f s | s == stage = True
    
    402
    +        | otherwise  = ghcDebugAssertions flav s
    
    402 403
     
    
    403 404
     -- | Build the stage3 compiler using the non-moving GC.
    
    404 405
     enableBootNonmovingGc :: Flavour -> Flavour
    

  • hadrian/src/Packages.hs
    ... ... @@ -217,7 +217,7 @@ timeoutPath = "testsuite/timeout/install-inplace/bin/timeout" <.> exe
    217 217
     -- TODO: Can we extract this information from Cabal files?
    
    218 218
     -- | Some program packages should not be linked with Haskell main function.
    
    219 219
     nonHsMainPackage :: Package -> Bool
    
    220
    -nonHsMainPackage = (`elem` [hp2ps, iserv, unlit, ghciWrapper])
    
    220
    +nonHsMainPackage = (`elem` [hp2ps, unlit, ghciWrapper])
    
    221 221
     
    
    222 222
     
    
    223 223
     {-
    

  • hadrian/src/Rules/Gmp.hs
    ... ... @@ -126,6 +126,12 @@ gmpRules = do
    126 126
                     interpretInContext ctx $
    
    127 127
                     mconcat
    
    128 128
                         [ getStagedCCFlags
    
    129
    +                    -- gmp fails to configure with newer compilers
    
    130
    +                    -- that default to c23:
    
    131
    +                    -- https://gmplib.org/list-archives/gmp-devel/2025-January/006279.html.
    
    132
    +                    -- for now just manually specify -std=gnu11 until
    
    133
    +                    -- next upstream release.
    
    134
    +                    , arg "-std=gnu11"
    
    129 135
                         -- gmp symbols are only used by bignum logic in
    
    130 136
                         -- ghc-internal and shouldn't be exported by the
    
    131 137
                         -- ghc-internal shared library.
    

  • hadrian/src/Settings/Packages.hs
    ... ... @@ -41,6 +41,8 @@ packageArgs = do
    41 41
         libzstdLibraryDir <- getSetting LibZstdLibDir
    
    42 42
         stageVersion <- readVersion <$> (expr $ ghcVersionStage stage)
    
    43 43
     
    
    44
    +    rtsWays <- getRtsWays
    
    45
    +
    
    44 46
         mconcat
    
    45 47
             --------------------------------- base ---------------------------------
    
    46 48
             [ package base ? mconcat
    
    ... ... @@ -185,11 +187,15 @@ packageArgs = do
    185 187
             --
    
    186 188
             -- The Solaris linker does not support --export-dynamic option. It also
    
    187 189
             -- does not need it since it exports all dynamic symbols by default
    
    188
    -        , package iserv
    
    189
    -          ? expr isElfTarget
    
    190
    +        , package iserv ? mconcat [
    
    191
    +            expr isElfTarget
    
    190 192
               ? notM (expr $ anyTargetOs [OSFreeBSD, OSSolaris2])? mconcat
    
    191 193
               [ builder (Ghc LinkHs) ? arg "-optl-Wl,--export-dynamic" ]
    
    192 194
     
    
    195
    +            -- Link iserv with -threaded if possible
    
    196
    +          , builder (Cabal Flags) ? any (wayUnit Threaded) rtsWays `cabalFlag` "threaded"
    
    197
    +        ]
    
    198
    +
    
    193 199
             -------------------------------- haddock -------------------------------
    
    194 200
             , package haddockApi ?
    
    195 201
               builder (Cabal Flags) ? arg "in-ghc-tree"
    

  • libraries/ghc-internal/configure.ac
    ... ... @@ -195,28 +195,10 @@ dnl--------------------------------------------------------------------
    195 195
        if test "$HaveFrameworkGMP" = "YES" || test "$HaveLibGmp" = "YES"
    
    196 196
        then
    
    197 197
            AC_MSG_RESULT([no])
    
    198
    -       UseIntreeGmp=0
    
    199 198
            AC_CHECK_HEADER([gmp.h], , [AC_MSG_ERROR([Cannot find gmp.h])])
    
    200
    -
    
    201
    -       AC_MSG_CHECKING([GMP version])
    
    202
    -       AC_COMPUTE_INT(GhcGmpVerMj, __GNU_MP_VERSION, [#include <gmp.h>],
    
    203
    -           AC_MSG_ERROR([Unable to get value of __GNU_MP_VERSION]))
    
    204
    -       AC_COMPUTE_INT(GhcGmpVerMi, __GNU_MP_VERSION_MINOR, [#include <gmp.h>],
    
    205
    -           AC_MSG_ERROR([Unable to get value of __GNU_MP_VERSION_MINOR]))
    
    206
    -       AC_COMPUTE_INT(GhcGmpVerPl, __GNU_MP_VERSION_PATCHLEVEL, [#include <gmp.h>],
    
    207
    -           AC_MSG_ERROR([Unable to get value of __GNU_MP_VERSION_PATCHLEVEL]))
    
    208
    -       AC_MSG_RESULT([$GhcGmpVerMj.$GhcGmpVerMi.$GhcGmpVerPl])
    
    209
    -
    
    210 199
        else
    
    211 200
            AC_MSG_RESULT([yes])
    
    212
    -       UseIntreeGmp=1
    
    213 201
            HaveSecurePowm=1
    
    214
    -
    
    215
    -       AC_MSG_CHECKING([GMP version])
    
    216
    -       GhcGmpVerMj=6
    
    217
    -       GhcGmpVerMi=1
    
    218
    -       GhcGmpVerPl=2
    
    219
    -       AC_MSG_RESULT([$GhcGmpVerMj.$GhcGmpVerMi.$GhcGmpVerPl])
    
    220 202
        fi
    
    221 203
     
    
    222 204
        GMP_INSTALL_INCLUDES="HsIntegerGmp.h ghc-gmp.h"
    
    ... ... @@ -231,10 +213,6 @@ AC_SUBST(GMP_INSTALL_INCLUDES)
    231 213
     AC_SUBST(HaveLibGmp)
    
    232 214
     AC_SUBST(HaveFrameworkGMP)
    
    233 215
     AC_SUBST(HaveSecurePowm)
    
    234
    -AC_SUBST(UseIntreeGmp)
    
    235
    -AC_SUBST(GhcGmpVerMj)
    
    236
    -AC_SUBST(GhcGmpVerMi)
    
    237
    -AC_SUBST(GhcGmpVerPl)
    
    238 216
     
    
    239 217
     # Compute offsets/sizes used by jsbits/base.js
    
    240 218
     if test "$host" = "javascript-ghcjs"
    

  • libraries/ghc-internal/include/HsIntegerGmp.h.in
    1 1
     #pragma once
    
    2 2
     
    
    3
    -/* Whether GMP is embedded into ghc-internal */
    
    4
    -#define GHC_GMP_INTREE     @UseIntreeGmp@
    
    5
    -
    
    6
    -/* The following values denote the GMP version used during GHC build-time */
    
    7
    -#define GHC_GMP_VERSION_MJ @GhcGmpVerMj@
    
    8
    -#define GHC_GMP_VERSION_MI @GhcGmpVerMi@
    
    9
    -#define GHC_GMP_VERSION_PL @GhcGmpVerPl@
    
    10
    -#define GHC_GMP_VERSION \
    
    11
    -    (@GhcGmpVerMj@ * 10000 + @GhcGmpVerMi@ * 100 + @GhcGmpVerPl@)
    
    12
    -
    
    13 3
     /* Whether GMP supports mpz_powm_sec */
    
    14 4
     #define HAVE_SECURE_POWM @HaveSecurePowm@

  • testsuite/tests/simplCore/should_compile/T18013.stderr
    ... ... @@ -143,14 +143,14 @@ T18013.$wmapMaybeRule [InlPrag=NOINLINE]
    143 143
      Unf=OtherCon []]
    
    144 144
     T18013.$wmapMaybeRule
    
    145 145
       = \ (@a) (@b) (@s) (ww :: s) (ww1 :: s -> a -> IO (Result s b)) ->
    
    146
    +      case ww of ww2 { __DEFAULT ->
    
    146 147
           case ww1 of wild { __DEFAULT ->
    
    147
    -      case ww of wild1 { __DEFAULT ->
    
    148 148
           T18013a.Rule
    
    149 149
             @IO
    
    150 150
             @(Maybe a)
    
    151 151
             @(Maybe b)
    
    152 152
             @s
    
    153
    -        wild1
    
    153
    +        ww2
    
    154 154
             ((\ (s2 :: s)
    
    155 155
                 (a1 :: Maybe a)
    
    156 156
                 (s1 :: GHC.Internal.Prim.State# GHC.Internal.Prim.RealWorld) ->
    
    ... ... @@ -158,7 +158,7 @@ T18013.$wmapMaybeRule
    158 158
                   Nothing ->
    
    159 159
                     (# s1,
    
    160 160
                        T18013a.Result
    
    161
    -                     @s @(Maybe b) wild1 (GHC.Internal.Maybe.Nothing @b) #);
    
    161
    +                     @s @(Maybe b) ww2 (GHC.Internal.Maybe.Nothing @b) #);
    
    162 162
                   Just x ->
    
    163 163
                     case ((wild s2 x)
    
    164 164
                           `cast` <Co:4> :: IO (Result s b)
    

  • testsuite/tests/simplCore/should_compile/T26722.hs
    1
    +module T26722 where
    
    2
    +
    
    3
    +data T = MkT ![Int]
    
    4
    +
    
    5
    +g s True  t = f s t t
    
    6
    +g s False t = g s True t
    
    7
    +
    
    8
    +f True  (MkT xs) t = f False (MkT xs) t
    
    9
    +f False (MkT xs) _ = xs

  • testsuite/tests/simplCore/should_compile/T26722.stderr
    1
    + 
    \ No newline at end of file

  • testsuite/tests/simplCore/should_compile/all.T
    ... ... @@ -563,3 +563,6 @@ test('T26115', [grep_errmsg(r'DFun')], compile, ['-O -ddump-simpl -dsuppress-uni
    563 563
     test('T26116', normal, compile, ['-O -ddump-rules'])
    
    564 564
     test('T26117', [grep_errmsg(r'==')], compile, ['-O -ddump-simpl -dsuppress-uniques'])
    
    565 565
     test('T26349',  normal, compile, ['-O -ddump-rules'])
    
    566
    +
    
    567
    +# T26722: there should be no reboxing in $wg
    
    568
    +test('T26722', [grep_errmsg(r'SPEC')], compile, ['-O -dno-typeable-binds'])

  • utils/iserv/cbits/iservmain.c deleted
    1
    -#include <ghcversion.h>
    
    2
    -#  include <rts/PosixSource.h>
    
    3
    -#include <Rts.h>
    
    4
    -
    
    5
    -#include <HsFFI.h>
    
    6
    -
    
    7
    -int main (int argc, char *argv[])
    
    8
    -{
    
    9
    -    RtsConfig conf = defaultRtsConfig;
    
    10
    -
    
    11
    -    // We never know what symbols GHC will look up in the future, so
    
    12
    -    // we must retain CAFs for running interpreted code.
    
    13
    -    conf.keep_cafs = 1;
    
    14
    -
    
    15
    -    conf.rts_opts_enabled = RtsOptsAll;
    
    16
    -    extern StgClosure ZCMain_main_closure;
    
    17
    -    hs_main(argc, argv, &ZCMain_main_closure, conf);
    
    18
    -}

  • utils/iserv/iserv.cabal.in
    ... ... @@ -23,11 +23,17 @@ Category: Development
    23 23
     build-type: Simple
    
    24 24
     cabal-version: >=1.10
    
    25 25
     
    
    26
    +Flag threaded
    
    27
    +    Description: Link the iserv executable against the threaded RTS
    
    28
    +    Default: True
    
    29
    +    Manual: True
    
    30
    +
    
    26 31
     Executable iserv
    
    27 32
         Default-Language: Haskell2010
    
    28
    -    ghc-options: -no-hs-main
    
    33
    +    ghc-options: -fkeep-cafs -rtsopts
    
    34
    +    if flag(threaded)
    
    35
    +      ghc-options: -threaded
    
    29 36
         Main-Is: Main.hs
    
    30
    -    C-Sources: cbits/iservmain.c
    
    31 37
         Hs-Source-Dirs: src
    
    32 38
         include-dirs: .
    
    33 39
         Build-Depends: