Cheng Shao pushed to branch wip/deepseq-primop at Glasgow Haskell Compiler / GHC

Commits:

8 changed files:

Changes:

  • libraries/ghc-experimental/cbits/DeepSeq.cmm
    1
    +/* -----------------------------------------------------------------------------
    
    2
    + *
    
    3
    + * (c) The GHC Team, 2026
    
    4
    + *
    
    5
    + * Support for the deepseq# primcall.
    
    6
    + *
    
    7
    + * ---------------------------------------------------------------------------*/
    
    8
    +
    
    9
    +#include "Cmm.h"
    
    10
    +
    
    11
    +/* -----------------------------------------------------------------------------
    
    12
    +   deepseq#
    
    13
    +
    
    14
    +   Deeply evaluate a value to (approximate) normal form, without requiring an
    
    15
    +   NFData constraint. This is used to provide a primitive analogue of
    
    16
    +   Control.DeepSeq.force / rnf.
    
    17
    +
    
    18
    +   See the GHC.DeepSeq documentation for the intended semantics and
    
    19
    +   limitations.
    
    20
    +   -------------------------------------------------------------------------- */
    
    21
    +
    
    22
    +// Worker which performs deep evaluation. This lets us tail-call when traversing
    
    23
    +// the final pointer field, avoiding stack blowup on common spine-recursive
    
    24
    +// structures (e.g. lists).
    
    25
    +//
    
    26
    +// The second argument is a boolean (0/1) accumulator tracking whether any
    
    27
    +// evaluation was forced in the transitive closure so far.
    
    28
    +stg_deepseqWorkzh (P_ p, W_ forced)
    
    29
    +{
    
    30
    +    W_ type, info;
    
    31
    +
    
    32
    +    again: MAYBE_GC(again);
    
    33
    +    STK_CHK_GEN();
    
    34
    +
    
    35
    +    p = UNTAG(p);
    
    36
    +    // Values in compact regions are already fully evaluated.
    
    37
    +    (W_ in_compact) = call stg_compactContainsAnyzh(p);
    
    38
    +    if (in_compact != 0) {
    
    39
    +        return (forced);
    
    40
    +    }
    
    41
    +    info  = %INFO_PTR(p);
    
    42
    +    type = TO_W_(%INFO_TYPE(%STD_INFO(info)));
    
    43
    +
    
    44
    +    switch [0 .. N_CLOSURE_TYPES] type {
    
    45
    +
    
    46
    +    // Unevaluated things must be evaluated first:
    
    47
    +    case
    
    48
    +        THUNK,
    
    49
    +        THUNK_1_0,
    
    50
    +        THUNK_0_1,
    
    51
    +        THUNK_2_0,
    
    52
    +        THUNK_1_1,
    
    53
    +        THUNK_0_2,
    
    54
    +        THUNK_STATIC,
    
    55
    +        AP,
    
    56
    +        AP_STACK,
    
    57
    +        BLACKHOLE,
    
    58
    +        THUNK_SELECTOR : {
    
    59
    +        (P_ evald) = call %ENTRY_CODE(info) (p);
    
    60
    +        jump stg_deepseqWorkzh(evald, 1);
    
    61
    +    }
    
    62
    +
    
    63
    +    // Follow indirections:
    
    64
    +    case IND, IND_STATIC: {
    
    65
    +        p = %acquire StgInd_indirectee(p);
    
    66
    +        jump stg_deepseqWorkzh(p, forced);
    
    67
    +    }
    
    68
    +
    
    69
    +    // WHITEHOLEs are transient. Yield and try again.
    
    70
    +    case WHITEHOLE: {
    
    71
    +        goto again;
    
    72
    +    }
    
    73
    +
    
    74
    +    // Arrays of pointers: evaluate elements.
    
    75
    +    case
    
    76
    +        MUT_ARR_PTRS_DIRTY,
    
    77
    +        MUT_ARR_PTRS_CLEAN,
    
    78
    +        MUT_ARR_PTRS_FROZEN_DIRTY,
    
    79
    +        MUT_ARR_PTRS_FROZEN_CLEAN: {
    
    80
    +        W_ i_arr, ptrs_arr;
    
    81
    +        ptrs_arr = StgMutArrPtrs_ptrs(p);
    
    82
    +        if (ptrs_arr == 0) { return (forced); }
    
    83
    +        i_arr = ptrs_arr - 1;
    
    84
    +      deepseq_arr_loop0:
    
    85
    +        if (i_arr == 0) {
    
    86
    +            // Tail-call the final element to avoid building up a deep stack
    
    87
    +            // when traversing large immutable arrays.
    
    88
    +            jump stg_deepseqWorkzh(P_[p + SIZEOF_StgMutArrPtrs], forced);
    
    89
    +        }
    
    90
    +        (W_ forced_arr) = call stg_deepseqWorkzh(P_[p + SIZEOF_StgMutArrPtrs + WDS(i_arr)], forced);
    
    91
    +        forced = forced_arr;
    
    92
    +        i_arr = i_arr - 1;
    
    93
    +        goto deepseq_arr_loop0;
    
    94
    +    }
    
    95
    +
    
    96
    +    case
    
    97
    +        SMALL_MUT_ARR_PTRS_DIRTY,
    
    98
    +        SMALL_MUT_ARR_PTRS_CLEAN,
    
    99
    +        SMALL_MUT_ARR_PTRS_FROZEN_DIRTY,
    
    100
    +        SMALL_MUT_ARR_PTRS_FROZEN_CLEAN: {
    
    101
    +        W_ i_sarr, ptrs_sarr;
    
    102
    +        ptrs_sarr = StgSmallMutArrPtrs_ptrs(p);
    
    103
    +        if (ptrs_sarr == 0) { return (forced); }
    
    104
    +        i_sarr = ptrs_sarr - 1;
    
    105
    +      deepseq_arr_loop1:
    
    106
    +        if (i_sarr == 0) {
    
    107
    +            // Tail-call the final element to avoid building up a deep stack
    
    108
    +            // when traversing large immutable arrays.
    
    109
    +            jump stg_deepseqWorkzh(P_[p + SIZEOF_StgSmallMutArrPtrs], forced);
    
    110
    +        }
    
    111
    +        (W_ forced_sarr) = call stg_deepseqWorkzh(P_[p + SIZEOF_StgSmallMutArrPtrs + WDS(i_sarr)], forced);
    
    112
    +        forced = forced_sarr;
    
    113
    +        i_sarr = i_sarr - 1;
    
    114
    +        goto deepseq_arr_loop1;
    
    115
    +    }
    
    116
    +
    
    117
    +    // Constructors: evaluate their pointer fields.
    
    118
    +    case
    
    119
    +        CONSTR,
    
    120
    +        CONSTR_1_0,
    
    121
    +        CONSTR_2_0,
    
    122
    +        CONSTR_1_1,
    
    123
    +        CONSTR_NOCAF: {
    
    124
    +        W_ i_constr, ptrs_constr;
    
    125
    +        ptrs_constr  = TO_W_(%INFO_PTRS(%STD_INFO(info)));
    
    126
    +        if (ptrs_constr == 0) { return (forced); }
    
    127
    +        i_constr = 0;
    
    128
    +      deepseq_constr_loop:
    
    129
    +        if (i_constr < ptrs_constr) {
    
    130
    +            // Tail-call the last one. This avoids building up a deep stack
    
    131
    +            // when traversing long lists. We count up so the final pointer
    
    132
    +            // field (e.g. the tail of a list cell) is tail-called.
    
    133
    +            if (i_constr == ptrs_constr - 1) {
    
    134
    +                jump stg_deepseqWorkzh(StgClosure_payload(p,i_constr), forced);
    
    135
    +            }
    
    136
    +            (W_ forced_constr) = call stg_deepseqWorkzh(StgClosure_payload(p,i_constr), forced);
    
    137
    +            forced = forced_constr;
    
    138
    +            i_constr = i_constr + 1;
    
    139
    +            goto deepseq_constr_loop;
    
    140
    +        }
    
    141
    +        return (forced);
    
    142
    +    }
    
    143
    +
    
    144
    +    case
    
    145
    +        MUT_VAR_CLEAN,
    
    146
    +        MUT_VAR_DIRTY: {
    
    147
    +        p = StgMutVar_var(p);
    
    148
    +        jump stg_deepseqWorkzh(p, forced);
    
    149
    +    }
    
    150
    +
    
    151
    +    case
    
    152
    +        MVAR_CLEAN,
    
    153
    +        MVAR_DIRTY: {
    
    154
    +        p = StgMVar_value(p);
    
    155
    +        jump stg_deepseqWorkzh(p, forced);
    
    156
    +    }
    
    157
    +
    
    158
    +    case TVAR: {
    
    159
    +        (P_ tvar_val) = call stg_readTVarIOzh(p);
    
    160
    +        jump stg_deepseqWorkzh(tvar_val, forced);
    
    161
    +    }
    
    162
    +
    
    163
    +    case WEAK: {
    
    164
    +        // Follow the value of a live weak pointer.
    
    165
    +        jump stg_deepseqWorkzh(StgWeak_value(p), forced);
    
    166
    +    }
    
    167
    +
    
    168
    +    // Anything else: conservatively stop.
    
    169
    +    //
    
    170
    +    // This includes (among other closure types) function-like closures, TSOs, etc,
    
    171
    +    // matching the intended "mimic typical NFData instances" semantics
    
    172
    +    // described in the primop documentation.
    
    173
    +    //
    
    174
    +    // We should never see frames here, but if we do, returning is safer than
    
    175
    +    // entering arbitrary things.
    
    176
    +    default: {
    
    177
    +        return (forced);
    
    178
    +    }}
    
    179
    +}
    
    180
    +
    
    181
    +// deepseq# primop entry point.
    
    182
    +//   deepseq# :: forall a s. a -> State# s -> (# State# s, Int#, a #)
    
    183
    +//
    
    184
    +// The State# argument/result has no runtime representation, so the RTS entry
    
    185
    +// only takes the value being forced.
    
    186
    +stg_deepseqzh (P_ p)
    
    187
    +{
    
    188
    +    (W_ forced) = call stg_deepseqWorkzh(p, 0);
    
    189
    +    return (forced, p);
    
    190
    +}

  • libraries/ghc-experimental/ghc-experimental.cabal.in
    ... ... @@ -35,6 +35,7 @@ library
    35 35
         exposed-modules:
    
    36 36
           Data.Sum.Experimental
    
    37 37
           Data.Tuple.Experimental
    
    38
    +      GHC.DeepSeq
    
    38 39
           GHC.PrimOps
    
    39 40
           GHC.Profiling.Eras
    
    40 41
           GHC.TypeLits.Experimental
    
    ... ... @@ -51,4 +52,5 @@ library
    51 52
         build-depends:    base >=4.20 && < 4.23,
    
    52 53
                           ghc-internal == @ProjectVersionForLib@.*
    
    53 54
         hs-source-dirs:   src
    
    55
    +    cmm-sources:      cbits/DeepSeq.cmm
    
    54 56
         default-language: Haskell2010

  • libraries/ghc-experimental/src/GHC/DeepSeq.hs
    1
    +{-# LANGUAGE ForeignFunctionInterface #-}
    
    2
    +{-# LANGUAGE GHCForeignImportPrim #-}
    
    3
    +{-# LANGUAGE MagicHash #-}
    
    4
    +{-# LANGUAGE UnboxedTuples #-}
    
    5
    +{-# LANGUAGE UnliftedFFITypes #-}
    
    6
    +
    
    7
    +module GHC.DeepSeq
    
    8
    +  ( force,
    
    9
    +    forceIO,
    
    10
    +    forceST,
    
    11
    +  )
    
    12
    +where
    
    13
    +
    
    14
    +import GHC.Internal.Exts
    
    15
    +import GHC.Internal.IO
    
    16
    +import GHC.Internal.ST
    
    17
    +
    
    18
    +-- | Pure wrapper around 'forceST'.
    
    19
    +force :: a -> (Bool, a)
    
    20
    +force a = runST (forceST a)
    
    21
    +
    
    22
    +-- | Deeply evaluate a value in the 'IO' monad, returning the forced value and
    
    23
    +-- a flag indicating whether any unevaluated closure was forced.
    
    24
    +--
    
    25
    +-- This is a primitive analogue of 'Control.DeepSeq.force' / @rnf@ that does
    
    26
    +-- not require an 'NFData' constraint. It traverses algebraic data (constructor
    
    27
    +-- fields), immutable arrays, and the contents of 'MutVar#', 'MVar#',
    
    28
    +-- 'MutableArray#', 'SmallMutableArray#', 'TVar#', and live 'Weak#' values.
    
    29
    +--
    
    30
    +-- To mimic typical 'Control.DeepSeq.NFData' instances, it stops at
    
    31
    +-- function-like closures (e.g. functions and partial applications) and at
    
    32
    +-- mutable objects which are not plain containers (e.g. 'MutableByteArray#').
    
    33
    +-- Consequently
    
    34
    +-- it is not a drop-in replacement for user-defined 'NFData' instances, which
    
    35
    +-- may choose to force less (or more) depending on semantics.
    
    36
    +--
    
    37
    +-- === Pointer traversal policy
    
    38
    +--
    
    39
    +-- We only follow a pointer when doing so is also possible in Haskell via a
    
    40
    +-- corresponding API. For example, we traverse 'MutVar#', 'MVar#', mutable
    
    41
    +-- arrays, and live weak pointers because you can observe their contents with
    
    42
    +-- operations like @readIORef@, @readMVar@, @readArray@, or @deRefWeak@.
    
    43
    +-- Conversely, we do not peek inside closures whose internals are not
    
    44
    +-- observable from Haskell, such as function closures and their captured free
    
    45
    +-- variables.
    
    46
    +--
    
    47
    +-- Like any deep evaluation, it may not terminate on cyclic structures.
    
    48
    +forceIO :: a -> IO (Bool, a)
    
    49
    +forceIO a = stToIO (forceST a)
    
    50
    +
    
    51
    +-- | Deeply evaluate a value in the strict 'ST' monad.
    
    52
    +forceST :: a -> ST s (Bool, a)
    
    53
    +forceST a = ST $ \s0 ->
    
    54
    +  case deepseq# (unsafeCoerce# a) s0 of
    
    55
    +    (# s1, flag#, a' #) -> (# s1, (isTrue# flag#, unsafeCoerce# a') #)
    
    56
    +
    
    57
    +foreign import prim "stg_deepseqzh" deepseq# :: Any -> State# s -> (# State# s, Int#, Any #)

  • testsuite/tests/interface-stability/ghc-experimental-exports.stdout
    ... ... @@ -4454,6 +4454,12 @@ module Data.Tuple.Experimental where
    4454 4454
       data Unit# = ...
    
    4455 4455
       getSolo :: forall a. Solo a -> a
    
    4456 4456
     
    
    4457
    +module GHC.DeepSeq where
    
    4458
    +  -- Safety: None
    
    4459
    +  force :: forall a. a -> (GHC.Internal.Types.Bool, a)
    
    4460
    +  forceIO :: forall a. a -> GHC.Internal.Types.IO (GHC.Internal.Types.Bool, a)
    
    4461
    +  forceST :: forall a s. a -> GHC.Internal.ST.ST s (GHC.Internal.Types.Bool, a)
    
    4462
    +
    
    4457 4463
     module GHC.Exception.Backtrace.Experimental where
    
    4458 4464
       -- Safety: None
    
    4459 4465
       type BacktraceMechanism :: *
    

  • testsuite/tests/interface-stability/ghc-experimental-exports.stdout-mingw32
    ... ... @@ -4454,6 +4454,12 @@ module Data.Tuple.Experimental where
    4454 4454
       data Unit# = ...
    
    4455 4455
       getSolo :: forall a. Solo a -> a
    
    4456 4456
     
    
    4457
    +module GHC.DeepSeq where
    
    4458
    +  -- Safety: None
    
    4459
    +  force :: forall a. a -> (GHC.Internal.Types.Bool, a)
    
    4460
    +  forceIO :: forall a. a -> GHC.Internal.Types.IO (GHC.Internal.Types.Bool, a)
    
    4461
    +  forceST :: forall a s. a -> GHC.Internal.ST.ST s (GHC.Internal.Types.Bool, a)
    
    4462
    +
    
    4457 4463
     module GHC.Exception.Backtrace.Experimental where
    
    4458 4464
       -- Safety: None
    
    4459 4465
       type BacktraceMechanism :: *
    

  • testsuite/tests/primops/should_run/DeepSeqPrimOp.hs
    1
    +module Main (main) where
    
    2
    +
    
    3
    +import Control.Concurrent.MVar (MVar, newEmptyMVar, putMVar)
    
    4
    +import Control.Concurrent.STM (TVar, newTVarIO)
    
    5
    +import Control.Exception (SomeException, evaluate, try)
    
    6
    +import Data.Array (Array, listArray)
    
    7
    +import Data.Array.IO (IOArray, newArray)
    
    8
    +import Data.IORef (IORef, newIORef)
    
    9
    +import GHC.DeepSeq (force, forceIO, forceST)
    
    10
    +import GHC.Compact (compactWithSharing, getCompact)
    
    11
    +import GHC.ST (runST)
    
    12
    +import System.Mem.Weak (mkWeak)
    
    13
    +import System.Timeout (timeout)
    
    14
    +
    
    15
    +deepEvaluate :: a -> IO a
    
    16
    +deepEvaluate a = do
    
    17
    +  (_, a') <- forceIO a
    
    18
    +  pure a'
    
    19
    +
    
    20
    +deepEvaluateWithFlag :: a -> IO (Bool, a)
    
    21
    +deepEvaluateWithFlag = forceIO
    
    22
    +
    
    23
    +mkThunk :: Int -> Int
    
    24
    +mkThunk x = x + 1
    
    25
    +{-# NOINLINE mkThunk #-}
    
    26
    +
    
    27
    +boomVal :: Int
    
    28
    +boomVal = error "boom"
    
    29
    +{-# NOINLINE boomVal #-}
    
    30
    +
    
    31
    +funVal :: Int -> Int
    
    32
    +funVal _ = boomVal
    
    33
    +{-# NOINLINE funVal #-}
    
    34
    +
    
    35
    +main :: IO ()
    
    36
    +main = do
    
    37
    +  r1 <- try (deepEvaluate (1 :: Int, error "boom") >> pure ()) :: IO (Either SomeException ())
    
    38
    +  case r1 of
    
    39
    +    Left _  -> putStrLn "thunk-forced"
    
    40
    +    Right _ -> putStrLn "unexpected-no-exn"
    
    41
    +
    
    42
    +  r2 <- try (deepEvaluate funVal) :: IO (Either SomeException (Int -> Int))
    
    43
    +  case r2 of
    
    44
    +    Left _ -> putStrLn "unexpected-exn"
    
    45
    +    Right _ -> putStrLn "fun-ok"
    
    46
    +
    
    47
    +  (forced2, ()) <- deepEvaluateWithFlag ()
    
    48
    +  if not forced2
    
    49
    +    then putStrLn "noforce-ok"
    
    50
    +    else putStrLn "noforce-bad"
    
    51
    +
    
    52
    +  x <- evaluate (42 :: Int)
    
    53
    +  (forced2b, _) <- deepEvaluateWithFlag x
    
    54
    +  if not forced2b
    
    55
    +    then putStrLn "noforce-int-ok"
    
    56
    +    else putStrLn "noforce-int-bad"
    
    57
    +
    
    58
    +  let (forced2c, _) = force x
    
    59
    +  if not forced2c
    
    60
    +    then putStrLn "force-int-ok"
    
    61
    +    else putStrLn "force-int-bad"
    
    62
    +
    
    63
    +  let (forced2d, _) = runST (forceST x)
    
    64
    +  if not forced2d
    
    65
    +    then putStrLn "forcest-int-ok"
    
    66
    +    else putStrLn "forcest-int-bad"
    
    67
    +
    
    68
    +  let v = (1 :: Int, mkThunk 2)
    
    69
    +  (forced3, v') <- deepEvaluateWithFlag v
    
    70
    +  if forced3 && snd v' == 3
    
    71
    +    then putStrLn "thunk-ok"
    
    72
    +    else putStrLn "unexpected"
    
    73
    +
    
    74
    +  let arr :: Array Int Int
    
    75
    +      arr = listArray (0, 0) [boomVal]
    
    76
    +  r3 <- try (deepEvaluate arr >> pure ()) :: IO (Either SomeException ())
    
    77
    +  case r3 of
    
    78
    +    Left _  -> putStrLn "array-thunk-forced"
    
    79
    +    Right _ -> putStrLn "array-unforced"
    
    80
    +
    
    81
    +  ioArr <- newArray (0, 0) boomVal :: IO (IOArray Int Int)
    
    82
    +  r4 <- try (deepEvaluate ioArr >> pure ()) :: IO (Either SomeException ())
    
    83
    +  case r4 of
    
    84
    +    Left _  -> putStrLn "ioarray-thunk-forced"
    
    85
    +    Right _ -> putStrLn "ioarray-unforced"
    
    86
    +
    
    87
    +  ref <- newIORef boomVal :: IO (IORef Int)
    
    88
    +  r5 <- try (deepEvaluate ref >> pure ()) :: IO (Either SomeException ())
    
    89
    +  case r5 of
    
    90
    +    Left _  -> putStrLn "ioref-thunk-forced"
    
    91
    +    Right _ -> putStrLn "ioref-unforced"
    
    92
    +
    
    93
    +  mvar <- newEmptyMVar :: IO (MVar Int)
    
    94
    +  putMVar mvar boomVal
    
    95
    +  r6 <- try (deepEvaluate mvar >> pure ()) :: IO (Either SomeException ())
    
    96
    +  case r6 of
    
    97
    +    Left _  -> putStrLn "mvar-thunk-forced"
    
    98
    +    Right _ -> putStrLn "mvar-unforced"
    
    99
    +
    
    100
    +  tvar <- newTVarIO boomVal :: IO (TVar Int)
    
    101
    +  r6b <- try (deepEvaluate tvar >> pure ()) :: IO (Either SomeException ())
    
    102
    +  case r6b of
    
    103
    +    Left _  -> putStrLn "tvar-thunk-forced"
    
    104
    +    Right _ -> putStrLn "tvar-unforced"
    
    105
    +
    
    106
    +  keyRef <- newIORef ()
    
    107
    +  weak <- mkWeak keyRef boomVal Nothing
    
    108
    +  r7 <- try (deepEvaluate weak >> pure ()) :: IO (Either SomeException ())
    
    109
    +  case r7 of
    
    110
    +    Left _  -> putStrLn "weak-thunk-forced"
    
    111
    +    Right _ -> putStrLn "weak-unforced"
    
    112
    +
    
    113
    +  let cyclic :: [Int]
    
    114
    +      cyclic = let xs = 1 : xs in xs
    
    115
    +  compacted <- compactWithSharing cyclic
    
    116
    +  let cyclic' = getCompact compacted
    
    117
    +  _ <- evaluate cyclic'
    
    118
    +  r8 <- timeout 2000000 (deepEvaluateWithFlag cyclic')
    
    119
    +  case r8 of
    
    120
    +    Nothing -> putStrLn "compact-loop-timeout"
    
    121
    +    Just _ -> putStrLn "compact-loop-ok"

  • testsuite/tests/primops/should_run/DeepSeqPrimOp.stdout
    1
    +thunk-forced
    
    2
    +fun-ok
    
    3
    +noforce-ok
    
    4
    +noforce-int-ok
    
    5
    +force-int-ok
    
    6
    +forcest-int-ok
    
    7
    +thunk-ok
    
    8
    +array-thunk-forced
    
    9
    +ioarray-thunk-forced
    
    10
    +ioref-thunk-forced
    
    11
    +mvar-thunk-forced
    
    12
    +tvar-thunk-forced
    
    13
    +weak-thunk-forced
    
    14
    +compact-loop-ok

  • testsuite/tests/primops/should_run/all.T
    ... ... @@ -17,6 +17,7 @@ test('T13825-compile', normal, compile_and_run, [''])
    17 17
     test('T16164', normal, compile_and_run, [''])
    
    18 18
     test('ShowPrim', normal, compile_and_run, [''])
    
    19 19
     test('T12492', normal, compile_and_run, [''])
    
    20
    +test('DeepSeqPrimOp', [js_skip, extra_ways(['ghci','ghci-opt']), extra_hc_opts('-package ghc-experimental -package ghc-compact')], compile_and_run, [''])
    
    20 21
     
    
    21 22
     test('ArithInt8', normal, compile_and_run, [''])
    
    22 23
     test('ArithWord8', normal, compile_and_run, [''])