Cheng Shao pushed to branch wip/deepseq-primop at Glasgow Haskell Compiler / GHC Commits: ea5fe7e8 by Cheng Shao at 2025-12-23T20:55:39+01:00 WIP - - - - - 16 changed files: - compiler/GHC/Builtin/primops.txt.pp - compiler/GHC/StgToCmm/Prim.hs - compiler/GHC/StgToJS/Prim.hs - libraries/base/src/GHC/Base.hs - libraries/base/src/GHC/Exts.hs - libraries/ghc-experimental/ghc-experimental.cabal.in - + libraries/ghc-experimental/src/GHC/DeepSeq.hs - + rts/DeepSeq.cmm - rts/RtsSymbols.c - rts/include/stg/MiscClosures.h - rts/rts.cabal - testsuite/tests/interface-stability/ghc-experimental-exports.stdout - testsuite/tests/interface-stability/ghc-prim-exports.stdout - + testsuite/tests/primops/should_run/DeepSeqPrimOp.hs - + testsuite/tests/primops/should_run/DeepSeqPrimOp.stdout - testsuite/tests/primops/should_run/all.T Changes: ===================================== compiler/GHC/Builtin/primops.txt.pp ===================================== @@ -4537,6 +4537,23 @@ primop PrefetchValueOp0 "prefetchValue0#" GenPrimOp with effect = ReadWriteEffect +------------------------------------------------------------------------ +section "Forcing evaluation" + {Primitives for forcing evaluation within a state thread.} +------------------------------------------------------------------------ + +primop DeepSeqOp "deepseq#" GenPrimOp + a -> State# s -> (# State# s, Int#, a #) + { @'deepseq#' x s@ deeply evaluates @x@ in the state thread; see + 'GHC.DeepSeq.forceIO' for the user-facing semantics. } + with + out_of_line = True + effect = ReadWriteEffect + -- See Note [seq# magic] in GHC.Types.Id.Make: we must not let strictness + -- analysis see through the sequencing effect. + strictness = { \ _arity -> mkClosedDmdSig [ topDmd, topDmd ] topDiv } + + -- Note [RuntimeRep polymorphism in continuation-style primops] -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -- See below. ===================================== compiler/GHC/StgToCmm/Prim.hs ===================================== @@ -1705,6 +1705,7 @@ emitPrimOp cfg primop = AtomicModifyMutVar2Op -> alwaysExternal AtomicModifyMutVar_Op -> alwaysExternal CasMutVarOp -> alwaysExternal + DeepSeqOp -> alwaysExternal CatchOp -> alwaysExternal RaiseOp -> alwaysExternal RaiseUnderflowOp -> alwaysExternal ===================================== compiler/GHC/StgToJS/Prim.hs ===================================== @@ -1155,6 +1155,8 @@ genPrim prof bound ty op = case op of ------------------------------ Unhandled primops ------------------- + DeepSeqOp -> unhandledPrimop op + AnnotateStackOp -> unhandledPrimop op NewPromptTagOp -> unhandledPrimop op ===================================== libraries/base/src/GHC/Base.hs ===================================== @@ -149,6 +149,9 @@ import GHC.Prim hiding -- whereFrom# is similarly internal. , whereFrom# , isByteArrayWeaklyPinned#, isMutableByteArrayWeaklyPinned# + -- Users should use the interface provided by GHC.DeepSeq in + -- ghc-experimental. + , deepseq# -- Don't re-export vector FMA instructions , fmaddFloatX4# , fmsubFloatX4# ===================================== libraries/base/src/GHC/Exts.hs ===================================== @@ -124,6 +124,10 @@ import GHC.Prim hiding , whereFrom# , isByteArrayWeaklyPinned#, isMutableByteArrayWeaklyPinned# + -- Users should use the interface provided by GHC.DeepSeq in + -- ghc-experimental. + , deepseq# + -- Don't re-export vector FMA instructions , fmaddFloatX4# , fmsubFloatX4# ===================================== libraries/ghc-experimental/ghc-experimental.cabal.in ===================================== @@ -31,6 +31,7 @@ library exposed-modules: Data.Sum.Experimental Data.Tuple.Experimental + GHC.DeepSeq GHC.PrimOps GHC.Profiling.Eras GHC.TypeLits.Experimental ===================================== libraries/ghc-experimental/src/GHC/DeepSeq.hs ===================================== @@ -0,0 +1,44 @@ +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE UnboxedTuples #-} + +module GHC.DeepSeq + ( force, + forceIO, + ) +where + +import GHC.IO +import GHC.Internal.Exts + +-- | Pure wrapper around 'forceIO'. +force :: a -> (Bool, a) +force = unsafePerformIO . forceIO + +-- | Deeply evaluate a value in the 'IO' monad, returning the forced value and +-- a flag indicating whether any unevaluated closure was forced. +-- +-- This is a primitive analogue of 'Control.DeepSeq.force' / @rnf@ that does +-- not require an 'NFData' constraint. It traverses algebraic data (constructor +-- fields), immutable arrays, and the contents of 'MutVar#', 'MVar#', +-- 'MutableArray#', 'SmallMutableArray#', and live 'Weak#' values. +-- +-- To mimic typical 'Control.DeepSeq.NFData' instances, it stops at +-- function-like closures (e.g. functions and partial applications) and at +-- mutable objects which are not plain containers (e.g. 'TVar#'). Consequently +-- it is not a drop-in replacement for user-defined 'NFData' instances, which +-- may choose to force less (or more) depending on semantics. +-- +-- === Pointer traversal policy +-- +-- We only follow a pointer when doing so is also possible in Haskell via a +-- corresponding API. For example, we traverse 'MutVar#', 'MVar#', mutable +-- arrays, and live weak pointers because you can observe their contents with +-- operations like @readIORef@, @readMVar@, @readArray@, or @deRefWeak@. +-- Conversely, we do not peek inside closures whose internals are not +-- observable from Haskell, such as function closures and their captured free +-- variables. +-- +-- Like any deep evaluation, it may not terminate on cyclic structures. +forceIO :: a -> IO (Bool, a) +forceIO a = IO $ \s0 -> case deepseq# a s0 of + (# s1, flag#, a' #) -> (# s1, (isTrue# flag#, a') #) ===================================== rts/DeepSeq.cmm ===================================== @@ -0,0 +1,246 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 2025 + * + * Support for the deepseq# primop. + * + * ---------------------------------------------------------------------------*/ + +#include "Cmm.h" + +/* +Note [import CLOSURE annotations] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +See Note [import CLOSURE annotations] in rts/Apply.cmm. +*/ + +#if !defined(UnregisterisedCompiler) +import CLOSURE g0; +import CLOSURE large_alloc_lim; +#endif + +/* ----------------------------------------------------------------------------- + deepseq# + + Deeply evaluate a value to (approximate) normal form, without requiring an + NFData constraint. This is used to provide a primitive analogue of + Control.DeepSeq.force / rnf. + + See the primop documentation in compiler/GHC/Builtin/primops.txt.pp for the + intended semantics and limitations. + -------------------------------------------------------------------------- */ + +// Worker which performs deep evaluation. This lets us tail-call when traversing +// the final pointer field, avoiding stack blowup on common spine-recursive +// structures (e.g. lists). +// +// The second argument is a boolean (0/1) accumulator tracking whether any +// evaluation was forced in the transitive closure so far. +stg_deepseqWorkzh (P_ p, W_ forced) +{ + W_ type, info; + + again: MAYBE_GC(again); + STK_CHK_GEN(); + + p = UNTAG(p); + info = %INFO_PTR(p); + type = TO_W_(%INFO_TYPE(%STD_INFO(info))); + + switch [0 .. N_CLOSURE_TYPES] type { + + // Unevaluated things must be evaluated first: + case + THUNK, + THUNK_1_0, + THUNK_0_1, + THUNK_2_0, + THUNK_1_1, + THUNK_0_2, + THUNK_STATIC, + AP, + AP_STACK, + BLACKHOLE, + THUNK_SELECTOR : { + (P_ evald) = call %ENTRY_CODE(info) (p); + jump stg_deepseqWorkzh(evald, 1); + } + + // Follow indirections: + case IND, IND_STATIC: { + p = %acquire StgInd_indirectee(p); + jump stg_deepseqWorkzh(p, forced); + } + + // WHITEHOLEs are transient. Yield and try again. + case WHITEHOLE: { + goto again; + } + + // Arrays of pointers: evaluate elements. + case + MUT_ARR_PTRS_DIRTY, + MUT_ARR_PTRS_CLEAN, + MUT_ARR_PTRS_FROZEN_DIRTY, + MUT_ARR_PTRS_FROZEN_CLEAN: { + W_ i_arr, ptrs_arr; + ptrs_arr = StgMutArrPtrs_ptrs(p); + if (ptrs_arr == 0) { return (forced); } + i_arr = ptrs_arr - 1; + deepseq_arr_loop0: + if (i_arr == 0) ( likely: False ) { + // Tail-call the final element to avoid building up a deep stack + // when traversing large immutable arrays. + jump stg_deepseqWorkzh(P_[p + SIZEOF_StgMutArrPtrs], forced); + } + (W_ forced_arr) = call stg_deepseqWorkzh(P_[p + SIZEOF_StgMutArrPtrs + WDS(i_arr)], forced); + forced = forced_arr; + i_arr = i_arr - 1; + goto deepseq_arr_loop0; + } + + case + SMALL_MUT_ARR_PTRS_DIRTY, + SMALL_MUT_ARR_PTRS_CLEAN, + SMALL_MUT_ARR_PTRS_FROZEN_DIRTY, + SMALL_MUT_ARR_PTRS_FROZEN_CLEAN: { + W_ i_sarr, ptrs_sarr; + ptrs_sarr = StgSmallMutArrPtrs_ptrs(p); + if (ptrs_sarr == 0) { return (forced); } + i_sarr = ptrs_sarr - 1; + deepseq_arr_loop1: + if (i_sarr == 0) ( likely: False ) { + // Tail-call the final element to avoid building up a deep stack + // when traversing large immutable arrays. + jump stg_deepseqWorkzh(P_[p + SIZEOF_StgSmallMutArrPtrs], forced); + } + (W_ forced_sarr) = call stg_deepseqWorkzh(P_[p + SIZEOF_StgSmallMutArrPtrs + WDS(i_sarr)], forced); + forced = forced_sarr; + i_sarr = i_sarr - 1; + goto deepseq_arr_loop1; + } + + // Constructors: evaluate their pointer fields. + case + CONSTR, + CONSTR_1_0, + CONSTR_0_1, + CONSTR_2_0, + CONSTR_1_1, + CONSTR_0_2, + CONSTR_NOCAF: { + W_ i_constr, ptrs_constr; + ptrs_constr = TO_W_(%INFO_PTRS(%STD_INFO(info))); + if (ptrs_constr == 0) { return (forced); } + i_constr = 0; + deepseq_constr_loop: + if (i_constr < ptrs_constr) { + // Tail-call the last one. This avoids building up a deep stack + // when traversing long lists. We count up so the final pointer + // field (e.g. the tail of a list cell) is tail-called. + if (i_constr == ptrs_constr - 1) { + jump stg_deepseqWorkzh(StgClosure_payload(p,i_constr), forced); + } + (W_ forced_constr) = call stg_deepseqWorkzh(StgClosure_payload(p,i_constr), forced); + forced = forced_constr; + i_constr = i_constr + 1; + goto deepseq_constr_loop; + } + return (forced); + } + + case + MUT_VAR_CLEAN, + MUT_VAR_DIRTY: { + p = %relaxed StgMutVar_var(p); + jump stg_deepseqWorkzh(p, forced); + } + + case + MVAR_CLEAN, + MVAR_DIRTY: { + p = %relaxed StgMVar_value(p); + jump stg_deepseqWorkzh(p, forced); + } + + case WEAK: { + // Follow the value of a live weak pointer. + jump stg_deepseqWorkzh(StgWeak_value(p), forced); + } + + // Anything else: conservatively stop. + // + // This includes (among other closure types) function-like closures and + // mutable objects which are not plain containers (e.g. TVar#), + // matching the intended "mimic typical NFData instances" semantics + // described in the primop documentation. + // + // We should never see frames here, but if we do, returning is safer than + // entering arbitrary things. + default: { + return (forced); + }} +} + +// deepseq# primop entry point. +// deepseq# :: forall a s. a -> State# s -> (# State# s, Int#, a #) +// +// The State# argument/result has no runtime representation, so the RTS entry +// only takes the value being forced. +stg_deepseqzh (P_ p) +{ + jump stg_deepseqLoopzh(p, 0); +} + +// Worker which evaluates to a root and then delegates to the deep traversal. +// The second argument is a boolean (0/1) accumulator tracking whether any +// evaluation was forced in the transitive closure so far. +stg_deepseqLoopzh (P_ p, W_ forced) +{ + W_ type, info, tag; + + again: MAYBE_GC(again); + STK_CHK_GEN(); + + tag = GETTAG(p); + p = UNTAG(p); + info = %INFO_PTR(p); + type = TO_W_(%INFO_TYPE(%STD_INFO(info))); + + switch [0 .. N_CLOSURE_TYPES] type { + + // Unevaluated things must be evaluated first: + case + THUNK, + THUNK_1_0, + THUNK_0_1, + THUNK_2_0, + THUNK_1_1, + THUNK_0_2, + THUNK_STATIC, + AP, + AP_STACK, + BLACKHOLE, + THUNK_SELECTOR : { + (P_ evald) = call %ENTRY_CODE(info) (p); + jump stg_deepseqLoopzh(evald, 1); + } + + // Follow indirections: + case IND, IND_STATIC: { + p = %acquire StgInd_indirectee(p); + jump stg_deepseqLoopzh(p, forced); + } + + // WHITEHOLEs are transient. Yield and try again. + case WHITEHOLE: { + goto again; + } + + default: { + P_ root; + root = tag | p; + (W_ forced1) = call stg_deepseqWorkzh(root, forced); + return (forced1, root); + }} +} ===================================== rts/RtsSymbols.c ===================================== @@ -644,6 +644,7 @@ extern char **environ; SymI_HasDataProto(stg_newMutVarzh) \ SymI_HasDataProto(stg_newTVarzh) \ SymI_HasDataProto(stg_noDuplicatezh) \ + SymI_HasDataProto(stg_deepseqzh) \ SymI_HasDataProto(stg_atomicModifyMutVar2zh) \ SymI_HasDataProto(stg_atomicModifyMutVarzuzh) \ SymI_HasDataProto(stg_casMutVarzh) \ ===================================== rts/include/stg/MiscClosures.h ===================================== @@ -531,6 +531,7 @@ RTS_FUN_DECL(stg_raiseUnderflowzh); RTS_FUN_DECL(stg_raiseOverflowzh); RTS_FUN_DECL(stg_raiseIOzh); RTS_FUN_DECL(stg_paniczh); +RTS_FUN_DECL(stg_deepseqzh); RTS_FUN_DECL(stg_keepAlivezh); RTS_FUN_DECL(stg_absentErrorzh); ===================================== rts/rts.cabal ===================================== @@ -359,6 +359,7 @@ library cmm-sources: Apply.cmm Compact.cmm ContinuationOps.cmm + DeepSeq.cmm Exception.cmm HeapStackCheck.cmm Jumps_D.cmm ===================================== testsuite/tests/interface-stability/ghc-experimental-exports.stdout ===================================== @@ -4454,6 +4454,11 @@ module Data.Tuple.Experimental where data Unit# = ... getSolo :: forall a. Solo a -> a +module GHC.DeepSeq where + -- Safety: None + force :: forall a. a -> (GHC.Internal.Types.Bool, a) + forceIO :: forall a. a -> GHC.Internal.Types.IO (GHC.Internal.Types.Bool, a) + module GHC.PrimOps where -- Safety: Unsafe (*#) :: Int# -> Int# -> Int# @@ -4924,6 +4929,7 @@ module GHC.PrimOps where decodeDouble_2Int# :: Double# -> (# Int#, Word#, Word#, Int# #) decodeDouble_Int64# :: Double# -> (# Int64#, Int# #) decodeFloat_Int# :: Float# -> (# Int#, Int# #) + deepseq# :: forall a d. a -> State# d -> (# State# d, Int#, a #) delay# :: forall d. Int# -> State# d -> State# d divideDoubleX2# :: DoubleX2# -> DoubleX2# -> DoubleX2# divideDoubleX4# :: DoubleX4# -> DoubleX4# -> DoubleX4# ===================================== testsuite/tests/interface-stability/ghc-prim-exports.stdout ===================================== @@ -1592,6 +1592,7 @@ module GHC.Prim where decodeDouble_2Int# :: Double# -> (# Int#, Word#, Word#, Int# #) decodeDouble_Int64# :: Double# -> (# Int64#, Int# #) decodeFloat_Int# :: Float# -> (# Int#, Int# #) + deepseq# :: forall a d. a -> State# d -> (# State# d, Int#, a #) delay# :: forall d. Int# -> State# d -> State# d divideDoubleX2# :: DoubleX2# -> DoubleX2# -> DoubleX2# divideDoubleX4# :: DoubleX4# -> DoubleX4# -> DoubleX4# @@ -3156,6 +3157,7 @@ module GHC.PrimopWrappers where decodeDouble_2Int# :: GHC.Internal.Prim.Double# -> (# GHC.Internal.Prim.Int#, GHC.Internal.Prim.Word#, GHC.Internal.Prim.Word#, GHC.Internal.Prim.Int# #) decodeDouble_Int64# :: GHC.Internal.Prim.Double# -> (# GHC.Internal.Prim.Int64#, GHC.Internal.Prim.Int# #) decodeFloat_Int# :: GHC.Internal.Prim.Float# -> (# GHC.Internal.Prim.Int#, GHC.Internal.Prim.Int# #) + deepseq# :: forall a s. a -> GHC.Internal.Prim.State# s -> (# GHC.Internal.Prim.State# s, GHC.Internal.Prim.Int#, a #) delay# :: forall s. GHC.Internal.Prim.Int# -> GHC.Internal.Prim.State# s -> GHC.Internal.Prim.State# s divideFloat# :: GHC.Internal.Prim.Float# -> GHC.Internal.Prim.Float# -> GHC.Internal.Prim.Float# double2Float# :: GHC.Internal.Prim.Double# -> GHC.Internal.Prim.Float# ===================================== testsuite/tests/primops/should_run/DeepSeqPrimOp.hs ===================================== @@ -0,0 +1,85 @@ +module Main (main) where + +import Control.Concurrent.MVar (MVar, newEmptyMVar, putMVar) +import Control.Exception (SomeException, try) +import Data.Array (Array, listArray) +import Data.Array.IO (IOArray, newArray) +import Data.IORef (IORef, newIORef) +import GHC.DeepSeq (forceIO) +import System.Mem.Weak (mkWeak) + +deepEvaluate :: a -> IO a +deepEvaluate a = do + (_, a') <- forceIO a + pure a' + +deepEvaluateWithFlag :: a -> IO (Bool, a) +deepEvaluateWithFlag = forceIO + +mkThunk :: Int -> Int +mkThunk x = x + 1 +{-# NOINLINE mkThunk #-} + +boomVal :: Int +boomVal = error "boom" +{-# NOINLINE boomVal #-} + +funVal :: Int -> Int +funVal _ = boomVal +{-# NOINLINE funVal #-} + +main :: IO () +main = do + r1 <- try (deepEvaluate (1 :: Int, error "boom") >> pure ()) :: IO (Either SomeException ()) + case r1 of + Left _ -> putStrLn "thunk-forced" + Right _ -> putStrLn "unexpected-no-exn" + + r2 <- try (deepEvaluate funVal) :: IO (Either SomeException (Int -> Int)) + case r2 of + Left _ -> putStrLn "unexpected-exn" + Right _ -> putStrLn "fun-ok" + + (forced2, ()) <- deepEvaluateWithFlag () + if not forced2 + then putStrLn "noforce-ok" + else putStrLn "noforce-bad" + + let v = (1 :: Int, mkThunk 2) + (forced3, v') <- deepEvaluateWithFlag v + if forced3 && snd v' == 3 + then putStrLn "thunk-ok" + else putStrLn "unexpected" + + let arr :: Array Int Int + arr = listArray (0, 0) [boomVal] + r3 <- try (deepEvaluate arr >> pure ()) :: IO (Either SomeException ()) + case r3 of + Left _ -> putStrLn "array-thunk-forced" + Right _ -> putStrLn "array-unforced" + + ioArr <- newArray (0, 0) boomVal :: IO (IOArray Int Int) + r4 <- try (deepEvaluate ioArr >> pure ()) :: IO (Either SomeException ()) + case r4 of + Left _ -> putStrLn "ioarray-thunk-forced" + Right _ -> putStrLn "ioarray-unforced" + + ref <- newIORef boomVal :: IO (IORef Int) + r5 <- try (deepEvaluate ref >> pure ()) :: IO (Either SomeException ()) + case r5 of + Left _ -> putStrLn "ioref-thunk-forced" + Right _ -> putStrLn "ioref-unforced" + + mvar <- newEmptyMVar :: IO (MVar Int) + putMVar mvar boomVal + r6 <- try (deepEvaluate mvar >> pure ()) :: IO (Either SomeException ()) + case r6 of + Left _ -> putStrLn "mvar-thunk-forced" + Right _ -> putStrLn "mvar-unforced" + + keyRef <- newIORef () + weak <- mkWeak keyRef boomVal Nothing + r7 <- try (deepEvaluate weak >> pure ()) :: IO (Either SomeException ()) + case r7 of + Left _ -> putStrLn "weak-thunk-forced" + Right _ -> putStrLn "weak-unforced" ===================================== testsuite/tests/primops/should_run/DeepSeqPrimOp.stdout ===================================== @@ -0,0 +1,9 @@ +thunk-forced +fun-ok +noforce-ok +thunk-ok +array-thunk-forced +ioarray-thunk-forced +ioref-thunk-forced +mvar-thunk-forced +weak-thunk-forced ===================================== testsuite/tests/primops/should_run/all.T ===================================== @@ -17,6 +17,7 @@ test('T13825-compile', normal, compile_and_run, ['']) test('T16164', normal, compile_and_run, ['']) test('ShowPrim', normal, compile_and_run, ['']) test('T12492', normal, compile_and_run, ['']) +test('DeepSeqPrimOp', [js_skip, extra_ways(['ghci','ghci-opt']), extra_hc_opts('-package ghc-experimental')], compile_and_run, ['']) test('ArithInt8', normal, compile_and_run, ['']) test('ArithWord8', normal, compile_and_run, ['']) View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/ea5fe7e8a34d93db03eb3af1d4d838e6... -- View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/ea5fe7e8a34d93db03eb3af1d4d838e6... You're receiving this email because of your account on gitlab.haskell.org.