[Git][ghc/ghc][wip/romes/25636] Allocate static constructors for bytecode
Rodrigo Mesquita pushed to branch wip/romes/25636 at Glasgow Haskell Compiler / GHC
Commits:
0fa83d73 by Rodrigo Mesquita at 2025-12-23T20:00:03+00:00
Allocate static constructors for bytecode
This commit adds support for static constructors when compiling and
linking ByteCode objects.
Top-level StgRhsCon get lowered to ProtoStaticCons rather than to
ProtoBCOs. A ProtoStaticCon gets allocated directly as a data con
application on the heap (using the new primop newConApp#).
Previously, we would allocate a ProtoBCO which, when evaluated, would
PACK and return the constructor.
A few more details are given in Note [Static constructors in Bytecode].
Secondly, this commit also fixes issue #25636 which was caused by
linking *unlifted* constructors in BCO instructions as
- (1) a thunk indexing the array of BCOs in a module
- (2) which evaluated to a BCO which still had to be evaluated to
return the unlifted constructor proper.
The (2) issue has been resolved by allocating the static constructors
directly. The (1) issue can be resolved by ensuring that we allocate all
unlifted top-level constructors eagerly, and leave the knot-tying for
the lifted BCOs and top-level constructors only.
The top-level unlifted constructors are never mutually recursive, so we
can allocate them all in one go as long as we do it in topological
order. Lifted fields of unlifted constructors can still be filled by the
knot-tied lifted variables since in those fields it is fine to keep
those thunks. See Note [Tying the knot in createBCOs] for more details.
Fixes #25636
- - - - -
21 changed files:
- compiler/GHC/Builtin/primops.txt.pp
- compiler/GHC/ByteCode/Asm.hs
- compiler/GHC/ByteCode/Instr.hs
- compiler/GHC/ByteCode/Linker.hs
- compiler/GHC/ByteCode/Serialize.hs
- compiler/GHC/ByteCode/Types.hs
- compiler/GHC/Cmm/Liveness.hs
- compiler/GHC/Linker/Loader.hs
- compiler/GHC/StgToByteCode.hs
- compiler/GHC/StgToCmm/Closure.hs
- compiler/GHC/StgToCmm/Prim.hs
- compiler/GHC/StgToJS/Prim.hs
- libraries/ghci/GHCi/CreateBCO.hs
- libraries/ghci/GHCi/ResolvedBCO.hs
- rts/Interpreter.c
- rts/PrimOps.cmm
- rts/RtsSymbols.c
- rts/include/rts/storage/ClosureMacros.h
- rts/include/stg/MiscClosures.h
- + testsuite/tests/codeGen/should_run/T23146/T25636.stdout
- utils/deriveConstants/Main.hs
Changes:
=====================================
compiler/GHC/Builtin/primops.txt.pp
=====================================
@@ -3947,6 +3947,18 @@ primop NewBCOOp "newBCO#" GenPrimOp
effect = ReadWriteEffect
out_of_line = True
+primop NewConAppObjOp "newConAppObj#" GenPrimOp
+ Addr# -> ByteArray# -> Array# a_levpoly -> Word# -> State# s -> (# State# s, b_levpoly #)
+ { @'newConAppObj#' datacon_itbl lits ptrs arity@ creates a new constructor
+ application object on the heap from the info table pointer of the data
+ constructor and the data arguments given in @ptrs@ and @lits@. The
+ resulting object is a heap closure for the constructor application. It is
+ evaluated and properly tagged. The given @arity@ gives the total size of
+ pointers and literals in number of words. }
+ with
+ effect = ReadWriteEffect
+ out_of_line = True
+
primop UnpackClosureOp "unpackClosure#" GenPrimOp
a -> (# Addr#, ByteArray#, Array# b #)
{ @'unpackClosure#' closure@ copies the closure and pointers in the
=====================================
compiler/GHC/ByteCode/Asm.hs
=====================================
@@ -4,6 +4,7 @@
{-# LANGUAGE UnboxedTuples #-}
{-# LANGUAGE PatternSynonyms #-}
{-# OPTIONS_GHC -optc-DNON_POSIX_SOURCE #-}
+{-# LANGUAGE LambdaCase #-}
--
--
-- (c) The University of Glasgow 2002-2006
@@ -21,9 +22,9 @@ module GHC.ByteCode.Asm (
assembleBCO
) where
-import GHC.Prelude hiding ( any )
-
+import GHC.Prelude hiding ( any, words )
+import Data.Maybe
import GHC.ByteCode.Instr
import GHC.ByteCode.InfoTable
import GHC.ByteCode.Types
@@ -37,9 +38,12 @@ import GHC.Types.SptEntry
import GHC.Types.Unique.FM
import GHC.Unit.Types
-import GHC.Utils.Outputable
+import GHC.Utils.Outputable ( Outputable(..), text, (<+>), vcat )
import GHC.Utils.Panic
+import GHC.Builtin.Types.Prim ( addrPrimTy )
+import GHC.Core.Type ( isUnliftedType )
+import GHC.Core.TyCo.Compare ( eqType )
import GHC.Core.TyCon
import GHC.Data.SizedSeq
import GHC.Data.SmallArray
@@ -66,11 +70,13 @@ import Data.Array.Base ( unsafeWrite )
import Foreign hiding (shiftL, shiftR)
import Data.ByteString (ByteString)
import Data.Char (ord)
-import Data.Maybe (fromMaybe)
import GHC.Float (castFloatToWord32, castDoubleToWord64)
import qualified Data.List as List ( any )
import GHC.Exts
+import GHC.Core.DataCon
+import GHC.Data.FlatBag
+import GHC.Types.Id
-- -----------------------------------------------------------------------------
@@ -83,14 +89,23 @@ import GHC.Exts
-- defined by this group of BCOs themselves
bcoFreeNames :: UnlinkedBCO -> UniqDSet Name
bcoFreeNames bco
- = bco_refs bco `uniqDSetMinusUniqSet` mkNameSet [unlinkedBCOName bco]
+ = bco_refs bco
where
- bco_refs (UnlinkedBCO _ _ _ _ nonptrs ptrs)
+ bco_refs UnlinkedBCO{unlinkedBCOName, unlinkedBCOLits, unlinkedBCOPtrs}
+ = unionManyUniqDSets (
+ mkUniqDSet [ n | BCOPtrName n <- elemsFlatBag unlinkedBCOPtrs ] :
+ mkUniqDSet [ n | BCONPtrItbl n <- elemsFlatBag unlinkedBCOLits ] :
+ map bco_refs [ bco | BCOPtrBCO bco <- elemsFlatBag unlinkedBCOPtrs ]
+ ) `uniqDSetMinusUniqSet` mkNameSet [unlinkedBCOName]
+ bco_refs UnlinkedStaticCon{ unlinkedStaticConName, unlinkedStaticConDataConName
+ , unlinkedStaticConLits, unlinkedStaticConPtrs }
= unionManyUniqDSets (
- mkUniqDSet [ n | BCOPtrName n <- elemsFlatBag ptrs ] :
- mkUniqDSet [ n | BCONPtrItbl n <- elemsFlatBag nonptrs ] :
- map bco_refs [ bco | BCOPtrBCO bco <- elemsFlatBag ptrs ]
+ mkUniqDSet [ unlinkedStaticConDataConName ] :
+ mkUniqDSet [ n | BCOPtrName n <- elemsFlatBag unlinkedStaticConPtrs ] :
+ mkUniqDSet [ n | BCONPtrItbl n <- elemsFlatBag unlinkedStaticConLits ] :
+ map bco_refs [ bco | BCOPtrBCO bco <- elemsFlatBag unlinkedStaticConPtrs ]
)
+ `uniqDSetMinusUniqSet` mkNameSet [ unlinkedStaticConName ]
-- -----------------------------------------------------------------------------
-- The bytecode assembler
@@ -147,9 +162,9 @@ assembleBCOs profile proto_bcos tycons top_strs modbreaks spt_entries = do
--
data RunAsmReader = RunAsmReader { isn_array :: {-# UNPACK #-} !(Array.IOUArray Int Word16)
- , ptr_array :: {-# UNPACK #-} !(SmallMutableArrayIO BCOPtr)
- , lit_array :: {-# UNPACK #-} !(SmallMutableArrayIO BCONPtr )
- }
+ , ptr_array :: {-# UNPACK #-} !(SmallMutableArrayIO BCOPtr)
+ , lit_array :: {-# UNPACK #-} !(SmallMutableArrayIO BCONPtr)
+ }
data RunAsmResult = RunAsmResult { final_isn_array :: !(Array.UArray Int Word16)
, final_ptr_array :: !(SmallArray BCOPtr)
@@ -195,6 +210,40 @@ assembleInspectAsm :: Platform -> BCInstr -> InspectAsm ()
assembleInspectAsm p i = assembleI @InspectAsm p i
assembleBCO :: Platform -> ProtoBCO -> IO UnlinkedBCO
+assembleBCO platform
+ (ProtoStaticCon { protoStaticConName
+ , protoStaticCon = dc
+ , protoStaticConData = args
+ }) = do
+ let ptrs = foldr mappendFlatBag emptyFlatBag (mapMaybe idBCOArg args)
+ let nonptrs = foldr mappendFlatBag emptyFlatBag (mapMaybe litBCOArg args)
+ pure UnlinkedStaticCon
+ { unlinkedStaticConName = protoStaticConName
+ , unlinkedStaticConDataConName = dataConName dc
+ , unlinkedStaticConLits = nonptrs
+ , unlinkedStaticConPtrs = ptrs
+ , unlinkedStaticConIsUnlifted = isUnliftedType (idType (dataConWrapId dc))
+ }
+ where
+ litBCOArg (Left l) = Just $ case literal platform l of
+ OnlyOne np -> unitFlatBag np
+ OnlyTwo np1 np2 -> TupleFlatBag np1 np2
+ litBCOArg (Right var)
+ -- Addr# literals are non-pointers
+ | idType var `eqType` addrPrimTy
+ = Just $ unitFlatBag (BCONPtrAddr (getName var))
+ | otherwise
+ = Nothing
+
+ idBCOArg (Left _) = Nothing
+ idBCOArg (Right var)
+ | idType var `eqType` addrPrimTy
+ = Nothing
+ | Just prim <- isPrimOpId_maybe var
+ = Just $ unitFlatBag (BCOPtrPrimOp prim)
+ | otherwise
+ = Just $ unitFlatBag (BCOPtrName (getName var))
+
assembleBCO platform
(ProtoBCO { protoBCOName = nm
, protoBCOInstrs = instrs
@@ -561,9 +610,9 @@ oneTwoLength (OnlyTwo {}) = 2
class Monad m => MonadAssembler m where
ioptr :: IO BCOPtr -> m Word
- lit :: OneOrTwo BCONPtr -> m Word
+ lit :: OneOrTwo BCONPtr -> m Word
label :: LocalLabel -> m ()
- emit :: PlatformWordSize -> Word16 -> [Operand] -> m ()
+ emit :: PlatformWordSize -> Word16 -> [Operand] -> m ()
lit1 :: MonadAssembler m => BCONPtr -> m Word
lit1 p = lit (OnlyOne p)
@@ -603,20 +652,20 @@ assembleI platform i = case i of
tuple_proto
p <- ioptr (liftM BCOPtrBCO ul_bco)
p_tup <- ioptr (liftM BCOPtrBCO ul_tuple_bco)
- info <- word (fromIntegral $
- mkNativeCallInfoSig platform call_info)
+ info <- lit $ word $ fromIntegral $
+ mkNativeCallInfoSig platform call_info
emit_ bci_PUSH_ALTS_T
[Op p, Op info, Op p_tup]
PUSH_PAD8 -> emit_ bci_PUSH_PAD8 []
PUSH_PAD16 -> emit_ bci_PUSH_PAD16 []
PUSH_PAD32 -> emit_ bci_PUSH_PAD32 []
- PUSH_UBX8 lit -> do np <- literal lit
+ PUSH_UBX8 litv -> do np <- lit $ literal platform litv
emit_ bci_PUSH_UBX8 [Op np]
- PUSH_UBX16 lit -> do np <- literal lit
+ PUSH_UBX16 litv -> do np <- lit $ literal platform litv
emit_ bci_PUSH_UBX16 [Op np]
- PUSH_UBX32 lit -> do np <- literal lit
+ PUSH_UBX32 litv -> do np <- lit $ literal platform litv
emit_ bci_PUSH_UBX32 [Op np]
- PUSH_UBX lit nws -> do np <- literal lit
+ PUSH_UBX litv nws -> do np <- lit $ literal platform litv
emit_ bci_PUSH_UBX [Op np, wOp nws]
-- see Note [Generating code for top-level string literal bindings] in GHC.StgToByteCode
PUSH_ADDR nm -> do np <- lit1 (BCONPtrAddr nm)
@@ -644,53 +693,53 @@ assembleI platform i = case i of
PACK dcon sz -> do itbl_no <- lit1 (BCONPtrItbl (getName dcon))
emit_ bci_PACK [Op itbl_no, wOp sz]
LABEL lbl -> label lbl
- TESTLT_I i l -> do np <- int i
+ TESTLT_I i l -> do np <- lit $ int i
emit_ bci_TESTLT_I [Op np, LabelOp l]
- TESTEQ_I i l -> do np <- int i
+ TESTEQ_I i l -> do np <- lit $ int i
emit_ bci_TESTEQ_I [Op np, LabelOp l]
- TESTLT_W w l -> do np <- word w
+ TESTLT_W w l -> do np <- lit $ word w
emit_ bci_TESTLT_W [Op np, LabelOp l]
- TESTEQ_W w l -> do np <- word w
+ TESTEQ_W w l -> do np <- lit $ word w
emit_ bci_TESTEQ_W [Op np, LabelOp l]
- TESTLT_I64 i l -> do np <- word64 (fromIntegral i)
+ TESTLT_I64 i l -> do np <- lit $ word64 platform (fromIntegral i)
emit_ bci_TESTLT_I64 [Op np, LabelOp l]
- TESTEQ_I64 i l -> do np <- word64 (fromIntegral i)
+ TESTEQ_I64 i l -> do np <- lit $ word64 platform (fromIntegral i)
emit_ bci_TESTEQ_I64 [Op np, LabelOp l]
- TESTLT_I32 i l -> do np <- word (fromIntegral i)
+ TESTLT_I32 i l -> do np <- lit $ word (fromIntegral i)
emit_ bci_TESTLT_I32 [Op np, LabelOp l]
- TESTEQ_I32 i l -> do np <- word (fromIntegral i)
+ TESTEQ_I32 i l -> do np <- lit $ word (fromIntegral i)
emit_ bci_TESTEQ_I32 [Op np, LabelOp l]
- TESTLT_I16 i l -> do np <- word (fromIntegral i)
+ TESTLT_I16 i l -> do np <- lit $ word (fromIntegral i)
emit_ bci_TESTLT_I16 [Op np, LabelOp l]
- TESTEQ_I16 i l -> do np <- word (fromIntegral i)
+ TESTEQ_I16 i l -> do np <- lit $ word (fromIntegral i)
emit_ bci_TESTEQ_I16 [Op np, LabelOp l]
- TESTLT_I8 i l -> do np <- word (fromIntegral i)
+ TESTLT_I8 i l -> do np <- lit $ word (fromIntegral i)
emit_ bci_TESTLT_I8 [Op np, LabelOp l]
- TESTEQ_I8 i l -> do np <- word (fromIntegral i)
+ TESTEQ_I8 i l -> do np <- lit $ word (fromIntegral i)
emit_ bci_TESTEQ_I8 [Op np, LabelOp l]
- TESTLT_W64 w l -> do np <- word64 w
+ TESTLT_W64 w l -> do np <- lit $ word64 platform w
emit_ bci_TESTLT_W64 [Op np, LabelOp l]
- TESTEQ_W64 w l -> do np <- word64 w
+ TESTEQ_W64 w l -> do np <- lit $ word64 platform w
emit_ bci_TESTEQ_W64 [Op np, LabelOp l]
- TESTLT_W32 w l -> do np <- word (fromIntegral w)
+ TESTLT_W32 w l -> do np <- lit $ word (fromIntegral w)
emit_ bci_TESTLT_W32 [Op np, LabelOp l]
- TESTEQ_W32 w l -> do np <- word (fromIntegral w)
+ TESTEQ_W32 w l -> do np <- lit $ word (fromIntegral w)
emit_ bci_TESTEQ_W32 [Op np, LabelOp l]
- TESTLT_W16 w l -> do np <- word (fromIntegral w)
+ TESTLT_W16 w l -> do np <- lit $ word (fromIntegral w)
emit_ bci_TESTLT_W16 [Op np, LabelOp l]
- TESTEQ_W16 w l -> do np <- word (fromIntegral w)
+ TESTEQ_W16 w l -> do np <- lit $ word (fromIntegral w)
emit_ bci_TESTEQ_W16 [Op np, LabelOp l]
- TESTLT_W8 w l -> do np <- word (fromIntegral w)
+ TESTLT_W8 w l -> do np <- lit $ word (fromIntegral w)
emit_ bci_TESTLT_W8 [Op np, LabelOp l]
- TESTEQ_W8 w l -> do np <- word (fromIntegral w)
+ TESTEQ_W8 w l -> do np <- lit $ word (fromIntegral w)
emit_ bci_TESTEQ_W8 [Op np, LabelOp l]
- TESTLT_F f l -> do np <- float f
+ TESTLT_F f l -> do np <- lit $ float platform f
emit_ bci_TESTLT_F [Op np, LabelOp l]
- TESTEQ_F f l -> do np <- float f
+ TESTEQ_F f l -> do np <- lit $ float platform f
emit_ bci_TESTEQ_F [Op np, LabelOp l]
- TESTLT_D d l -> do np <- double d
+ TESTLT_D d l -> do np <- lit $ double platform d
emit_ bci_TESTLT_D [Op np, LabelOp l]
- TESTEQ_D d l -> do np <- double d
+ TESTEQ_D d l -> do np <- lit $ double platform d
emit_ bci_TESTEQ_D [Op np, LabelOp l]
TESTLT_P i l -> emit_ bci_TESTLT_P [SmallOp i, LabelOp l]
TESTEQ_P i l -> emit_ bci_TESTEQ_P [SmallOp i, LabelOp l]
@@ -864,84 +913,86 @@ assembleI platform i = case i of
where
unsupported_width = panic "GHC.ByteCode.Asm: Unsupported Width"
- emit_ = emit word_size
-
- literal :: Literal -> m Word
- literal (LitLabel fs _) = litlabel fs
- literal LitNullAddr = word 0
- literal (LitFloat r) = float (fromRational r)
- literal (LitDouble r) = double (fromRational r)
- literal (LitChar c) = int (ord c)
- literal (LitString bs) = lit1 (BCONPtrStr bs)
- -- LitString requires a zero-terminator when emitted
- literal (LitNumber nt i) = case nt of
- LitNumInt -> word (fromIntegral i)
- LitNumWord -> word (fromIntegral i)
- LitNumInt8 -> word8 (fromIntegral i)
- LitNumWord8 -> word8 (fromIntegral i)
- LitNumInt16 -> word16 (fromIntegral i)
- LitNumWord16 -> word16 (fromIntegral i)
- LitNumInt32 -> word32 (fromIntegral i)
- LitNumWord32 -> word32 (fromIntegral i)
- LitNumInt64 -> word64 (fromIntegral i)
- LitNumWord64 -> word64 (fromIntegral i)
- LitNumBigNat -> panic "GHC.ByteCode.Asm.literal: LitNumBigNat"
+ emit_ = emit (platformWordSize platform)
+
+literal :: Platform -> Literal -> OneOrTwo BCONPtr
+literal platform = \case
+ LitLabel fs _ -> OnlyOne (BCONPtrLbl fs)
+ LitNullAddr -> word 0
+ LitFloat r -> float platform (fromRational r)
+ LitDouble r -> double platform (fromRational r)
+ LitChar c -> int (ord c)
+ LitString bs -> OnlyOne (BCONPtrStr bs)
+ -- LitString requires a zero-terminator when emitted
+ LitNumber nt i -> case nt of
+ LitNumInt -> word (fromIntegral i)
+ LitNumWord -> word (fromIntegral i)
+ LitNumInt8 -> word8 platform (fromIntegral i)
+ LitNumWord8 -> word8 platform (fromIntegral i)
+ LitNumInt16 -> word16 platform (fromIntegral i)
+ LitNumWord16 -> word16 platform (fromIntegral i)
+ LitNumInt32 -> word32 platform (fromIntegral i)
+ LitNumWord32 -> word32 platform (fromIntegral i)
+ LitNumInt64 -> word64 platform (fromIntegral i)
+ LitNumWord64 -> word64 platform (fromIntegral i)
+ LitNumBigNat -> panic "GHC.ByteCode.Asm.literal: LitNumBigNat"
-- We can lower 'LitRubbish' to an arbitrary constant, but @NULL@ is most
-- likely to elicit a crash (rather than corrupt memory) in case absence
-- analysis messed up.
- literal (LitRubbish {}) = word 0
-
- litlabel fs = lit1 (BCONPtrLbl fs)
- words ws = lit (fmap BCONPtrWord ws)
- word w = words (OnlyOne w)
- word2 w1 w2 = words (OnlyTwo w1 w2)
- word_size = platformWordSize platform
- word_size_bits = platformWordSizeInBits platform
-
- -- Make lists of host-sized words for literals, so that when the
- -- words are placed in memory at increasing addresses, the
- -- bit pattern is correct for the host's word size and endianness.
- --
- -- Note that we only support host endianness == target endianness for now,
- -- even with the external interpreter. This would need to be fixed to
- -- support host endianness /= target endianness
- int :: Int -> m Word
- int i = word (fromIntegral i)
-
- float :: Float -> m Word
- float f = word32 (castFloatToWord32 f)
-
- double :: Double -> m Word
- double d = word64 (castDoubleToWord64 d)
-
- word64 :: Word64 -> m Word
- word64 ww = case word_size of
- PW4 ->
- let !wl = fromIntegral ww
- !wh = fromIntegral (ww `unsafeShiftR` 32)
- in case platformByteOrder platform of
- LittleEndian -> word2 wl wh
- BigEndian -> word2 wh wl
- PW8 -> word (fromIntegral ww)
-
- word8 :: Word8 -> m Word
- word8 x = case platformByteOrder platform of
- LittleEndian -> word (fromIntegral x)
- BigEndian -> word (fromIntegral x `unsafeShiftL` (word_size_bits - 8))
-
- word16 :: Word16 -> m Word
- word16 x = case platformByteOrder platform of
- LittleEndian -> word (fromIntegral x)
- BigEndian -> word (fromIntegral x `unsafeShiftL` (word_size_bits - 16))
-
- word32 :: Word32 -> m Word
- word32 x = case platformByteOrder platform of
- LittleEndian -> word (fromIntegral x)
- BigEndian -> case word_size of
- PW4 -> word (fromIntegral x)
- PW8 -> word (fromIntegral x `unsafeShiftL` 32)
+ LitRubbish {} -> word 0
+
+words :: OneOrTwo Word -> OneOrTwo BCONPtr
+words ws = fmap BCONPtrWord ws
+
+word :: Word -> OneOrTwo BCONPtr
+word w = words (OnlyOne w)
+word2 :: Word -> Word -> OneOrTwo BCONPtr
+word2 w1 w2 = words (OnlyTwo w1 w2)
+
+-- Make lists of host-sized words for literals, so that when the
+-- words are placed in memory at increasing addresses, the
+-- bit pattern is correct for the host's word size and endianness.
+--
+-- Note that we only support host endianness == target endianness for now,
+-- even with the external interpreter. This would need to be fixed to
+-- support host endianness /= target endianness
+int :: Int -> OneOrTwo BCONPtr
+int i = word (fromIntegral i)
+
+float :: Platform -> Float -> OneOrTwo BCONPtr
+float platform f = word32 platform (castFloatToWord32 f)
+
+double :: Platform -> Double -> OneOrTwo BCONPtr
+double p d = word64 p (castDoubleToWord64 d)
+
+word64 :: Platform -> Word64 -> OneOrTwo BCONPtr
+word64 platform ww = case platformWordSize platform of
+ PW4 ->
+ let !wl = fromIntegral ww
+ !wh = fromIntegral (ww `unsafeShiftR` 32)
+ in case platformByteOrder platform of
+ LittleEndian -> word2 wl wh
+ BigEndian -> word2 wh wl
+ PW8 -> word (fromIntegral ww)
+
+word8 :: Platform -> Word8 -> OneOrTwo BCONPtr
+word8 platform x = case platformByteOrder platform of
+ LittleEndian -> word (fromIntegral x)
+ BigEndian -> word (fromIntegral x `unsafeShiftL` (platformWordSizeInBits platform - 8))
+
+word16 :: Platform -> Word16 -> OneOrTwo BCONPtr
+word16 platform x = case platformByteOrder platform of
+ LittleEndian -> word (fromIntegral x)
+ BigEndian -> word (fromIntegral x `unsafeShiftL` (platformWordSizeInBits platform - 16))
+
+word32 :: Platform -> Word32 -> OneOrTwo BCONPtr
+word32 platform x = case platformByteOrder platform of
+ LittleEndian -> word (fromIntegral x)
+ BigEndian -> case platformWordSize platform of
+ PW4 -> word (fromIntegral x)
+ PW8 -> word (fromIntegral x `unsafeShiftL` 32)
isLargeW :: Word -> Bool
isLargeW n = n > 65535
=====================================
compiler/GHC/ByteCode/Instr.hs
=====================================
@@ -16,6 +16,7 @@ import GHC.Cmm.Type (Width)
import GHC.StgToCmm.Layout ( ArgRep(..) )
import GHC.Utils.Outputable
import GHC.Types.Name
+import GHC.Types.Id
import GHC.Types.Literal
import GHC.Types.Unique
import GHC.Core.DataCon
@@ -43,9 +44,63 @@ data ProtoBCO
protoBCOBitmap :: [StgWord],
protoBCOBitmapSize :: Word,
protoBCOArity :: Int,
- -- what the BCO came from, for debugging only
+ -- | What the BCO came from, for debugging only
protoBCOExpr :: Either [CgStgAlt] CgStgRhs
}
+ -- | A top-level static constructor application object
+ -- See Note [Static constructors in Bytecode]
+ | ProtoStaticCon {
+ protoStaticConName :: Name,
+ -- ^ The name to which this static constructor is bound,
+ -- not to be confused with the DataCon itself.
+ protoStaticCon :: DataCon,
+ -- ^ The DataCon being constructed.
+ -- We use this to construct the right info table.
+ protoStaticConData :: [Either Literal Id],
+ -- ^ The static constructor pointer and non-pointer arguments, sorted
+ -- in the order they should appear at runtime (see 'mkVirtConstrOffsets').
+ -- The pointers always come first, followed by the non-pointers.
+ protoStaticConExpr :: CgStgRhs
+ -- ^ What the static con came from, for debugging only
+ }
+
+{-
+Note [Static constructors in Bytecode]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+In bytecode, top-level 'StgRhsCon's are lowered to 'ProtoStaticCon' rather than
+'ProtoBCO'. A 'ProtoStaticCon' represents directly a heap allocated data
+constructor application. We can do this only for top-level 'StgRhsCon's, where
+all the data arguments to the constructor are statically known.
+
+'StgRhsCon's which have free variables are compiled down to BCOs which push the
+arguments and then 'PACK' the constructor, just like 'StgConApp's.
+
+Example:
+
+ Haskell:
+
+ data X = X Char# Char
+ x = X 'a'# 'b'
+
+ Stg:
+
+ x1 = GHC.Types.C#! ['b'#];
+ X.x = X.X! ['a#' x2];
+
+ X.X = \r [arg1 arg2] X.X [arg1 arg2];
+
+ ByteCode:
+ ProtoStaticCon x1:
+ C# [Left 'b'#]
+ ProtoStaticCon X.x:
+ X.X [Left 'a'#, Right x1]
+
+ ProtoBCO X.X:
+ PUSH_LL 0 1
+ PACK X.X 2
+ SLIDE 1 2
+ RETURN P
+-}
-- | A local block label (e.g. identifying a case alternative).
newtype LocalLabel = LocalLabel { getLocalLabel :: Word32 }
@@ -278,6 +333,11 @@ data BCInstr
-- Printing bytecode instructions
instance Outputable ProtoBCO where
+ ppr (ProtoStaticCon nm con args origin)
+ = text "ProtoStaticCon" <+> ppr nm <> colon
+ $$ nest 3 (pprStgRhsShort shortStgPprOpts origin)
+ $$ nest 3 (text "constructor: " <+> ppr con)
+ $$ nest 3 (text "sorted args: " <+> ppr args)
ppr (ProtoBCO { protoBCOName = name
, protoBCOInstrs = instrs
, protoBCOBitmap = bitmap
@@ -469,7 +529,8 @@ instance Outputable BCInstr where
-- stack high water mark, but it doesn't seem worth the hassle.
protoBCOStackUse :: ProtoBCO -> Word
-protoBCOStackUse bco = sum (map bciStackUse (protoBCOInstrs bco))
+protoBCOStackUse ProtoBCO{protoBCOInstrs} = sum (map bciStackUse protoBCOInstrs)
+protoBCOStackUse ProtoStaticCon{} = 0
bciStackUse :: BCInstr -> Word
bciStackUse STKCHECK{} = 0
=====================================
compiler/GHC/ByteCode/Linker.hs
=====================================
@@ -12,6 +12,7 @@ module GHC.ByteCode.Linker
, lookupStaticPtr
, lookupIE
, linkFail
+ , BCOIx(..)
)
where
@@ -46,31 +47,68 @@ import Data.Array.Unboxed
import Foreign.Ptr
import GHC.Exts
-{-
+{- |
Linking interpretables into something we can run
-}
-
linkBCO
:: Interp
-> PkgsLoaded
-> BytecodeLoaderState
- -> NameEnv Int
+ -> NameEnv BCOIx
+ -- ^ A mapping from names to references to other BCOs
+ -- or static constructors in this group.
-> UnlinkedBCO
-> IO ResolvedBCO
-linkBCO interp pkgs_loaded bytecode_state bco_ix
- (UnlinkedBCO _ arity insns bitmap lits0 ptrs0) = do
- -- fromIntegral Word -> Word64 should be a no op if Word is Word64
- -- otherwise it will result in a cast to longlong on 32bit systems.
- (lits :: [Word]) <- mapM (fmap fromIntegral . lookupLiteral interp pkgs_loaded bytecode_state) (elemsFlatBag lits0)
- ptrs <- mapM (resolvePtr interp pkgs_loaded bytecode_state bco_ix) (elemsFlatBag ptrs0)
- let lits' = listArray (0 :: Int, fromIntegral (sizeFlatBag lits0)-1) lits
- return $ ResolvedBCO { resolvedBCOIsLE = isLittleEndian
- , resolvedBCOArity = arity
- , resolvedBCOInstrs = insns
- , resolvedBCOBitmap = bitmap
- , resolvedBCOLits = mkBCOByteArray lits'
- , resolvedBCOPtrs = addListToSS emptySS ptrs
- }
+linkBCO interp pkgs_loaded bytecode_state bco_ix unl_bco = do
+ case unl_bco of
+ UnlinkedBCO _ arity insns
+ bitmap lits0 ptrs0 -> do
+ lits <- doLits lits0
+ ptrs <- doPtrs ptrs0
+ return ResolvedBCO
+ { resolvedBCOIsLE = isLittleEndian
+ , resolvedBCOArity = arity
+ , resolvedBCOInstrs = insns
+ , resolvedBCOBitmap = bitmap
+ , resolvedBCOLits = lits
+ , resolvedBCOPtrs = ptrs
+ }
+
+ UnlinkedStaticCon
+ { unlinkedStaticConLits = lits0
+ , unlinkedStaticConPtrs = ptrs0
+ , unlinkedStaticConDataConName
+ , unlinkedStaticConIsUnlifted
+ } -> do
+ Ptr itbl_ptr# <- lookupIE interp pkgs_loaded bytecode_state unlinkedStaticConDataConName
+ lits <- doLits lits0
+ ptrs <- doPtrs ptrs0
+ return ResolvedStaticCon
+ { resolvedBCOIsLE = isLittleEndian
+ , resolvedStaticConInfoPtr = W# (int2Word# (addr2Int# itbl_ptr#))
+ , resolvedStaticConArity = sizeFlatBag lits0 + sizeFlatBag ptrs0
+ , resolvedStaticConLits = lits
+ , resolvedStaticConPtrs = ptrs
+ , resolvedStaticConIsUnlifted = unlinkedStaticConIsUnlifted
+ }
+ where
+ doLits lits0 = do
+ (lits :: [Word]) <- mapM (lookupLiteral interp pkgs_loaded bytecode_state) (elemsFlatBag lits0)
+ let lits' = listArray (0 :: Int, fromIntegral (sizeFlatBag lits0)-1) lits
+ return $ mkBCOByteArray lits'
+ doPtrs ptrs0 = addListToSS emptySS <$> do
+ mapM (resolvePtr interp pkgs_loaded bytecode_state bco_ix) (elemsFlatBag ptrs0)
+
+-- | An index into a BCO or Static Constructor in this group.
+--
+-- We distinguish between lifted and unlifted static constructors because
+-- lifted ones get resolved by tying a knot, since there may be circular
+-- dependencies between them, whereas unlifted ones get constructed in a first
+-- pass.
+data BCOIx = BCOIx !Int
+ | LiftedStaticConIx !Int
+ | UnliftedStaticConIx !Int
+ deriving (Eq, Ord, Show)
lookupLiteral :: Interp -> PkgsLoaded -> BytecodeLoaderState -> BCONPtr -> IO Word
lookupLiteral interp pkgs_loaded bytecode_state ptr = case ptr of
@@ -154,13 +192,16 @@ resolvePtr
:: Interp
-> PkgsLoaded
-> BytecodeLoaderState
- -> NameEnv Int
+ -> NameEnv BCOIx
-> BCOPtr
-> IO ResolvedBCOPtr
resolvePtr interp pkgs_loaded bco_loader_state bco_ix ptr = case ptr of
BCOPtrName nm
- | Just ix <- lookupNameEnv bco_ix nm
- -> return (ResolvedBCORef ix) -- ref to another BCO in this group
+ | Just bix <- lookupNameEnv bco_ix nm
+ -> return $ case bix of
+ BCOIx ix -> ResolvedBCORef ix
+ LiftedStaticConIx ix -> ResolvedStaticConRef ix
+ UnliftedStaticConIx ix -> ResolvedUnliftedStaticConRef ix
| Just (_, rhv) <- lookupNameBytecodeState bco_loader_state nm
-> return (ResolvedBCOPtr (unsafeForeignRefToRemoteRef rhv))
=====================================
compiler/GHC/ByteCode/Serialize.hs
=====================================
@@ -315,22 +315,39 @@ instance Binary CompiledByteCode where
put_ bh bc_spt_entries
instance Binary UnlinkedBCO where
- get bh =
- UnlinkedBCO
- <$> getViaBinName bh
- <*> get bh
- <*> (Binary.decode <$> get bh)
- <*> (Binary.decode <$> get bh)
- <*> get bh
- <*> get bh
+ get bh = do
+ t <- getByte bh
+ case t of
+ 0 -> UnlinkedBCO
+ <$> getViaBinName bh
+ <*> get bh
+ <*> (Binary.decode <$> get bh)
+ <*> (Binary.decode <$> get bh)
+ <*> get bh
+ <*> get bh
+ 1 -> UnlinkedStaticCon
+ <$> getViaBinName bh
+ <*> getViaBinName bh
+ <*> get bh
+ <*> get bh
+ <*> get bh
+ _ -> panic "Binary UnlinkedBCO: invalid byte"
put_ bh UnlinkedBCO {..} = do
+ putByte bh 0
putViaBinName bh unlinkedBCOName
put_ bh unlinkedBCOArity
put_ bh $ Binary.encode unlinkedBCOInstrs
put_ bh $ Binary.encode unlinkedBCOBitmap
put_ bh unlinkedBCOLits
put_ bh unlinkedBCOPtrs
+ put_ bh UnlinkedStaticCon {..} = do
+ putByte bh 1
+ putViaBinName bh unlinkedStaticConName
+ putViaBinName bh unlinkedStaticConDataConName
+ put_ bh unlinkedStaticConLits
+ put_ bh unlinkedStaticConPtrs
+ put_ bh unlinkedStaticConIsUnlifted
instance Binary BCOPtr where
get bh = do
=====================================
compiler/GHC/ByteCode/Types.hs
=====================================
@@ -29,6 +29,7 @@ module GHC.ByteCode.Types
) where
import GHC.Prelude
+import qualified Data.ByteString.Char8 as BS8
import GHC.Data.FastString
import GHC.Data.FlatBag
@@ -248,16 +249,31 @@ data UnlinkedBCO
= UnlinkedBCO {
unlinkedBCOName :: !Name,
unlinkedBCOArity :: {-# UNPACK #-} !Int,
- unlinkedBCOInstrs :: !(BCOByteArray Word16), -- insns
- unlinkedBCOBitmap :: !(BCOByteArray Word), -- bitmap
- unlinkedBCOLits :: !(FlatBag BCONPtr), -- non-ptrs
- unlinkedBCOPtrs :: !(FlatBag BCOPtr) -- ptrs
+ unlinkedBCOInstrs :: !(BCOByteArray Word16), -- insns
+ unlinkedBCOBitmap :: !(BCOByteArray Word), -- bitmap
+ unlinkedBCOLits :: !(FlatBag BCONPtr), -- non-ptrs
+ unlinkedBCOPtrs :: !(FlatBag BCOPtr) -- ptrs
+ }
+ -- | An unlinked top-level static constructor
+ -- See Note [Static constructors in Bytecode]
+ | UnlinkedStaticCon {
+ unlinkedStaticConName :: !Name,
+ -- ^ The name to which this static constructor is bound, not to be
+ -- confused with the name of the static constructor itself
+ -- ('unlinkedStaticConDataConName')
+ unlinkedStaticConDataConName :: !Name,
+ unlinkedStaticConLits :: !(FlatBag BCONPtr), -- non-ptrs
+ unlinkedStaticConPtrs :: !(FlatBag BCOPtr), -- ptrs
+ unlinkedStaticConIsUnlifted :: !Bool
}
instance NFData UnlinkedBCO where
rnf UnlinkedBCO{..} =
rnf unlinkedBCOLits `seq`
rnf unlinkedBCOPtrs
+ rnf UnlinkedStaticCon{..} =
+ rnf unlinkedStaticConLits `seq`
+ rnf unlinkedStaticConPtrs
data BCOPtr
= BCOPtrName !Name
@@ -270,6 +286,12 @@ instance NFData BCOPtr where
rnf (BCOPtrBCO bco) = rnf bco
rnf x = x `seq` ()
+instance Outputable BCOPtr where
+ ppr (BCOPtrName nm) = text "BCOPtrName" <+> ppr nm
+ ppr (BCOPtrPrimOp op) = text "BCOPtrPrimOp" <+> ppr op
+ ppr (BCOPtrBCO bco) = text "BCOPtrBCO" <+> ppr bco
+ ppr (BCOPtrBreakArray mod) = text "
participants (1)
-
Rodrigo Mesquita (@alt-romes)