[Git][ghc/ghc][wip/bytecode-serialize-3] compiler: implement and test bytecode serialization logic

Cheng Shao pushed to branch wip/bytecode-serialize-3 at Glasgow Haskell Compiler / GHC Commits: dfad5e2e by Cheng Shao at 2025-08-11T08:43:48+00:00 compiler: implement and test bytecode serialization logic - - - - - 14 changed files: - compiler/GHC/Builtin/PrimOps.hs - compiler/GHC/ByteCode/Breakpoints.hs - + compiler/GHC/ByteCode/Serialize.hs - compiler/GHC/ByteCode/Types.hs - compiler/GHC/Data/FlatBag.hs - compiler/GHC/Data/SmallArray.hs - compiler/GHC/Driver/Main.hs - compiler/GHC/HsToCore/Breakpoints.hs - compiler/GHC/Types/SptEntry.hs - compiler/GHC/Types/Tickish.hs - compiler/GHC/Utils/Binary.hs - compiler/ghc.cabal.in - testsuite/tests/count-deps/CountDepsAst.stdout - testsuite/tests/count-deps/CountDepsParser.stdout Changes: ===================================== compiler/GHC/Builtin/PrimOps.hs ===================================== @@ -53,10 +53,12 @@ import GHC.Types.Unique ( Unique ) import GHC.Unit.Types ( Unit ) +import GHC.Utils.Binary import GHC.Utils.Outputable import GHC.Utils.Panic import GHC.Data.FastString +import GHC.Data.SmallArray {- ************************************************************************ @@ -929,3 +931,12 @@ primOpIsReallyInline = \case DataToTagSmallOp -> False DataToTagLargeOp -> False p -> not (primOpOutOfLine p) + +instance Binary PrimOp where + get bh = (allThePrimOpsArr `indexSmallArray`) <$> get bh + + put_ bh = put_ bh . primOpTag + +allThePrimOpsArr :: SmallArray PrimOp +{-# NOINLINE allThePrimOpsArr #-} +allThePrimOpsArr = listToArray (maxPrimOpTag + 1) primOpTag id allThePrimOps ===================================== compiler/GHC/ByteCode/Breakpoints.hs ===================================== @@ -1,5 +1,6 @@ {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE DerivingVia #-} -- | Breakpoint information constructed during ByteCode generation. -- @@ -44,6 +45,7 @@ import GHC.HsToCore.Breakpoints import GHC.Iface.Syntax import GHC.Unit.Module (Module) +import GHC.Utils.Binary import GHC.Utils.Outputable import GHC.Utils.Panic import Data.Array @@ -297,3 +299,26 @@ instance Outputable CgBreakInfo where parens (ppr (cgb_vars info) <+> ppr (cgb_resty info) <+> ppr (cgb_tick_id info)) + +instance Binary CgBreakInfo where + put_ bh CgBreakInfo {..} = + put_ bh cgb_tyvars + *> put_ bh cgb_vars + *> put_ bh cgb_resty + *> put_ bh cgb_tick_id + + get bh = CgBreakInfo <$> get bh <*> get bh <*> get bh <*> get bh + +instance Binary InternalModBreaks where + get bh = InternalModBreaks <$> get bh <*> get bh + + put_ bh InternalModBreaks {..} = + put_ bh imodBreaks_breakInfo *> put_ bh imodBreaks_modBreaks + +deriving via BreakpointId instance Binary InternalBreakLoc + +instance Binary InternalBreakpointId where + get bh = InternalBreakpointId <$> get bh <*> get bh + + put_ bh InternalBreakpointId {..} = + put_ bh ibi_info_mod *> put_ bh ibi_info_index ===================================== compiler/GHC/ByteCode/Serialize.hs ===================================== @@ -0,0 +1,205 @@ +{-# LANGUAGE MultiWayIf #-} +{-# LANGUAGE RecordWildCards #-} +{-# OPTIONS_GHC -Wno-orphans #-} + +module GHC.ByteCode.Serialize + ( testBinByteCode, + ) +where + +import Control.Monad +import Data.Binary qualified as Binary +import Data.Foldable +import Data.IORef +import Data.Proxy +import Data.Word +import GHC.ByteCode.Types +import GHC.Data.FastString +import GHC.Driver.Env +import GHC.Iface.Binary +import GHC.Prelude +import GHC.Types.Name +import GHC.Types.Name.Cache +import GHC.Types.SrcLoc +import GHC.Utils.Binary +import GHC.Utils.Exception +import GHC.Utils.Panic +import GHC.Utils.TmpFs +import System.FilePath + +testBinByteCode :: HscEnv -> CompiledByteCode -> IO CompiledByteCode +testBinByteCode hsc_env cbc = withSystemTempDirectory "ghc-bbc" $ \tmpdir -> do + let f = tmpdir > "ghc-bbc" + roundtripBinByteCode hsc_env f cbc + +roundtripBinByteCode :: + HscEnv -> FilePath -> CompiledByteCode -> IO CompiledByteCode +roundtripBinByteCode hsc_env f cbc = do + writeBinByteCode f cbc + readBinByteCode hsc_env f + +readBinByteCode :: HscEnv -> FilePath -> IO CompiledByteCode +readBinByteCode hsc_env f = do + bh' <- readBinMem f + bh <- addBinNameReader hsc_env bh' + getWithUserData (hsc_NC hsc_env) bh + +writeBinByteCode :: FilePath -> CompiledByteCode -> IO () +writeBinByteCode f cbc = do + bh' <- openBinMem (1024 * 1024) + bh <- addBinNameWriter bh' + putWithUserData QuietBinIFace NormalCompression bh cbc + writeBinMem bh f + +instance Binary CompiledByteCode where + get bh = do + bc_bcos <- get bh + bc_itbls_len <- get bh + bc_itbls <- replicateM bc_itbls_len $ do + nm <- getViaBinName bh + itbl <- get bh + pure (nm, itbl) + bc_strs_len <- get bh + bc_strs <- + replicateM bc_strs_len $ (,) <$> getViaBinName bh <*> get bh + bc_breaks <- get bh + bc_spt_entries <- get bh + evaluate + CompiledByteCode + { bc_bcos, + bc_itbls, + bc_strs, + bc_breaks, + bc_spt_entries + } + + put_ bh CompiledByteCode {..} = do + put_ bh bc_bcos + put_ bh $ length bc_itbls + for_ bc_itbls $ \(nm, itbl) -> do + putViaBinName bh nm + put_ bh itbl + put_ bh $ length bc_strs + for_ bc_strs $ \(nm, str) -> putViaBinName bh nm *> put_ bh str + put_ bh bc_breaks + put_ bh bc_spt_entries + +instance Binary UnlinkedBCO where + get bh = + UnlinkedBCO + <$> getViaBinName bh + <*> get bh + <*> (Binary.decode <$> get bh) + <*> (Binary.decode <$> get bh) + <*> get bh + <*> get bh + + put_ bh UnlinkedBCO {..} = do + putViaBinName bh unlinkedBCOName + put_ bh unlinkedBCOArity + put_ bh $ Binary.encode unlinkedBCOInstrs + put_ bh $ Binary.encode unlinkedBCOBitmap + put_ bh unlinkedBCOLits + put_ bh unlinkedBCOPtrs + +instance Binary BCOPtr where + get bh = do + t <- getByte bh + case t of + 0 -> BCOPtrName <$> getViaBinName bh + 1 -> BCOPtrPrimOp <$> get bh + 2 -> BCOPtrBCO <$> get bh + 3 -> BCOPtrBreakArray <$> get bh + _ -> panic "Binary BCOPtr: invalid byte" + + put_ bh ptr = case ptr of + BCOPtrName nm -> putByte bh 0 *> putViaBinName bh nm + BCOPtrPrimOp op -> putByte bh 1 *> put_ bh op + BCOPtrBCO bco -> putByte bh 2 *> put_ bh bco + BCOPtrBreakArray info_mod -> putByte bh 3 *> put_ bh info_mod + +instance Binary BCONPtr where + get bh = do + t <- getByte bh + case t of + 0 -> BCONPtrWord . fromIntegral <$> (get bh :: IO Word64) + 1 -> BCONPtrLbl <$> get bh + 2 -> BCONPtrItbl <$> getViaBinName bh + 3 -> BCONPtrAddr <$> getViaBinName bh + 4 -> BCONPtrStr <$> get bh + 5 -> BCONPtrFS <$> get bh + 6 -> BCONPtrFFIInfo <$> get bh + 7 -> BCONPtrCostCentre <$> get bh + _ -> panic "Binary BCONPtr: invalid byte" + + put_ bh ptr = case ptr of + BCONPtrWord lit -> putByte bh 0 *> put_ bh (fromIntegral lit :: Word64) + BCONPtrLbl sym -> putByte bh 1 *> put_ bh sym + BCONPtrItbl nm -> putByte bh 2 *> putViaBinName bh nm + BCONPtrAddr nm -> putByte bh 3 *> putViaBinName bh nm + BCONPtrStr str -> putByte bh 4 *> put_ bh str + BCONPtrFS fs -> putByte bh 5 *> put_ bh fs + BCONPtrFFIInfo ffi -> putByte bh 6 *> put_ bh ffi + BCONPtrCostCentre ibi -> putByte bh 7 *> put_ bh ibi + +newtype BinName = BinName {unBinName :: Name} + +getViaBinName :: ReadBinHandle -> IO Name +getViaBinName bh = case findUserDataReader Proxy bh of + BinaryReader f -> unBinName <$> f bh + +putViaBinName :: WriteBinHandle -> Name -> IO () +putViaBinName bh nm = case findUserDataWriter Proxy bh of + BinaryWriter f -> f bh $ BinName nm + +addBinNameWriter :: WriteBinHandle -> IO WriteBinHandle +addBinNameWriter bh' = + evaluate + $ flip addWriterToUserData bh' + $ BinaryWriter + $ \bh (BinName nm) -> + if + | isExternalName nm -> do + putByte bh 0 + put_ bh nm + | otherwise -> do + putByte bh 1 + put_ bh + $ occNameFS (occName nm) + `appendFS` mkFastString + (show $ nameUnique nm) + +addBinNameReader :: HscEnv -> ReadBinHandle -> IO ReadBinHandle +addBinNameReader HscEnv {..} bh' = do + env_ref <- newIORef emptyOccEnv + pure $ flip addReaderToUserData bh' $ BinaryReader $ \bh -> do + t <- getByte bh + case t of + 0 -> do + nm <- get bh + pure $ BinName nm + 1 -> do + occ <- mkVarOccFS <$> get bh + u <- takeUniqFromNameCache hsc_NC + nm' <- evaluate $ mkInternalName u occ noSrcSpan + fmap BinName $ atomicModifyIORef' env_ref $ \env -> + case lookupOccEnv env occ of + Just nm -> (env, nm) + _ -> (extendOccEnv env occ nm', nm') + _ -> panic "Binary BinName: invalid byte" + +-- Note [Serializing Names in bytecode] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- The bytecode related types contain various Names which we need to +-- serialize. Unfortunately, we can't directly use the Binary instance +-- of Name: it is only meant to be used for serializing external Names +-- in BinIface logic, but bytecode does contain internal Names. +-- +-- We also need to maintain the invariant that: any pair of internal +-- Names with equal/different uniques must also be deserialized to +-- have the same equality. So normally uniques aren't supposed to be +-- serialized, but for this invariant to work, we do append uniques to +-- OccNames of internal Names, so that they can be uniquely identified +-- by OccName alone. When deserializing, we check a global cached +-- mapping from OccName to Unique, and create the real Name with the +-- right Unique if it's already deserialized at least once. ===================================== compiler/GHC/ByteCode/Types.hs ===================================== @@ -35,6 +35,7 @@ import GHC.Data.FastString import GHC.Data.FlatBag import GHC.Types.Name import GHC.Types.Name.Env +import GHC.Utils.Binary import GHC.Utils.Outputable import GHC.Builtin.PrimOps import GHC.Types.SptEntry @@ -296,3 +297,8 @@ instance Outputable UnlinkedBCO where ppr (sizeFlatBag lits), text "lits", ppr (sizeFlatBag ptrs), text "ptrs" ] +instance Binary FFIInfo where + get bh = FFIInfo <$> get bh <*> get bh + + put_ bh FFIInfo {..} = put_ bh ffiInfoArgs *> put_ bh ffiInfoRet + ===================================== compiler/GHC/Data/FlatBag.hs ===================================== @@ -16,6 +16,8 @@ import GHC.Prelude import Control.DeepSeq import GHC.Data.SmallArray +import GHC.Utils.Binary +import GHC.Utils.Panic -- | Store elements in a flattened representation. -- @@ -66,6 +68,21 @@ instance NFData a => NFData (FlatBag a) where rnf (TupleFlatBag a b) = rnf a `seq` rnf b rnf (FlatBag arr) = rnfSmallArray arr +instance (Binary a) => Binary (FlatBag a) where + get bh = do + t <- getByte bh + case t of + 0 -> pure EmptyFlatBag + 1 -> UnitFlatBag <$> get bh + 2 -> TupleFlatBag <$> get bh <*> get bh + 3 -> FlatBag <$> get bh + _ -> panic "Binary FlatBag: invalid byte" + + put_ bh EmptyFlatBag = putByte bh 0 + put_ bh (UnitFlatBag a) = putByte bh 1 *> put_ bh a + put_ bh (TupleFlatBag a b) = putByte bh 2 *> put_ bh a *> put_ bh b + put_ bh (FlatBag arr) = putByte bh 3 *> put_ bh arr + -- | Create an empty 'FlatBag'. -- -- The empty 'FlatBag' is shared over all instances. @@ -129,4 +146,3 @@ fromSmallArray s = case sizeofSmallArray s of 1 -> UnitFlatBag (indexSmallArray s 0) 2 -> TupleFlatBag (indexSmallArray s 0) (indexSmallArray s 1) _ -> FlatBag s - ===================================== compiler/GHC/Data/SmallArray.hs ===================================== @@ -29,7 +29,9 @@ import GHC.Exts import GHC.Prelude import GHC.IO import GHC.ST +import GHC.Utils.Binary import Control.DeepSeq +import Data.Foldable data SmallArray a = SmallArray (SmallArray# a) @@ -166,3 +168,17 @@ listToArray (I# size) index_of value_of xs = runST $ ST \s -> (# s', ma #) -> case write_elems ma xs s' of s'' -> case unsafeFreezeSmallArray# ma s'' of (# s''', a #) -> (# s''', SmallArray a #) + +instance (Binary a) => Binary (SmallArray a) where + get bh = do + len <- get bh + ma <- newSmallArrayIO len undefined + for_ [0 .. len - 1] $ \i -> do + a <- get bh + writeSmallArrayIO ma i a + unsafeFreezeSmallArrayIO ma + + put_ bh sa = do + let len = sizeofSmallArray sa + put_ bh len + for_ [0 .. len - 1] $ \i -> put_ bh $ sa `indexSmallArray` i ===================================== compiler/GHC/Driver/Main.hs ===================================== @@ -305,6 +305,8 @@ import Data.Bifunctor import qualified GHC.Unit.Home.Graph as HUG import GHC.Unit.Home.PackageTable +import GHC.ByteCode.Serialize + {- ********************************************************************** %* * Initialisation @@ -2169,7 +2171,8 @@ generateByteCode :: HscEnv -> ModLocation -> IO (CompiledByteCode, [FilePath]) generateByteCode hsc_env cgguts mod_location = do - (hasStub, comp_bc) <- hscInteractive hsc_env cgguts mod_location + (hasStub, comp_bc') <- hscInteractive hsc_env cgguts mod_location + comp_bc <- testBinByteCode hsc_env comp_bc' compile_for_interpreter hsc_env $ \ i_env -> do stub_o <- traverse (compileForeign i_env LangC) hasStub foreign_files_o <- traverse (uncurry (compileForeign i_env)) (cgi_foreign_files cgguts) ===================================== compiler/GHC/HsToCore/Breakpoints.hs ===================================== @@ -30,6 +30,7 @@ import GHC.Types.SrcLoc (SrcSpan) import GHC.Types.Name (OccName) import GHC.Types.Tickish (BreakTickIndex, BreakpointId(..)) import GHC.Unit.Module (Module) +import GHC.Utils.Binary import GHC.Utils.Outputable import Data.List (intersperse) @@ -106,3 +107,13 @@ The breakpoint is in the function called "baz" that is declared in a `let` or `where` clause of a declaration called "bar", which itself is declared in a `let` or `where` clause of the top-level function called "foo". -} + +instance Binary ModBreaks where + get bh = ModBreaks <$> get bh <*> get bh <*> get bh <*> get bh <*> get bh + + put_ bh ModBreaks {..} = + put_ bh modBreaks_locs + *> put_ bh modBreaks_vars + *> put_ bh modBreaks_decls + *> put_ bh modBreaks_ccs + *> put_ bh modBreaks_module ===================================== compiler/GHC/Types/SptEntry.hs ===================================== @@ -3,9 +3,14 @@ module GHC.Types.SptEntry ) where -import GHC.Types.Var ( Id ) +import GHC.Builtin.Types +import GHC.Types.Id +import GHC.Types.Name import GHC.Fingerprint.Type ( Fingerprint ) +import GHC.Prelude +import GHC.Utils.Binary import GHC.Utils.Outputable +import GHC.Utils.Panic.Plain -- | An entry to be inserted into a module's static pointer table. -- See Note [Grand plan for static forms] in "GHC.Iface.Tidy.StaticPtrTable". @@ -14,3 +19,13 @@ data SptEntry = SptEntry Id Fingerprint instance Outputable SptEntry where ppr (SptEntry id fpr) = ppr id <> colon <+> ppr fpr +instance Binary SptEntry where + get bh = do + nm <- get bh + fp <- get bh + -- static pointer logic only uses the associated Name without Type + pure $ SptEntry (mkVanillaGlobal nm anyTy) fp + + put_ bh (SptEntry var fp) = do + massert $ isGlobalId var + put_ bh (getName var) *> put_ bh fp ===================================== compiler/GHC/Types/Tickish.hs ===================================== @@ -4,6 +4,7 @@ {-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE RecordWildCards #-} module GHC.Types.Tickish ( GenTickish(..), @@ -44,6 +45,7 @@ import GHC.Utils.Panic import Language.Haskell.Syntax.Extension ( NoExtField ) import Data.Data +import GHC.Utils.Binary import GHC.Utils.Outputable (Outputable (ppr), text, (<+>)) {- ********************************************************************* @@ -202,6 +204,11 @@ instance NFData BreakpointId where rnf BreakpointId{bi_tick_mod, bi_tick_index} = rnf bi_tick_mod `seq` rnf bi_tick_index +instance Binary BreakpointId where + get bh = BreakpointId <$> get bh <*> get bh + + put_ bh BreakpointId {..} = put_ bh bi_tick_mod *> put_ bh bi_tick_index + -------------------------------------------------------------------------------- -- | A "counting tick" (where tickishCounts is True) is one that ===================================== compiler/GHC/Utils/Binary.hs ===================================== @@ -125,6 +125,7 @@ import Language.Haskell.Syntax.ImpExp.IsBoot (IsBootInterface(..)) import {-# SOURCE #-} GHC.Types.Name (Name) import GHC.Data.FastString import GHC.Data.TrieMap +import GHC.Utils.Exception import GHC.Utils.Panic.Plain import GHC.Types.Unique.FM import GHC.Data.FastMutInt @@ -133,6 +134,8 @@ import GHC.Types.SrcLoc import GHC.Types.Unique import qualified GHC.Data.Strict as Strict import GHC.Utils.Outputable( JoinPointHood(..) ) +import GHCi.FFI +import GHCi.Message import Control.DeepSeq import Control.Monad ( when, (<$!>), unless, forM_, void ) @@ -140,8 +143,10 @@ import Foreign hiding (bit, setBit, clearBit, shiftL, shiftR, void) import Data.Array import Data.Array.IO import Data.Array.Unsafe +import qualified Data.Binary as Binary import Data.ByteString (ByteString, copy) import Data.Coerce +import qualified Data.ByteString.Lazy as LBS import qualified Data.ByteString.Internal as BS import qualified Data.ByteString.Unsafe as BS import qualified Data.ByteString.Short.Internal as SBS @@ -929,6 +934,12 @@ instance Binary Char where put_ bh c = put_ bh (fromIntegral (ord c) :: Word32) get bh = do x <- get bh; return $! (chr (fromIntegral (x :: Word32))) +instance Binary Word where + put_ bh i = put_ bh (fromIntegral i :: Word64) + get bh = do + x <- get bh + return $! (fromIntegral (x :: Word64)) + instance Binary Int where put_ bh i = put_ bh (fromIntegral i :: Int64) get bh = do @@ -1849,6 +1860,18 @@ instance Binary ByteString where put_ bh f = putBS bh f get bh = getBS bh +instance Binary LBS.ByteString where + put_ bh lbs = do + put_ bh (fromIntegral (LBS.length lbs) :: Int) + let f bs acc = + ( BS.unsafeUseAsCStringLen bs $ + \(ptr, l) -> putPrim bh l $ \op -> copyBytes op (castPtr ptr) l + ) + *> acc + LBS.foldrChunks f (pure ()) lbs + + get bh = LBS.fromStrict <$> get bh + instance Binary FastString where put_ bh f = case findUserDataWriter (Proxy :: Proxy FastString) bh of @@ -2106,6 +2129,7 @@ instance Binary BinSrcSpan where _ -> do s <- get bh return $ BinSrcSpan (UnhelpfulSpan s) +deriving via BinSrcSpan instance Binary SrcSpan {- Note [Source Location Wrappers] @@ -2163,3 +2187,40 @@ instance Binary a => Binary (FingerprintWithValue a) where instance NFData a => NFData (FingerprintWithValue a) where rnf (FingerprintWithValue fp mflags) = rnf fp `seq` rnf mflags `seq` () + +instance Binary ConInfoTable where + get bh = Binary.decode <$> get bh + + put_ bh = put_ bh . Binary.encode + +instance Binary FFIType where + get bh = do + t <- getByte bh + evaluate $ case t of + 0 -> FFIVoid + 1 -> FFIPointer + 2 -> FFIFloat + 3 -> FFIDouble + 4 -> FFISInt8 + 5 -> FFISInt16 + 6 -> FFISInt32 + 7 -> FFISInt64 + 8 -> FFIUInt8 + 9 -> FFIUInt16 + 10 -> FFIUInt32 + 11 -> FFIUInt64 + _ -> panic "Binary FFIType: invalid byte" + + put_ bh t = putByte bh $ case t of + FFIVoid -> 0 + FFIPointer -> 1 + FFIFloat -> 2 + FFIDouble -> 3 + FFISInt8 -> 4 + FFISInt16 -> 5 + FFISInt32 -> 6 + FFISInt64 -> 7 + FFIUInt8 -> 8 + FFIUInt16 -> 9 + FFIUInt32 -> 10 + FFIUInt64 -> 11 ===================================== compiler/ghc.cabal.in ===================================== @@ -228,6 +228,7 @@ Library GHC.ByteCode.InfoTable GHC.ByteCode.Instr GHC.ByteCode.Linker + GHC.ByteCode.Serialize GHC.ByteCode.Types GHC.Cmm GHC.Cmm.BlockId ===================================== testsuite/tests/count-deps/CountDepsAst.stdout ===================================== @@ -71,6 +71,7 @@ GHC.Data.Maybe GHC.Data.OrdList GHC.Data.OsPath GHC.Data.Pair +GHC.Data.SmallArray GHC.Data.Strict GHC.Data.StringBuffer GHC.Data.TrieMap ===================================== testsuite/tests/count-deps/CountDepsParser.stdout ===================================== @@ -73,6 +73,7 @@ GHC.Data.Maybe GHC.Data.OrdList GHC.Data.OsPath GHC.Data.Pair +GHC.Data.SmallArray GHC.Data.Strict GHC.Data.StringBuffer GHC.Data.TrieMap View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/dfad5e2ec454dd05ea720def9a001b49... -- View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/dfad5e2ec454dd05ea720def9a001b49... You're receiving this email because of your account on gitlab.haskell.org.
participants (1)
-
Cheng Shao (@TerrorJack)