
Cheng Shao pushed to branch wip/bytecode-serialize-3 at Glasgow Haskell Compiler / GHC Commits: 1362d5a8 by Cheng Shao at 2025-08-10T13:39:51+00:00 compiler: WIP GHC.ByteCode.Serialize - - - - - fdebadb5 by Cheng Shao at 2025-08-10T13:39:56+00:00 driver: test bytecode roundtrip serialization - - - - - 10 changed files: - compiler/GHC/Builtin/PrimOps.hs - compiler/GHC/ByteCode/Breakpoints.hs - + compiler/GHC/ByteCode/Serialize.hs - compiler/GHC/ByteCode/Types.hs - compiler/GHC/Data/FlatBag.hs - compiler/GHC/Driver/Main.hs - compiler/GHC/Types/SptEntry.hs - compiler/GHC/Types/Tickish.hs - compiler/GHC/Utils/Binary.hs - compiler/ghc.cabal.in Changes: ===================================== compiler/GHC/Builtin/PrimOps.hs ===================================== @@ -53,6 +53,7 @@ import GHC.Types.Unique ( Unique ) import GHC.Unit.Types ( Unit ) +import GHC.Utils.Binary import GHC.Utils.Outputable import GHC.Utils.Panic @@ -929,3 +930,8 @@ primOpIsReallyInline = \case DataToTagSmallOp -> False DataToTagLargeOp -> False p -> not (primOpOutOfLine p) + +instance Binary PrimOp where + get bh = (allThePrimOps !!) <$> get bh + + put_ bh = put_ bh . primOpTag ===================================== compiler/GHC/ByteCode/Breakpoints.hs ===================================== @@ -44,6 +44,7 @@ import GHC.HsToCore.Breakpoints import GHC.Iface.Syntax import GHC.Unit.Module (Module) +import GHC.Utils.Binary import GHC.Utils.Outputable import GHC.Utils.Panic import Data.Array @@ -297,3 +298,11 @@ instance Outputable CgBreakInfo where parens (ppr (cgb_vars info) <+> ppr (cgb_resty info) <+> ppr (cgb_tick_id info)) + +deriving newtype instance Binary InternalBreakLoc + +instance Binary InternalBreakpointId where + get bh = InternalBreakpointId <$> get bh <*> get bh + + put_ bh InternalBreakpointId {..} = + put_ bh ibi_info_mod *> put_ bh ibi_info_index ===================================== compiler/GHC/ByteCode/Serialize.hs ===================================== @@ -0,0 +1,224 @@ +{-# LANGUAGE MultiWayIf #-} +{-# LANGUAGE RecordWildCards #-} +{-# OPTIONS_GHC -Wno-orphans #-} + +module GHC.ByteCode.Serialize + ( testBinByteCode, + ) +where + +import Control.Monad +import Data.Binary qualified as Binary +import Data.ByteString.Lazy qualified as LBS +import Data.Foldable +import Data.IORef +import Data.Proxy +import Data.Word +import GHC.ByteCode.Breakpoints +import GHC.ByteCode.Types +import GHC.Data.FastString +import GHC.Driver.Env +import GHC.Iface.Binary +import GHC.Prelude +import GHC.Types.Name +import GHC.Types.Name.Cache +import GHC.Types.SrcLoc +import GHC.Utils.Binary +import GHC.Utils.Exception +import GHC.Utils.TmpFs +import GHCi.Message +import System.FilePath + +testBinByteCode :: HscEnv -> CompiledByteCode -> IO CompiledByteCode +testBinByteCode hsc_env cbc = withSystemTempDirectory "ghc-bbc" $ \tmpdir -> do + let f = tmpdir > "ghc-bbc" + roundtripBinByteCode hsc_env f cbc + +roundtripBinByteCode :: + HscEnv -> FilePath -> CompiledByteCode -> IO CompiledByteCode +roundtripBinByteCode hsc_env f cbc = do + writeBinByteCode f cbc + readBinByteCode hsc_env f + +readBinByteCode :: HscEnv -> FilePath -> IO CompiledByteCode +readBinByteCode hsc_env f = do + bh' <- readBinMem f + bh <- addSerializableNameReader hsc_env bh' + getWithUserData (hsc_NC hsc_env) bh + +writeBinByteCode :: FilePath -> CompiledByteCode -> IO () +writeBinByteCode f cbc = do + bh' <- openBinMem (1024 * 1024) + bh <- addSerializableNameWriter bh' + putWithUserData QuietBinIFace NormalCompression bh cbc + writeBinMem bh f + +instance Binary CompiledByteCode where + get bh = do + bc_bcos <- get bh + bc_itbls_len <- get bh + bc_itbls <- replicateM bc_itbls_len $ do + nm <- getViaSerializableName bh + itbl <- get bh + pure (nm, itbl) + bc_strs_len <- get bh + bc_strs <- + replicateM bc_strs_len $ (,) <$> getViaSerializableName bh <*> get bh + bc_breaks <- get bh + bc_spt_entries <- get bh + evaluate + CompiledByteCode + { bc_bcos, + bc_itbls, + bc_strs, + bc_breaks, + bc_spt_entries + } + + put_ bh CompiledByteCode {..} = do + put_ bh bc_bcos + put_ bh $ length bc_itbls + for_ bc_itbls $ \(nm, itbl) -> do + putViaSerializableName bh nm + put_ bh itbl + put_ bh $ length bc_strs + for_ bc_strs $ \(nm, str) -> putViaSerializableName bh nm *> put_ bh str + put_ bh bc_breaks + put_ bh bc_spt_entries + +instance Binary InternalModBreaks where + get bh = InternalModBreaks <$> get bh <*> get bh + + put_ bh InternalModBreaks {..} = + put_ bh imodBreaks_breakInfo *> put_ bh imodBreaks_modBreaks + +instance Binary ModBreaks where + get bh = ModBreaks <$> get bh <*> get bh <*> get bh <*> get bh <*> get bh + + put_ bh ModBreaks {..} = + put_ bh modBreaks_locs + *> put_ bh modBreaks_vars + *> put_ bh modBreaks_decls + *> put_ bh modBreaks_ccs + *> put_ bh modBreaks_module + +instance Binary SrcSpan where + get bh = unBinSrcSpan <$> get bh + + put_ bh = put_ bh . BinSrcSpan + +instance Binary CgBreakInfo where + put_ bh CgBreakInfo {..} = + put_ bh cgb_tyvars + *> put_ bh cgb_vars + *> put_ bh cgb_resty + *> put_ bh cgb_tick_id + + get bh = CgBreakInfo <$> get bh <*> get bh <*> get bh <*> get bh + +instance Binary ConInfoTable where + get bh = Binary.decode . LBS.fromStrict <$> get bh + + put_ bh = put_ bh . LBS.toStrict . Binary.encode + +instance Binary UnlinkedBCO where + get bh = + UnlinkedBCO + <$> getViaSerializableName bh + <*> get bh + <*> (Binary.decode . LBS.fromStrict <$> get bh) + <*> (Binary.decode . LBS.fromStrict <$> get bh) + <*> get bh + <*> get bh + + put_ bh UnlinkedBCO {..} = do + putViaSerializableName bh unlinkedBCOName + put_ bh unlinkedBCOArity + put_ bh $ LBS.toStrict $ Binary.encode unlinkedBCOInstrs + put_ bh $ LBS.toStrict $ Binary.encode unlinkedBCOBitmap + put_ bh unlinkedBCOLits + put_ bh unlinkedBCOPtrs + +instance Binary BCOPtr where + get bh = do + t <- getByte bh + case t of + 0 -> BCOPtrName <$> getViaSerializableName bh + 1 -> BCOPtrPrimOp <$> get bh + 2 -> BCOPtrBCO <$> get bh + _ -> BCOPtrBreakArray <$> get bh + + put_ bh ptr = case ptr of + BCOPtrName nm -> putByte bh 0 *> putViaSerializableName bh nm + BCOPtrPrimOp op -> putByte bh 1 *> put_ bh op + BCOPtrBCO bco -> putByte bh 2 *> put_ bh bco + BCOPtrBreakArray info_mod -> putByte bh 3 *> put_ bh info_mod + +instance Binary BCONPtr where + get bh = do + t <- getByte bh + case t of + 0 -> BCONPtrWord . fromIntegral <$> (get bh :: IO Word64) + 1 -> BCONPtrLbl <$> get bh + 2 -> BCONPtrItbl <$> getViaSerializableName bh + 3 -> BCONPtrAddr <$> getViaSerializableName bh + 4 -> BCONPtrStr <$> get bh + 5 -> BCONPtrFS <$> get bh + 6 -> BCONPtrFFIInfo <$> get bh + _ -> BCONPtrCostCentre <$> get bh + + put_ bh ptr = case ptr of + BCONPtrWord lit -> putByte bh 0 *> put_ bh (fromIntegral lit :: Word64) + BCONPtrLbl sym -> putByte bh 1 *> put_ bh sym + BCONPtrItbl nm -> putByte bh 2 *> putViaSerializableName bh nm + BCONPtrAddr nm -> putByte bh 3 *> putViaSerializableName bh nm + BCONPtrStr str -> putByte bh 4 *> put_ bh str + BCONPtrFS fs -> putByte bh 5 *> put_ bh fs + BCONPtrFFIInfo ffi -> putByte bh 6 *> put_ bh ffi + BCONPtrCostCentre ibi -> putByte bh 7 *> put_ bh ibi + +newtype SerializableName = SerializableName {unSerializableName :: Name} + +getViaSerializableName :: ReadBinHandle -> IO Name +getViaSerializableName bh = case findUserDataReader Proxy bh of + BinaryReader f -> unSerializableName <$> f bh + +putViaSerializableName :: WriteBinHandle -> Name -> IO () +putViaSerializableName bh nm = case findUserDataWriter Proxy bh of + BinaryWriter f -> f bh $ SerializableName nm + +addSerializableNameWriter :: WriteBinHandle -> IO WriteBinHandle +addSerializableNameWriter bh' = + evaluate + $ flip addWriterToUserData bh' + $ BinaryWriter + $ \bh (SerializableName nm) -> + if + | isExternalName nm -> do + putByte bh 0 + put_ bh nm + | otherwise -> do + putByte bh 1 + put_ bh + $ occNameFS (occName nm) + `appendFS` mkFastString + (show $ nameUnique nm) + +addSerializableNameReader :: HscEnv -> ReadBinHandle -> IO ReadBinHandle +addSerializableNameReader HscEnv {..} bh' = do + nc <- evaluate hsc_NC + env_ref <- newIORef emptyOccEnv + evaluate $ flip addReaderToUserData bh' $ BinaryReader $ \bh -> do + t <- getByte bh + case t of + 0 -> do + nm <- get bh + evaluate $ SerializableName nm + _ -> do + occ <- mkVarOccFS <$> get bh + u <- takeUniqFromNameCache nc + nm' <- evaluate $ mkInternalName u occ noSrcSpan + fmap SerializableName $ atomicModifyIORef' env_ref $ \env -> + case lookupOccEnv env occ of + Just nm -> (env, nm) + _ -> (extendOccEnv env occ nm', nm') ===================================== compiler/GHC/ByteCode/Types.hs ===================================== @@ -35,6 +35,7 @@ import GHC.Data.FastString import GHC.Data.FlatBag import GHC.Types.Name import GHC.Types.Name.Env +import GHC.Utils.Binary import GHC.Utils.Outputable import GHC.Builtin.PrimOps import GHC.Types.SptEntry @@ -296,3 +297,8 @@ instance Outputable UnlinkedBCO where ppr (sizeFlatBag lits), text "lits", ppr (sizeFlatBag ptrs), text "ptrs" ] +instance Binary FFIInfo where + get bh = FFIInfo <$> get bh <*> get bh + + put_ bh FFIInfo {..} = put_ bh ffiInfoArgs *> put_ bh ffiInfoRet + ===================================== compiler/GHC/Data/FlatBag.hs ===================================== @@ -16,6 +16,8 @@ import GHC.Prelude import Control.DeepSeq import GHC.Data.SmallArray +import GHC.Utils.Binary +import GHC.Utils.Exception -- | Store elements in a flattened representation. -- @@ -66,6 +68,13 @@ instance NFData a => NFData (FlatBag a) where rnf (TupleFlatBag a b) = rnf a `seq` rnf b rnf (FlatBag arr) = rnfSmallArray arr +instance (Binary a) => Binary (FlatBag a) where + get bh = do + xs <- get bh + evaluate $ fromList (fromIntegral $ length xs) xs + + put_ bh = put_ bh . elemsFlatBag + -- | Create an empty 'FlatBag'. -- -- The empty 'FlatBag' is shared over all instances. @@ -129,4 +138,3 @@ fromSmallArray s = case sizeofSmallArray s of 1 -> UnitFlatBag (indexSmallArray s 0) 2 -> TupleFlatBag (indexSmallArray s 0) (indexSmallArray s 1) _ -> FlatBag s - ===================================== compiler/GHC/Driver/Main.hs ===================================== @@ -305,6 +305,8 @@ import Data.Bifunctor import qualified GHC.Unit.Home.Graph as HUG import GHC.Unit.Home.PackageTable +import GHC.ByteCode.Serialize + {- ********************************************************************** %* * Initialisation @@ -2169,7 +2171,8 @@ generateByteCode :: HscEnv -> ModLocation -> IO (CompiledByteCode, [FilePath]) generateByteCode hsc_env cgguts mod_location = do - (hasStub, comp_bc) <- hscInteractive hsc_env cgguts mod_location + (hasStub, comp_bc') <- hscInteractive hsc_env cgguts mod_location + comp_bc <- testBinByteCode hsc_env comp_bc' compile_for_interpreter hsc_env $ \ i_env -> do stub_o <- traverse (compileForeign i_env LangC) hasStub foreign_files_o <- traverse (uncurry (compileForeign i_env)) (cgi_foreign_files cgguts) ===================================== compiler/GHC/Types/SptEntry.hs ===================================== @@ -3,8 +3,12 @@ module GHC.Types.SptEntry ) where -import GHC.Types.Var ( Id ) +import GHC.Builtin.Types +import GHC.Types.Id +import GHC.Types.Name import GHC.Fingerprint.Type ( Fingerprint ) +import GHC.Prelude +import GHC.Utils.Binary import GHC.Utils.Outputable -- | An entry to be inserted into a module's static pointer table. @@ -14,3 +18,11 @@ data SptEntry = SptEntry Id Fingerprint instance Outputable SptEntry where ppr (SptEntry id fpr) = ppr id <> colon <+> ppr fpr +instance Binary SptEntry where + get bh = do + nm <- get bh + fp <- get bh + pure $ SptEntry (mkVanillaGlobal nm anyTy) fp + + put_ bh (SptEntry nm fp) = + put_ bh (getName nm) *> put_ bh fp ===================================== compiler/GHC/Types/Tickish.hs ===================================== @@ -4,6 +4,7 @@ {-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE RecordWildCards #-} module GHC.Types.Tickish ( GenTickish(..), @@ -44,6 +45,7 @@ import GHC.Utils.Panic import Language.Haskell.Syntax.Extension ( NoExtField ) import Data.Data +import GHC.Utils.Binary import GHC.Utils.Outputable (Outputable (ppr), text, (<+>)) {- ********************************************************************* @@ -202,6 +204,11 @@ instance NFData BreakpointId where rnf BreakpointId{bi_tick_mod, bi_tick_index} = rnf bi_tick_mod `seq` rnf bi_tick_index +instance Binary BreakpointId where + get bh = BreakpointId <$> get bh <*> get bh + + put_ bh BreakpointId {..} = put_ bh bi_tick_mod *> put_ bh bi_tick_index + -------------------------------------------------------------------------------- -- | A "counting tick" (where tickishCounts is True) is one that ===================================== compiler/GHC/Utils/Binary.hs ===================================== @@ -125,6 +125,7 @@ import Language.Haskell.Syntax.ImpExp.IsBoot (IsBootInterface(..)) import {-# SOURCE #-} GHC.Types.Name (Name) import GHC.Data.FastString import GHC.Data.TrieMap +import GHC.Utils.Exception import GHC.Utils.Panic.Plain import GHC.Types.Unique.FM import GHC.Data.FastMutInt @@ -133,6 +134,7 @@ import GHC.Types.SrcLoc import GHC.Types.Unique import qualified GHC.Data.Strict as Strict import GHC.Utils.Outputable( JoinPointHood(..) ) +import GHCi.FFI import Control.DeepSeq import Control.Monad ( when, (<$!>), unless, forM_, void ) @@ -929,6 +931,12 @@ instance Binary Char where put_ bh c = put_ bh (fromIntegral (ord c) :: Word32) get bh = do x <- get bh; return $! (chr (fromIntegral (x :: Word32))) +instance Binary Word where + put_ bh i = put_ bh (fromIntegral i :: Word64) + get bh = do + x <- get bh + return $! (fromIntegral (x :: Word64)) + instance Binary Int where put_ bh i = put_ bh (fromIntegral i :: Int64) get bh = do @@ -2163,3 +2171,35 @@ instance Binary a => Binary (FingerprintWithValue a) where instance NFData a => NFData (FingerprintWithValue a) where rnf (FingerprintWithValue fp mflags) = rnf fp `seq` rnf mflags `seq` () + +instance Binary FFIType where + get bh = do + t <- getByte bh + evaluate $ case t of + 0 -> FFIVoid + 1 -> FFIPointer + 2 -> FFIFloat + 3 -> FFIDouble + 4 -> FFISInt8 + 5 -> FFISInt16 + 6 -> FFISInt32 + 7 -> FFISInt64 + 8 -> FFIUInt8 + 9 -> FFIUInt16 + 10 -> FFIUInt32 + 11 -> FFIUInt64 + _ -> panic "Binary FFIType: invalid byte" + + put_ bh t = putByte bh $ case t of + FFIVoid -> 0 + FFIPointer -> 1 + FFIFloat -> 2 + FFIDouble -> 3 + FFISInt8 -> 4 + FFISInt16 -> 5 + FFISInt32 -> 6 + FFISInt64 -> 7 + FFIUInt8 -> 8 + FFIUInt16 -> 9 + FFIUInt32 -> 10 + FFIUInt64 -> 11 ===================================== compiler/ghc.cabal.in ===================================== @@ -228,6 +228,7 @@ Library GHC.ByteCode.InfoTable GHC.ByteCode.Instr GHC.ByteCode.Linker + GHC.ByteCode.Serialize GHC.ByteCode.Types GHC.Cmm GHC.Cmm.BlockId View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/e1707f1ced6e0912607993af4cb8fbf... -- View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/e1707f1ced6e0912607993af4cb8fbf... You're receiving this email because of your account on gitlab.haskell.org.
participants (1)
-
Cheng Shao (@TerrorJack)