[Git][ghc/ghc][wip/bytecode-serialize-pre] 8 commits: compiler: do not allocate strings in bytecode assembler

Cheng Shao pushed to branch wip/bytecode-serialize-pre at Glasgow Haskell Compiler / GHC Commits: 55e22336 by Cheng Shao at 2025-05-14T01:17:16+00:00 compiler: do not allocate strings in bytecode assembler This patch refactors the compiler to avoid allocating iserv buffers for BCONPtrStr at assemble-time. Now BCONPtrStr ByteStrings are recorded as a part of CompiledByteCode, and actual allocation only happens at link-time. This refactoring is necessary for adding bytecode serialization functionality, as explained by the revised comments in this commit. ------------------------- Metric Increase: MultiLayerModulesDefsGhciReload ------------------------- - - - - - 739d2f38 by Cheng Shao at 2025-05-14T01:17:26+00:00 compiler: make bc_strs serializable This commit makes the bc_strs field in CompiledByteCode serializable; similar to previous commit, we preserve the ByteString directly and defer the actual allocation to link-time, as mentioned in updated comment. - - - - - 6a2710ff by Cheng Shao at 2025-05-14T01:17:27+00:00 compiler: make bc_itbls serializable This commit makes bc_itbls in CompiledByteCode serializable. A dedicated ConInfoTable datatype has been added in ghci which is the recipe for dynamically making a datacon's info table, containing the payload of the MkConInfoTable iserv message. - - - - - 553c5fd5 by Cheng Shao at 2025-05-14T01:17:27+00:00 compiler: remove FFIInfo bookkeeping in BCO This commit removes the bc_ffis field from CompiledByteCode completely, as well as all the related bookkeeping logic in GHC.StgToByteCode. bc_ffis is actually *unused* in the rest of GHC codebase! It is merely a list of FFIInfo, which is just a remote pointer of the libffi ffi_cif struct; once we allocate the ffi_cif struct and put its pointer in a CCALL instruction, we'll never free it anyway. So there is no point of bookkeeping. - - - - - 75ab80d0 by Cheng Shao at 2025-05-14T01:17:27+00:00 compiler: make FFIInfo serializable in BCO This commit makes all the FFIInfo needed in CCALL instructions serializable. Previously, when doing STG to BCO lowering, we would allocate a libffi ffi_cif struct and keep its remote pointer as FFIInfo; but actually we can just keep the type signature as FFIInfo and defer the actual allocation to link-time. - - - - - a9c42238 by Cheng Shao at 2025-05-14T01:17:27+00:00 ghci: remove redundant NewBreakModule message This commit removes the redundant NewBreakModule message from ghci: it just allocates two strings! This functionality can be implemented with existing MallocStrings in one iserv call. - - - - - bad310cb by Cheng Shao at 2025-05-14T01:17:27+00:00 compiler: make breakpoint module name and unit id serializable This commit makes breakpoint module name and unit id serializable, in BRK_FUN instructions as well as ModBreaks. We can simply keep the module name and unit ids, and defer the buffer allocation to link time. - - - - - 981efb4a by Cheng Shao at 2025-05-14T01:17:27+00:00 compiler: remove unused newModule This commit removes the now unused newModule function from GHC. - - - - - 13 changed files: - compiler/GHC/ByteCode/Asm.hs - compiler/GHC/ByteCode/InfoTable.hs - compiler/GHC/ByteCode/Instr.hs - compiler/GHC/ByteCode/Linker.hs - compiler/GHC/ByteCode/Types.hs - compiler/GHC/HsToCore/Breakpoints.hs - compiler/GHC/Linker/Loader.hs - compiler/GHC/Runtime/Interpreter.hs - compiler/GHC/StgToByteCode.hs - libraries/ghci/GHCi/Message.hs - libraries/ghci/GHCi/Run.hs - testsuite/tests/bytecode/T22376/all.T - testsuite/tests/perf/should_run/ByteCodeAsm.hs Changes: ===================================== compiler/GHC/ByteCode/Asm.hs ===================================== @@ -29,7 +29,6 @@ import GHC.ByteCode.Instr import GHC.ByteCode.InfoTable import GHC.ByteCode.Types import GHCi.RemoteTypes -import GHC.Runtime.Interpreter import GHC.Runtime.Heap.Layout ( fromStgWord, StgWord ) import GHC.Types.Name @@ -38,11 +37,13 @@ import GHC.Types.Literal import GHC.Types.Unique.DSet import GHC.Types.SptEntry import GHC.Types.Unique.FM +import GHC.Unit.Types import GHC.Utils.Outputable import GHC.Utils.Panic import GHC.Core.TyCon +import GHC.Data.FastString import GHC.Data.SizedSeq import GHC.Data.SmallArray @@ -52,6 +53,7 @@ import GHC.Cmm.Reg ( GlobalArgRegs(..) ) import GHC.Cmm.CallConv ( allArgRegsCover ) import GHC.Platform import GHC.Platform.Profile +import Language.Haskell.Syntax.Module.Name import Control.Monad import qualified Control.Monad.Trans.State.Strict as MTL @@ -65,6 +67,7 @@ import Data.Array.Base ( unsafeWrite ) #endif import Foreign hiding (shiftL, shiftR) +import Data.ByteString (ByteString) import Data.Char (ord) import Data.Maybe (fromMaybe) import GHC.Float (castFloatToWord32, castDoubleToWord64) @@ -104,24 +107,21 @@ bcoFreeNames bco -- Top level assembler fn. assembleBCOs - :: Interp - -> Profile + :: Profile -> FlatBag (ProtoBCO Name) -> [TyCon] - -> AddrEnv + -> [(Name, ByteString)] -> Maybe ModBreaks -> [SptEntry] -> IO CompiledByteCode -assembleBCOs interp profile proto_bcos tycons top_strs modbreaks spt_entries = do +assembleBCOs profile proto_bcos tycons top_strs modbreaks spt_entries = do -- TODO: the profile should be bundled with the interpreter: the rts ways are -- fixed for an interpreter - itblenv <- mkITbls interp profile tycons + let itbls = mkITbls profile tycons bcos <- mapM (assembleBCO (profilePlatform profile)) proto_bcos - bcos' <- mallocStrings interp bcos return CompiledByteCode - { bc_bcos = bcos' - , bc_itbls = itblenv - , bc_ffis = concatMap protoBCOFFIs proto_bcos + { bc_bcos = bcos + , bc_itbls = itbls , bc_strs = top_strs , bc_breaks = modbreaks , bc_spt_entries = spt_entries @@ -137,50 +137,17 @@ assembleBCOs interp profile proto_bcos tycons top_strs modbreaks spt_entries = d -- memory for them, and bake the resulting addresses into the instruction stream -- in the form of BCONPtrWord arguments. -- --- Since we do this when assembling, we only allocate the memory when we compile --- the module, not each time we relink it. However, we do want to take care to --- malloc the memory all in one go, since that is more efficient with --- -fexternal-interpreter, especially when compiling in parallel. +-- We used to allocate remote buffers for BCONPtrStr ByteStrings when +-- assembling, but this gets in the way of bytecode serialization: we +-- want the ability to serialize and reload assembled bytecode, so +-- it's better to preserve BCONPtrStr as-is, and only perform the +-- actual allocation at link-time. -- -- Note that, as with top-level string literal bindings, this memory is never -- freed, so it just leaks if the BCO is unloaded. See Note [Generating code for -- top-level string literal bindings] in GHC.StgToByteCode for some discussion -- about why. -- -mallocStrings :: Interp -> FlatBag UnlinkedBCO -> IO (FlatBag UnlinkedBCO) -mallocStrings interp ulbcos = do - let bytestrings = reverse (MTL.execState (mapM_ collect ulbcos) []) - ptrs <- interpCmd interp (MallocStrings bytestrings) - return (MTL.evalState (mapM splice ulbcos) ptrs) - where - splice bco@UnlinkedBCO{..} = do - lits <- mapM spliceLit unlinkedBCOLits - ptrs <- mapM splicePtr unlinkedBCOPtrs - return bco { unlinkedBCOLits = lits, unlinkedBCOPtrs = ptrs } - - spliceLit (BCONPtrStr _) = do - rptrs <- MTL.get - case rptrs of - (RemotePtr p : rest) -> do - MTL.put rest - return (BCONPtrWord (fromIntegral p)) - _ -> panic "mallocStrings:spliceLit" - spliceLit other = return other - - splicePtr (BCOPtrBCO bco) = BCOPtrBCO <$> splice bco - splicePtr other = return other - - collect UnlinkedBCO{..} = do - mapM_ collectLit unlinkedBCOLits - mapM_ collectPtr unlinkedBCOPtrs - - collectLit (BCONPtrStr bs) = do - strs <- MTL.get - MTL.put (bs:strs) - collectLit _ = return () - - collectPtr (BCOPtrBCO bco) = collect bco - collectPtr _ = return () data RunAsmReader = RunAsmReader { isn_array :: {-# UNPACK #-} !(Array.IOUArray Int Word16) , ptr_array :: {-# UNPACK #-} !(SmallMutableArrayIO BCOPtr) @@ -729,15 +696,15 @@ assembleI platform i = case i of ENTER -> emit_ bci_ENTER [] RETURN rep -> emit_ (return_non_tuple rep) [] RETURN_TUPLE -> emit_ bci_RETURN_T [] - CCALL off m_addr i -> do np <- addr m_addr + CCALL off ffi i -> do np <- lit1 $ BCONPtrFFIInfo ffi emit_ bci_CCALL [wOp off, Op np, SmallOp i] PRIMCALL -> emit_ bci_PRIMCALL [] BRK_FUN arr tick_mod tick_mod_id tickx info_mod info_mod_id infox cc -> do p1 <- ptr (BCOPtrBreakArray arr) - tick_addr <- addr tick_mod - tick_unitid_addr <- addr tick_mod_id - info_addr <- addr info_mod - info_unitid_addr <- addr info_mod_id + tick_addr <- lit1 $ BCONPtrStr $ bytesFS $ moduleNameFS tick_mod + info_addr <- lit1 $ BCONPtrStr $ bytesFS $ moduleNameFS info_mod + tick_unitid_addr <- lit1 $ BCONPtrStr $ bytesFS $ unitIdFS tick_mod_id + info_unitid_addr <- lit1 $ BCONPtrStr $ bytesFS $ unitIdFS info_mod_id np <- addr cc emit_ bci_BRK_FUN [ Op p1 , Op tick_addr, Op info_addr ===================================== compiler/GHC/ByteCode/InfoTable.hs ===================================== @@ -13,11 +13,9 @@ import GHC.Prelude import GHC.Platform import GHC.Platform.Profile -import GHC.ByteCode.Types -import GHC.Runtime.Interpreter +import GHCi.Message import GHC.Types.Name ( Name, getName ) -import GHC.Types.Name.Env import GHC.Types.RepType import GHC.Core.DataCon ( DataCon, dataConRepArgTys, dataConIdentity ) @@ -35,33 +33,38 @@ import GHC.Utils.Panic -} -- Make info tables for the data decls in this module -mkITbls :: Interp -> Profile -> [TyCon] -> IO ItblEnv -mkITbls interp profile tcs = - foldr plusNameEnv emptyNameEnv <$> - mapM mkITbl (filter isDataTyCon tcs) +mkITbls :: Profile -> [TyCon] -> [(Name, ConInfoTable)] +mkITbls profile tcs = concatMap mkITbl (filter isDataTyCon tcs) where - mkITbl :: TyCon -> IO ItblEnv + mkITbl :: TyCon -> [(Name, ConInfoTable)] mkITbl tc | dcs `lengthIs` n -- paranoia; this is an assertion. - = make_constr_itbls interp profile dcs + = make_constr_itbls profile dcs where dcs = tyConDataCons tc n = tyConFamilySize tc mkITbl _ = panic "mkITbl" -mkItblEnv :: [(Name,ItblPtr)] -> ItblEnv -mkItblEnv pairs = mkNameEnv [(n, (n,p)) | (n,p) <- pairs] - -- Assumes constructors are numbered from zero, not one -make_constr_itbls :: Interp -> Profile -> [DataCon] -> IO ItblEnv -make_constr_itbls interp profile cons = +make_constr_itbls :: Profile -> [DataCon] -> [(Name, ConInfoTable)] +make_constr_itbls profile cons = -- TODO: the profile should be bundled with the interpreter: the rts ways are -- fixed for an interpreter - mkItblEnv <$> mapM (uncurry mk_itbl) (zip cons [0..]) - where - mk_itbl :: DataCon -> Int -> IO (Name,ItblPtr) - mk_itbl dcon conNo = do - let rep_args = [ prim_rep + map (uncurry mk_itbl) (zip cons [0..]) + where + mk_itbl :: DataCon -> Int -> (Name, ConInfoTable) + mk_itbl dcon conNo = + ( getName dcon, + ConInfoTable + tables_next_to_code + ptrs' + nptrs_really + conNo + (tagForCon platform dcon) + descr + ) + where + rep_args = [ prim_rep | arg <- dataConRepArgTys dcon , prim_rep <- typePrimRep (scaledThing arg) ] @@ -79,7 +82,3 @@ make_constr_itbls interp profile cons = platform = profilePlatform profile constants = platformConstants platform tables_next_to_code = platformTablesNextToCode platform - - r <- interpCmd interp (MkConInfoTable tables_next_to_code ptrs' nptrs_really - conNo (tagForCon platform dcon) descr) - return (getName dcon, ItblPtr r) ===================================== compiler/GHC/ByteCode/Instr.hs ===================================== @@ -15,7 +15,6 @@ import GHC.Prelude import GHC.ByteCode.Types import GHCi.RemoteTypes -import GHCi.FFI (C_ffi_cif) import GHC.StgToCmm.Layout ( ArgRep(..) ) import GHC.Utils.Outputable import GHC.Types.Name @@ -51,9 +50,7 @@ data ProtoBCO a protoBCOBitmapSize :: Word, protoBCOArity :: Int, -- what the BCO came from, for debugging only - protoBCOExpr :: Either [CgStgAlt] CgStgRhs, - -- malloc'd pointers - protoBCOFFIs :: [FFIInfo] + protoBCOExpr :: Either [CgStgAlt] CgStgRhs } -- | A local block label (e.g. identifying a case alternative). @@ -209,7 +206,7 @@ data BCInstr -- For doing calls to C (via glue code generated by libffi) | CCALL !WordOff -- stack frame size - (RemotePtr C_ffi_cif) -- addr of the glue code + !FFIInfo -- libffi ffi_cif function prototype !Word16 -- flags. -- -- 0x1: call is interruptible @@ -233,11 +230,11 @@ data BCInstr -- Breakpoints | BRK_FUN (ForeignRef BreakArray) - (RemotePtr ModuleName) -- breakpoint tick module - (RemotePtr UnitId) -- breakpoint tick module unit id + !ModuleName -- breakpoint tick module + !UnitId -- breakpoint tick module unit id !Word16 -- breakpoint tick index - (RemotePtr ModuleName) -- breakpoint info module - (RemotePtr UnitId) -- breakpoint info module unit id + !ModuleName -- breakpoint info module + !UnitId -- breakpoint info module unit id !Word16 -- breakpoint info index (RemotePtr CostCentre) @@ -266,10 +263,9 @@ instance Outputable a => Outputable (ProtoBCO a) where , protoBCOBitmap = bitmap , protoBCOBitmapSize = bsize , protoBCOArity = arity - , protoBCOExpr = origin - , protoBCOFFIs = ffis }) + , protoBCOExpr = origin }) = (text "ProtoBCO" <+> ppr name <> char '#' <> int arity - <+> text (show ffis) <> colon) + <> colon) $$ nest 3 (case origin of Left alts -> vcat (zipWith (<+>) (char '{' : repeat (char ';')) @@ -393,9 +389,9 @@ instance Outputable BCInstr where ppr (TESTEQ_P i lab) = text "TESTEQ_P" <+> ppr i <+> text "__" <> ppr lab ppr CASEFAIL = text "CASEFAIL" ppr (JMP lab) = text "JMP" <+> ppr lab - ppr (CCALL off marshal_addr flags) = text "CCALL " <+> ppr off + ppr (CCALL off ffi flags) = text "CCALL " <+> ppr off <+> text "marshal code at" - <+> text (show marshal_addr) + <+> text (show ffi) <+> (case flags of 0x1 -> text "(interruptible)" 0x2 -> text "(unsafe)" ===================================== compiler/GHC/ByteCode/Linker.hs ===================================== @@ -3,6 +3,7 @@ {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE DataKinds #-} +{-# LANGUAGE RecordWildCards #-} {-# OPTIONS_GHC -optc-DNON_POSIX_SOURCE #-} -- -- (c) The University of Glasgow 2002-2006 @@ -83,9 +84,12 @@ lookupLiteral interp pkgs_loaded le ptr = case ptr of BCONPtrAddr nm -> do Ptr a# <- lookupAddr interp pkgs_loaded (addr_env le) nm return (W# (int2Word# (addr2Int# a#))) - BCONPtrStr _ -> - -- should be eliminated during assembleBCOs - panic "lookupLiteral: BCONPtrStr" + BCONPtrStr bs -> do + RemotePtr p <- fmap head $ interpCmd interp $ MallocStrings [bs] + pure $ fromIntegral p + BCONPtrFFIInfo (FFIInfo {..}) -> do + RemotePtr p <- interpCmd interp $ PrepFFI ffiInfoArgs ffiInfoRet + pure $ fromIntegral p lookupStaticPtr :: Interp -> FastString -> IO (Ptr ()) lookupStaticPtr interp addr_of_label_string = do ===================================== compiler/GHC/ByteCode/Types.hs ===================================== @@ -35,6 +35,7 @@ import GHC.Builtin.PrimOps import GHC.Types.SptEntry import GHC.Types.SrcLoc import GHCi.BreakArray +import GHCi.Message import GHCi.RemoteTypes import GHCi.FFI import Control.DeepSeq @@ -49,8 +50,8 @@ import qualified GHC.Exts.Heap as Heap import GHC.Stack.CCS import GHC.Cmm.Expr ( GlobalRegSet, emptyRegSet, regSetToList ) import GHC.Iface.Syntax -import Language.Haskell.Syntax.Module.Name (ModuleName) -import GHC.Unit.Types (UnitId) +import Language.Haskell.Syntax.Module.Name (ModuleName, mkModuleNameFS) +import GHC.Unit.Types (UnitId(..)) -- ----------------------------------------------------------------------------- -- Compiled Byte Code @@ -59,13 +60,10 @@ data CompiledByteCode = CompiledByteCode { bc_bcos :: FlatBag UnlinkedBCO -- ^ Bunch of interpretable bindings - , bc_itbls :: ItblEnv + , bc_itbls :: [(Name, ConInfoTable)] -- ^ Mapping from DataCons to their info tables - , bc_ffis :: [FFIInfo] - -- ^ ffi blocks we allocated - - , bc_strs :: AddrEnv + , bc_strs :: [(Name, ByteString)] -- ^ top-level strings (heap allocated) , bc_breaks :: Maybe ModBreaks @@ -76,9 +74,10 @@ data CompiledByteCode = CompiledByteCode -- BCOs. See Note [Grand plan for static forms] in -- "GHC.Iface.Tidy.StaticPtrTable". } - -- ToDo: we're not tracking strings that we malloc'd -newtype FFIInfo = FFIInfo (RemotePtr C_ffi_cif) - deriving (Show, NFData) + +-- | A libffi ffi_cif function prototype. +data FFIInfo = FFIInfo { ffiInfoArgs :: ![FFIType], ffiInfoRet :: !FFIType } + deriving (Show) instance Outputable CompiledByteCode where ppr CompiledByteCode{..} = ppr $ elemsFlatBag bc_bcos @@ -88,9 +87,8 @@ instance Outputable CompiledByteCode where seqCompiledByteCode :: CompiledByteCode -> () seqCompiledByteCode CompiledByteCode{..} = rnf bc_bcos `seq` - seqEltsNameEnv rnf bc_itbls `seq` - rnf bc_ffis `seq` - seqEltsNameEnv rnf bc_strs `seq` + rnf bc_itbls `seq` + rnf bc_strs `seq` rnf (fmap seqModBreaks bc_breaks) newtype ByteOff = ByteOff Int @@ -200,10 +198,11 @@ data BCONPtr -- | A reference to a top-level string literal; see -- Note [Generating code for top-level string literal bindings] in GHC.StgToByteCode. | BCONPtrAddr !Name - -- | Only used internally in the assembler in an intermediate representation; - -- should never appear in a fully-assembled UnlinkedBCO. + -- | A top-level string literal. -- Also see Note [Allocating string literals] in GHC.ByteCode.Asm. | BCONPtrStr !ByteString + -- | A libffi ffi_cif function prototype. + | BCONPtrFFIInfo !FFIInfo instance NFData BCONPtr where rnf x = x `seq` () @@ -263,9 +262,9 @@ data ModBreaks -- ^ Array pointing to cost centre for each breakpoint , modBreaks_breakInfo :: IntMap CgBreakInfo -- ^ info about each breakpoint from the bytecode generator - , modBreaks_module :: RemotePtr ModuleName + , modBreaks_module :: !ModuleName -- ^ info about the module in which we are setting the breakpoint - , modBreaks_module_unitid :: RemotePtr UnitId + , modBreaks_module_unitid :: !UnitId -- ^ The 'UnitId' of the 'ModuleName' } @@ -290,8 +289,8 @@ emptyModBreaks = ModBreaks , modBreaks_decls = array (0,-1) [] , modBreaks_ccs = array (0,-1) [] , modBreaks_breakInfo = IntMap.empty - , modBreaks_module = toRemotePtr nullPtr - , modBreaks_module_unitid = toRemotePtr nullPtr + , modBreaks_module = mkModuleNameFS nilFS + , modBreaks_module_unitid = UnitId nilFS } {- ===================================== compiler/GHC/HsToCore/Breakpoints.hs ===================================== @@ -34,7 +34,6 @@ mkModBreaks interp mod extendedMixEntries breakArray <- GHCi.newBreakArray interp count ccs <- mkCCSArray interp mod count entries - (mod_ptr, mod_id_ptr) <- GHCi.newModule interp mod let locsTicks = listArray (0,count-1) [ tick_loc t | t <- entries ] varsTicks = listArray (0,count-1) [ tick_ids t | t <- entries ] @@ -45,8 +44,8 @@ mkModBreaks interp mod extendedMixEntries , modBreaks_vars = varsTicks , modBreaks_decls = declsTicks , modBreaks_ccs = ccs - , modBreaks_module = mod_ptr - , modBreaks_module_unitid = mod_id_ptr + , modBreaks_module = moduleName mod + , modBreaks_module_unitid = toUnitId $ moduleUnit mod } mkCCSArray ===================================== compiler/GHC/Linker/Loader.hs ===================================== @@ -54,7 +54,7 @@ import GHC.Tc.Utils.Monad import GHC.Runtime.Interpreter import GHCi.RemoteTypes import GHC.Iface.Load -import GHCi.Message (LoadedDLL) +import GHCi.Message (ConInfoTable(..), LoadedDLL) import GHC.ByteCode.Linker import GHC.ByteCode.Asm @@ -95,6 +95,7 @@ import GHC.Linker.Types -- Standard libraries import Control.Monad +import Data.ByteString (ByteString) import qualified Data.Set as Set import Data.Char (isSpace) import qualified Data.Foldable as Foldable @@ -688,8 +689,10 @@ loadDecls interp hsc_env span linkable = do else do -- Link the expression itself let le = linker_env pls - le2 = le { itbl_env = foldl' (\acc cbc -> plusNameEnv acc (bc_itbls cbc)) (itbl_env le) cbcs - , addr_env = foldl' (\acc cbc -> plusNameEnv acc (bc_strs cbc)) (addr_env le) cbcs } + le2_itbl_env <- linkITbls interp (itbl_env le) (concat $ map bc_itbls cbcs) + le2_addr_env <- foldlM (\env cbc -> allocateTopStrings interp (bc_strs cbc) env) (addr_env le) cbcs + let le2 = le { itbl_env = le2_itbl_env + , addr_env = le2_addr_env } -- Link the necessary packages and linkables new_bindings <- linkSomeBCOs interp (pkgs_loaded pls) le2 cbcs @@ -911,9 +914,9 @@ dynLinkBCOs interp pls bcos = do le1 = linker_env pls - ie2 = foldr plusNameEnv (itbl_env le1) (map bc_itbls cbcs) - ae2 = foldr plusNameEnv (addr_env le1) (map bc_strs cbcs) - le2 = le1 { itbl_env = ie2, addr_env = ae2 } + ie2 <- linkITbls interp (itbl_env le1) (concatMap bc_itbls cbcs) + ae2 <- foldlM (\env cbc -> allocateTopStrings interp (bc_strs cbc) env) (addr_env le1) cbcs + let le2 = le1 { itbl_env = ie2, addr_env = ae2 } names_and_refs <- linkSomeBCOs interp (pkgs_loaded pls) le2 cbcs @@ -958,6 +961,11 @@ makeForeignNamedHValueRefs makeForeignNamedHValueRefs interp bindings = mapM (\(n, hvref) -> (n,) <$> mkFinalizedHValue interp hvref) bindings +linkITbls :: Interp -> ItblEnv -> [(Name, ConInfoTable)] -> IO ItblEnv +linkITbls interp = foldlM $ \env (nm, itbl) -> do + r <- interpCmd interp $ MkConInfoTable itbl + evaluate $ extendNameEnv env nm (nm, ItblPtr r) + {- ********************************************************************** Unload some object modules @@ -1614,3 +1622,13 @@ maybePutStr logger s = maybePutSDoc logger (text s) maybePutStrLn :: Logger -> String -> IO () maybePutStrLn logger s = maybePutSDoc logger (text s <> text "\n") + +-- | see Note [Generating code for top-level string literal bindings] +allocateTopStrings :: + Interp -> [(Name, ByteString)] -> AddrEnv -> IO AddrEnv +allocateTopStrings interp topStrings prev_env = do + let (bndrs, strings) = unzip topStrings + ptrs <- interpCmd interp $ MallocStrings strings + evaluate $ extendNameEnvList prev_env (zipWith mk_entry bndrs ptrs) + where + mk_entry nm ptr = (nm, (nm, AddrPtr ptr)) ===================================== compiler/GHC/Runtime/Interpreter.hs ===================================== @@ -21,7 +21,6 @@ module GHC.Runtime.Interpreter , mkCostCentres , costCentreStackInfo , newBreakArray - , newModule , storeBreakpoint , breakpointStatus , getBreakpointVar @@ -376,14 +375,6 @@ newBreakArray interp size = do breakArray <- interpCmd interp (NewBreakArray size) mkFinalizedHValue interp breakArray -newModule :: Interp -> Module -> IO (RemotePtr ModuleName, RemotePtr UnitId) -newModule interp mod = do - let - mod_name = moduleNameString $ moduleName mod - mod_id = fastStringToShortByteString $ unitIdFS $ toUnitId $ moduleUnit mod - (mod_ptr, mod_id_ptr) <- interpCmd interp (NewBreakModule mod_name mod_id) - pure (castRemotePtr mod_ptr, castRemotePtr mod_id_ptr) - storeBreakpoint :: Interp -> ForeignRef BreakArray -> Int -> Int -> IO () storeBreakpoint interp ref ix cnt = do -- #19157 withForeignRef ref $ \breakarray -> ===================================== compiler/GHC/StgToByteCode.hs ===================================== @@ -67,7 +67,6 @@ import GHC.Data.Bitmap import GHC.Data.FlatBag as FlatBag import GHC.Data.OrdList import GHC.Data.Maybe -import GHC.Types.Name.Env (mkNameEnv) import GHC.Types.Tickish import GHC.Types.SptEntry @@ -82,7 +81,6 @@ import GHC.Unit.Home.PackageTable (lookupHpt) import Data.Array import Data.Coerce (coerce) -import Data.ByteString (ByteString) #if MIN_VERSION_rts(1,0,3) import qualified Data.ByteString.Char8 as BS #endif @@ -118,19 +116,15 @@ byteCodeGen hsc_env this_mod binds tycs mb_modBreaks spt_entries bnd <- binds case bnd of StgTopLifted bnd -> [Right bnd] - StgTopStringLit b str -> [Left (b, str)] + StgTopStringLit b str -> [Left (getName b, str)] flattenBind (StgNonRec b e) = [(b,e)] flattenBind (StgRec bs) = bs - stringPtrs <- allocateTopStrings interp strings (BcM_State{..}, proto_bcos) <- runBc hsc_env this_mod mb_modBreaks $ do let flattened_binds = concatMap flattenBind (reverse lifted_binds) FlatBag.fromList (fromIntegral $ length flattened_binds) <$> mapM schemeTopBind flattened_binds - when (notNull ffis) - (panic "GHC.StgToByteCode.byteCodeGen: missing final emitBc?") - putDumpFileMaybe logger Opt_D_dump_BCOs "Proto-BCOs" FormatByteCode (vcat (intersperse (char ' ') (map ppr $ elemsFlatBag proto_bcos))) @@ -138,7 +132,7 @@ byteCodeGen hsc_env this_mod binds tycs mb_modBreaks spt_entries let mod_breaks = case modBreaks of Nothing -> Nothing Just mb -> Just mb{ modBreaks_breakInfo = breakInfo } - cbc <- assembleBCOs interp profile proto_bcos tycs stringPtrs mod_breaks spt_entries + cbc <- assembleBCOs profile proto_bcos tycs strings mod_breaks spt_entries -- Squash space leaks in the CompiledByteCode. This is really -- important, because when loading a set of modules into GHCi @@ -152,22 +146,8 @@ byteCodeGen hsc_env this_mod binds tycs mb_modBreaks spt_entries where dflags = hsc_dflags hsc_env logger = hsc_logger hsc_env - interp = hscInterp hsc_env profile = targetProfile dflags --- | see Note [Generating code for top-level string literal bindings] -allocateTopStrings - :: Interp - -> [(Id, ByteString)] - -> IO AddrEnv -allocateTopStrings interp topStrings = do - let !(bndrs, strings) = unzip topStrings - ptrs <- interpCmd interp $ MallocStrings strings - return $ mkNameEnv (zipWith mk_entry bndrs ptrs) - where - mk_entry bndr ptr = let nm = getName bndr - in (nm, (nm, AddrPtr ptr)) - {- Note [Generating code for top-level string literal bindings] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ As described in Note [Compilation plan for top-level string literals] @@ -178,9 +158,9 @@ the bytecode compiler: (1) compiling the bindings themselves, and we deal with them: 1. Top-level string literal bindings are separated from the rest of - the module. Memory for them is allocated immediately, via - interpCmd, in allocateTopStrings, and the resulting AddrEnv is - recorded in the bc_strs field of the CompiledByteCode result. + the module. Memory is not allocated until bytecode link-time, the + bc_strs field of the CompiledByteCode result records [(Name, ByteString)] + directly. 2. When we encounter a reference to a top-level string literal, we generate a PUSH_ADDR pseudo-instruction, which is assembled to @@ -254,17 +234,15 @@ mkProtoBCO -> WordOff -- ^ bitmap size -> [StgWord] -- ^ bitmap -> Bool -- ^ True <=> is a return point, rather than a function - -> [FFIInfo] -> ProtoBCO Name -mkProtoBCO platform _add_bco_name nm instrs_ordlist origin arity bitmap_size bitmap is_ret ffis +mkProtoBCO platform _add_bco_name nm instrs_ordlist origin arity bitmap_size bitmap is_ret = ProtoBCO { protoBCOName = nm, protoBCOInstrs = maybe_add_bco_name $ maybe_add_stack_check peep_d, protoBCOBitmap = bitmap, protoBCOBitmapSize = fromIntegral bitmap_size, protoBCOArity = arity, - protoBCOExpr = origin, - protoBCOFFIs = ffis + protoBCOExpr = origin } where #if MIN_VERSION_rts(1,0,3) @@ -334,7 +312,7 @@ schemeTopBind (id, rhs) -- by just re-using the single top-level definition. So -- for the worker itself, we must allocate it directly. -- ioToBc (putStrLn $ "top level BCO") - emitBc (mkProtoBCO platform add_bco_name + pure (mkProtoBCO platform add_bco_name (getName id) (toOL [PACK data_con 0, RETURN P]) (Right rhs) 0 0 [{-no bitmap-}] False{-not alts-}) @@ -399,7 +377,7 @@ schemeR_wrk fvs nm original_body (args, body) bitmap = mkBitmap platform bits body_code <- schemeER_wrk sum_szsb_args p_init body - emitBc (mkProtoBCO platform add_bco_name nm body_code (Right original_body) + pure (mkProtoBCO platform add_bco_name nm body_code (Right original_body) arity bitmap_size bitmap False{-not alts-}) -- | Introduce break instructions for ticked expressions. @@ -478,7 +456,7 @@ break_info hsc_env mod current_mod current_mod_breaks where check_mod_ptr mb | mod_ptr <- modBreaks_module mb - , fromRemotePtr mod_ptr /= nullPtr + , not $ nullFS $ moduleNameFS mod_ptr = Just mb | otherwise = Nothing @@ -546,7 +524,7 @@ returnUnliftedReps d s szb reps = do -- otherwise use RETURN_TUPLE with a tuple descriptor nv_reps -> do let (call_info, args_offsets) = layoutNativeCall profile NativeTupleReturn 0 id nv_reps - tuple_bco <- emitBc (tupleBCO platform call_info args_offsets) + tuple_bco = tupleBCO platform call_info args_offsets return $ PUSH_UBX (mkNativeCallInfoLit platform call_info) 1 `consOL` PUSH_BCO tuple_bco `consOL` unitOL RETURN_TUPLE @@ -1097,16 +1075,15 @@ doCase d s p scrut bndr alts scrut_code <- schemeE (d + ret_frame_size_b + save_ccs_size_b) (d + ret_frame_size_b + save_ccs_size_b) p scrut - alt_bco' <- emitBc alt_bco if ubx_tuple_frame - then do tuple_bco <- emitBc (tupleBCO platform call_info args_offsets) - return (PUSH_ALTS_TUPLE alt_bco' call_info tuple_bco + then do let tuple_bco = tupleBCO platform call_info args_offsets + return (PUSH_ALTS_TUPLE alt_bco call_info tuple_bco `consOL` scrut_code) else let scrut_rep = case non_void_arg_reps of [] -> V [rep] -> rep _ -> panic "schemeE(StgCase).push_alts" - in return (PUSH_ALTS alt_bco' scrut_rep `consOL` scrut_code) + in return (PUSH_ALTS alt_bco scrut_rep `consOL` scrut_code) -- ----------------------------------------------------------------------------- @@ -1398,7 +1375,7 @@ Note [unboxed tuple bytecodes and tuple_BCO] -} -tupleBCO :: Platform -> NativeCallInfo -> [(PrimRep, ByteOff)] -> [FFIInfo] -> ProtoBCO Name +tupleBCO :: Platform -> NativeCallInfo -> [(PrimRep, ByteOff)] -> ProtoBCO Name tupleBCO platform args_info args = mkProtoBCO platform Nothing invented_name body_code (Left []) 0{-no arity-} bitmap_size bitmap False{-is alts-} @@ -1419,7 +1396,7 @@ tupleBCO platform args_info args = body_code = mkSlideW 0 1 -- pop frame header `snocOL` RETURN_TUPLE -- and add it again -primCallBCO :: Platform -> NativeCallInfo -> [(PrimRep, ByteOff)] -> [FFIInfo] -> ProtoBCO Name +primCallBCO :: Platform -> NativeCallInfo -> [(PrimRep, ByteOff)] -> ProtoBCO Name primCallBCO platform args_info args = mkProtoBCO platform Nothing invented_name body_code (Left []) 0{-no arity-} bitmap_size bitmap False{-is alts-} @@ -1528,7 +1505,7 @@ generatePrimCall d s p target _mb_unit _result_ty args massert (off == dd + szb) go (dd + szb) (push:pushes) cs push_args <- go d [] shifted_args_offsets - args_bco <- emitBc (primCallBCO platform args_info prim_args_offsets) + let args_bco = primCallBCO platform args_info prim_args_offsets return $ mconcat push_args `appOL` (push_target `consOL` push_info `consOL` @@ -1706,13 +1683,10 @@ generateCCall d0 s p (CCallSpec target _ safety) result_ty args let ffires = primRepToFFIType platform r_rep ffiargs = map (primRepToFFIType platform) a_reps - interp <- hscInterp <$> getHscEnv - token <- ioToBc $ interpCmd interp (PrepFFI ffiargs ffires) - recordFFIBc token let -- do the call - do_call = unitOL (CCALL stk_offset token flags) + do_call = unitOL (CCALL stk_offset (FFIInfo ffiargs ffires) flags) where flags = case safety of PlaySafe -> 0x0 PlayInterruptible -> 0x1 @@ -2311,8 +2285,6 @@ data BcM_State { bcm_hsc_env :: HscEnv , thisModule :: Module -- current module (for breakpoints) , nextlabel :: Word32 -- for generating local labels - , ffis :: [FFIInfo] -- ffi info blocks, to free later - -- Should be free()d when it is GCd , modBreaks :: Maybe ModBreaks -- info about breakpoints , breakInfo :: IntMap CgBreakInfo -- ^ Info at breakpoint occurrence. @@ -2333,7 +2305,7 @@ runBc :: HscEnv -> Module -> Maybe ModBreaks -> BcM r -> IO (BcM_State, r) runBc hsc_env this_mod modBreaks (BcM m) - = m (BcM_State hsc_env this_mod 0 [] modBreaks IntMap.empty 0) + = m (BcM_State hsc_env this_mod 0 modBreaks IntMap.empty 0) thenBc :: BcM a -> (a -> BcM b) -> BcM b thenBc (BcM expr) cont = BcM $ \st0 -> do @@ -2376,14 +2348,6 @@ shouldAddBcoName = do then Just <$> getCurrentModule else return Nothing -emitBc :: ([FFIInfo] -> ProtoBCO Name) -> BcM (ProtoBCO Name) -emitBc bco - = BcM $ \st -> return (st{ffis=[]}, bco (ffis st)) - -recordFFIBc :: RemotePtr C_ffi_cif -> BcM () -recordFFIBc a - = BcM $ \st -> return (st{ffis = FFIInfo a : ffis st}, ()) - getLabelBc :: BcM LocalLabel getLabelBc = BcM $ \st -> do let nl = nextlabel st ===================================== libraries/ghci/GHCi/Message.hs ===================================== @@ -11,6 +11,7 @@ -- module GHCi.Message ( Message(..), Msg(..) + , ConInfoTable(..) , THMessage(..), THMsg(..) , QResult(..) , EvalStatus_(..), EvalStatus, EvalResult(..), EvalOpts(..), EvalExpr(..) @@ -41,6 +42,7 @@ import GHC.ForeignSrcLang import GHC.Fingerprint import GHC.Conc (pseq, par) import Control.Concurrent +import Control.DeepSeq import Control.Exception #if MIN_VERSION_base(4,20,0) import Control.Exception.Context @@ -117,12 +119,7 @@ data Message a where -- | Create an info table for a constructor MkConInfoTable - :: Bool -- TABLES_NEXT_TO_CODE - -> Int -- ptr words - -> Int -- non-ptr words - -> Int -- constr tag - -> Int -- pointer tag - -> ByteString -- constructor desccription + :: !ConInfoTable -> Message (RemotePtr Heap.StgInfoTable) -- | Evaluate a statement @@ -244,16 +241,23 @@ data Message a where :: RemoteRef (ResumeContext ()) -> Message (EvalStatus ()) - -- | Allocate a string for a breakpoint module name. - -- This uses an empty dummy type because @ModuleName@ isn't available here. - NewBreakModule - :: String -- ^ @ModuleName@ - -> BS.ShortByteString -- ^ @UnitId@ for the given @ModuleName@ - -> Message (RemotePtr BreakModule, RemotePtr BreakUnitId) +deriving instance Show (Message a) +-- | Used to dynamically create a data constructor's info table at +-- run-time. +data ConInfoTable = ConInfoTable { + conItblTablesNextToCode :: !Bool, -- ^ TABLES_NEXT_TO_CODE + conItblPtrs :: !Int, -- ^ ptr words + conItblNPtrs :: !Int, -- ^ non-ptr words + conItblConTag :: !Int, -- ^ constr tag + conItblPtrTag :: !Int, -- ^ pointer tag + conItblDescr :: !ByteString -- ^ constructor desccription +} + deriving (Generic, Show) -deriving instance Show (Message a) +instance Binary ConInfoTable +instance NFData ConInfoTable -- | Template Haskell return values data QResult a @@ -568,7 +572,7 @@ getMessage = do 15 -> Msg <$> MallocStrings <$> get 16 -> Msg <$> (PrepFFI <$> get <*> get) 17 -> Msg <$> FreeFFI <$> get - 18 -> Msg <$> (MkConInfoTable <$> get <*> get <*> get <*> get <*> get <*> get) + 18 -> Msg <$> MkConInfoTable <$> get 19 -> Msg <$> (EvalStmt <$> get <*> get) 20 -> Msg <$> (ResumeStmt <$> get <*> get) 21 -> Msg <$> (AbandonStmt <$> get) @@ -589,9 +593,8 @@ getMessage = do 36 -> Msg <$> (Seq <$> get) 37 -> Msg <$> return RtsRevertCAFs 38 -> Msg <$> (ResumeSeq <$> get) - 39 -> Msg <$> (NewBreakModule <$> get <*> get) - 40 -> Msg <$> (LookupSymbolInDLL <$> get <*> get) - 41 -> Msg <$> (WhereFrom <$> get) + 39 -> Msg <$> (LookupSymbolInDLL <$> get <*> get) + 40 -> Msg <$> (WhereFrom <$> get) _ -> error $ "Unknown Message code " ++ (show b) putMessage :: Message a -> Put @@ -615,7 +618,7 @@ putMessage m = case m of MallocStrings bss -> putWord8 15 >> put bss PrepFFI args res -> putWord8 16 >> put args >> put res FreeFFI p -> putWord8 17 >> put p - MkConInfoTable tc p n t pt d -> putWord8 18 >> put tc >> put p >> put n >> put t >> put pt >> put d + MkConInfoTable itbl -> putWord8 18 >> put itbl EvalStmt opts val -> putWord8 19 >> put opts >> put val ResumeStmt opts val -> putWord8 20 >> put opts >> put val AbandonStmt val -> putWord8 21 >> put val @@ -636,9 +639,8 @@ putMessage m = case m of Seq a -> putWord8 36 >> put a RtsRevertCAFs -> putWord8 37 ResumeSeq a -> putWord8 38 >> put a - NewBreakModule name unitid -> putWord8 39 >> put name >> put unitid - LookupSymbolInDLL dll str -> putWord8 40 >> put dll >> put str - WhereFrom a -> putWord8 41 >> put a + LookupSymbolInDLL dll str -> putWord8 39 >> put dll >> put str + WhereFrom a -> putWord8 40 >> put a {- Note [Parallelize CreateBCOs serialization] ===================================== libraries/ghci/GHCi/Run.hs ===================================== @@ -74,7 +74,7 @@ run m = case m of UnloadObj str -> unloadObj str AddLibrarySearchPath str -> toRemotePtr <$> addLibrarySearchPath str RemoveLibrarySearchPath ptr -> removeLibrarySearchPath (fromRemotePtr ptr) - MkConInfoTable tc ptrs nptrs tag ptrtag desc -> + MkConInfoTable (ConInfoTable tc ptrs nptrs tag ptrtag desc) -> toRemotePtr <$> mkConInfoTable tc ptrs nptrs tag ptrtag desc ResolveObjs -> resolveObjs FindSystemLibrary str -> findSystemLibrary str @@ -96,10 +96,6 @@ run m = case m of MkCostCentres mod ccs -> mkCostCentres mod ccs CostCentreStackInfo ptr -> ccsToStrings (fromRemotePtr ptr) NewBreakArray sz -> mkRemoteRef =<< newBreakArray sz - NewBreakModule name unitid -> do - namePtr <- newModuleName name - uidPtr <- newUnitId unitid - pure (namePtr, uidPtr) SetupBreakpoint ref ix cnt -> do arr <- localRef ref; _ <- setupBreakpoint arr ix cnt @@ -440,13 +436,6 @@ mkString0 bs = B.unsafeUseAsCStringLen bs $ \(cstr,len) -> do pokeElemOff (ptr :: Ptr CChar) len 0 return (castRemotePtr (toRemotePtr ptr)) -mkShortByteString0 :: BS.ShortByteString -> IO (RemotePtr ()) -mkShortByteString0 bs = BS.useAsCStringLen bs $ \(cstr,len) -> do - ptr <- mallocBytes (len+1) - copyBytes ptr cstr len - pokeElemOff (ptr :: Ptr CChar) len 0 - return (castRemotePtr (toRemotePtr ptr)) - mkCostCentres :: String -> [(String,String)] -> IO [RemotePtr CostCentre] #if defined(PROFILING) mkCostCentres mod ccs = do @@ -464,14 +453,6 @@ foreign import ccall unsafe "mkCostCentre" mkCostCentres _ _ = return [] #endif -newModuleName :: String -> IO (RemotePtr BreakModule) -newModuleName name = - castRemotePtr . toRemotePtr <$> newCString name - -newUnitId :: BS.ShortByteString -> IO (RemotePtr BreakUnitId) -newUnitId name = - castRemotePtr <$> mkShortByteString0 name - getIdValFromApStack :: HValue -> Int -> IO (Maybe HValue) getIdValFromApStack apStack (I# stackDepth) = do case getApStackVal# apStack stackDepth of ===================================== testsuite/tests/bytecode/T22376/all.T ===================================== @@ -1,2 +1,2 @@ -test('T22376', [req_interp, extra_files(['A.hs', 'B.hs'])], multimod_compile_and_run, +test('T22376', [extra_files(['A.hs', 'B.hs'])], multimod_compile_and_run, ['T22376', '-O1 -fwrite-if-simplified-core -fbyte-code-and-object-code -fprefer-byte-code']) ===================================== testsuite/tests/perf/should_run/ByteCodeAsm.hs ===================================== @@ -49,11 +49,11 @@ instrs = [ STKCHECK 1234 ++ [ PUSH_G appAName | _ <- [0..100] ] ++ [ PUSH_BCO fake_proto2 ] -fake_proto = ProtoBCO appAName instrs [] 0 0 (Left []) [] +fake_proto = ProtoBCO appAName instrs [] 0 0 (Left []) instrs2 = [ STKCHECK 77, UNPACK 4, SLIDE 0 4, ENTER ] -fake_proto2 = ProtoBCO appAName instrs2 [] 0 0 (Left []) [] +fake_proto2 = ProtoBCO appAName instrs2 [] 0 0 (Left []) main :: IO () main = do View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/3d08b572dfe99ea851f0c506b19c29d... -- View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/3d08b572dfe99ea851f0c506b19c29d... You're receiving this email because of your account on gitlab.haskell.org.
participants (1)
-
Cheng Shao (@TerrorJack)