Cheng Shao pushed to branch wip/bytecode-serialize-3 at Glasgow Haskell Compiler / GHC
Commits:
-
bca57385
by Cheng Shao at 2025-08-10T13:06:05+00:00
-
c9eb4883
by Cheng Shao at 2025-08-10T13:06:11+00:00
7 changed files:
- compiler/GHC/Builtin/PrimOps.hs
- + compiler/GHC/ByteCode/Serialize.hs
- compiler/GHC/ByteCode/Types.hs
- compiler/GHC/Data/FlatBag.hs
- compiler/GHC/Driver/Main.hs
- compiler/GHC/Utils/Binary.hs
- compiler/ghc.cabal.in
Changes:
... | ... | @@ -53,6 +53,7 @@ import GHC.Types.Unique ( Unique ) |
53 | 53 | |
54 | 54 | import GHC.Unit.Types ( Unit )
|
55 | 55 | |
56 | +import GHC.Utils.Binary
|
|
56 | 57 | import GHC.Utils.Outputable
|
57 | 58 | import GHC.Utils.Panic
|
58 | 59 | |
... | ... | @@ -929,3 +930,8 @@ primOpIsReallyInline = \case |
929 | 930 | DataToTagSmallOp -> False
|
930 | 931 | DataToTagLargeOp -> False
|
931 | 932 | p -> not (primOpOutOfLine p)
|
933 | + |
|
934 | +instance Binary PrimOp where
|
|
935 | + get bh = (allThePrimOps !!) <$> get bh
|
|
936 | + |
|
937 | + put_ bh = put_ bh . primOpTag |
1 | +{-# LANGUAGE MultiWayIf #-}
|
|
2 | +{-# LANGUAGE RecordWildCards #-}
|
|
3 | +{-# OPTIONS_GHC -Wno-orphans #-}
|
|
4 | + |
|
5 | +module GHC.ByteCode.Serialize
|
|
6 | + ( testBinByteCode,
|
|
7 | + )
|
|
8 | +where
|
|
9 | + |
|
10 | +import Control.Monad
|
|
11 | +import Data.Binary qualified as Binary
|
|
12 | +import Data.ByteString.Lazy qualified as LBS
|
|
13 | +import Data.Foldable
|
|
14 | +import Data.IORef
|
|
15 | +import Data.Proxy
|
|
16 | +import Data.Word
|
|
17 | +import GHC.Builtin.Types
|
|
18 | +import GHC.ByteCode.Breakpoints
|
|
19 | +import GHC.ByteCode.Types
|
|
20 | +import GHC.Data.FastString
|
|
21 | +import GHC.Driver.Env
|
|
22 | +import GHC.Iface.Binary
|
|
23 | +import GHC.Prelude
|
|
24 | +import GHC.Types.Id
|
|
25 | +import GHC.Types.Name
|
|
26 | +import GHC.Types.Name.Cache
|
|
27 | +import GHC.Types.SptEntry
|
|
28 | +import GHC.Types.SrcLoc
|
|
29 | +import GHC.Utils.Binary
|
|
30 | +import GHC.Utils.Exception
|
|
31 | +import GHC.Utils.TmpFs
|
|
32 | +import GHCi.Message
|
|
33 | +import System.FilePath
|
|
34 | + |
|
35 | +testBinByteCode :: HscEnv -> CompiledByteCode -> IO CompiledByteCode
|
|
36 | +testBinByteCode hsc_env cbc = withSystemTempDirectory "ghc-bbc" $ \tmpdir -> do
|
|
37 | + let f = tmpdir </> "ghc-bbc"
|
|
38 | + roundtripBinByteCode hsc_env f cbc
|
|
39 | + |
|
40 | +roundtripBinByteCode ::
|
|
41 | + HscEnv -> FilePath -> CompiledByteCode -> IO CompiledByteCode
|
|
42 | +roundtripBinByteCode hsc_env f cbc = do
|
|
43 | + writeBinByteCode f cbc
|
|
44 | + readBinByteCode hsc_env f
|
|
45 | + |
|
46 | +readBinByteCode :: HscEnv -> FilePath -> IO CompiledByteCode
|
|
47 | +readBinByteCode hsc_env f = do
|
|
48 | + bh' <- readBinMem f
|
|
49 | + bh <- addSerializableNameReader hsc_env bh'
|
|
50 | + getWithUserData (hsc_NC hsc_env) bh
|
|
51 | + |
|
52 | +writeBinByteCode :: FilePath -> CompiledByteCode -> IO ()
|
|
53 | +writeBinByteCode f cbc = do
|
|
54 | + bh' <- openBinMem (1024 * 1024)
|
|
55 | + bh <- addSerializableNameWriter bh'
|
|
56 | + putWithUserData QuietBinIFace NormalCompression bh cbc
|
|
57 | + writeBinMem bh f
|
|
58 | + |
|
59 | +instance Binary CompiledByteCode where
|
|
60 | + get bh = do
|
|
61 | + bc_bcos <- get bh
|
|
62 | + bc_itbls_len <- get bh
|
|
63 | + bc_itbls <- replicateM bc_itbls_len $ do
|
|
64 | + nm <- getViaSerializableName bh
|
|
65 | + itbl <- get bh
|
|
66 | + pure (nm, itbl)
|
|
67 | + bc_strs_len <- get bh
|
|
68 | + bc_strs <-
|
|
69 | + replicateM bc_strs_len $ (,) <$> getViaSerializableName bh <*> get bh
|
|
70 | + bc_breaks <- get bh
|
|
71 | + bc_spt_entries <- get bh
|
|
72 | + evaluate
|
|
73 | + CompiledByteCode
|
|
74 | + { bc_bcos,
|
|
75 | + bc_itbls,
|
|
76 | + bc_strs,
|
|
77 | + bc_breaks,
|
|
78 | + bc_spt_entries
|
|
79 | + }
|
|
80 | + |
|
81 | + put_ bh CompiledByteCode {..} = do
|
|
82 | + put_ bh bc_bcos
|
|
83 | + put_ bh $ length bc_itbls
|
|
84 | + for_ bc_itbls $ \(nm, itbl) -> do
|
|
85 | + putViaSerializableName bh nm
|
|
86 | + put_ bh itbl
|
|
87 | + put_ bh $ length bc_strs
|
|
88 | + for_ bc_strs $ \(nm, str) -> putViaSerializableName bh nm *> put_ bh str
|
|
89 | + put_ bh bc_breaks
|
|
90 | + put_ bh bc_spt_entries
|
|
91 | + |
|
92 | +instance Binary InternalModBreaks where
|
|
93 | + get bh = InternalModBreaks <$> get bh <*> get bh
|
|
94 | + |
|
95 | + put_ bh InternalModBreaks {..} =
|
|
96 | + put_ bh imodBreaks_breakInfo *> put_ bh imodBreaks_modBreaks
|
|
97 | + |
|
98 | +instance Binary ModBreaks where
|
|
99 | + get bh = ModBreaks <$> get bh <*> get bh <*> get bh <*> get bh <*> get bh
|
|
100 | + |
|
101 | + put_ bh ModBreaks {..} =
|
|
102 | + put_ bh modBreaks_locs
|
|
103 | + *> put_ bh modBreaks_vars
|
|
104 | + *> put_ bh modBreaks_decls
|
|
105 | + *> put_ bh modBreaks_ccs
|
|
106 | + *> put_ bh modBreaks_module
|
|
107 | + |
|
108 | +instance Binary SrcSpan where
|
|
109 | + get bh = unBinSrcSpan <$> get bh
|
|
110 | + |
|
111 | + put_ bh = put_ bh . BinSrcSpan
|
|
112 | + |
|
113 | +instance Binary CgBreakInfo where
|
|
114 | + put_ bh CgBreakInfo {..} =
|
|
115 | + put_ bh cgb_tyvars
|
|
116 | + *> put_ bh cgb_vars
|
|
117 | + *> put_ bh cgb_resty
|
|
118 | + *> put_ bh cgb_tick_id
|
|
119 | + |
|
120 | + get bh = CgBreakInfo <$> get bh <*> get bh <*> get bh <*> get bh
|
|
121 | + |
|
122 | +instance Binary ConInfoTable where
|
|
123 | + get bh = Binary.decode . LBS.fromStrict <$> get bh
|
|
124 | + |
|
125 | + put_ bh = put_ bh . LBS.toStrict . Binary.encode
|
|
126 | + |
|
127 | +instance Binary UnlinkedBCO where
|
|
128 | + get bh =
|
|
129 | + UnlinkedBCO
|
|
130 | + <$> getViaSerializableName bh
|
|
131 | + <*> get bh
|
|
132 | + <*> (Binary.decode . LBS.fromStrict <$> get bh)
|
|
133 | + <*> (Binary.decode . LBS.fromStrict <$> get bh)
|
|
134 | + <*> get bh
|
|
135 | + <*> get bh
|
|
136 | + |
|
137 | + put_ bh UnlinkedBCO {..} = do
|
|
138 | + putViaSerializableName bh unlinkedBCOName
|
|
139 | + put_ bh unlinkedBCOArity
|
|
140 | + put_ bh $ LBS.toStrict $ Binary.encode unlinkedBCOInstrs
|
|
141 | + put_ bh $ LBS.toStrict $ Binary.encode unlinkedBCOBitmap
|
|
142 | + put_ bh unlinkedBCOLits
|
|
143 | + put_ bh unlinkedBCOPtrs
|
|
144 | + |
|
145 | +instance Binary BCOPtr where
|
|
146 | + get bh = do
|
|
147 | + t <- getByte bh
|
|
148 | + case t of
|
|
149 | + 0 -> BCOPtrName <$> getViaSerializableName bh
|
|
150 | + 1 -> BCOPtrPrimOp <$> get bh
|
|
151 | + 2 -> BCOPtrBCO <$> get bh
|
|
152 | + _ -> BCOPtrBreakArray <$> get bh
|
|
153 | + |
|
154 | + put_ bh ptr = case ptr of
|
|
155 | + BCOPtrName nm -> putByte bh 0 *> putViaSerializableName bh nm
|
|
156 | + BCOPtrPrimOp op -> putByte bh 1 *> put_ bh op
|
|
157 | + BCOPtrBCO bco -> putByte bh 2 *> put_ bh bco
|
|
158 | + BCOPtrBreakArray info_mod -> putByte bh 3 *> put_ bh info_mod
|
|
159 | + |
|
160 | +instance Binary BCONPtr where
|
|
161 | + get bh = do
|
|
162 | + t <- getByte bh
|
|
163 | + case t of
|
|
164 | + 0 -> BCONPtrWord . fromIntegral <$> (get bh :: IO Word64)
|
|
165 | + 1 -> BCONPtrLbl <$> get bh
|
|
166 | + 2 -> BCONPtrItbl <$> getViaSerializableName bh
|
|
167 | + 3 -> BCONPtrAddr <$> getViaSerializableName bh
|
|
168 | + 4 -> BCONPtrStr <$> get bh
|
|
169 | + 5 -> BCONPtrFS <$> get bh
|
|
170 | + 6 -> BCONPtrFFIInfo <$> get bh
|
|
171 | + _ -> BCONPtrCostCentre <$> get bh
|
|
172 | + |
|
173 | + put_ bh ptr = case ptr of
|
|
174 | + BCONPtrWord lit -> putByte bh 0 *> put_ bh (fromIntegral lit :: Word64)
|
|
175 | + BCONPtrLbl sym -> putByte bh 1 *> put_ bh sym
|
|
176 | + BCONPtrItbl nm -> putByte bh 2 *> putViaSerializableName bh nm
|
|
177 | + BCONPtrAddr nm -> putByte bh 3 *> putViaSerializableName bh nm
|
|
178 | + BCONPtrStr str -> putByte bh 4 *> put_ bh str
|
|
179 | + BCONPtrFS fs -> putByte bh 5 *> put_ bh fs
|
|
180 | + BCONPtrFFIInfo ffi -> putByte bh 6 *> put_ bh ffi
|
|
181 | + BCONPtrCostCentre ibi -> putByte bh 7 *> put_ bh ibi
|
|
182 | + |
|
183 | +instance Binary InternalBreakLoc where
|
|
184 | + get bh = InternalBreakLoc <$> get bh
|
|
185 | + |
|
186 | + put_ bh InternalBreakLoc {..} = put_ bh internalBreakLoc
|
|
187 | + |
|
188 | +instance Binary BreakpointId where
|
|
189 | + get bh = BreakpointId <$> get bh <*> get bh
|
|
190 | + |
|
191 | + put_ bh BreakpointId {..} = put_ bh bi_tick_mod *> put_ bh bi_tick_index
|
|
192 | + |
|
193 | +instance Binary InternalBreakpointId where
|
|
194 | + get bh = InternalBreakpointId <$> get bh <*> get bh
|
|
195 | + |
|
196 | + put_ bh InternalBreakpointId {..} =
|
|
197 | + put_ bh ibi_info_mod *> put_ bh ibi_info_index
|
|
198 | + |
|
199 | +instance Binary SptEntry where
|
|
200 | + get bh = do
|
|
201 | + nm <- getViaSerializableName bh
|
|
202 | + fp <- get bh
|
|
203 | + pure $ SptEntry (mkVanillaGlobal nm anyTy) fp
|
|
204 | + |
|
205 | + put_ bh (SptEntry nm fp) =
|
|
206 | + putViaSerializableName bh (getName nm) *> put_ bh fp
|
|
207 | + |
|
208 | +newtype SerializableName = SerializableName {unSerializableName :: Name}
|
|
209 | + |
|
210 | +getViaSerializableName :: ReadBinHandle -> IO Name
|
|
211 | +getViaSerializableName bh = case findUserDataReader Proxy bh of
|
|
212 | + BinaryReader f -> unSerializableName <$> f bh
|
|
213 | + |
|
214 | +putViaSerializableName :: WriteBinHandle -> Name -> IO ()
|
|
215 | +putViaSerializableName bh nm = case findUserDataWriter Proxy bh of
|
|
216 | + BinaryWriter f -> f bh $ SerializableName nm
|
|
217 | + |
|
218 | +addSerializableNameWriter :: WriteBinHandle -> IO WriteBinHandle
|
|
219 | +addSerializableNameWriter bh' =
|
|
220 | + evaluate
|
|
221 | + $ flip addWriterToUserData bh'
|
|
222 | + $ BinaryWriter
|
|
223 | + $ \bh (SerializableName nm) ->
|
|
224 | + if
|
|
225 | + | isExternalName nm -> do
|
|
226 | + putByte bh 0
|
|
227 | + put_ bh nm
|
|
228 | + | otherwise -> do
|
|
229 | + putByte bh 1
|
|
230 | + put_ bh
|
|
231 | + $ occNameFS (occName nm)
|
|
232 | + `appendFS` mkFastString
|
|
233 | + (show $ nameUnique nm)
|
|
234 | + |
|
235 | +addSerializableNameReader :: HscEnv -> ReadBinHandle -> IO ReadBinHandle
|
|
236 | +addSerializableNameReader HscEnv {..} bh' = do
|
|
237 | + nc <- evaluate hsc_NC
|
|
238 | + env_ref <- newIORef emptyOccEnv
|
|
239 | + evaluate $ flip addReaderToUserData bh' $ BinaryReader $ \bh -> do
|
|
240 | + t <- getByte bh
|
|
241 | + case t of
|
|
242 | + 0 -> do
|
|
243 | + nm <- get bh
|
|
244 | + evaluate $ SerializableName nm
|
|
245 | + _ -> do
|
|
246 | + occ <- mkVarOccFS <$> get bh
|
|
247 | + u <- takeUniqFromNameCache nc
|
|
248 | + nm' <- evaluate $ mkInternalName u occ noSrcSpan
|
|
249 | + fmap SerializableName $ atomicModifyIORef' env_ref $ \env ->
|
|
250 | + case lookupOccEnv env occ of
|
|
251 | + Just nm -> (env, nm)
|
|
252 | + _ -> (extendOccEnv env occ nm', nm') |
... | ... | @@ -35,6 +35,7 @@ import GHC.Data.FastString |
35 | 35 | import GHC.Data.FlatBag
|
36 | 36 | import GHC.Types.Name
|
37 | 37 | import GHC.Types.Name.Env
|
38 | +import GHC.Utils.Binary
|
|
38 | 39 | import GHC.Utils.Outputable
|
39 | 40 | import GHC.Builtin.PrimOps
|
40 | 41 | import GHC.Types.SptEntry
|
... | ... | @@ -296,3 +297,8 @@ instance Outputable UnlinkedBCO where |
296 | 297 | ppr (sizeFlatBag lits), text "lits",
|
297 | 298 | ppr (sizeFlatBag ptrs), text "ptrs" ]
|
298 | 299 | |
300 | +instance Binary FFIInfo where
|
|
301 | + get bh = FFIInfo <$> get bh <*> get bh
|
|
302 | + |
|
303 | + put_ bh FFIInfo {..} = put_ bh ffiInfoArgs *> put_ bh ffiInfoRet
|
|
304 | + |
... | ... | @@ -16,6 +16,8 @@ import GHC.Prelude |
16 | 16 | import Control.DeepSeq
|
17 | 17 | |
18 | 18 | import GHC.Data.SmallArray
|
19 | +import GHC.Utils.Binary
|
|
20 | +import GHC.Utils.Exception
|
|
19 | 21 | |
20 | 22 | -- | Store elements in a flattened representation.
|
21 | 23 | --
|
... | ... | @@ -66,6 +68,13 @@ instance NFData a => NFData (FlatBag a) where |
66 | 68 | rnf (TupleFlatBag a b) = rnf a `seq` rnf b
|
67 | 69 | rnf (FlatBag arr) = rnfSmallArray arr
|
68 | 70 | |
71 | +instance (Binary a) => Binary (FlatBag a) where
|
|
72 | + get bh = do
|
|
73 | + xs <- get bh
|
|
74 | + evaluate $ fromList (fromIntegral $ length xs) xs
|
|
75 | + |
|
76 | + put_ bh = put_ bh . elemsFlatBag
|
|
77 | + |
|
69 | 78 | -- | Create an empty 'FlatBag'.
|
70 | 79 | --
|
71 | 80 | -- The empty 'FlatBag' is shared over all instances.
|
... | ... | @@ -129,4 +138,3 @@ fromSmallArray s = case sizeofSmallArray s of |
129 | 138 | 1 -> UnitFlatBag (indexSmallArray s 0)
|
130 | 139 | 2 -> TupleFlatBag (indexSmallArray s 0) (indexSmallArray s 1)
|
131 | 140 | _ -> FlatBag s |
132 | - |
... | ... | @@ -305,6 +305,8 @@ import Data.Bifunctor |
305 | 305 | import qualified GHC.Unit.Home.Graph as HUG
|
306 | 306 | import GHC.Unit.Home.PackageTable
|
307 | 307 | |
308 | +import GHC.ByteCode.Serialize
|
|
309 | + |
|
308 | 310 | {- **********************************************************************
|
309 | 311 | %* *
|
310 | 312 | Initialisation
|
... | ... | @@ -2169,7 +2171,8 @@ generateByteCode :: HscEnv |
2169 | 2171 | -> ModLocation
|
2170 | 2172 | -> IO (CompiledByteCode, [FilePath])
|
2171 | 2173 | generateByteCode hsc_env cgguts mod_location = do
|
2172 | - (hasStub, comp_bc) <- hscInteractive hsc_env cgguts mod_location
|
|
2174 | + (hasStub, comp_bc') <- hscInteractive hsc_env cgguts mod_location
|
|
2175 | + comp_bc <- testBinByteCode hsc_env comp_bc'
|
|
2173 | 2176 | compile_for_interpreter hsc_env $ \ i_env -> do
|
2174 | 2177 | stub_o <- traverse (compileForeign i_env LangC) hasStub
|
2175 | 2178 | foreign_files_o <- traverse (uncurry (compileForeign i_env)) (cgi_foreign_files cgguts)
|
... | ... | @@ -125,6 +125,7 @@ import Language.Haskell.Syntax.ImpExp.IsBoot (IsBootInterface(..)) |
125 | 125 | import {-# SOURCE #-} GHC.Types.Name (Name)
|
126 | 126 | import GHC.Data.FastString
|
127 | 127 | import GHC.Data.TrieMap
|
128 | +import GHC.Utils.Exception
|
|
128 | 129 | import GHC.Utils.Panic.Plain
|
129 | 130 | import GHC.Types.Unique.FM
|
130 | 131 | import GHC.Data.FastMutInt
|
... | ... | @@ -133,6 +134,7 @@ import GHC.Types.SrcLoc |
133 | 134 | import GHC.Types.Unique
|
134 | 135 | import qualified GHC.Data.Strict as Strict
|
135 | 136 | import GHC.Utils.Outputable( JoinPointHood(..) )
|
137 | +import GHCi.FFI
|
|
136 | 138 | |
137 | 139 | import Control.DeepSeq
|
138 | 140 | import Control.Monad ( when, (<$!>), unless, forM_, void )
|
... | ... | @@ -929,6 +931,12 @@ instance Binary Char where |
929 | 931 | put_ bh c = put_ bh (fromIntegral (ord c) :: Word32)
|
930 | 932 | get bh = do x <- get bh; return $! (chr (fromIntegral (x :: Word32)))
|
931 | 933 | |
934 | +instance Binary Word where
|
|
935 | + put_ bh i = put_ bh (fromIntegral i :: Word64)
|
|
936 | + get bh = do
|
|
937 | + x <- get bh
|
|
938 | + return $! (fromIntegral (x :: Word64))
|
|
939 | + |
|
932 | 940 | instance Binary Int where
|
933 | 941 | put_ bh i = put_ bh (fromIntegral i :: Int64)
|
934 | 942 | get bh = do
|
... | ... | @@ -2163,3 +2171,35 @@ instance Binary a => Binary (FingerprintWithValue a) where |
2163 | 2171 | instance NFData a => NFData (FingerprintWithValue a) where
|
2164 | 2172 | rnf (FingerprintWithValue fp mflags)
|
2165 | 2173 | = rnf fp `seq` rnf mflags `seq` ()
|
2174 | + |
|
2175 | +instance Binary FFIType where
|
|
2176 | + get bh = do
|
|
2177 | + t <- getByte bh
|
|
2178 | + evaluate $ case t of
|
|
2179 | + 0 -> FFIVoid
|
|
2180 | + 1 -> FFIPointer
|
|
2181 | + 2 -> FFIFloat
|
|
2182 | + 3 -> FFIDouble
|
|
2183 | + 4 -> FFISInt8
|
|
2184 | + 5 -> FFISInt16
|
|
2185 | + 6 -> FFISInt32
|
|
2186 | + 7 -> FFISInt64
|
|
2187 | + 8 -> FFIUInt8
|
|
2188 | + 9 -> FFIUInt16
|
|
2189 | + 10 -> FFIUInt32
|
|
2190 | + 11 -> FFIUInt64
|
|
2191 | + _ -> panic "Binary FFIType: invalid byte"
|
|
2192 | + |
|
2193 | + put_ bh t = putByte bh $ case t of
|
|
2194 | + FFIVoid -> 0
|
|
2195 | + FFIPointer -> 1
|
|
2196 | + FFIFloat -> 2
|
|
2197 | + FFIDouble -> 3
|
|
2198 | + FFISInt8 -> 4
|
|
2199 | + FFISInt16 -> 5
|
|
2200 | + FFISInt32 -> 6
|
|
2201 | + FFISInt64 -> 7
|
|
2202 | + FFIUInt8 -> 8
|
|
2203 | + FFIUInt16 -> 9
|
|
2204 | + FFIUInt32 -> 10
|
|
2205 | + FFIUInt64 -> 11 |
... | ... | @@ -228,6 +228,7 @@ Library |
228 | 228 | GHC.ByteCode.InfoTable
|
229 | 229 | GHC.ByteCode.Instr
|
230 | 230 | GHC.ByteCode.Linker
|
231 | + GHC.ByteCode.Serialize
|
|
231 | 232 | GHC.ByteCode.Types
|
232 | 233 | GHC.Cmm
|
233 | 234 | GHC.Cmm.BlockId
|