[Git][ghc/ghc][wip/fendor/linkable-usage] 2 commits: determinism: Use a deterministic renaming when writing bytecode files
Hannes Siebenhandl pushed to branch wip/fendor/linkable-usage at Glasgow Haskell Compiler / GHC Commits: dd08f8bc by Matthew Pickering at 2026-02-19T12:30:00+01:00 determinism: Use a deterministic renaming when writing bytecode files Now when writing the bytecode file, a counter and substitution are used to provide deterministic keys to local variables (rather than relying on uniques). This change ensures that `.gbc` are produced deterministically. Fixes #26499 - - - - - ef364443 by fendor at 2026-02-19T14:26:28+01:00 WIP: fix fingerprinting - - - - - 1 changed file: - compiler/GHC/ByteCode/Serialize.hs Changes: ===================================== compiler/GHC/ByteCode/Serialize.hs ===================================== @@ -47,9 +47,11 @@ import qualified Data.ByteString as BS import Data.Traversable import GHC.Utils.Logger import GHC.Linker.Types -import System.IO.Unsafe (unsafeInterleaveIO) +import System.IO.Unsafe (unsafeInterleaveIO, unsafePerformIO) import GHC.Utils.Outputable -import GHC.Utils.Fingerprint (Fingerprint, fingerprintByteString) +import GHC.Utils.Fingerprint (Fingerprint) +import GHC.Types.Name.Env +import GHC.Iface.Recomp.Binary (putNameLiterally, fingerprintBinMem) {- Note [Overview of persistent bytecode] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -289,12 +291,8 @@ writeBinByteCode f cbc = do fingerprintModuleByteCodeContents :: Module -> CompiledByteCode -> [FilePath] -> IO Fingerprint fingerprintModuleByteCodeContents modl cbc foreign_files = do - bh' <- openBinMem (1024 * 1024) - bh <- addBinNameWriter bh' foreign_contents <- readObjectFiles foreign_files - putWithUserData QuietBinIFace NormalCompression bh - (modl, cbc, foreign_contents) - withBinBuffer bh (pure . fingerprintByteString) + pure $ computeFingerprint2 putNameLiterally (modl, cbc, foreign_contents) instance Binary CompiledByteCode where get bh = do @@ -397,8 +395,30 @@ putViaBinName :: WriteBinHandle -> Name -> IO () putViaBinName bh nm = case findUserDataWriter Proxy bh of BinaryWriter f -> f bh $ BinName nm +data BytecodeNameEnv = ByteCodeNameEnv { _bytecode_next_id :: !Word64 + , _bytecode_name_subst :: NameEnv Word64 + } + + +computeFingerprint2 :: (Binary a) + => (WriteBinHandle -> Name -> IO ()) + -> a + -> Fingerprint +computeFingerprint2 put_nonbinding_name a = unsafePerformIO $ do + bh <- fmap set_user_data $ openBinMem (3*1024) -- just less than a block + bh' <- addBinNameWriter bh + put_ bh' a + fingerprintBinMem bh' + where + set_user_data bh = setWriterUserData bh $ mkWriterUserData + [ mkSomeBinaryWriter $ mkWriter put_nonbinding_name + , mkSomeBinaryWriter $ simpleBindingNameWriter $ mkWriter putNameLiterally + , mkSomeBinaryWriter $ mkWriter putFS + ] + addBinNameWriter :: WriteBinHandle -> IO WriteBinHandle -addBinNameWriter bh' = +addBinNameWriter bh' = do + env_ref <- newIORef (ByteCodeNameEnv 0 emptyNameEnv) evaluate $ flip addWriterToUserData bh' $ BinaryWriter @@ -409,10 +429,17 @@ addBinNameWriter bh' = put_ bh nm | otherwise -> do putByte bh 1 - put_ bh - $ occNameFS (occName nm) - `appendFS` mkFastString - (show $ nameUnique nm) + key <- getBinNameKey env_ref nm + -- Delimit the OccName from the deterministic counter to keep the + -- encoding injective, avoiding collisions like "foo1" vs "foo#1". + put_ bh (occNameFS (occName nm) `appendFS` mkFastString ('#' : show key)) + where + -- Find a deterministic key for local names. This + getBinNameKey ref name = do + atomicModifyIORef ref (\b@(ByteCodeNameEnv next subst) -> + case lookupNameEnv subst name of + Just idx -> (b, idx) + Nothing -> (ByteCodeNameEnv (next + 1) (extendNameEnv subst name next), next)) addBinNameReader :: HscEnv -> ReadBinHandle -> IO ReadBinHandle addBinNameReader HscEnv {..} bh' = do @@ -438,9 +465,6 @@ addBinNameReader HscEnv {..} bh' = do -- Note [Serializing Names in bytecode] -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --- NOTE: This approach means that bytecode objects are not deterministic. --- We need to revisit this in order to make the output deterministic. --- -- The bytecode related types contain various Names which we need to -- serialize. Unfortunately, we can't directly use the Binary instance -- of Name: it is only meant to be used for serializing external Names @@ -448,9 +472,8 @@ addBinNameReader HscEnv {..} bh' = do -- -- We also need to maintain the invariant that: any pair of internal -- Names with equal/different uniques must also be deserialized to --- have the same equality. So normally uniques aren't supposed to be --- serialized, but for this invariant to work, we do append uniques to --- OccNames of internal Names, so that they can be uniquely identified --- by OccName alone. When deserializing, we check a global cached --- mapping from OccName to Unique, and create the real Name with the --- right Unique if it's already deserialized at least once. +-- have the same equality. Therefore when we write the names to the interface, we +-- use an incrementing counter to give each local name it's own unique number. A substitution +-- is maintained to give each occurence of the Name the same unique key. When the interface +-- is read, a reverse mapping is used from these unique keys to a Name. +-- View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/fbe69319e44e9dbdaef8a68c50de1f2... -- View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/fbe69319e44e9dbdaef8a68c50de1f2... You're receiving this email because of your account on gitlab.haskell.org.
participants (1)
-
Hannes Siebenhandl (@fendor)