Cheng Shao pushed to branch wip/wasm-iserv-fast-binary at Glasgow Haskell Compiler / GHC Commits: 1520fd8f by Cheng Shao at 2026-03-12T21:56:35+00:00 ghci: serialize BCOByteArray buffer directly when possible This patch changes the `Binary` instances of `BCOByteArray` to directly serialize the underlying buffer when possible, while also taking into account the issue of host-dependent `Word` width. See added comments and amended `Note [BCOByteArray serialization]` for detailed explanation. Closes #27020. - - - - - 2 changed files: - libraries/ghci/GHCi/ResolvedBCO.hs - testsuite/tests/ghci/should_run/BinaryArray.hs Changes: ===================================== libraries/ghci/GHCi/ResolvedBCO.hs ===================================== @@ -9,12 +9,24 @@ module GHCi.ResolvedBCO , mkBCOByteArray ) where +#include "MachDeps.h" + import Prelude -- See note [Why do we import Prelude here?] import GHC.Data.SizedSeq import GHCi.RemoteTypes import GHCi.BreakArray -import Data.Binary +#if SIZEOF_HSWORD == 4 +import Control.Monad +import Data.Array.Base (foldrArray, listArray) +import Data.ByteString.Builder.Extra +#endif + +import Data.Binary (Binary(..)) +import Data.Binary.Get +import Data.Binary.Put +import Data.ByteString.Short (ShortByteString(..)) +import Data.Word import GHC.Generics import Foreign.Storable @@ -58,15 +70,23 @@ data BCOByteArray a getBCOByteArray :: !ByteArray# } +#if SIZEOF_HSWORD == 4 fromBCOByteArray :: forall a . Storable a => BCOByteArray a -> UArray Int a fromBCOByteArray (BCOByteArray ba#) = UArray 0 (n - 1) n ba# where len# = sizeofByteArray# ba# n = (I# len#) `div` sizeOf (undefined :: a) +#endif mkBCOByteArray :: UArray Int a -> BCOByteArray a mkBCOByteArray (UArray _ _ _ arr) = BCOByteArray arr +putFixedWidthBCOByteArray :: BCOByteArray a -> Put +putFixedWidthBCOByteArray (BCOByteArray ba#) = put $ SBS ba# + +getFixedWidthBCOByteArray :: Get (BCOByteArray a) +getFixedWidthBCOByteArray = (\(SBS ba#) -> BCOByteArray ba#) <$> get + instance Show (BCOByteArray Word16) where showsPrec _ _ = showString "BCOByteArray Word16" @@ -89,10 +109,40 @@ instance Binary ResolvedBCO where get = ResolvedBCO <$> get <*> get <*> get <*> get <*> get <*> get -- See Note [BCOByteArray serialization] -instance (Binary a, Storable a, IArray UArray a) => Binary (BCOByteArray a) where - put = put . fromBCOByteArray - get = mkBCOByteArray <$> get - +instance Binary (BCOByteArray Word16) where + put = putFixedWidthBCOByteArray + get = getFixedWidthBCOByteArray + +-- Word size depends on host, which is tricky when host/target word +-- sizes differ. We always serialize `BCOByteArray Word` as +-- `BCOByteArray Word64`. +instance Binary (BCOByteArray Word) where +#if SIZEOF_HSWORD == 8 + -- 64-bit fast path. `BCOByteArray` is directly serialized via the + -- `Binary ShortByteString` instance, which serializes the `Int` + -- bytelength first (via `Int64` transparently), then copies the + -- buffer. + put = putFixedWidthBCOByteArray + + get = getFixedWidthBCOByteArray +#else + -- 32-bit slow path. Pretend it's a `BCOByteArray Word64` and handle + -- the bytelength & buffer elements directly. + put ba32@(BCOByteArray ba32#) = + put len64 *> + putBuilder + (foldrArray (\w32 acc -> word64Host (fromIntegral w32) <> acc) mempty arr32) + where + len32# = sizeofByteArray# ba32# + len64 = I# len32# * 2 + arr32 = fromBCOByteArray ba32 + + get = do + len64 <- get + let len = len64 `div` 8 + w32s <- replicateM len (fromIntegral <$> getWord64host) + pure $ mkBCOByteArray $ listArray (0, len - 1) w32s +#endif data ResolvedBCOPtr = ResolvedBCORef {-# UNPACK #-} !Int @@ -124,12 +174,17 @@ instance Binary ResolvedBCOPtr -- The root issue here is the usage of platform sized integer types in -- BCO (and any messages we pass between ghc/iserv really), we should -- do what we already do for RemotePtr: always use Word64 instead of --- Word. But that takes much more work, and there's an easier --- mitigation: keep BCOByteArray as ByteArray#, but serialize it as --- UArray, given the Binary instances are independent of platform word --- size and endianness, so each Word/Int is always serialized as --- 64-bit big-endian Word64/Int64, and the entire UArray is serialized --- as a list (length+elements). +-- Word. +-- +-- When we serialize `BCOByteArray Word16`, element is fixed width on +-- 32/64-bit host, so we can directly serialize the buffer per se. For +-- `BCOByteArray Word`, we must always serialize it as `BCOByteArray +-- Word64`, and hence it has fast-path/slow-path decided at +-- compile-time, see comments of `instance Binary (BCOByteArray Word)` +-- for explanation. These are the only two `Binary` instances we ever +-- use, so to avoid unnecessary complexity, we're fine with flexible +-- instances here, instead of generalizing to any element type that +-- may be fixed-width or not. -- -- Since we erase the metadata in UArray, we need to find a way to -- calculate the item count by dividing the ByteArray# length with ===================================== testsuite/tests/ghci/should_run/BinaryArray.hs ===================================== @@ -24,7 +24,7 @@ roundtripTest arr = Left _ -> putStrLn "deserialization failed" -- See Note [BCOByteArray serialization] -roundtripTestByteArray :: forall a . (IArray UArray a, MArray IOUArray a IO, Eq a, Binary a, Storable a) +roundtripTestByteArray :: forall a . (IArray UArray a, Eq a, Binary (BCOByteArray a)) => UArray Int a -> IO () roundtripTestByteArray (UArray _ _ _ arr#) = let val = BCOByteArray arr# :: BCOByteArray a @@ -44,10 +44,5 @@ main = do roundtripTest (AU.listArray (1,500) [1..] :: UArray Int Word32) roundtripTest (AU.listArray (1,500) [1..] :: UArray Int Word64) roundtripTest (AU.listArray (1,500) ['a'..] :: UArray Int Char) - roundtripTestByteArray (AU.listArray (1,500) [1..] :: UArray Int Int) roundtripTestByteArray (AU.listArray (1,500) [1..] :: UArray Int Word) - roundtripTestByteArray (AU.listArray (1,500) [1..] :: UArray Int Word8) roundtripTestByteArray (AU.listArray (1,500) [1..] :: UArray Int Word16) - roundtripTestByteArray (AU.listArray (1,500) [1..] :: UArray Int Word32) - roundtripTestByteArray (AU.listArray (1,500) [1..] :: UArray Int Word64) - roundtripTestByteArray (AU.listArray (1,500) ['a'..] :: UArray Int Char) View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/1520fd8f67a39f96d36af8ae16be41ee... -- View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/1520fd8f67a39f96d36af8ae16be41ee... You're receiving this email because of your account on gitlab.haskell.org.
participants (1)
-
Cheng Shao (@TerrorJack)