Zubin pushed to branch wip/inline-fs at Glasgow Haskell Compiler / GHC

Commits:

1 changed file:

Changes:

  • compiler/GHC/Data/FastString.hs
    ... ... @@ -130,8 +130,10 @@ import GHC.Data.FastMutInt
    130 130
     import Control.Concurrent.MVar
    
    131 131
     import Control.DeepSeq
    
    132 132
     import Control.Monad
    
    133
    +import Data.Bits
    
    133 134
     import Data.ByteString (ByteString)
    
    134 135
     import Data.ByteString.Short (ShortByteString)
    
    136
    +import Data.Word
    
    135 137
     import qualified Data.ByteString          as BS
    
    136 138
     import qualified Data.ByteString.Char8    as BSC
    
    137 139
     import qualified Data.ByteString.Unsafe   as BS
    
    ... ... @@ -171,9 +173,6 @@ fastZStringToByteString (FastZString bs) = bs
    171 173
     unsafeMkByteString :: String -> ByteString
    
    172 174
     unsafeMkByteString = BSC.pack
    
    173 175
     
    
    174
    -hashFastString :: FastString -> Int
    
    175
    -hashFastString fs = hashStr $ fs_sbs fs
    
    176
    -
    
    177 176
     -- -----------------------------------------------------------------------------
    
    178 177
     
    
    179 178
     newtype FastZString = FastZString ByteString
    
    ... ... @@ -206,36 +205,102 @@ mkFastZStringString str = FastZString (BSC.pack str)
    206 205
     All 'FastString's are stored in a global hashtable to support fast O(1)
    
    207 206
     comparison.
    
    208 207
     
    
    209
    -It is also associated with a lazy reference to the Z-encoding
    
    210
    -of this string which is used by the compiler internally.
    
    211 208
     -}
    
    209
    +-- | Payload for table-referenced FastStrings
    
    212 210
     data FastStringPayload = FastString {
    
    213
    -      uniq    :: {-# UNPACK #-} !Int, -- unique id
    
    214
    -      n_chars :: {-# UNPACK #-} !Int, -- number of chars
    
    215
    -      fs_sbs  :: {-# UNPACK #-} !ShortByteString
    
    211
    +      payload_sbs :: {-# UNPACK #-} !ShortByteString
    
    216 212
       }
    
    217 213
     
    
    214
    +{- Note [FastString Encoding]
    
    215
    +   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    216
    +
    
    217
    +   A FastString is uniformly represented as a single Word64 which serves as both
    
    218
    +   the unique identifier and the string representation (for short strings).
    
    219
    +
    
    220
    +   Inline strings (strings ≤ 7 bytes):
    
    221
    +   - Top 5 bits are always 11111 (0x1F at bit position 59-63)
    
    222
    +   - Next 3 bits encode the byte length (0-7)
    
    223
    +   - Remaining 56 bits contain the actual string bytes
    
    224
    +   - Top byte ranges: 0xF8-0xFF (248-255)
    
    225
    +   - "a" (0x61): 0xF900000000000061
    
    226
    +
    
    227
    +   Table-referenced strings (strings > 7 bytes):
    
    228
    +   - Top 16 bits encode the UTF-8 character count (2-63487, 0x0002-0xF7FF)
    
    229
    +   - Lower 48 bits encode an index into the FastStringPayload table
    
    230
    +   - Top byte ranges: 0x00-0xF7 (0-247)
    
    231
    +   - Top byte < 0xF8 to prevent ambiguity with inline encoding
    
    232
    +   - Maximum character count: 63487 (0xF7FF)
    
    233
    +   - Example: 10-char string at index 42: 0x000A00000000002A
    
    234
    +
    
    235
    +   Top byte value determines encoding type:
    
    236
    +   - 0xF8-0xFF: Inline string
    
    237
    +   - 0x00-0xF7: Table reference
    
    238
    +-}
    
    218 239
     data FastString = FastStringId {-# UNPACK #-} !Word64
    
    219 240
     
    
    241
    +-- | Encode a ShortByteString (≤7 bytes) directly into a Word64
    
    242
    +-- Top 5 bits = 0x1F (11111), next 3 bits = length, remaining 56 bits = data
    
    243
    +encodeInline :: ShortByteString -> Word64
    
    244
    +encodeInline sbs =
    
    245
    +  let !len = SBS.length sbs
    
    246
    +      !lenBits = (0xf8 .|. fromIntegral len) `unsafeShiftL` 56
    
    247
    +      w8 :: Int -> Word64
    
    248
    +      w8 shift = fromIntegral (SBS.index sbs (shift `unsafeShiftR` 3)) `unsafeShiftL` shift
    
    249
    +  in case len of
    
    250
    +       0 -> lenBits
    
    251
    +       1 -> lenBits .|. w8 0
    
    252
    +       2 -> lenBits .|. w8 8 .|. w8 0
    
    253
    +       3 -> lenBits .|. w8 16 .|. w8 8 .|. w8 0
    
    254
    +       4 -> lenBits .|. w8 24 .|. w8 16 .|. w8 8 .|. w8 0
    
    255
    +       5 -> lenBits .|. w8 32 .|. w8 24 .|. w8 16 .|. w8 8 .|. w8 0
    
    256
    +       6 -> lenBits .|. w8 40 .|. w8 32 .|. w8 24 .|. w8 16 .|. w8 8 .|. w8 0
    
    257
    +       7 -> lenBits .|. w8 48 .|. w8 40 .|. w8 32 .|. w8 24 .|. w8 16 .|. w8 8 .|. w8 0
    
    258
    +       _ -> panic "encodeInline: string too long"
    
    259
    +
    
    260
    +-- | Encode a table reference into a Word64
    
    261
    +-- Top 16 bits = UTF-8 character count, lower 48 bits = table index
    
    262
    +encodeTableRef :: Int -> Int -> Word64
    
    263
    +encodeTableRef n_chars idx
    
    264
    +  | n_chars < 2 = panic "encodeTableRef: character count too small (< 2)"
    
    265
    +  | n_chars > 0xf7ff = panic "encodeTableRef: character count too large (> 63487)"
    
    266
    +  | otherwise =
    
    267
    +      let charBits = fromIntegral n_chars `Data.Bits.shiftL` 48
    
    268
    +          idxBits = fromIntegral idx .&. 0x0000ffffffffffff -- mask to 48 bits
    
    269
    +      in charBits .|. idxBits
    
    270
    +
    
    220 271
     fs_sbs :: FastString -> ShortByteString
    
    221 272
     fs_sbs (FastStringId w)
    
    222 273
       | w .&. 0xf8000000_00000000 == 0xf8000000_00000000
    
    223 274
       = let bs_len = (w .&. 0x07000000_00000000) `unsafeShiftR` 56
    
    224
    -        w8_64 x = wordToWord8# (word64ToWord# x)
    
    225
    -        go 0 _ = []
    
    226
    -        go n cur = wordToWord8# (word64ToWord# cur) : go (n-1) (cur `uncheckedShiftRL64#` 8#)
    
    275
    +        w8 :: Int -> Word8
    
    276
    +        w8 shift = fromIntegral ((w `unsafeShiftR` shift) .&. 0xFF)
    
    227 277
         in case bs_len of
    
    228 278
              0 -> SBS.empty
    
    229
    -         1 -> SBS.singleton $ wordToWord8# (word64ToWord# w)
    
    230
    -         2 -> SBS.pack [ w8_64 (w `uncheckedShiftRL64#` 8#) , w8_64 w]
    
    231
    -         3 -> SBS.pack [ w8_64 (w `uncheckedShiftRL64#` 16#), w8_64 (w `uncheckedShiftRL64#` 8#), w8_64 w)]
    
    232
    -         4 -> SBS.pack [ w8_64 (w `uncheckedShiftRL64#` 24#), w8_64 (w `uncheckedShiftRL64#` 16#), w8_64 (w `uncheckedShiftRL64#` 8#), w8_64 w)]
    
    233
    -         5 -> SBS.pack [ w8_64 (w `uncheckedShiftRL64#` 32#), w8_64 (w `uncheckedShiftRL64#` 24#), w8_64 (w `uncheckedShiftRL64#` 16#), w8_64 (w `uncheckedShiftRL64#` 8#), w8_64 w)]
    
    234
    -         6 -> SBS.pack [ w8_64 (w `uncheckedShiftRL64#` 40#), w8_64 (w `uncheckedShiftRL64#` 32#), w8_64 (w `uncheckedShiftRL64#` 24#), w8_64 (w `uncheckedShiftRL64#` 16#), w8_64 (w `uncheckedShiftRL64#` 8#), w8_64 w)]
    
    235
    -         7 -> SBS.pack [ w8_64 (w `uncheckedShiftRL64#` 48#), w8_64 (w `uncheckedShiftRL64#` 40#), w8_64 (w `uncheckedShiftRL64#` 32#), w8_64 (w `uncheckedShiftRL64#` 24#), w8_64 (w `uncheckedShiftRL64#` 16#), w8_64 (w `uncheckedShiftRL64#` 8#), w8_64 w)]
    
    236
    -  | otherwise
    
    279
    +         1 -> SBS.singleton $ w8 0
    
    280
    +         2 -> SBS.pack [ w8 8, w8 0 ]
    
    281
    +         3 -> SBS.pack [ w8 16, w8 8, w8 0 ]
    
    282
    +         4 -> SBS.pack [ w8 24, w8 16, w8 8, w8 0 ]
    
    283
    +         5 -> SBS.pack [ w8 32, w8 24, w8 16, w8 8, w8 0 ]
    
    284
    +         6 -> SBS.pack [ w8 40, w8 32, w8 24, w8 16, w8 8, w8 0 ]
    
    285
    +         7 -> SBS.pack [ w8 48, w8 40, w8 32, w8 24, w8 16, w8 8, w8 0 ]
    
    286
    +         _ -> panic "fs_sbs: invalid inline string length"
    
    287
    +  | otherwise =
    
    288
    +    -- Table reference: need to extract index from lower 48 bits and look up
    
    289
    +    let idx = fromIntegral (w .&. 0x0000FFFFFFFFFFFF) :: Int
    
    290
    +        payload = lookupPayloadByIndex idx
    
    291
    +    in payload_sbs payload
    
    292
    +
    
    237 293
     n_chars :: FastString -> Int
    
    294
    +n_chars (FastStringId w)
    
    295
    +  | w .&. 0xf8000000_00000000 == 0xf8000000_00000000 =
    
    296
    +    -- Inline string: count UTF-8 characters
    
    297
    +    utf8CountCharsShortByteString (fs_sbs (FastStringId w))
    
    298
    +  | otherwise =
    
    299
    +    -- Table reference: extract character count from top 16 bits
    
    300
    +    fromIntegral (w `unsafeShiftR` 48)
    
    301
    +
    
    238 302
     uniq :: FastString -> Int
    
    303
    +uniq (FastStringId w) = fromIntegral w
    
    239 304
     
    
    240 305
     instance Eq FastString where
    
    241 306
       f1 == f2  =  uniq f1 == uniq f2
    
    ... ... @@ -326,23 +391,120 @@ and updates to multiple buckets with low synchronization overhead.
    326 391
     
    
    327 392
     See Note [Updating the FastString table] on how it's updated.
    
    328 393
     -}
    
    394
    +-- | A segment of the payload array
    
    395
    +data PayloadSegment = PayloadSegment
    
    396
    +  (MutableArray# RealWorld FastStringPayload)
    
    397
    +
    
    398
    +-- | Boxed wrapper for Array# to allow storage in IORef
    
    399
    +data SegmentArray = SegmentArray (Array# (IORef PayloadSegment))
    
    400
    +
    
    401
    +-- | Append-only array of FastStringPayload for O(1) lookup by index
    
    402
    +data PayloadArray = PayloadArray
    
    403
    +  {-# UNPACK #-} !FastMutInt  -- ^ Next free index
    
    404
    +  {-# UNPACK #-} !(IORef SegmentArray)  -- ^ Growable segmented array
    
    405
    +  {-# UNPACK #-} !(MVar ())  -- ^ Lock for growing the array
    
    406
    +
    
    407
    +-- Number of payloads per segment
    
    408
    +payloadSegmentSize :: Int
    
    409
    +payloadSegmentSize = 1024
    
    410
    +
    
    411
    +payloadSegmentBits :: Int
    
    412
    +payloadSegmentBits = 10  -- 2^10 = 1024
    
    413
    +
    
    414
    +initialPayloadSegments :: Int
    
    415
    +initialPayloadSegments = 256
    
    416
    +
    
    417
    +-- | Ensure a payload segment exists, growing the array if necessary
    
    418
    +-- Returns the segment IORef for the given segment index
    
    419
    +ensurePayloadSegment :: Int -> IO (IORef PayloadSegment)
    
    420
    +ensurePayloadSegment segmentIdx = do
    
    421
    +  let !(FastStringTable _ (PayloadArray _ segmentsRef growLock)) = stringTable
    
    422
    +      !(I# segmentIdx#) = segmentIdx
    
    423
    +  SegmentArray segments# <- readIORef segmentsRef
    
    424
    +  let currentSize# = sizeofArray# segments#
    
    425
    +
    
    426
    +  if isTrue# (segmentIdx# <# currentSize#)
    
    427
    +    then do
    
    428
    +      -- Segment exists, return it
    
    429
    +      let (# segmentRef #) = indexArray# segments# segmentIdx#
    
    430
    +      return segmentRef
    
    431
    +    else do
    
    432
    +      -- Need to grow the array
    
    433
    +      withMVar growLock $ \_ -> do
    
    434
    +        -- Re-check after acquiring lock (another thread might have grown it)
    
    435
    +        SegmentArray segments'# <- readIORef segmentsRef
    
    436
    +        let currentSize'# = sizeofArray# segments'#
    
    437
    +        if isTrue# (segmentIdx# <# currentSize'#)
    
    438
    +          then do
    
    439
    +            let (# segmentRef #) = indexArray# segments'# segmentIdx#
    
    440
    +            return segmentRef
    
    441
    +          else do
    
    442
    +            -- Double the size
    
    443
    +            let newSize# = currentSize'# *# 2#
    
    444
    +                !(I# payloadSegmentSize#) = payloadSegmentSize
    
    445
    +            SegmentArray newSegments# <- IO $ \s1# ->
    
    446
    +              case newArray# newSize# (panic "unallocated_segment") s1# of
    
    447
    +                (# s2#, arr# #) ->
    
    448
    +                  case copyArray# segments'# 0# arr# 0# currentSize'# s2# of
    
    449
    +                    s3# ->
    
    450
    +                      -- Allocate new segments
    
    451
    +                      let allocLoop i# s#
    
    452
    +                            | isTrue# (i# ==# newSize#) = s#
    
    453
    +                            | otherwise =
    
    454
    +                                case newArray# payloadSegmentSize# (panic "uninitialized payload") s# of
    
    455
    +                                  (# s'#, seg# #) -> case newIORef (PayloadSegment seg#) `unIO` s'# of
    
    456
    +                                    (# s''#, segRef #) -> case writeArray# arr# i# segRef s''# of
    
    457
    +                                      s'''# -> allocLoop (i# +# 1#) s'''#
    
    458
    +                      in case allocLoop currentSize'# s3# of
    
    459
    +                           s4# -> case unsafeFreezeArray# arr# s4# of
    
    460
    +                             (# s5#, frozen# #) -> (# s5#, SegmentArray frozen# #)
    
    461
    +            writeIORef segmentsRef (SegmentArray newSegments#)
    
    462
    +            let (# segmentRef #) = indexArray# newSegments# segmentIdx#
    
    463
    +            return segmentRef
    
    464
    +
    
    465
    +-- | Allocate a payload in the global payload array and return its index
    
    466
    +allocatePayload :: FastStringPayload -> IO Int
    
    467
    +allocatePayload payload = do
    
    468
    +  let !(FastStringTable _ (PayloadArray nextIdx _ _)) = stringTable
    
    469
    +  idx <- atomicFetchAddFastMut nextIdx 1
    
    470
    +  let !(I# idx#) = idx
    
    471
    +      !(I# payloadSegmentBits#) = payloadSegmentBits
    
    472
    +      !(I# payloadSegmentSize#) = payloadSegmentSize
    
    473
    +      segmentIdx = I# (idx# `uncheckedIShiftRL#` payloadSegmentBits#)
    
    474
    +      offsetInSegment# = idx# `andI#` (payloadSegmentSize# -# 1#)
    
    475
    +  segmentRef <- ensurePayloadSegment segmentIdx
    
    476
    +  PayloadSegment segment# <- readIORef segmentRef
    
    477
    +  IO $ \s# ->
    
    478
    +    case writeArray# segment# offsetInSegment# payload s# of
    
    479
    +      s'# -> (# s'#, () #)
    
    480
    +  return idx
    
    481
    +
    
    482
    +-- | Look up a FastStringPayload by its index
    
    483
    +-- The segment is guaranteed to exist because indices only come from allocatePayload
    
    484
    +lookupPayloadByIndex :: Int -> FastStringPayload
    
    485
    +lookupPayloadByIndex idx = inlinePerformIO $ do
    
    486
    +  let !(FastStringTable _ (PayloadArray _ segmentsRef _)) = stringTable
    
    487
    +      !(I# idx#) = idx
    
    488
    +      !(I# payloadSegmentBits#) = payloadSegmentBits
    
    489
    +      !(I# payloadSegmentSize#) = payloadSegmentSize
    
    490
    +      segmentIdx# = idx# `uncheckedIShiftRL#` payloadSegmentBits#
    
    491
    +      offsetInSegment# = idx# `andI#` (payloadSegmentSize# -# 1#)
    
    492
    +  SegmentArray segments# <- readIORef segmentsRef
    
    493
    +  let (# segmentRef #) = indexArray# segments# segmentIdx#
    
    494
    +  PayloadSegment segment# <- readIORef segmentRef
    
    495
    +  IO $ readArray# segment# offsetInSegment#
    
    496
    +
    
    329 497
     data FastStringTable = FastStringTable
    
    330
    -  {-# UNPACK #-} !FastMutInt
    
    331
    -  -- ^ The unique ID counter shared with all buckets
    
    332
    -  --
    
    333
    -  -- We unpack the 'FastMutInt' counter as it is always consumed strictly.
    
    334
    -  -- We mark this as 'NOUNPACK' as this 'FastMutInt' is retained by a thunk
    
    335
    -  -- in 'mkFastStringWith' and needs to be boxed any way.
    
    336
    -  -- If this is unpacked, then we box this single 'FastMutInt' once for each
    
    337
    -  -- allocated FastString.
    
    338 498
       (Array# (IORef FastStringTableSegment)) -- ^  concurrent segments
    
    499
    +  !PayloadArray  -- ^ Global payload array
    
    339 500
     
    
    340 501
     data TableSegment a = TableSegment
    
    341 502
       {-# UNPACK #-} !(MVar ())  -- the lock for write in each segment
    
    342 503
       {-# UNPACK #-} !FastMutInt -- the number of elements
    
    343 504
       (MutableArray# RealWorld [a]) -- buckets in this segment
    
    344 505
     
    
    345
    -type FastStringTableSegment = TableSegment FastString
    
    506
    +-- Hash table now stores indices into the payload array
    
    507
    +type FastStringTableSegment = TableSegment Int
    
    346 508
     
    
    347 509
     data FastZStringTable = FastZStringTable
    
    348 510
       {-# UNPACK #-} !FastMutInt
    
    ... ... @@ -415,6 +577,9 @@ stringTable :: FastStringTable
    415 577
     stringTable = unsafePerformIO $ do
    
    416 578
       let !(I# numSegments#) = numSegments
    
    417 579
           !(I# initialNumBuckets#) = initialNumBuckets
    
    580
    +      !(I# payloadSegmentSize#) = payloadSegmentSize
    
    581
    +      !(I# numPayloadSegments#) = initialPayloadSegments
    
    582
    +
    
    418 583
           loop a# i# s1#
    
    419 584
             | isTrue# (i# ==# numSegments#) = s1#
    
    420 585
             | otherwise = case newMVar () `unIO` s1# of
    
    ... ... @@ -424,13 +589,33 @@ stringTable = unsafePerformIO $ do
    424 589
                         (TableSegment lock counter buckets#) `unIO` s4# of
    
    425 590
                       (# s5#, segment #) -> case writeArray# a# i# segment s5# of
    
    426 591
                         s6# -> loop a# (i# +# 1#) s6#
    
    427
    -  uid <- newFastMutInt 603979776 -- ord '$' * 0x01000000
    
    592
    +
    
    593
    +      -- Initialize payload array segments
    
    594
    +      payloadLoop a# i# s1#
    
    595
    +        | isTrue# (i# ==# numPayloadSegments#) = s1#
    
    596
    +        | otherwise = case newArray# payloadSegmentSize#
    
    597
    +                           (panic "uninitialized payload") s1# of
    
    598
    +            (# s2#, segment# #) -> case newIORef (PayloadSegment segment#) `unIO` s2# of
    
    599
    +              (# s3#, segmentRef #) -> case writeArray# a# i# segmentRef s3# of
    
    600
    +                s4# -> payloadLoop a# (i# +# 1#) s4#
    
    601
    +
    
    602
    +  nextPayloadIdx <- newFastMutInt 0
    
    603
    +  payloadGrowLock <- newMVar ()
    
    604
    +
    
    605
    +  payloadArray <- IO $ \s1# ->
    
    606
    +    case newArray# numPayloadSegments# (panic "payload_array") s1# of
    
    607
    +      (# s2#, arr# #) -> case payloadLoop arr# 0# s2# of
    
    608
    +        s3# -> case unsafeFreezeArray# arr# s3# of
    
    609
    +          (# s4#, segments# #) -> case newIORef (SegmentArray segments#) `unIO` s4# of
    
    610
    +            (# s5#, segmentsRef #) ->
    
    611
    +              (# s5#, PayloadArray nextPayloadIdx segmentsRef payloadGrowLock #)
    
    612
    +
    
    428 613
       tab <- IO $ \s1# ->
    
    429 614
         case newArray# numSegments# (panic "string_table") s1# of
    
    430 615
           (# s2#, arr# #) -> case loop arr# 0# s2# of
    
    431 616
             s3# -> case unsafeFreezeArray# arr# s3# of
    
    432 617
               (# s4#, segments# #) ->
    
    433
    -            (# s4#, FastStringTable uid segments# #)
    
    618
    +            (# s4#, FastStringTable segments# payloadArray #)
    
    434 619
     
    
    435 620
       -- use the support wired into the RTS to share this CAF among all images of
    
    436 621
       -- libHSghc
    
    ... ... @@ -540,49 +725,69 @@ The procedure goes like this:
    540 725
        * Otherwise, insert and return the string we created.
    
    541 726
     -}
    
    542 727
     
    
    543
    -mkFastStringWith
    
    544
    -    :: (Int -> IO FastString) -> ShortByteString -> IO FastString
    
    545
    -mkFastStringWith mk_fs sbs = do
    
    546
    -  TableSegment lock _ buckets# <- readIORef segmentRef
    
    547
    -  let idx# = hashToIndex# buckets# hash#
    
    548
    -  bucket <- IO $ readArray# buckets# idx#
    
    549
    -  case bucket_match bucket sbs of
    
    550
    -    Just found -> return found
    
    551
    -    Nothing -> do
    
    552
    -      -- The withMVar below is not dupable. It can lead to deadlock if it is
    
    553
    -      -- only run partially and putMVar is not called after takeMVar.
    
    554
    -      noDuplicate
    
    555
    -      n <- get_uid
    
    556
    -      new_fs <- mk_fs n
    
    557
    -      withMVar lock $ \_ -> insert new_fs
    
    728
    +mkFastStringWith :: ShortByteString -> IO FastString
    
    729
    +mkFastStringWith sbs = do
    
    730
    +  let len = SBS.length sbs
    
    731
    +  if len <= 7
    
    732
    +    then do
    
    733
    +      -- Inline strings: encode directly, no table lookup needed
    
    734
    +      return $ FastStringId (encodeInline sbs)
    
    735
    +    else do
    
    736
    +      -- Table strings: check for existing index, or create new one
    
    737
    +      let chars = utf8CountCharsShortByteString sbs
    
    738
    +      TableSegment lock _ buckets# <- readIORef segmentRef
    
    739
    +      let idx# = hashToIndex# buckets# hash#
    
    740
    +      bucket <- IO $ readArray# buckets# idx#
    
    741
    +      case bucket_match bucket sbs of
    
    742
    +        Just payloadIdx -> do
    
    743
    +          -- Found existing payload index, convert to FastString reference
    
    744
    +          return $ FastStringId (encodeTableRef chars payloadIdx)
    
    745
    +        Nothing -> do
    
    746
    +          -- Create new payload and insert into table
    
    747
    +          noDuplicate
    
    748
    +          withMVar lock $ \_ -> insert
    
    558 749
       where
    
    559
    -    !(FastStringTable uid segments#) = stringTable
    
    560
    -    get_uid = atomicFetchAddFastMut uid 1
    
    750
    +    !(FastStringTable segments# _) = stringTable
    
    561 751
     
    
    562 752
         !(I# hash#) = hashStr sbs
    
    563 753
         (# segmentRef #) = indexArray# segments# (hashToSegment# hash#)
    
    564
    -    insert fs = do
    
    565
    -      TableSegment _ counter buckets# <- maybeResizeSegment hashFastString segmentRef
    
    754
    +
    
    755
    +    -- Helper to hash an index for table operations
    
    756
    +    hashIndex :: Int -> Int
    
    757
    +    hashIndex idx = hashStr (payload_sbs (lookupPayloadByIndex idx))
    
    758
    +
    
    759
    +    insert :: IO FastString
    
    760
    +    insert = do
    
    761
    +      let chars = utf8CountCharsShortByteString sbs
    
    762
    +          payload = FastString { payload_sbs = sbs }
    
    763
    +      -- Allocate payload in global array and get its index
    
    764
    +      payloadIdx <- allocatePayload payload
    
    765
    +      TableSegment _ counter buckets# <- maybeResizeSegment hashIndex segmentRef
    
    566 766
           let idx# = hashToIndex# buckets# hash#
    
    567 767
           bucket <- IO $ readArray# buckets# idx#
    
    568 768
           case bucket_match bucket sbs of
    
    569
    -        -- The FastString was added by another thread after previous read and
    
    769
    +        -- The payload was added by another thread after previous read and
    
    570 770
             -- before we acquired the write lock.
    
    571
    -        Just found -> return found
    
    771
    +        Just existingIdx -> do
    
    772
    +          return $ FastStringId (encodeTableRef chars existingIdx)
    
    572 773
             Nothing -> do
    
    774
    +          -- Insert the payload index into the hash table
    
    573 775
               IO $ \s1# ->
    
    574
    -            case writeArray# buckets# idx# (fs : bucket) s1# of
    
    776
    +            case writeArray# buckets# idx# (payloadIdx : bucket) s1# of
    
    575 777
                   s2# -> (# s2#, () #)
    
    576 778
               _ <- atomicFetchAddFastMut counter 1
    
    577
    -          return fs
    
    779
    +          return $ FastStringId (encodeTableRef chars payloadIdx)
    
    578 780
     
    
    579
    -bucket_match :: [FastString] -> ShortByteString -> Maybe FastString
    
    580
    -bucket_match fs sbs = go fs
    
    781
    +-- | Match a ShortByteString against a bucket of payload indices
    
    782
    +-- Returns the matching index if found
    
    783
    +bucket_match :: [Int] -> ShortByteString -> Maybe Int
    
    784
    +bucket_match indices sbs = go indices
    
    581 785
       where go [] = Nothing
    
    582
    -        go (fs@(FastString {n_chars = n_chars, fs_sbs=fs_sbs}) : ls)
    
    583
    -          | length fs_sbs /= SBS.length fs_sbs
    
    584
    -          | fs_sbs == sbs = Just fs
    
    585
    -          | otherwise     = go ls
    
    786
    +        go (idx : rest) =
    
    787
    +          let payload = lookupPayloadByIndex idx
    
    788
    +          in if payload_sbs payload == sbs
    
    789
    +             then Just idx
    
    790
    +             else go rest
    
    586 791
     -- bucket_match used to inline before changes to instance Eq ShortByteString
    
    587 792
     -- in bytestring-0.12, which made it slightly larger than inlining threshold.
    
    588 793
     -- Non-inlining causes a small, but measurable performance regression, so let's force it.
    
    ... ... @@ -592,7 +797,8 @@ bucket_match fs sbs = go fs
    592 797
     {-# INLINE mkNewFastZString #-}
    
    593 798
     
    
    594 799
     mkNewFastZString :: FastString -> IO FastZString
    
    595
    -mkNewFastZString (FastString uniq _ sbs) = do
    
    800
    +mkNewFastZString fs = do
    
    801
    +  let sbs = fs_sbs fs
    
    596 802
       TableSegment lock _ buckets# <- readIORef segmentRef
    
    597 803
       let idx# = hashToIndex# buckets# hash#
    
    598 804
       bucket <- IO $ readArray# buckets# idx#
    
    ... ... @@ -611,7 +817,7 @@ mkNewFastZString (FastString uniq _ sbs) = do
    611 817
     
    
    612 818
         -- FastString uniques are sequential, pass them through a linear
    
    613 819
         -- congruential generator to randomise
    
    614
    -    !(I# hash#) = uniq*6364136223846793005 + 1
    
    820
    +    !(I# hash#) = uniq fs * 6364136223846793005 + 1
    
    615 821
         (# segmentRef #) = indexArray# segments# (hashToSegment# hash#)
    
    616 822
         insert n fs = do
    
    617 823
           TableSegment _ counter buckets# <- maybeResizeSegment zStringHash segmentRef
    
    ... ... @@ -642,7 +848,7 @@ mkFastStringBytes !ptr !len =
    642 848
         -- idempotent.
    
    643 849
         unsafeDupablePerformIO $ do
    
    644 850
             sbs <- newSBSFromPtr ptr len
    
    645
    -        mkFastStringWith (mkNewFastStringShortByteString sbs) sbs
    
    851
    +        mkFastStringWith sbs
    
    646 852
     
    
    647 853
     newSBSFromPtr :: Ptr a -> Int -> IO ShortByteString
    
    648 854
     newSBSFromPtr (Ptr src#) (I# len#) =
    
    ... ... @@ -656,14 +862,13 @@ newSBSFromPtr (Ptr src#) (I# len#) =
    656 862
     mkFastStringByteString :: ByteString -> FastString
    
    657 863
     mkFastStringByteString bs =
    
    658 864
       let sbs = SBS.toShort bs in
    
    659
    -  inlinePerformIO $
    
    660
    -      mkFastStringWith (mkNewFastStringShortByteString sbs) sbs
    
    865
    +  inlinePerformIO $ mkFastStringWith sbs
    
    661 866
     
    
    662 867
     -- | Create a 'FastString' from an existing 'ShortByteString' without
    
    663 868
     -- copying.
    
    664 869
     mkFastStringShortByteString :: ShortByteString -> FastString
    
    665 870
     mkFastStringShortByteString sbs =
    
    666
    -  inlinePerformIO $ mkFastStringWith (mkNewFastStringShortByteString sbs) sbs
    
    871
    +  inlinePerformIO $ mkFastStringWith sbs
    
    667 872
     
    
    668 873
     -- | Creates a UTF-8 encoded 'FastString' from a 'String'
    
    669 874
     mkFastString :: String -> FastString
    
    ... ... @@ -671,7 +876,7 @@ mkFastString :: String -> FastString
    671 876
     mkFastString str =
    
    672 877
       inlinePerformIO $ do
    
    673 878
         let !sbs = utf8EncodeShortByteString str
    
    674
    -    mkFastStringWith (mkNewFastStringShortByteString sbs) sbs
    
    879
    +    mkFastStringWith sbs
    
    675 880
     
    
    676 881
     -- The following rule is used to avoid polluting the non-reclaimable FastString
    
    677 882
     -- table with transient strings when we only want their encoding.
    
    ... ... @@ -687,11 +892,6 @@ mkFastStringByteList str = mkFastStringShortByteString (SBS.pack str)
    687 892
     mkZFastString :: ShortByteString -> FastZString
    
    688 893
     mkZFastString sbs = mkFastZStringString (zEncodeString (utf8DecodeShortByteString sbs))
    
    689 894
     
    
    690
    -mkNewFastStringShortByteString :: ShortByteString -> Int -> IO FastString
    
    691
    -mkNewFastStringShortByteString sbs uid = do
    
    692
    -  let chars = utf8CountCharsShortByteString sbs
    
    693
    -  return (FastString uid chars sbs)
    
    694
    -
    
    695 895
     hashStr  :: ShortByteString -> Int
    
    696 896
      -- produce a hash value between 0 & m (inclusive)
    
    697 897
     hashStr sbs@(SBS.SBS ba#) = loop 0# 0#
    
    ... ... @@ -719,7 +919,7 @@ lengthFS fs = n_chars fs
    719 919
     
    
    720 920
     -- | Returns @True@ if the 'FastString' is empty
    
    721 921
     nullFS :: FastString -> Bool
    
    722
    -nullFS fs = SBS.null $ fs_sbs fs
    
    922
    +nullFS (FastStringId w) = w == 0xF800000000000000
    
    723 923
     
    
    724 924
     -- | Lazily unpacks and decodes the FastString
    
    725 925
     unpackFS :: FastString -> String
    
    ... ... @@ -755,12 +955,14 @@ uniqueOfFS :: FastString -> Int
    755 955
     uniqueOfFS fs = uniq fs
    
    756 956
     
    
    757 957
     nilFS :: FastString
    
    758
    -nilFS = mkFastString ""
    
    958
    +nilFS = FastStringId 0xF800000000000000  -- Empty inline string
    
    759 959
     
    
    760 960
     -- -----------------------------------------------------------------------------
    
    761 961
     -- Stats
    
    762 962
     
    
    763
    -getFastStringTable :: IO [[[FastString]]]
    
    963
    +-- | Get all FastString payloads from the table (for debugging/stats)
    
    964
    +-- Returns indices organized by segment and bucket
    
    965
    +getFastStringTable :: IO [[[Int]]]
    
    764 966
     getFastStringTable =
    
    765 967
       forM [0 .. numSegments - 1] $ \(I# i#) -> do
    
    766 968
         let (# segmentRef #) = indexArray# segments# i#
    
    ... ... @@ -769,7 +971,7 @@ getFastStringTable =
    769 971
         forM [0 .. bucketSize - 1] $ \(I# j#) ->
    
    770 972
           IO $ readArray# buckets# j#
    
    771 973
       where
    
    772
    -    !(FastStringTable _ segments#) = stringTable
    
    974
    +    !(FastStringTable segments# _) = stringTable
    
    773 975
     
    
    774 976
     getFastZStringTable :: IO [[[FastZString]]]
    
    775 977
     getFastZStringTable =