| ... |
... |
@@ -104,6 +104,8 @@ module GHC.Data.FastString |
|
104
|
104
|
getFastStringTable,
|
|
105
|
105
|
getFastZStringTable,
|
|
106
|
106
|
getFastStringZEncCounter,
|
|
|
107
|
+ lookupPayloadByIndex,
|
|
|
108
|
+ FastStringPayload(..),
|
|
107
|
109
|
|
|
108
|
110
|
-- * PtrStrings
|
|
109
|
111
|
PtrString (..),
|
| ... |
... |
@@ -130,8 +132,10 @@ import GHC.Data.FastMutInt |
|
130
|
132
|
import Control.Concurrent.MVar
|
|
131
|
133
|
import Control.DeepSeq
|
|
132
|
134
|
import Control.Monad
|
|
|
135
|
+import Data.Bits
|
|
133
|
136
|
import Data.ByteString (ByteString)
|
|
134
|
137
|
import Data.ByteString.Short (ShortByteString)
|
|
|
138
|
+import Data.Word
|
|
135
|
139
|
import qualified Data.ByteString as BS
|
|
136
|
140
|
import qualified Data.ByteString.Char8 as BSC
|
|
137
|
141
|
import qualified Data.ByteString.Unsafe as BS
|
| ... |
... |
@@ -171,9 +175,6 @@ fastZStringToByteString (FastZString bs) = bs |
|
171
|
175
|
unsafeMkByteString :: String -> ByteString
|
|
172
|
176
|
unsafeMkByteString = BSC.pack
|
|
173
|
177
|
|
|
174
|
|
-hashFastString :: FastString -> Int
|
|
175
|
|
-hashFastString fs = hashStr $ fs_sbs fs
|
|
176
|
|
-
|
|
177
|
178
|
-- -----------------------------------------------------------------------------
|
|
178
|
179
|
|
|
179
|
180
|
newtype FastZString = FastZString ByteString
|
| ... |
... |
@@ -206,36 +207,98 @@ mkFastZStringString str = FastZString (BSC.pack str) |
|
206
|
207
|
All 'FastString's are stored in a global hashtable to support fast O(1)
|
|
207
|
208
|
comparison.
|
|
208
|
209
|
|
|
209
|
|
-It is also associated with a lazy reference to the Z-encoding
|
|
210
|
|
-of this string which is used by the compiler internally.
|
|
211
|
210
|
-}
|
|
|
211
|
+-- | Payload for table-referenced FastStrings
|
|
212
|
212
|
data FastStringPayload = FastString {
|
|
213
|
|
- uniq :: {-# UNPACK #-} !Int, -- unique id
|
|
214
|
|
- n_chars :: {-# UNPACK #-} !Int, -- number of chars
|
|
215
|
|
- fs_sbs :: {-# UNPACK #-} !ShortByteString
|
|
|
213
|
+ payload_sbs :: {-# UNPACK #-} !ShortByteString
|
|
216
|
214
|
}
|
|
217
|
215
|
|
|
|
216
|
+{- Note [FastString Encoding]
|
|
|
217
|
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
218
|
+
|
|
|
219
|
+ A FastString is uniformly represented as a single Word64 which serves as both
|
|
|
220
|
+ the unique identifier and the string representation (for short strings).
|
|
|
221
|
+
|
|
|
222
|
+ Inline strings (strings ≤ 7 bytes):
|
|
|
223
|
+ - Top 5 bits are always 11111 (0x1F at bit position 59-63)
|
|
|
224
|
+ - Next 3 bits encode the byte length (0-7)
|
|
|
225
|
+ - Remaining 56 bits contain the actual string bytes
|
|
|
226
|
+ - Top byte ranges: 0xF8-0xFF (248-255)
|
|
|
227
|
+ - "a" (0x61): 0xF900000000000061
|
|
|
228
|
+
|
|
|
229
|
+ Table-referenced strings (strings > 7 bytes):
|
|
|
230
|
+ - Top 16 bits encode the UTF-8 character count (2-63487, 0x0002-0xF7FF)
|
|
|
231
|
+ - Lower 48 bits encode an index into the FastStringPayload table
|
|
|
232
|
+ - Top byte ranges: 0x00-0xF7 (0-247)
|
|
|
233
|
+ - Top byte < 0xF8 to prevent ambiguity with inline encoding
|
|
|
234
|
+ - Maximum character count: 63487 (0xF7FF)
|
|
|
235
|
+ - Example: 10-char string at index 42: 0x000A00000000002A
|
|
|
236
|
+
|
|
|
237
|
+ Top byte value determines encoding type:
|
|
|
238
|
+ - 0xF8-0xFF: Inline string
|
|
|
239
|
+ - 0x00-0xF7: Table reference
|
|
|
240
|
+-}
|
|
218
|
241
|
data FastString = FastStringId {-# UNPACK #-} !Word64
|
|
219
|
242
|
|
|
|
243
|
+encodeInline :: ShortByteString -> Word64
|
|
|
244
|
+encodeInline sbs =
|
|
|
245
|
+ let !len = SBS.length sbs
|
|
|
246
|
+ !lenBits = (0xf8 .|. fromIntegral len) `unsafeShiftL` 56
|
|
|
247
|
+ w8 :: Int -> Word64
|
|
|
248
|
+ w8 shift = fromIntegral (SBS.index sbs (shift `unsafeShiftR` 3)) `unsafeShiftL` shift
|
|
|
249
|
+ in case len of
|
|
|
250
|
+ 0 -> lenBits
|
|
|
251
|
+ 1 -> lenBits .|. w8 0
|
|
|
252
|
+ 2 -> lenBits .|. w8 8 .|. w8 0
|
|
|
253
|
+ 3 -> lenBits .|. w8 16 .|. w8 8 .|. w8 0
|
|
|
254
|
+ 4 -> lenBits .|. w8 24 .|. w8 16 .|. w8 8 .|. w8 0
|
|
|
255
|
+ 5 -> lenBits .|. w8 32 .|. w8 24 .|. w8 16 .|. w8 8 .|. w8 0
|
|
|
256
|
+ 6 -> lenBits .|. w8 40 .|. w8 32 .|. w8 24 .|. w8 16 .|. w8 8 .|. w8 0
|
|
|
257
|
+ 7 -> lenBits .|. w8 48 .|. w8 40 .|. w8 32 .|. w8 24 .|. w8 16 .|. w8 8 .|. w8 0
|
|
|
258
|
+ _ -> panic "encodeInline: string too long"
|
|
|
259
|
+
|
|
|
260
|
+encodeTableRef :: Int -> Int -> Word64
|
|
|
261
|
+encodeTableRef n_chars idx
|
|
|
262
|
+ | n_chars < 2 = panic "encodeTableRef: character count too small (< 2)"
|
|
|
263
|
+ | n_chars > 0xf7ff = panic "encodeTableRef: character count too large (> 63487)"
|
|
|
264
|
+ | otherwise =
|
|
|
265
|
+ let charBits = fromIntegral n_chars `Data.Bits.shiftL` 48
|
|
|
266
|
+ idxBits = fromIntegral idx .&. 0x0000ffffffffffff -- mask to 48 bits
|
|
|
267
|
+ in charBits .|. idxBits
|
|
|
268
|
+
|
|
220
|
269
|
fs_sbs :: FastString -> ShortByteString
|
|
221
|
270
|
fs_sbs (FastStringId w)
|
|
222
|
271
|
| w .&. 0xf8000000_00000000 == 0xf8000000_00000000
|
|
223
|
272
|
= let bs_len = (w .&. 0x07000000_00000000) `unsafeShiftR` 56
|
|
224
|
|
- w8_64 x = wordToWord8# (word64ToWord# x)
|
|
225
|
|
- go 0 _ = []
|
|
226
|
|
- go n cur = wordToWord8# (word64ToWord# cur) : go (n-1) (cur `uncheckedShiftRL64#` 8#)
|
|
|
273
|
+ w8 :: Int -> Word8
|
|
|
274
|
+ w8 shift = fromIntegral ((w `unsafeShiftR` shift) .&. 0xFF)
|
|
227
|
275
|
in case bs_len of
|
|
228
|
276
|
0 -> SBS.empty
|
|
229
|
|
- 1 -> SBS.singleton $ wordToWord8# (word64ToWord# w)
|
|
230
|
|
- 2 -> SBS.pack [ w8_64 (w `uncheckedShiftRL64#` 8#) , w8_64 w]
|
|
231
|
|
- 3 -> SBS.pack [ w8_64 (w `uncheckedShiftRL64#` 16#), w8_64 (w `uncheckedShiftRL64#` 8#), w8_64 w)]
|
|
232
|
|
- 4 -> SBS.pack [ w8_64 (w `uncheckedShiftRL64#` 24#), w8_64 (w `uncheckedShiftRL64#` 16#), w8_64 (w `uncheckedShiftRL64#` 8#), w8_64 w)]
|
|
233
|
|
- 5 -> SBS.pack [ w8_64 (w `uncheckedShiftRL64#` 32#), w8_64 (w `uncheckedShiftRL64#` 24#), w8_64 (w `uncheckedShiftRL64#` 16#), w8_64 (w `uncheckedShiftRL64#` 8#), w8_64 w)]
|
|
234
|
|
- 6 -> SBS.pack [ w8_64 (w `uncheckedShiftRL64#` 40#), w8_64 (w `uncheckedShiftRL64#` 32#), w8_64 (w `uncheckedShiftRL64#` 24#), w8_64 (w `uncheckedShiftRL64#` 16#), w8_64 (w `uncheckedShiftRL64#` 8#), w8_64 w)]
|
|
235
|
|
- 7 -> SBS.pack [ w8_64 (w `uncheckedShiftRL64#` 48#), w8_64 (w `uncheckedShiftRL64#` 40#), w8_64 (w `uncheckedShiftRL64#` 32#), w8_64 (w `uncheckedShiftRL64#` 24#), w8_64 (w `uncheckedShiftRL64#` 16#), w8_64 (w `uncheckedShiftRL64#` 8#), w8_64 w)]
|
|
236
|
|
- | otherwise
|
|
|
277
|
+ 1 -> SBS.singleton $ w8 0
|
|
|
278
|
+ 2 -> SBS.pack [ w8 0, w8 8 ]
|
|
|
279
|
+ 3 -> SBS.pack [ w8 0, w8 8, w8 16 ]
|
|
|
280
|
+ 4 -> SBS.pack [ w8 0, w8 8, w8 16, w8 24 ]
|
|
|
281
|
+ 5 -> SBS.pack [ w8 0, w8 8, w8 16, w8 24, w8 32 ]
|
|
|
282
|
+ 6 -> SBS.pack [ w8 0, w8 8, w8 16, w8 24, w8 32, w8 40 ]
|
|
|
283
|
+ 7 -> SBS.pack [ w8 0, w8 8, w8 16, w8 24, w8 32, w8 40, w8 48 ]
|
|
|
284
|
+ _ -> panic "fs_sbs: invalid inline string length"
|
|
|
285
|
+ | otherwise =
|
|
|
286
|
+ -- Table reference: need to extract index from lower 48 bits and look up
|
|
|
287
|
+ let idx = fromIntegral (w .&. 0x0000FFFFFFFFFFFF) :: Int
|
|
|
288
|
+ payload = lookupPayloadByIndex idx
|
|
|
289
|
+ in payload_sbs payload
|
|
|
290
|
+
|
|
237
|
291
|
n_chars :: FastString -> Int
|
|
|
292
|
+n_chars (FastStringId w)
|
|
|
293
|
+ | w .&. 0xf8000000_00000000 == 0xf8000000_00000000 =
|
|
|
294
|
+ -- Inline string: count UTF-8 characters
|
|
|
295
|
+ utf8CountCharsShortByteString (fs_sbs (FastStringId w))
|
|
|
296
|
+ | otherwise =
|
|
|
297
|
+ -- Table reference: extract character count from top 16 bits
|
|
|
298
|
+ fromIntegral (w `unsafeShiftR` 48)
|
|
|
299
|
+
|
|
238
|
300
|
uniq :: FastString -> Int
|
|
|
301
|
+uniq (FastStringId w) = fromIntegral w
|
|
239
|
302
|
|
|
240
|
303
|
instance Eq FastString where
|
|
241
|
304
|
f1 == f2 = uniq f1 == uniq f2
|
| ... |
... |
@@ -326,23 +389,120 @@ and updates to multiple buckets with low synchronization overhead. |
|
326
|
389
|
|
|
327
|
390
|
See Note [Updating the FastString table] on how it's updated.
|
|
328
|
391
|
-}
|
|
|
392
|
+-- | A segment of the payload array
|
|
|
393
|
+data PayloadSegment = PayloadSegment
|
|
|
394
|
+ (MutableArray# RealWorld FastStringPayload)
|
|
|
395
|
+
|
|
|
396
|
+-- | Boxed wrapper for Array# to allow storage in IORef
|
|
|
397
|
+data SegmentArray = SegmentArray (Array# (IORef PayloadSegment))
|
|
|
398
|
+
|
|
|
399
|
+-- | Append-only array of FastStringPayload for O(1) lookup by index
|
|
|
400
|
+data PayloadArray = PayloadArray
|
|
|
401
|
+ {-# UNPACK #-} !FastMutInt -- ^ Next free index
|
|
|
402
|
+ {-# UNPACK #-} !(IORef SegmentArray) -- ^ Growable segmented array
|
|
|
403
|
+ {-# UNPACK #-} !(MVar ()) -- ^ Lock for growing the array
|
|
|
404
|
+
|
|
|
405
|
+-- Number of payloads per segment
|
|
|
406
|
+payloadSegmentSize :: Int
|
|
|
407
|
+payloadSegmentSize = 1024
|
|
|
408
|
+
|
|
|
409
|
+payloadSegmentBits :: Int
|
|
|
410
|
+payloadSegmentBits = 10 -- 2^10 = 1024
|
|
|
411
|
+
|
|
|
412
|
+initialPayloadSegments :: Int
|
|
|
413
|
+initialPayloadSegments = 256
|
|
|
414
|
+
|
|
|
415
|
+-- | Ensure a payload segment exists, growing the array if necessary
|
|
|
416
|
+-- Returns the segment IORef for the given segment index
|
|
|
417
|
+ensurePayloadSegment :: Int -> IO (IORef PayloadSegment)
|
|
|
418
|
+ensurePayloadSegment segmentIdx = do
|
|
|
419
|
+ let !(FastStringTable _ (PayloadArray _ segmentsRef growLock)) = stringTable
|
|
|
420
|
+ !(I# segmentIdx#) = segmentIdx
|
|
|
421
|
+ SegmentArray segments# <- readIORef segmentsRef
|
|
|
422
|
+ let currentSize# = sizeofArray# segments#
|
|
|
423
|
+
|
|
|
424
|
+ if isTrue# (segmentIdx# <# currentSize#)
|
|
|
425
|
+ then do
|
|
|
426
|
+ -- Segment exists, return it
|
|
|
427
|
+ let (# segmentRef #) = indexArray# segments# segmentIdx#
|
|
|
428
|
+ return segmentRef
|
|
|
429
|
+ else do
|
|
|
430
|
+ -- Need to grow the array
|
|
|
431
|
+ withMVar growLock $ \_ -> do
|
|
|
432
|
+ -- Re-check after acquiring lock (another thread might have grown it)
|
|
|
433
|
+ SegmentArray segments'# <- readIORef segmentsRef
|
|
|
434
|
+ let currentSize'# = sizeofArray# segments'#
|
|
|
435
|
+ if isTrue# (segmentIdx# <# currentSize'#)
|
|
|
436
|
+ then do
|
|
|
437
|
+ let (# segmentRef #) = indexArray# segments'# segmentIdx#
|
|
|
438
|
+ return segmentRef
|
|
|
439
|
+ else do
|
|
|
440
|
+ -- Double the size
|
|
|
441
|
+ let newSize# = currentSize'# *# 2#
|
|
|
442
|
+ !(I# payloadSegmentSize#) = payloadSegmentSize
|
|
|
443
|
+ SegmentArray newSegments# <- IO $ \s1# ->
|
|
|
444
|
+ case newArray# newSize# (panic "unallocated_segment") s1# of
|
|
|
445
|
+ (# s2#, arr# #) ->
|
|
|
446
|
+ case copyArray# segments'# 0# arr# 0# currentSize'# s2# of
|
|
|
447
|
+ s3# ->
|
|
|
448
|
+ -- Allocate new segments
|
|
|
449
|
+ let allocLoop i# s#
|
|
|
450
|
+ | isTrue# (i# ==# newSize#) = s#
|
|
|
451
|
+ | otherwise =
|
|
|
452
|
+ case newArray# payloadSegmentSize# (panic "uninitialized payload") s# of
|
|
|
453
|
+ (# s'#, seg# #) -> case newIORef (PayloadSegment seg#) `unIO` s'# of
|
|
|
454
|
+ (# s''#, segRef #) -> case writeArray# arr# i# segRef s''# of
|
|
|
455
|
+ s'''# -> allocLoop (i# +# 1#) s'''#
|
|
|
456
|
+ in case allocLoop currentSize'# s3# of
|
|
|
457
|
+ s4# -> case unsafeFreezeArray# arr# s4# of
|
|
|
458
|
+ (# s5#, frozen# #) -> (# s5#, SegmentArray frozen# #)
|
|
|
459
|
+ writeIORef segmentsRef (SegmentArray newSegments#)
|
|
|
460
|
+ let (# segmentRef #) = indexArray# newSegments# segmentIdx#
|
|
|
461
|
+ return segmentRef
|
|
|
462
|
+
|
|
|
463
|
+-- | Allocate a payload in the global payload array and return its index
|
|
|
464
|
+allocatePayload :: FastStringPayload -> IO Int
|
|
|
465
|
+allocatePayload payload = do
|
|
|
466
|
+ let !(FastStringTable _ (PayloadArray nextIdx _ _)) = stringTable
|
|
|
467
|
+ idx <- atomicFetchAddFastMut nextIdx 1
|
|
|
468
|
+ let !(I# idx#) = idx
|
|
|
469
|
+ !(I# payloadSegmentBits#) = payloadSegmentBits
|
|
|
470
|
+ !(I# payloadSegmentSize#) = payloadSegmentSize
|
|
|
471
|
+ segmentIdx = I# (idx# `uncheckedIShiftRL#` payloadSegmentBits#)
|
|
|
472
|
+ offsetInSegment# = idx# `andI#` (payloadSegmentSize# -# 1#)
|
|
|
473
|
+ segmentRef <- ensurePayloadSegment segmentIdx
|
|
|
474
|
+ PayloadSegment segment# <- readIORef segmentRef
|
|
|
475
|
+ IO $ \s# ->
|
|
|
476
|
+ case writeArray# segment# offsetInSegment# payload s# of
|
|
|
477
|
+ s'# -> (# s'#, () #)
|
|
|
478
|
+ return idx
|
|
|
479
|
+
|
|
|
480
|
+-- | Look up a FastStringPayload by its index
|
|
|
481
|
+-- The segment is guaranteed to exist because indices only come from allocatePayload
|
|
|
482
|
+lookupPayloadByIndex :: Int -> FastStringPayload
|
|
|
483
|
+lookupPayloadByIndex idx = inlinePerformIO $ do
|
|
|
484
|
+ let !(FastStringTable _ (PayloadArray _ segmentsRef _)) = stringTable
|
|
|
485
|
+ !(I# idx#) = idx
|
|
|
486
|
+ !(I# payloadSegmentBits#) = payloadSegmentBits
|
|
|
487
|
+ !(I# payloadSegmentSize#) = payloadSegmentSize
|
|
|
488
|
+ segmentIdx# = idx# `uncheckedIShiftRL#` payloadSegmentBits#
|
|
|
489
|
+ offsetInSegment# = idx# `andI#` (payloadSegmentSize# -# 1#)
|
|
|
490
|
+ SegmentArray segments# <- readIORef segmentsRef
|
|
|
491
|
+ let (# segmentRef #) = indexArray# segments# segmentIdx#
|
|
|
492
|
+ PayloadSegment segment# <- readIORef segmentRef
|
|
|
493
|
+ IO $ readArray# segment# offsetInSegment#
|
|
|
494
|
+
|
|
329
|
495
|
data FastStringTable = FastStringTable
|
|
330
|
|
- {-# UNPACK #-} !FastMutInt
|
|
331
|
|
- -- ^ The unique ID counter shared with all buckets
|
|
332
|
|
- --
|
|
333
|
|
- -- We unpack the 'FastMutInt' counter as it is always consumed strictly.
|
|
334
|
|
- -- We mark this as 'NOUNPACK' as this 'FastMutInt' is retained by a thunk
|
|
335
|
|
- -- in 'mkFastStringWith' and needs to be boxed any way.
|
|
336
|
|
- -- If this is unpacked, then we box this single 'FastMutInt' once for each
|
|
337
|
|
- -- allocated FastString.
|
|
338
|
496
|
(Array# (IORef FastStringTableSegment)) -- ^ concurrent segments
|
|
|
497
|
+ !PayloadArray -- ^ Global payload array
|
|
339
|
498
|
|
|
340
|
499
|
data TableSegment a = TableSegment
|
|
341
|
500
|
{-# UNPACK #-} !(MVar ()) -- the lock for write in each segment
|
|
342
|
501
|
{-# UNPACK #-} !FastMutInt -- the number of elements
|
|
343
|
502
|
(MutableArray# RealWorld [a]) -- buckets in this segment
|
|
344
|
503
|
|
|
345
|
|
-type FastStringTableSegment = TableSegment FastString
|
|
|
504
|
+-- Hash table now stores indices into the payload array
|
|
|
505
|
+type FastStringTableSegment = TableSegment Int
|
|
346
|
506
|
|
|
347
|
507
|
data FastZStringTable = FastZStringTable
|
|
348
|
508
|
{-# UNPACK #-} !FastMutInt
|
| ... |
... |
@@ -415,6 +575,9 @@ stringTable :: FastStringTable |
|
415
|
575
|
stringTable = unsafePerformIO $ do
|
|
416
|
576
|
let !(I# numSegments#) = numSegments
|
|
417
|
577
|
!(I# initialNumBuckets#) = initialNumBuckets
|
|
|
578
|
+ !(I# payloadSegmentSize#) = payloadSegmentSize
|
|
|
579
|
+ !(I# numPayloadSegments#) = initialPayloadSegments
|
|
|
580
|
+
|
|
418
|
581
|
loop a# i# s1#
|
|
419
|
582
|
| isTrue# (i# ==# numSegments#) = s1#
|
|
420
|
583
|
| otherwise = case newMVar () `unIO` s1# of
|
| ... |
... |
@@ -424,13 +587,33 @@ stringTable = unsafePerformIO $ do |
|
424
|
587
|
(TableSegment lock counter buckets#) `unIO` s4# of
|
|
425
|
588
|
(# s5#, segment #) -> case writeArray# a# i# segment s5# of
|
|
426
|
589
|
s6# -> loop a# (i# +# 1#) s6#
|
|
427
|
|
- uid <- newFastMutInt 603979776 -- ord '$' * 0x01000000
|
|
|
590
|
+
|
|
|
591
|
+ -- Initialize payload array segments
|
|
|
592
|
+ payloadLoop a# i# s1#
|
|
|
593
|
+ | isTrue# (i# ==# numPayloadSegments#) = s1#
|
|
|
594
|
+ | otherwise = case newArray# payloadSegmentSize#
|
|
|
595
|
+ (panic "uninitialized payload") s1# of
|
|
|
596
|
+ (# s2#, segment# #) -> case newIORef (PayloadSegment segment#) `unIO` s2# of
|
|
|
597
|
+ (# s3#, segmentRef #) -> case writeArray# a# i# segmentRef s3# of
|
|
|
598
|
+ s4# -> payloadLoop a# (i# +# 1#) s4#
|
|
|
599
|
+
|
|
|
600
|
+ nextPayloadIdx <- newFastMutInt 0
|
|
|
601
|
+ payloadGrowLock <- newMVar ()
|
|
|
602
|
+
|
|
|
603
|
+ payloadArray <- IO $ \s1# ->
|
|
|
604
|
+ case newArray# numPayloadSegments# (panic "payload_array") s1# of
|
|
|
605
|
+ (# s2#, arr# #) -> case payloadLoop arr# 0# s2# of
|
|
|
606
|
+ s3# -> case unsafeFreezeArray# arr# s3# of
|
|
|
607
|
+ (# s4#, segments# #) -> case newIORef (SegmentArray segments#) `unIO` s4# of
|
|
|
608
|
+ (# s5#, segmentsRef #) ->
|
|
|
609
|
+ (# s5#, PayloadArray nextPayloadIdx segmentsRef payloadGrowLock #)
|
|
|
610
|
+
|
|
428
|
611
|
tab <- IO $ \s1# ->
|
|
429
|
612
|
case newArray# numSegments# (panic "string_table") s1# of
|
|
430
|
613
|
(# s2#, arr# #) -> case loop arr# 0# s2# of
|
|
431
|
614
|
s3# -> case unsafeFreezeArray# arr# s3# of
|
|
432
|
615
|
(# s4#, segments# #) ->
|
|
433
|
|
- (# s4#, FastStringTable uid segments# #)
|
|
|
616
|
+ (# s4#, FastStringTable segments# payloadArray #)
|
|
434
|
617
|
|
|
435
|
618
|
-- use the support wired into the RTS to share this CAF among all images of
|
|
436
|
619
|
-- libHSghc
|
| ... |
... |
@@ -540,49 +723,69 @@ The procedure goes like this: |
|
540
|
723
|
* Otherwise, insert and return the string we created.
|
|
541
|
724
|
-}
|
|
542
|
725
|
|
|
543
|
|
-mkFastStringWith
|
|
544
|
|
- :: (Int -> IO FastString) -> ShortByteString -> IO FastString
|
|
545
|
|
-mkFastStringWith mk_fs sbs = do
|
|
546
|
|
- TableSegment lock _ buckets# <- readIORef segmentRef
|
|
547
|
|
- let idx# = hashToIndex# buckets# hash#
|
|
548
|
|
- bucket <- IO $ readArray# buckets# idx#
|
|
549
|
|
- case bucket_match bucket sbs of
|
|
550
|
|
- Just found -> return found
|
|
551
|
|
- Nothing -> do
|
|
552
|
|
- -- The withMVar below is not dupable. It can lead to deadlock if it is
|
|
553
|
|
- -- only run partially and putMVar is not called after takeMVar.
|
|
554
|
|
- noDuplicate
|
|
555
|
|
- n <- get_uid
|
|
556
|
|
- new_fs <- mk_fs n
|
|
557
|
|
- withMVar lock $ \_ -> insert new_fs
|
|
|
726
|
+mkFastStringWith :: ShortByteString -> IO FastString
|
|
|
727
|
+mkFastStringWith sbs = do
|
|
|
728
|
+ let len = SBS.length sbs
|
|
|
729
|
+ if len <= 7
|
|
|
730
|
+ then do
|
|
|
731
|
+ -- Inline strings: encode directly, no table lookup needed
|
|
|
732
|
+ return $ FastStringId (encodeInline sbs)
|
|
|
733
|
+ else do
|
|
|
734
|
+ -- Table strings: check for existing index, or create new one
|
|
|
735
|
+ let chars = utf8CountCharsShortByteString sbs
|
|
|
736
|
+ TableSegment lock _ buckets# <- readIORef segmentRef
|
|
|
737
|
+ let idx# = hashToIndex# buckets# hash#
|
|
|
738
|
+ bucket <- IO $ readArray# buckets# idx#
|
|
|
739
|
+ case bucket_match bucket sbs of
|
|
|
740
|
+ Just payloadIdx -> do
|
|
|
741
|
+ -- Found existing payload index, convert to FastString reference
|
|
|
742
|
+ return $ FastStringId (encodeTableRef chars payloadIdx)
|
|
|
743
|
+ Nothing -> do
|
|
|
744
|
+ -- Create new payload and insert into table
|
|
|
745
|
+ noDuplicate
|
|
|
746
|
+ withMVar lock $ \_ -> insert
|
|
558
|
747
|
where
|
|
559
|
|
- !(FastStringTable uid segments#) = stringTable
|
|
560
|
|
- get_uid = atomicFetchAddFastMut uid 1
|
|
|
748
|
+ !(FastStringTable segments# _) = stringTable
|
|
561
|
749
|
|
|
562
|
750
|
!(I# hash#) = hashStr sbs
|
|
563
|
751
|
(# segmentRef #) = indexArray# segments# (hashToSegment# hash#)
|
|
564
|
|
- insert fs = do
|
|
565
|
|
- TableSegment _ counter buckets# <- maybeResizeSegment hashFastString segmentRef
|
|
|
752
|
+
|
|
|
753
|
+ -- Helper to hash an index for table operations
|
|
|
754
|
+ hashIndex :: Int -> Int
|
|
|
755
|
+ hashIndex idx = hashStr (payload_sbs (lookupPayloadByIndex idx))
|
|
|
756
|
+
|
|
|
757
|
+ insert :: IO FastString
|
|
|
758
|
+ insert = do
|
|
|
759
|
+ let chars = utf8CountCharsShortByteString sbs
|
|
|
760
|
+ payload = FastString { payload_sbs = sbs }
|
|
|
761
|
+ -- Allocate payload in global array and get its index
|
|
|
762
|
+ payloadIdx <- allocatePayload payload
|
|
|
763
|
+ TableSegment _ counter buckets# <- maybeResizeSegment hashIndex segmentRef
|
|
566
|
764
|
let idx# = hashToIndex# buckets# hash#
|
|
567
|
765
|
bucket <- IO $ readArray# buckets# idx#
|
|
568
|
766
|
case bucket_match bucket sbs of
|
|
569
|
|
- -- The FastString was added by another thread after previous read and
|
|
|
767
|
+ -- The payload was added by another thread after previous read and
|
|
570
|
768
|
-- before we acquired the write lock.
|
|
571
|
|
- Just found -> return found
|
|
|
769
|
+ Just existingIdx -> do
|
|
|
770
|
+ return $ FastStringId (encodeTableRef chars existingIdx)
|
|
572
|
771
|
Nothing -> do
|
|
|
772
|
+ -- Insert the payload index into the hash table
|
|
573
|
773
|
IO $ \s1# ->
|
|
574
|
|
- case writeArray# buckets# idx# (fs : bucket) s1# of
|
|
|
774
|
+ case writeArray# buckets# idx# (payloadIdx : bucket) s1# of
|
|
575
|
775
|
s2# -> (# s2#, () #)
|
|
576
|
776
|
_ <- atomicFetchAddFastMut counter 1
|
|
577
|
|
- return fs
|
|
|
777
|
+ return $ FastStringId (encodeTableRef chars payloadIdx)
|
|
578
|
778
|
|
|
579
|
|
-bucket_match :: [FastString] -> ShortByteString -> Maybe FastString
|
|
580
|
|
-bucket_match fs sbs = go fs
|
|
|
779
|
+-- | Match a ShortByteString against a bucket of payload indices
|
|
|
780
|
+-- Returns the matching index if found
|
|
|
781
|
+bucket_match :: [Int] -> ShortByteString -> Maybe Int
|
|
|
782
|
+bucket_match indices sbs = go indices
|
|
581
|
783
|
where go [] = Nothing
|
|
582
|
|
- go (fs@(FastString {n_chars = n_chars, fs_sbs=fs_sbs}) : ls)
|
|
583
|
|
- | length fs_sbs /= SBS.length fs_sbs
|
|
584
|
|
- | fs_sbs == sbs = Just fs
|
|
585
|
|
- | otherwise = go ls
|
|
|
784
|
+ go (idx : rest) =
|
|
|
785
|
+ let payload = lookupPayloadByIndex idx
|
|
|
786
|
+ in if payload_sbs payload == sbs
|
|
|
787
|
+ then Just idx
|
|
|
788
|
+ else go rest
|
|
586
|
789
|
-- bucket_match used to inline before changes to instance Eq ShortByteString
|
|
587
|
790
|
-- in bytestring-0.12, which made it slightly larger than inlining threshold.
|
|
588
|
791
|
-- Non-inlining causes a small, but measurable performance regression, so let's force it.
|
| ... |
... |
@@ -592,7 +795,8 @@ bucket_match fs sbs = go fs |
|
592
|
795
|
{-# INLINE mkNewFastZString #-}
|
|
593
|
796
|
|
|
594
|
797
|
mkNewFastZString :: FastString -> IO FastZString
|
|
595
|
|
-mkNewFastZString (FastString uniq _ sbs) = do
|
|
|
798
|
+mkNewFastZString fs = do
|
|
|
799
|
+ let sbs = fs_sbs fs
|
|
596
|
800
|
TableSegment lock _ buckets# <- readIORef segmentRef
|
|
597
|
801
|
let idx# = hashToIndex# buckets# hash#
|
|
598
|
802
|
bucket <- IO $ readArray# buckets# idx#
|
| ... |
... |
@@ -611,7 +815,7 @@ mkNewFastZString (FastString uniq _ sbs) = do |
|
611
|
815
|
|
|
612
|
816
|
-- FastString uniques are sequential, pass them through a linear
|
|
613
|
817
|
-- congruential generator to randomise
|
|
614
|
|
- !(I# hash#) = uniq*6364136223846793005 + 1
|
|
|
818
|
+ !(I# hash#) = uniq fs * 6364136223846793005 + 1
|
|
615
|
819
|
(# segmentRef #) = indexArray# segments# (hashToSegment# hash#)
|
|
616
|
820
|
insert n fs = do
|
|
617
|
821
|
TableSegment _ counter buckets# <- maybeResizeSegment zStringHash segmentRef
|
| ... |
... |
@@ -642,7 +846,7 @@ mkFastStringBytes !ptr !len = |
|
642
|
846
|
-- idempotent.
|
|
643
|
847
|
unsafeDupablePerformIO $ do
|
|
644
|
848
|
sbs <- newSBSFromPtr ptr len
|
|
645
|
|
- mkFastStringWith (mkNewFastStringShortByteString sbs) sbs
|
|
|
849
|
+ mkFastStringWith sbs
|
|
646
|
850
|
|
|
647
|
851
|
newSBSFromPtr :: Ptr a -> Int -> IO ShortByteString
|
|
648
|
852
|
newSBSFromPtr (Ptr src#) (I# len#) =
|
| ... |
... |
@@ -656,14 +860,13 @@ newSBSFromPtr (Ptr src#) (I# len#) = |
|
656
|
860
|
mkFastStringByteString :: ByteString -> FastString
|
|
657
|
861
|
mkFastStringByteString bs =
|
|
658
|
862
|
let sbs = SBS.toShort bs in
|
|
659
|
|
- inlinePerformIO $
|
|
660
|
|
- mkFastStringWith (mkNewFastStringShortByteString sbs) sbs
|
|
|
863
|
+ inlinePerformIO $ mkFastStringWith sbs
|
|
661
|
864
|
|
|
662
|
865
|
-- | Create a 'FastString' from an existing 'ShortByteString' without
|
|
663
|
866
|
-- copying.
|
|
664
|
867
|
mkFastStringShortByteString :: ShortByteString -> FastString
|
|
665
|
868
|
mkFastStringShortByteString sbs =
|
|
666
|
|
- inlinePerformIO $ mkFastStringWith (mkNewFastStringShortByteString sbs) sbs
|
|
|
869
|
+ inlinePerformIO $ mkFastStringWith sbs
|
|
667
|
870
|
|
|
668
|
871
|
-- | Creates a UTF-8 encoded 'FastString' from a 'String'
|
|
669
|
872
|
mkFastString :: String -> FastString
|
| ... |
... |
@@ -671,7 +874,7 @@ mkFastString :: String -> FastString |
|
671
|
874
|
mkFastString str =
|
|
672
|
875
|
inlinePerformIO $ do
|
|
673
|
876
|
let !sbs = utf8EncodeShortByteString str
|
|
674
|
|
- mkFastStringWith (mkNewFastStringShortByteString sbs) sbs
|
|
|
877
|
+ mkFastStringWith sbs
|
|
675
|
878
|
|
|
676
|
879
|
-- The following rule is used to avoid polluting the non-reclaimable FastString
|
|
677
|
880
|
-- table with transient strings when we only want their encoding.
|
| ... |
... |
@@ -687,11 +890,6 @@ mkFastStringByteList str = mkFastStringShortByteString (SBS.pack str) |
|
687
|
890
|
mkZFastString :: ShortByteString -> FastZString
|
|
688
|
891
|
mkZFastString sbs = mkFastZStringString (zEncodeString (utf8DecodeShortByteString sbs))
|
|
689
|
892
|
|
|
690
|
|
-mkNewFastStringShortByteString :: ShortByteString -> Int -> IO FastString
|
|
691
|
|
-mkNewFastStringShortByteString sbs uid = do
|
|
692
|
|
- let chars = utf8CountCharsShortByteString sbs
|
|
693
|
|
- return (FastString uid chars sbs)
|
|
694
|
|
-
|
|
695
|
893
|
hashStr :: ShortByteString -> Int
|
|
696
|
894
|
-- produce a hash value between 0 & m (inclusive)
|
|
697
|
895
|
hashStr sbs@(SBS.SBS ba#) = loop 0# 0#
|
| ... |
... |
@@ -719,7 +917,7 @@ lengthFS fs = n_chars fs |
|
719
|
917
|
|
|
720
|
918
|
-- | Returns @True@ if the 'FastString' is empty
|
|
721
|
919
|
nullFS :: FastString -> Bool
|
|
722
|
|
-nullFS fs = SBS.null $ fs_sbs fs
|
|
|
920
|
+nullFS (FastStringId w) = w == 0xF800000000000000
|
|
723
|
921
|
|
|
724
|
922
|
-- | Lazily unpacks and decodes the FastString
|
|
725
|
923
|
unpackFS :: FastString -> String
|
| ... |
... |
@@ -755,12 +953,14 @@ uniqueOfFS :: FastString -> Int |
|
755
|
953
|
uniqueOfFS fs = uniq fs
|
|
756
|
954
|
|
|
757
|
955
|
nilFS :: FastString
|
|
758
|
|
-nilFS = mkFastString ""
|
|
|
956
|
+nilFS = FastStringId 0xF800000000000000 -- Empty inline string
|
|
759
|
957
|
|
|
760
|
958
|
-- -----------------------------------------------------------------------------
|
|
761
|
959
|
-- Stats
|
|
762
|
960
|
|
|
763
|
|
-getFastStringTable :: IO [[[FastString]]]
|
|
|
961
|
+-- | Get all FastString payloads from the table (for debugging/stats)
|
|
|
962
|
+-- Returns indices organized by segment and bucket
|
|
|
963
|
+getFastStringTable :: IO [[[Int]]]
|
|
764
|
964
|
getFastStringTable =
|
|
765
|
965
|
forM [0 .. numSegments - 1] $ \(I# i#) -> do
|
|
766
|
966
|
let (# segmentRef #) = indexArray# segments# i#
|
| ... |
... |
@@ -769,7 +969,7 @@ getFastStringTable = |
|
769
|
969
|
forM [0 .. bucketSize - 1] $ \(I# j#) ->
|
|
770
|
970
|
IO $ readArray# buckets# j#
|
|
771
|
971
|
where
|
|
772
|
|
- !(FastStringTable _ segments#) = stringTable
|
|
|
972
|
+ !(FastStringTable segments# _) = stringTable
|
|
773
|
973
|
|
|
774
|
974
|
getFastZStringTable :: IO [[[FastZString]]]
|
|
775
|
975
|
getFastZStringTable =
|