GitLab

...

@@ -104,6 +104,8 @@ module GHC.Data.FastString

104

         getFastStringTable,

105

         getFastZStringTable,

106

         getFastStringZEncCounter,

107

+        lookupPayloadByIndex,

108

+        FastStringPayload(..),

107

109

108

110

         -- * PtrStrings

109

111

         PtrString (..),

...

@@ -130,8 +132,10 @@ import GHC.Data.FastMutInt

130

132

 import Control.Concurrent.MVar

131

133

 import Control.DeepSeq

132

134

 import Control.Monad

135

+import Data.Bits

133

136

 import Data.ByteString (ByteString)

134

137

 import Data.ByteString.Short (ShortByteString)

138

+import Data.Word

135

139

 import qualified Data.ByteString          as BS

136

140

 import qualified Data.ByteString.Char8    as BSC

137

141

 import qualified Data.ByteString.Unsafe   as BS

...

@@ -171,9 +175,6 @@ fastZStringToByteString (FastZString bs) = bs

171

175

 unsafeMkByteString :: String -> ByteString

172

176

 unsafeMkByteString = BSC.pack

173

177

174

-hashFastString :: FastString -> Int

175

-hashFastString fs = hashStr $ fs_sbs fs

176

177

178

 -- -----------------------------------------------------------------------------

178

179

180

 newtype FastZString = FastZString ByteString

...

@@ -206,36 +207,98 @@ mkFastZStringString str = FastZString (BSC.pack str)

206

207

 All 'FastString's are stored in a global hashtable to support fast O(1)

207

208

 comparison.

208

209

-It is also associated with a lazy reference to the Z-encoding

210

-of this string which is used by the compiler internally.

211

210

-}

211

+-- | Payload for table-referenced FastStrings

212

 data FastStringPayload = FastString {

213

-      uniq    :: {-# UNPACK #-} !Int, -- unique id

214

-      n_chars :: {-# UNPACK #-} !Int, -- number of chars

215

-      fs_sbs  :: {-# UNPACK #-} !ShortByteString

213

+      payload_sbs :: {-# UNPACK #-} !ShortByteString

216

214

217

215

216

+{- Note [FastString Encoding]

217

+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

218

219

+   A FastString is uniformly represented as a single Word64 which serves as both

220

+   the unique identifier and the string representation (for short strings).

221

222

+   Inline strings (strings ≤ 7 bytes):

223

+   - Top 5 bits are always 11111 (0x1F at bit position 59-63)

224

+   - Next 3 bits encode the byte length (0-7)

225

+   - Remaining 56 bits contain the actual string bytes

226

+   - Top byte ranges: 0xF8-0xFF (248-255)

227

+   - "a" (0x61): 0xF900000000000061

228

229

+   Table-referenced strings (strings > 7 bytes):

230

+   - Top 16 bits encode the UTF-8 character count (2-63487, 0x0002-0xF7FF)

231

+   - Lower 48 bits encode an index into the FastStringPayload table

232

+   - Top byte ranges: 0x00-0xF7 (0-247)

233

+   - Top byte < 0xF8 to prevent ambiguity with inline encoding

234

+   - Maximum character count: 63487 (0xF7FF)

235

+   - Example: 10-char string at index 42: 0x000A00000000002A

236

237

+   Top byte value determines encoding type:

238

+   - 0xF8-0xFF: Inline string

239

+   - 0x00-0xF7: Table reference

240

+-}

218

241

 data FastString = FastStringId {-# UNPACK #-} !Word64

219

242

243

+encodeInline :: ShortByteString -> Word64

244

+encodeInline sbs =

245

+  let !len = SBS.length sbs

246

+      !lenBits = (0xf8 .|. fromIntegral len) `unsafeShiftL` 56

247

+      w8 :: Int -> Word64

248

+      w8 shift = fromIntegral (SBS.index sbs (shift `unsafeShiftR` 3)) `unsafeShiftL` shift

249

+  in case len of

250

+       0 -> lenBits

251

+       1 -> lenBits .|. w8 0

252

+       2 -> lenBits .|. w8 8 .|. w8 0

253

+       3 -> lenBits .|. w8 16 .|. w8 8 .|. w8 0

254

+       4 -> lenBits .|. w8 24 .|. w8 16 .|. w8 8 .|. w8 0

255

+       5 -> lenBits .|. w8 32 .|. w8 24 .|. w8 16 .|. w8 8 .|. w8 0

256

+       6 -> lenBits .|. w8 40 .|. w8 32 .|. w8 24 .|. w8 16 .|. w8 8 .|. w8 0

257

+       7 -> lenBits .|. w8 48 .|. w8 40 .|. w8 32 .|. w8 24 .|. w8 16 .|. w8 8 .|. w8 0

258

+       _ -> panic "encodeInline: string too long"

259

260

+encodeTableRef :: Int -> Int -> Word64

261

+encodeTableRef n_chars idx

262

+  | n_chars < 2 = panic "encodeTableRef: character count too small (< 2)"

263

+  | n_chars > 0xf7ff = panic "encodeTableRef: character count too large (> 63487)"

264

+  | otherwise =

265

+      let charBits = fromIntegral n_chars `Data.Bits.shiftL` 48

266

+          idxBits = fromIntegral idx .&. 0x0000ffffffffffff -- mask to 48 bits

267

+      in charBits .|. idxBits

268

220

269

 fs_sbs :: FastString -> ShortByteString

221

270

 fs_sbs (FastStringId w)

222

271

   | w .&. 0xf8000000_00000000 == 0xf8000000_00000000

223

272

   = let bs_len = (w .&. 0x07000000_00000000) `unsafeShiftR` 56

224

-        w8_64 x = wordToWord8# (word64ToWord# x)

225

-        go 0 _ = []

226

-        go n cur = wordToWord8# (word64ToWord# cur) : go (n-1) (cur `uncheckedShiftRL64#` 8#)

273

+        w8 :: Int -> Word8

274

+        w8 shift = fromIntegral ((w `unsafeShiftR` shift) .&. 0xFF)

227

275

     in case bs_len of

228

276

          0 -> SBS.empty

229

-         1 -> SBS.singleton $ wordToWord8# (word64ToWord# w)

230

-         2 -> SBS.pack [ w8_64 (w `uncheckedShiftRL64#` 8#) , w8_64 w]

231

-         3 -> SBS.pack [ w8_64 (w `uncheckedShiftRL64#` 16#), w8_64 (w `uncheckedShiftRL64#` 8#), w8_64 w)]

232

-         4 -> SBS.pack [ w8_64 (w `uncheckedShiftRL64#` 24#), w8_64 (w `uncheckedShiftRL64#` 16#), w8_64 (w `uncheckedShiftRL64#` 8#), w8_64 w)]

233

-         5 -> SBS.pack [ w8_64 (w `uncheckedShiftRL64#` 32#), w8_64 (w `uncheckedShiftRL64#` 24#), w8_64 (w `uncheckedShiftRL64#` 16#), w8_64 (w `uncheckedShiftRL64#` 8#), w8_64 w)]

234

-         6 -> SBS.pack [ w8_64 (w `uncheckedShiftRL64#` 40#), w8_64 (w `uncheckedShiftRL64#` 32#), w8_64 (w `uncheckedShiftRL64#` 24#), w8_64 (w `uncheckedShiftRL64#` 16#), w8_64 (w `uncheckedShiftRL64#` 8#), w8_64 w)]

235

-         7 -> SBS.pack [ w8_64 (w `uncheckedShiftRL64#` 48#), w8_64 (w `uncheckedShiftRL64#` 40#), w8_64 (w `uncheckedShiftRL64#` 32#), w8_64 (w `uncheckedShiftRL64#` 24#), w8_64 (w `uncheckedShiftRL64#` 16#), w8_64 (w `uncheckedShiftRL64#` 8#), w8_64 w)]

236

-  | otherwise

277

+         1 -> SBS.singleton $ w8 0

278

+         2 -> SBS.pack [ w8 0, w8 8 ]

279

+         3 -> SBS.pack [ w8 0, w8 8, w8 16 ]

280

+         4 -> SBS.pack [ w8 0, w8 8, w8 16, w8 24 ]

281

+         5 -> SBS.pack [ w8 0, w8 8, w8 16, w8 24, w8 32 ]

282

+         6 -> SBS.pack [ w8 0, w8 8, w8 16, w8 24, w8 32, w8 40 ]

283

+         7 -> SBS.pack [ w8 0, w8 8, w8 16, w8 24, w8 32, w8 40, w8 48 ]

284

+         _ -> panic "fs_sbs: invalid inline string length"

285

+  | otherwise =

286

+    -- Table reference: need to extract index from lower 48 bits and look up

287

+    let idx = fromIntegral (w .&. 0x0000FFFFFFFFFFFF) :: Int

288

+        payload = lookupPayloadByIndex idx

289

+    in payload_sbs payload

290

237

291

 n_chars :: FastString -> Int

292

+n_chars (FastStringId w)

293

+  | w .&. 0xf8000000_00000000 == 0xf8000000_00000000 =

294

+    -- Inline string: count UTF-8 characters

295

+    utf8CountCharsShortByteString (fs_sbs (FastStringId w))

296

+  | otherwise =

297

+    -- Table reference: extract character count from top 16 bits

298

+    fromIntegral (w `unsafeShiftR` 48)

299

238

300

 uniq :: FastString -> Int

301

+uniq (FastStringId w) = fromIntegral w

239

302

240

303

 instance Eq FastString where

241

304

   f1 == f2  =  uniq f1 == uniq f2

...

@@ -326,23 +389,120 @@ and updates to multiple buckets with low synchronization overhead.

326

389

327

390

 See Note [Updating the FastString table] on how it's updated.

328

391

-}

392

+-- | A segment of the payload array

393

+data PayloadSegment = PayloadSegment

394

+  (MutableArray# RealWorld FastStringPayload)

395

396

+-- | Boxed wrapper for Array# to allow storage in IORef

397

+data SegmentArray = SegmentArray (Array# (IORef PayloadSegment))

398

399

+-- | Append-only array of FastStringPayload for O(1) lookup by index

400

+data PayloadArray = PayloadArray

401

+  {-# UNPACK #-} !FastMutInt  -- ^ Next free index

402

+  {-# UNPACK #-} !(IORef SegmentArray)  -- ^ Growable segmented array

403

+  {-# UNPACK #-} !(MVar ())  -- ^ Lock for growing the array

404

405

+-- Number of payloads per segment

406

+payloadSegmentSize :: Int

407

+payloadSegmentSize = 1024

408

409

+payloadSegmentBits :: Int

410

+payloadSegmentBits = 10  -- 2^10 = 1024

411

412

+initialPayloadSegments :: Int

413

+initialPayloadSegments = 256

414

415

+-- | Ensure a payload segment exists, growing the array if necessary

416

+-- Returns the segment IORef for the given segment index

417

+ensurePayloadSegment :: Int -> IO (IORef PayloadSegment)

418

+ensurePayloadSegment segmentIdx = do

419

+  let !(FastStringTable _ (PayloadArray _ segmentsRef growLock)) = stringTable

420

+      !(I# segmentIdx#) = segmentIdx

421

+  SegmentArray segments# <- readIORef segmentsRef

422

+  let currentSize# = sizeofArray# segments#

423

424

+  if isTrue# (segmentIdx# <# currentSize#)

425

+    then do

426

+      -- Segment exists, return it

427

+      let (# segmentRef #) = indexArray# segments# segmentIdx#

428

+      return segmentRef

429

+    else do

430

+      -- Need to grow the array

431

+      withMVar growLock $ \_ -> do

432

+        -- Re-check after acquiring lock (another thread might have grown it)

433

+        SegmentArray segments'# <- readIORef segmentsRef

434

+        let currentSize'# = sizeofArray# segments'#

435

+        if isTrue# (segmentIdx# <# currentSize'#)

436

+          then do

437

+            let (# segmentRef #) = indexArray# segments'# segmentIdx#

438

+            return segmentRef

439

+          else do

440

+            -- Double the size

441

+            let newSize# = currentSize'# *# 2#

442

+                !(I# payloadSegmentSize#) = payloadSegmentSize

443

+            SegmentArray newSegments# <- IO $ \s1# ->

444

+              case newArray# newSize# (panic "unallocated_segment") s1# of

445

+                (# s2#, arr# #) ->

446

+                  case copyArray# segments'# 0# arr# 0# currentSize'# s2# of

447

+                    s3# ->

448

+                      -- Allocate new segments

449

+                      let allocLoop i# s#

450

+                            | isTrue# (i# ==# newSize#) = s#

451

+                            | otherwise =

452

+                                case newArray# payloadSegmentSize# (panic "uninitialized payload") s# of

453

+                                  (# s'#, seg# #) -> case newIORef (PayloadSegment seg#) `unIO` s'# of

454

+                                    (# s''#, segRef #) -> case writeArray# arr# i# segRef s''# of

455

+                                      s'''# -> allocLoop (i# +# 1#) s'''#

456

+                      in case allocLoop currentSize'# s3# of

457

+                           s4# -> case unsafeFreezeArray# arr# s4# of

458

+                             (# s5#, frozen# #) -> (# s5#, SegmentArray frozen# #)

459

+            writeIORef segmentsRef (SegmentArray newSegments#)

460

+            let (# segmentRef #) = indexArray# newSegments# segmentIdx#

461

+            return segmentRef

462

463

+-- | Allocate a payload in the global payload array and return its index

464

+allocatePayload :: FastStringPayload -> IO Int

465

+allocatePayload payload = do

466

+  let !(FastStringTable _ (PayloadArray nextIdx _ _)) = stringTable

467

+  idx <- atomicFetchAddFastMut nextIdx 1

468

+  let !(I# idx#) = idx

469

+      !(I# payloadSegmentBits#) = payloadSegmentBits

470

+      !(I# payloadSegmentSize#) = payloadSegmentSize

471

+      segmentIdx = I# (idx# `uncheckedIShiftRL#` payloadSegmentBits#)

472

+      offsetInSegment# = idx# `andI#` (payloadSegmentSize# -# 1#)

473

+  segmentRef <- ensurePayloadSegment segmentIdx

474

+  PayloadSegment segment# <- readIORef segmentRef

475

+  IO $ \s# ->

476

+    case writeArray# segment# offsetInSegment# payload s# of

477

+      s'# -> (# s'#, () #)

478

+  return idx

479

480

+-- | Look up a FastStringPayload by its index

481

+-- The segment is guaranteed to exist because indices only come from allocatePayload

482

+lookupPayloadByIndex :: Int -> FastStringPayload

483

+lookupPayloadByIndex idx = inlinePerformIO $ do

484

+  let !(FastStringTable _ (PayloadArray _ segmentsRef _)) = stringTable

485

+      !(I# idx#) = idx

486

+      !(I# payloadSegmentBits#) = payloadSegmentBits

487

+      !(I# payloadSegmentSize#) = payloadSegmentSize

488

+      segmentIdx# = idx# `uncheckedIShiftRL#` payloadSegmentBits#

489

+      offsetInSegment# = idx# `andI#` (payloadSegmentSize# -# 1#)

490

+  SegmentArray segments# <- readIORef segmentsRef

491

+  let (# segmentRef #) = indexArray# segments# segmentIdx#

492

+  PayloadSegment segment# <- readIORef segmentRef

493

+  IO $ readArray# segment# offsetInSegment#

494

329

495

 data FastStringTable = FastStringTable

330

-  {-# UNPACK #-} !FastMutInt

331

-  -- ^ The unique ID counter shared with all buckets

332

-  --

333

-  -- We unpack the 'FastMutInt' counter as it is always consumed strictly.

334

-  -- We mark this as 'NOUNPACK' as this 'FastMutInt' is retained by a thunk

335

-  -- in 'mkFastStringWith' and needs to be boxed any way.

336

-  -- If this is unpacked, then we box this single 'FastMutInt' once for each

337

-  -- allocated FastString.

338

496

   (Array# (IORef FastStringTableSegment)) -- ^  concurrent segments

497

+  !PayloadArray  -- ^ Global payload array

339

498

340

499

 data TableSegment a = TableSegment

341

500

   {-# UNPACK #-} !(MVar ())  -- the lock for write in each segment

342

501

   {-# UNPACK #-} !FastMutInt -- the number of elements

343

502

   (MutableArray# RealWorld [a]) -- buckets in this segment

344

503

345

-type FastStringTableSegment = TableSegment FastString

504

+-- Hash table now stores indices into the payload array

505

+type FastStringTableSegment = TableSegment Int

346

506

347

507

 data FastZStringTable = FastZStringTable

348

508

   {-# UNPACK #-} !FastMutInt

...

@@ -415,6 +575,9 @@ stringTable :: FastStringTable

415

575

 stringTable = unsafePerformIO $ do

416

576

   let !(I# numSegments#) = numSegments

417

577

       !(I# initialNumBuckets#) = initialNumBuckets

578

+      !(I# payloadSegmentSize#) = payloadSegmentSize

579

+      !(I# numPayloadSegments#) = initialPayloadSegments

580

418

581

       loop a# i# s1#

419

582

         | isTrue# (i# ==# numSegments#) = s1#

420

583

         | otherwise = case newMVar () `unIO` s1# of

...

@@ -424,13 +587,33 @@ stringTable = unsafePerformIO $ do

424

587

                     (TableSegment lock counter buckets#) `unIO` s4# of

425

588

                   (# s5#, segment #) -> case writeArray# a# i# segment s5# of

426

589

                     s6# -> loop a# (i# +# 1#) s6#

427

-  uid <- newFastMutInt 603979776 -- ord '$' * 0x01000000

590

591

+      -- Initialize payload array segments

592

+      payloadLoop a# i# s1#

593

+        | isTrue# (i# ==# numPayloadSegments#) = s1#

594

+        | otherwise = case newArray# payloadSegmentSize#

595

+                           (panic "uninitialized payload") s1# of

596

+            (# s2#, segment# #) -> case newIORef (PayloadSegment segment#) `unIO` s2# of

597

+              (# s3#, segmentRef #) -> case writeArray# a# i# segmentRef s3# of

598

+                s4# -> payloadLoop a# (i# +# 1#) s4#

599

600

+  nextPayloadIdx <- newFastMutInt 0

601

+  payloadGrowLock <- newMVar ()

602

603

+  payloadArray <- IO $ \s1# ->

604

+    case newArray# numPayloadSegments# (panic "payload_array") s1# of

605

+      (# s2#, arr# #) -> case payloadLoop arr# 0# s2# of

606

+        s3# -> case unsafeFreezeArray# arr# s3# of

607

+          (# s4#, segments# #) -> case newIORef (SegmentArray segments#) `unIO` s4# of

608

+            (# s5#, segmentsRef #) ->

609

+              (# s5#, PayloadArray nextPayloadIdx segmentsRef payloadGrowLock #)

610

428

611

   tab <- IO $ \s1# ->

429

612

     case newArray# numSegments# (panic "string_table") s1# of

430

613

       (# s2#, arr# #) -> case loop arr# 0# s2# of

431

614

         s3# -> case unsafeFreezeArray# arr# s3# of

432

615

           (# s4#, segments# #) ->

433

-            (# s4#, FastStringTable uid segments# #)

616

+            (# s4#, FastStringTable segments# payloadArray #)

434

617

435

618

   -- use the support wired into the RTS to share this CAF among all images of

436

619

   -- libHSghc

...

@@ -540,49 +723,69 @@ The procedure goes like this:

540

723

    * Otherwise, insert and return the string we created.

541

724

-}

542

725

543

-mkFastStringWith

544

-    :: (Int -> IO FastString) -> ShortByteString -> IO FastString

545

-mkFastStringWith mk_fs sbs = do

546

-  TableSegment lock _ buckets# <- readIORef segmentRef

547

-  let idx# = hashToIndex# buckets# hash#

548

-  bucket <- IO $ readArray# buckets# idx#

549

-  case bucket_match bucket sbs of

550

-    Just found -> return found

551

-    Nothing -> do

552

-      -- The withMVar below is not dupable. It can lead to deadlock if it is

553

-      -- only run partially and putMVar is not called after takeMVar.

554

-      noDuplicate

555

-      n <- get_uid

556

-      new_fs <- mk_fs n

557

-      withMVar lock $ \_ -> insert new_fs

726

+mkFastStringWith :: ShortByteString -> IO FastString

727

+mkFastStringWith sbs = do

728

+  let len = SBS.length sbs

729

+  if len <= 7

730

+    then do

731

+      -- Inline strings: encode directly, no table lookup needed

732

+      return $ FastStringId (encodeInline sbs)

733

+    else do

734

+      -- Table strings: check for existing index, or create new one

735

+      let chars = utf8CountCharsShortByteString sbs

736

+      TableSegment lock _ buckets# <- readIORef segmentRef

737

+      let idx# = hashToIndex# buckets# hash#

738

+      bucket <- IO $ readArray# buckets# idx#

739

+      case bucket_match bucket sbs of

740

+        Just payloadIdx -> do

741

+          -- Found existing payload index, convert to FastString reference

742

+          return $ FastStringId (encodeTableRef chars payloadIdx)

743

+        Nothing -> do

744

+          -- Create new payload and insert into table

745

+          noDuplicate

746

+          withMVar lock $ \_ -> insert

558

747

   where

559

-    !(FastStringTable uid segments#) = stringTable

560

-    get_uid = atomicFetchAddFastMut uid 1

748

+    !(FastStringTable segments# _) = stringTable

561

749

562

750

     !(I# hash#) = hashStr sbs

563

751

     (# segmentRef #) = indexArray# segments# (hashToSegment# hash#)

564

-    insert fs = do

565

-      TableSegment _ counter buckets# <- maybeResizeSegment hashFastString segmentRef

752

753

+    -- Helper to hash an index for table operations

754

+    hashIndex :: Int -> Int

755

+    hashIndex idx = hashStr (payload_sbs (lookupPayloadByIndex idx))

756

757

+    insert :: IO FastString

758

+    insert = do

759

+      let chars = utf8CountCharsShortByteString sbs

760

+          payload = FastString { payload_sbs = sbs }

761

+      -- Allocate payload in global array and get its index

762

+      payloadIdx <- allocatePayload payload

763

+      TableSegment _ counter buckets# <- maybeResizeSegment hashIndex segmentRef

566

764

       let idx# = hashToIndex# buckets# hash#

567

765

       bucket <- IO $ readArray# buckets# idx#

568

766

       case bucket_match bucket sbs of

569

-        -- The FastString was added by another thread after previous read and

767

+        -- The payload was added by another thread after previous read and

570

768

         -- before we acquired the write lock.

571

-        Just found -> return found

769

+        Just existingIdx -> do

770

+          return $ FastStringId (encodeTableRef chars existingIdx)

572

771

         Nothing -> do

772

+          -- Insert the payload index into the hash table

573

773

           IO $ \s1# ->

574

-            case writeArray# buckets# idx# (fs : bucket) s1# of

774

+            case writeArray# buckets# idx# (payloadIdx : bucket) s1# of

575

775

               s2# -> (# s2#, () #)

576

776

           _ <- atomicFetchAddFastMut counter 1

577

-          return fs

777

+          return $ FastStringId (encodeTableRef chars payloadIdx)

578

778

579

-bucket_match :: [FastString] -> ShortByteString -> Maybe FastString

580

-bucket_match fs sbs = go fs

779

+-- | Match a ShortByteString against a bucket of payload indices

780

+-- Returns the matching index if found

781

+bucket_match :: [Int] -> ShortByteString -> Maybe Int

782

+bucket_match indices sbs = go indices

581

783

   where go [] = Nothing

582

-        go (fs@(FastString {n_chars = n_chars, fs_sbs=fs_sbs}) : ls)

583

-          | length fs_sbs /= SBS.length fs_sbs

584

-          | fs_sbs == sbs = Just fs

585

-          | otherwise     = go ls

784

+        go (idx : rest) =

785

+          let payload = lookupPayloadByIndex idx

786

+          in if payload_sbs payload == sbs

787

+             then Just idx

788

+             else go rest

586

789

 -- bucket_match used to inline before changes to instance Eq ShortByteString

587

790

 -- in bytestring-0.12, which made it slightly larger than inlining threshold.

588

791

 -- Non-inlining causes a small, but measurable performance regression, so let's force it.

...

@@ -592,7 +795,8 @@ bucket_match fs sbs = go fs

592

795

 {-# INLINE mkNewFastZString #-}

593

796

594

797

 mkNewFastZString :: FastString -> IO FastZString

595

-mkNewFastZString (FastString uniq _ sbs) = do

798

+mkNewFastZString fs = do

799

+  let sbs = fs_sbs fs

596

800

   TableSegment lock _ buckets# <- readIORef segmentRef

597

801

   let idx# = hashToIndex# buckets# hash#

598

802

   bucket <- IO $ readArray# buckets# idx#

...

@@ -611,7 +815,7 @@ mkNewFastZString (FastString uniq _ sbs) = do

611

815

612

816

     -- FastString uniques are sequential, pass them through a linear

613

817

     -- congruential generator to randomise

614

-    !(I# hash#) = uniq*6364136223846793005 + 1

818

+    !(I# hash#) = uniq fs * 6364136223846793005 + 1

615

819

     (# segmentRef #) = indexArray# segments# (hashToSegment# hash#)

616

820

     insert n fs = do

617

821

       TableSegment _ counter buckets# <- maybeResizeSegment zStringHash segmentRef

...

@@ -642,7 +846,7 @@ mkFastStringBytes !ptr !len =

642

846

     -- idempotent.

643

847

     unsafeDupablePerformIO $ do

644

848

         sbs <- newSBSFromPtr ptr len

645

-        mkFastStringWith (mkNewFastStringShortByteString sbs) sbs

849

+        mkFastStringWith sbs

646

850

647

851

 newSBSFromPtr :: Ptr a -> Int -> IO ShortByteString

648

852

 newSBSFromPtr (Ptr src#) (I# len#) =

...

@@ -656,14 +860,13 @@ newSBSFromPtr (Ptr src#) (I# len#) =

656

860

 mkFastStringByteString :: ByteString -> FastString

657

861

 mkFastStringByteString bs =

658

862

   let sbs = SBS.toShort bs in

659

-  inlinePerformIO $

660

-      mkFastStringWith (mkNewFastStringShortByteString sbs) sbs

863

+  inlinePerformIO $ mkFastStringWith sbs

661

864

662

865

 -- | Create a 'FastString' from an existing 'ShortByteString' without

663

866

 -- copying.

664

867

 mkFastStringShortByteString :: ShortByteString -> FastString

665

868

 mkFastStringShortByteString sbs =

666

-  inlinePerformIO $ mkFastStringWith (mkNewFastStringShortByteString sbs) sbs

869

+  inlinePerformIO $ mkFastStringWith sbs

667

870

668

871

 -- | Creates a UTF-8 encoded 'FastString' from a 'String'

669

872

 mkFastString :: String -> FastString

...

@@ -671,7 +874,7 @@ mkFastString :: String -> FastString

671

874

 mkFastString str =

672

875

   inlinePerformIO $ do

673

876

     let !sbs = utf8EncodeShortByteString str

674

-    mkFastStringWith (mkNewFastStringShortByteString sbs) sbs

877

+    mkFastStringWith sbs

675

878

676

879

 -- The following rule is used to avoid polluting the non-reclaimable FastString

677

880

 -- table with transient strings when we only want their encoding.

...

@@ -687,11 +890,6 @@ mkFastStringByteList str = mkFastStringShortByteString (SBS.pack str)

687

890

 mkZFastString :: ShortByteString -> FastZString

688

891

 mkZFastString sbs = mkFastZStringString (zEncodeString (utf8DecodeShortByteString sbs))

689

892

690

-mkNewFastStringShortByteString :: ShortByteString -> Int -> IO FastString

691

-mkNewFastStringShortByteString sbs uid = do

692

-  let chars = utf8CountCharsShortByteString sbs

693

-  return (FastString uid chars sbs)

694

695

893

 hashStr  :: ShortByteString -> Int

696

894

  -- produce a hash value between 0 & m (inclusive)

697

895

 hashStr sbs@(SBS.SBS ba#) = loop 0# 0#

...

@@ -719,7 +917,7 @@ lengthFS fs = n_chars fs

719

917

720

918

 -- | Returns @True@ if the 'FastString' is empty

721

919

 nullFS :: FastString -> Bool

722

-nullFS fs = SBS.null $ fs_sbs fs

920

+nullFS (FastStringId w) = w == 0xF800000000000000

723

921

724

922

 -- | Lazily unpacks and decodes the FastString

725

923

 unpackFS :: FastString -> String

...

@@ -755,12 +953,14 @@ uniqueOfFS :: FastString -> Int

755

953

 uniqueOfFS fs = uniq fs

756

954

757

955

 nilFS :: FastString

758

-nilFS = mkFastString ""

956

+nilFS = FastStringId 0xF800000000000000  -- Empty inline string

759

957

760

958

 -- -----------------------------------------------------------------------------

761

959

 -- Stats

762

960

763

-getFastStringTable :: IO [[[FastString]]]

961

+-- | Get all FastString payloads from the table (for debugging/stats)

962

+-- Returns indices organized by segment and bucket

963

+getFastStringTable :: IO [[[Int]]]

764

964

 getFastStringTable =

765

965

   forM [0 .. numSegments - 1] $ \(I# i#) -> do

766

966

     let (# segmentRef #) = indexArray# segments# i#

...

@@ -769,7 +969,7 @@ getFastStringTable =

769

969

     forM [0 .. bucketSize - 1] $ \(I# j#) ->

770

970

       IO $ readArray# buckets# j#

771

971

   where

772

-    !(FastStringTable _ segments#) = stringTable

972

+    !(FastStringTable segments# _) = stringTable

773

973

774

974

 getFastZStringTable :: IO [[[FastZString]]]

775

975

 getFastZStringTable =

Zubin pushed to branch wip/inline-fs at Glasgow Haskell Compiler / GHC

Commits:

2 changed files:

Changes:

@@ -413,7 +413,7 @@ dumpFinalStats logger = do
      fzss <- getFastZStringTable
      let ppr_table         = fmap ppr_segment (fss `zip` [0..])
          ppr_segment (s,n) = hang (text "Segment" <+> int n) 2 (vcat (fmap ppr_bucket (s `zip` [0..])))
 -        ppr_bucket  (b,n) = hang (text "Bucket" <+> int n) 2 (vcat (fmap ftext b))
 +        ppr_bucket  (b,n) = hang (text "Bucket" <+> int n) 2 (vcat (fmap (ftext . mkFastStringShortByteString . payload_sbs . lookupPayloadByIndex) b))
      putDumpFileMaybe logger Opt_D_dump_faststrings "FastStrings" FormatText (vcat ppr_table)
      let ppr_table'         = fmap ppr_segment' (fzss `zip` [0..])
          ppr_segment' (s,n) = hang (text "Segment" <+> int n) 2 (vcat (fmap ppr_bucket' (s `zip` [0..])))