Marge Bot pushed to branch wip/marge_bot_batch_merge_job at Glasgow Haskell Compiler / GHC

Commits:

23 changed files:

Changes:

  • compiler/GHC/CmmToAsm/X86/CodeGen.hs
    ... ... @@ -376,7 +376,7 @@ stmtToInstrs bid stmt = do
    376 376
           --We try to arrange blocks such that the likely branch is the fallthrough
    
    377 377
           --in GHC.Cmm.ContFlowOpt. So we can assume the condition is likely false here.
    
    378 378
           CmmCondBranch arg true false _ -> genCondBranch bid true false arg
    
    379
    -      CmmSwitch arg ids -> genSwitch arg ids
    
    379
    +      CmmSwitch arg ids -> genSwitch arg ids bid
    
    380 380
           CmmCall { cml_target = arg
    
    381 381
                   , cml_args_regs = gregs } -> genJump arg (jumpRegs platform gregs)
    
    382 382
           _ ->
    
    ... ... @@ -489,13 +489,6 @@ is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
    489 489
       where i64 = fromIntegral i :: Int64
    
    490 490
     
    
    491 491
     
    
    492
    --- | Convert a BlockId to some CmmStatic data
    
    493
    -jumpTableEntry :: NCGConfig -> Maybe BlockId -> CmmStatic
    
    494
    -jumpTableEntry config Nothing = CmmStaticLit (CmmInt 0 (ncgWordWidth config))
    
    495
    -jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
    
    496
    -    where blockLabel = blockLbl blockid
    
    497
    -
    
    498
    -
    
    499 492
     -- -----------------------------------------------------------------------------
    
    500 493
     -- General things for putting together code sequences
    
    501 494
     
    
    ... ... @@ -5375,11 +5368,52 @@ index (1),
    5375 5368
         indexExpr    = UU_Conv(indexOffset); // == 1::I64
    
    5376 5369
     
    
    5377 5370
     See #21186.
    
    5378
    --}
    
    5379 5371
     
    
    5380
    -genSwitch :: CmmExpr -> SwitchTargets -> NatM InstrBlock
    
    5372
    +Note [Jump tables]
    
    5373
    +~~~~~~~~~~~~~~~~~~
    
    5374
    +The x86 backend has a virtual JMP_TBL instruction which payload can be used to
    
    5375
    +generate both the jump instruction and the jump table contents. `genSwitch` is
    
    5376
    +responsible for generating these JMP_TBL instructions.
    
    5377
    +
    
    5378
    +Depending on `-fPIC` flag and on the architecture, we generate the following
    
    5379
    +jump table variants:
    
    5380
    +
    
    5381
    +  | Variant |  Arch  | Table's contents                       | Reference to the table |
    
    5382
    +  |---------|--------|----------------------------------------|------------------------|
    
    5383
    +  |     PIC |  Both  | Relative offset: target_lbl - base_lbl | PIC                    |
    
    5384
    +  | Non-PIC | 64-bit | Absolute: target_lbl                   | Non-PIC (rip-relative) |
    
    5385
    +  | Non-PIC | 32-bit | Absolute: target_lbl                   | Non-PIC (absolute)     |
    
    5386
    +
    
    5387
    +For the PIC variant, we store relative entries (`target_lbl - base_lbl`) in the
    
    5388
    +jump table. Using absolute entries with PIC would require target_lbl symbols to
    
    5389
    +be resolved at link time, hence to be global labels (currently they are local
    
    5390
    +labels).
    
    5391
    +
    
    5392
    +We use the block_id of the code containing the jump as `base_lbl`. It ensures
    
    5393
    +that target_lbl and base_lbl are close enough to each others, avoiding
    
    5394
    +overflows.
    
    5395
    +
    
    5396
    +Historical note: in the past we used the table label `table_lbl` as base_lbl. It
    
    5397
    +allowed the jumping code to only compute one global address (table_lbl) both to
    
    5398
    +read the table and to compute the target address. However:
    
    5381 5399
     
    
    5382
    -genSwitch expr targets = do
    
    5400
    + * the table could be too far from the jump and on Windows which only
    
    5401
    +   has 32-bit relative relocations (IMAGE_REL_AMD64_REL64 doesn't exist),
    
    5402
    +   `dest_lbl - table_lbl` overflowed (see #24016)
    
    5403
    +
    
    5404
    + * Mac OS X/x86-64 linker was unable to handle `.quad L1 - L0`
    
    5405
    +   relocations if L0 wasn't preceded by a non-anonymous label in its
    
    5406
    +   section (which was the case with table_lbl). Hence we used to put the
    
    5407
    +   jump table in the .text section in this case.
    
    5408
    +
    
    5409
    +
    
    5410
    +-}
    
    5411
    +
    
    5412
    +-- | Generate a JMP_TBL instruction
    
    5413
    +--
    
    5414
    +-- See Note [Jump tables]
    
    5415
    +genSwitch :: CmmExpr -> SwitchTargets -> BlockId -> NatM InstrBlock
    
    5416
    +genSwitch expr targets bid = do
    
    5383 5417
       config <- getConfig
    
    5384 5418
       let platform = ncgPlatform config
    
    5385 5419
           expr_w = cmmExprWidth platform expr
    
    ... ... @@ -5390,79 +5424,76 @@ genSwitch expr targets = do
    5390 5424
           indexExpr = CmmMachOp
    
    5391 5425
             (MO_UU_Conv expr_w (platformWordWidth platform))
    
    5392 5426
             [indexExpr0]
    
    5393
    -  if ncgPIC config
    
    5394
    -  then do
    
    5395
    -        (reg,e_code) <- getNonClobberedReg indexExpr
    
    5396
    -           -- getNonClobberedReg because it needs to survive across t_code
    
    5397
    -        lbl <- getNewLabelNat
    
    5398
    -        let is32bit = target32Bit platform
    
    5399
    -            os = platformOS platform
    
    5400
    -            -- Might want to use .rodata.<function we're in> instead, but as
    
    5401
    -            -- long as it's something unique it'll work out since the
    
    5402
    -            -- references to the jump table are in the appropriate section.
    
    5403
    -            rosection = case os of
    
    5404
    -              -- on Mac OS X/x86_64, put the jump table in the text section to
    
    5405
    -              -- work around a limitation of the linker.
    
    5406
    -              -- ld64 is unable to handle the relocations for
    
    5407
    -              --     .quad L1 - L0
    
    5408
    -              -- if L0 is not preceded by a non-anonymous label in its section.
    
    5409
    -              OSDarwin | not is32bit -> Section Text lbl
    
    5410
    -              _ -> Section ReadOnlyData lbl
    
    5411
    -        dynRef <- cmmMakeDynamicReference config DataReference lbl
    
    5412
    -        (tableReg,t_code) <- getSomeReg $ dynRef
    
    5413
    -        let op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
    
    5414
    -                                       (EAIndex reg (platformWordSizeInBytes platform)) (ImmInt 0))
    
    5415
    -
    
    5416
    -        return $ e_code `appOL` t_code `appOL` toOL [
    
    5417
    -                                ADD (intFormat (platformWordWidth platform)) op (OpReg tableReg),
    
    5418
    -                                JMP_TBL (OpReg tableReg) ids rosection lbl
    
    5419
    -                       ]
    
    5420
    -  else do
    
    5421
    -        (reg,e_code) <- getSomeReg indexExpr
    
    5422
    -        lbl <- getNewLabelNat
    
    5423
    -        let is32bit = target32Bit platform
    
    5424
    -        if is32bit
    
    5425
    -          then let op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg (platformWordSizeInBytes platform)) (ImmCLbl lbl))
    
    5426
    -                   jmp_code = JMP_TBL op ids (Section ReadOnlyData lbl) lbl
    
    5427
    -               in return $ e_code `appOL` unitOL jmp_code
    
    5428
    -          else do
    
    5427
    +
    
    5428
    +      (offset, blockIds) = switchTargetsToTable targets
    
    5429
    +      ids = map (fmap DestBlockId) blockIds
    
    5430
    +
    
    5431
    +      is32bit = target32Bit platform
    
    5432
    +      fmt = archWordFormat is32bit
    
    5433
    +
    
    5434
    +  table_lbl <- getNewLabelNat
    
    5435
    +  let bid_lbl = blockLbl bid
    
    5436
    +  let table_section = Section ReadOnlyData table_lbl
    
    5437
    +
    
    5438
    +  -- see Note [Jump tables] for a description of the following 3 variants.
    
    5439
    +  if
    
    5440
    +    | ncgPIC config -> do
    
    5441
    +      -- PIC support: store relative offsets in the jump table to allow the code
    
    5442
    +      -- to be relocated without updating the table. The table itself and the
    
    5443
    +      -- block label used to make the relative labels absolute are read in a PIC
    
    5444
    +      -- way (via cmmMakeDynamicReference).
    
    5445
    +      (reg,e_code) <- getNonClobberedReg indexExpr -- getNonClobberedReg because it needs to survive across t_code and j_code
    
    5446
    +      (tableReg,t_code) <- getNonClobberedReg =<< cmmMakeDynamicReference config DataReference table_lbl
    
    5447
    +      (targetReg,j_code) <- getSomeReg =<< cmmMakeDynamicReference config DataReference bid_lbl
    
    5448
    +      pure $ e_code `appOL` t_code `appOL` j_code `appOL` toOL
    
    5449
    +            [ ADD fmt (OpAddr (AddrBaseIndex (EABaseReg tableReg) (EAIndex reg (platformWordSizeInBytes platform)) (ImmInt 0)))
    
    5450
    +                      (OpReg targetReg)
    
    5451
    +            , JMP_TBL (OpReg targetReg) ids table_section table_lbl (Just bid_lbl)
    
    5452
    +            ]
    
    5453
    +
    
    5454
    +    | not is32bit -> do
    
    5455
    +      -- 64-bit non-PIC code
    
    5456
    +      (reg,e_code) <- getSomeReg indexExpr
    
    5457
    +      tableReg <- getNewRegNat (intFormat (platformWordWidth platform))
    
    5458
    +      targetReg <- getNewRegNat (intFormat (platformWordWidth platform))
    
    5459
    +      pure $ e_code `appOL` toOL
    
    5429 5460
                 -- See Note [%rip-relative addressing on x86-64].
    
    5430
    -            tableReg <- getNewRegNat (intFormat (platformWordWidth platform))
    
    5431
    -            targetReg <- getNewRegNat (intFormat (platformWordWidth platform))
    
    5432
    -            let op = OpAddr (AddrBaseIndex (EABaseReg tableReg) (EAIndex reg (platformWordSizeInBytes platform)) (ImmInt 0))
    
    5433
    -                fmt = archWordFormat is32bit
    
    5434
    -                code = e_code `appOL` toOL
    
    5435
    -                    [ LEA fmt (OpAddr (AddrBaseIndex EABaseRip EAIndexNone (ImmCLbl lbl))) (OpReg tableReg)
    
    5436
    -                    , MOV fmt op (OpReg targetReg)
    
    5437
    -                    , JMP_TBL (OpReg targetReg) ids (Section ReadOnlyData lbl) lbl
    
    5438
    -                    ]
    
    5439
    -            return code
    
    5440
    -  where
    
    5441
    -    (offset, blockIds) = switchTargetsToTable targets
    
    5442
    -    ids = map (fmap DestBlockId) blockIds
    
    5461
    +            [ LEA fmt (OpAddr (AddrBaseIndex EABaseRip EAIndexNone (ImmCLbl table_lbl))) (OpReg tableReg)
    
    5462
    +            , MOV fmt (OpAddr (AddrBaseIndex (EABaseReg tableReg) (EAIndex reg (platformWordSizeInBytes platform)) (ImmInt 0)))
    
    5463
    +                      (OpReg targetReg)
    
    5464
    +            , JMP_TBL (OpReg targetReg) ids table_section table_lbl Nothing
    
    5465
    +            ]
    
    5466
    +
    
    5467
    +    | otherwise -> do
    
    5468
    +      -- 32-bit non-PIC code is a straightforward jump to &table[entry].
    
    5469
    +      (reg,e_code) <- getSomeReg indexExpr
    
    5470
    +      pure $ e_code `appOL` unitOL
    
    5471
    +            ( JMP_TBL (OpAddr (AddrBaseIndex EABaseNone (EAIndex reg (platformWordSizeInBytes platform)) (ImmCLbl table_lbl)))
    
    5472
    +                      ids table_section table_lbl Nothing
    
    5473
    +            )
    
    5443 5474
     
    
    5444 5475
     generateJumpTableForInstr :: NCGConfig -> Instr -> Maybe (NatCmmDecl (Alignment, RawCmmStatics) Instr)
    
    5445
    -generateJumpTableForInstr config (JMP_TBL _ ids section lbl)
    
    5446
    -    = let getBlockId (DestBlockId id) = id
    
    5447
    -          getBlockId _ = panic "Non-Label target in Jump Table"
    
    5448
    -          blockIds = map (fmap getBlockId) ids
    
    5449
    -      in Just (createJumpTable config blockIds section lbl)
    
    5450
    -generateJumpTableForInstr _ _ = Nothing
    
    5451
    -
    
    5452
    -createJumpTable :: NCGConfig -> [Maybe BlockId] -> Section -> CLabel
    
    5453
    -                -> GenCmmDecl (Alignment, RawCmmStatics) h g
    
    5454
    -createJumpTable config ids section lbl
    
    5455
    -    = let jumpTable
    
    5456
    -            | ncgPIC config =
    
    5457
    -                  let ww = ncgWordWidth config
    
    5458
    -                      jumpTableEntryRel Nothing
    
    5459
    -                          = CmmStaticLit (CmmInt 0 ww)
    
    5460
    -                      jumpTableEntryRel (Just blockid)
    
    5461
    -                          = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0 ww)
    
    5462
    -                          where blockLabel = blockLbl blockid
    
    5463
    -                  in map jumpTableEntryRel ids
    
    5464
    -            | otherwise = map (jumpTableEntry config) ids
    
    5465
    -      in CmmData section (mkAlignment 1, CmmStaticsRaw lbl jumpTable)
    
    5476
    +generateJumpTableForInstr config = \case
    
    5477
    +  JMP_TBL _ ids section table_lbl mrel_lbl ->
    
    5478
    +    let getBlockId (DestBlockId id) = id
    
    5479
    +        getBlockId _ = panic "Non-Label target in Jump Table"
    
    5480
    +        block_ids = map (fmap getBlockId) ids
    
    5481
    +
    
    5482
    +        jumpTable = case mrel_lbl of
    
    5483
    +          Nothing      -> map mk_absolute block_ids           -- absolute entries
    
    5484
    +          Just rel_lbl -> map (mk_relative rel_lbl) block_ids -- offsets relative to rel_lbl
    
    5485
    +
    
    5486
    +        mk_absolute = \case
    
    5487
    +          Nothing      -> CmmStaticLit (CmmInt 0 (ncgWordWidth config))
    
    5488
    +          Just blockid -> CmmStaticLit (CmmLabel (blockLbl blockid))
    
    5489
    +
    
    5490
    +        mk_relative rel_lbl = \case
    
    5491
    +          Nothing      -> CmmStaticLit (CmmInt 0 (ncgWordWidth config))
    
    5492
    +          Just blockid -> CmmStaticLit (CmmLabelDiffOff (blockLbl blockid) rel_lbl 0 (ncgWordWidth config))
    
    5493
    +
    
    5494
    +    in Just (CmmData section (mkAlignment 1, CmmStaticsRaw table_lbl jumpTable))
    
    5495
    +
    
    5496
    +  _ -> Nothing
    
    5466 5497
     
    
    5467 5498
     extractUnwindPoints :: [Instr] -> [UnwindPoint]
    
    5468 5499
     extractUnwindPoints instrs =
    

  • compiler/GHC/CmmToAsm/X86/Instr.hs
    ... ... @@ -252,6 +252,7 @@ data Instr
    252 252
                           [Maybe JumpDest] -- Targets of the jump table
    
    253 253
                           Section   -- Data section jump table should be put in
    
    254 254
                           CLabel    -- Label of jump table
    
    255
    +                      !(Maybe CLabel) -- Label used to compute relative offsets. Otherwise we store absolute addresses.
    
    255 256
             -- | X86 call instruction
    
    256 257
             | CALL        (Either Imm Reg) -- ^ Jump target
    
    257 258
                           [RegWithFormat]  -- ^ Arguments (required for register allocation)
    
    ... ... @@ -486,7 +487,7 @@ regUsageOfInstr platform instr
    486 487
         JXX    _ _          -> mkRU [] []
    
    487 488
         JXX_GBL _ _         -> mkRU [] []
    
    488 489
         JMP     op regs     -> mkRU (use_R addrFmt op regs) []
    
    489
    -    JMP_TBL op _ _ _    -> mkRU (use_R addrFmt op []) []
    
    490
    +    JMP_TBL op _ _ _ _  -> mkRU (use_R addrFmt op []) []
    
    490 491
         CALL (Left _)  params   -> mkRU params (map mkFmt $ callClobberedRegs platform)
    
    491 492
         CALL (Right reg) params -> mkRU (mk addrFmt reg:params) (map mkFmt $ callClobberedRegs platform)
    
    492 493
         CLTD   fmt          -> mkRU [mk fmt eax] [mk fmt edx]
    
    ... ... @@ -812,7 +813,7 @@ patchRegsOfInstr platform instr env
    812 813
         POP  fmt op          -> patch1 (POP  fmt) op
    
    813 814
         SETCC cond op        -> patch1 (SETCC cond) op
    
    814 815
         JMP op regs          -> JMP (patchOp op) regs
    
    815
    -    JMP_TBL op ids s lbl -> JMP_TBL (patchOp op) ids s lbl
    
    816
    +    JMP_TBL op ids s tl jl -> JMP_TBL (patchOp op) ids s tl jl
    
    816 817
     
    
    817 818
         FMA3 fmt perm var x1 x2 x3 -> patch3 (FMA3 fmt perm var) x1 x2 x3
    
    818 819
     
    
    ... ... @@ -1016,9 +1017,9 @@ isJumpishInstr instr
    1016 1017
     canFallthroughTo :: Instr -> BlockId -> Bool
    
    1017 1018
     canFallthroughTo insn bid
    
    1018 1019
       = case insn of
    
    1019
    -    JXX _ target          -> bid == target
    
    1020
    -    JMP_TBL _ targets _ _ -> all isTargetBid targets
    
    1021
    -    _                     -> False
    
    1020
    +    JXX _ target            -> bid == target
    
    1021
    +    JMP_TBL _ targets _ _ _ -> all isTargetBid targets
    
    1022
    +    _                       -> False
    
    1022 1023
       where
    
    1023 1024
         isTargetBid target = case target of
    
    1024 1025
           Nothing                      -> True
    
    ... ... @@ -1031,9 +1032,9 @@ jumpDestsOfInstr
    1031 1032
     
    
    1032 1033
     jumpDestsOfInstr insn
    
    1033 1034
       = case insn of
    
    1034
    -        JXX _ id        -> [id]
    
    1035
    -        JMP_TBL _ ids _ _ -> [id | Just (DestBlockId id) <- ids]
    
    1036
    -        _               -> []
    
    1035
    +        JXX _ id            -> [id]
    
    1036
    +        JMP_TBL _ ids _ _ _ -> [id | Just (DestBlockId id) <- ids]
    
    1037
    +        _                   -> []
    
    1037 1038
     
    
    1038 1039
     
    
    1039 1040
     patchJumpInstr
    
    ... ... @@ -1042,8 +1043,8 @@ patchJumpInstr
    1042 1043
     patchJumpInstr insn patchF
    
    1043 1044
       = case insn of
    
    1044 1045
             JXX cc id       -> JXX cc (patchF id)
    
    1045
    -        JMP_TBL op ids section lbl
    
    1046
    -          -> JMP_TBL op (map (fmap (patchJumpDest patchF)) ids) section lbl
    
    1046
    +        JMP_TBL op ids section table_lbl rel_lbl
    
    1047
    +          -> JMP_TBL op (map (fmap (patchJumpDest patchF)) ids) section table_lbl rel_lbl
    
    1047 1048
             _               -> insn
    
    1048 1049
         where
    
    1049 1050
             patchJumpDest f (DestBlockId id) = DestBlockId (f id)
    
    ... ... @@ -1504,14 +1505,14 @@ shortcutJump fn insn = shortcutJump' fn (setEmpty :: LabelSet) insn
    1504 1505
                 Just (DestBlockId id') -> shortcutJump' fn seen' (JXX cc id')
    
    1505 1506
                 Just (DestImm imm)     -> shortcutJump' fn seen' (JXX_GBL cc imm)
    
    1506 1507
             where seen' = setInsert id seen
    
    1507
    -    shortcutJump' fn _ (JMP_TBL addr blocks section tblId) =
    
    1508
    +    shortcutJump' fn _ (JMP_TBL addr blocks section table_lbl rel_lbl) =
    
    1508 1509
             let updateBlock (Just (DestBlockId bid))  =
    
    1509 1510
                     case fn bid of
    
    1510 1511
                         Nothing   -> Just (DestBlockId bid )
    
    1511 1512
                         Just dest -> Just dest
    
    1512 1513
                 updateBlock dest = dest
    
    1513 1514
                 blocks' = map updateBlock blocks
    
    1514
    -        in  JMP_TBL addr blocks' section tblId
    
    1515
    +        in  JMP_TBL addr blocks' section table_lbl rel_lbl
    
    1515 1516
         shortcutJump' _ _ other = other
    
    1516 1517
     
    
    1517 1518
     -- Here because it knows about JumpDest
    

  • compiler/GHC/CmmToAsm/X86/Ppr.hs
    ... ... @@ -895,7 +895,7 @@ pprInstr platform i = case i of
    895 895
        JMP op _
    
    896 896
           -> line $ text "\tjmp *" <> pprOperand platform (archWordFormat (target32Bit platform)) op
    
    897 897
     
    
    898
    -   JMP_TBL op _ _ _
    
    898
    +   JMP_TBL op _ _ _ _
    
    899 899
           -> pprInstr platform (JMP op [])
    
    900 900
     
    
    901 901
        CALL (Left imm) _
    

  • compiler/GHC/Core/Opt/Arity.hs
    ... ... @@ -2993,12 +2993,12 @@ pushCoValArg co
    2993 2993
         Pair tyL tyR = coercionKind co
    
    2994 2994
     
    
    2995 2995
     pushCoercionIntoLambda
    
    2996
    -    :: HasDebugCallStack => Subst -> InVar -> InExpr -> OutCoercionR -> Maybe (OutVar, OutExpr)
    
    2996
    +    :: HasDebugCallStack => InScopeSet -> Var -> CoreExpr -> CoercionR -> Maybe (Var, CoreExpr)
    
    2997 2997
     -- This implements the Push rule from the paper on coercions
    
    2998 2998
     --    (\x. e) |> co
    
    2999 2999
     -- ===>
    
    3000 3000
     --    (\x'. e |> co')
    
    3001
    -pushCoercionIntoLambda subst x e co
    
    3001
    +pushCoercionIntoLambda in_scope x e co
    
    3002 3002
         | assert (not (isTyVar x) && not (isCoVar x)) True
    
    3003 3003
         , Pair s1s2 t1t2 <- coercionKind co
    
    3004 3004
         , Just {}              <- splitFunTy_maybe s1s2
    
    ... ... @@ -3011,9 +3011,9 @@ pushCoercionIntoLambda subst x e co
    3011 3011
               -- Should we optimize the coercions here?
    
    3012 3012
               -- Otherwise they might not match too well
    
    3013 3013
               x' = x `setIdType` t1 `setIdMult` w1
    
    3014
    -          in_scope' = substInScopeSet subst `extendInScopeSet` x'
    
    3014
    +          in_scope' = in_scope `extendInScopeSet` x'
    
    3015 3015
               subst' =
    
    3016
    -            extendIdSubst (setInScope subst in_scope')
    
    3016
    +            extendIdSubst (setInScope emptySubst in_scope')
    
    3017 3017
                   x
    
    3018 3018
                   (mkCast (Var x') (mkSymCo co1))
    
    3019 3019
                 -- We substitute x' for x, except we need to preserve types.
    

  • compiler/GHC/Core/SimpleOpt.hs
    ... ... @@ -393,12 +393,19 @@ simple_app env e0@(Lam {}) as0@(_:_)
    393 393
           = wrapLet mb_pr $ do_beta env'' body as
    
    394 394
           where (env', b') = subst_opt_bndr env b
    
    395 395
     
    
    396
    -    do_beta env e@(Lam b body) as@(CastIt co:rest)
    
    397
    -      -- See Note [Desugaring unlifted newtypes]
    
    396
    +    -- See Note [Eliminate casts in function position]
    
    397
    +    do_beta env e@(Lam b _) as@(CastIt out_co:rest)
    
    398 398
           | isNonCoVarId b
    
    399
    -      , Just (b', body') <- pushCoercionIntoLambda (soe_subst env) b body co
    
    399
    +      -- Optimise the inner lambda to make it an 'OutExpr', which makes it
    
    400
    +      -- possible to call 'pushCoercionIntoLambda' with the 'OutCoercion' 'co'.
    
    401
    +      -- This is kind of horrible, as for nested casted lambdas with a big body,
    
    402
    +      -- we will repeatedly optimise the body (once for each binder). However,
    
    403
    +      -- we need to do this to avoid mixing 'InExpr' and 'OutExpr', or two
    
    404
    +      -- 'InExpr' with different environments (getting this wrong caused #26588 & #26589.)
    
    405
    +      , Lam out_b out_body <- simple_app env e []
    
    406
    +      , Just (b', body') <- pushCoercionIntoLambda (soeInScope env) out_b out_body out_co
    
    400 407
           = do_beta (soeZapSubst env) (Lam b' body') rest
    
    401
    -        -- soeZapSubst: pushCoercionIntoLambda applies the substitution
    
    408
    +        -- soeZapSubst: we've already optimised everything (the lambda and 'rest') by now.
    
    402 409
           | otherwise
    
    403 410
           = rebuild_app env (simple_opt_expr env e) as
    
    404 411
     
    
    ... ... @@ -511,7 +518,31 @@ TL;DR: To avoid the rest of the compiler pipeline seeing these bad lambas, we
    511 518
     rely on the simple optimiser to both inline the newtype unfolding and
    
    512 519
     subsequently deal with the resulting lambdas (either beta-reducing them
    
    513 520
     altogether or pushing coercions into them so that they satisfy the
    
    514
    -representation-polymorphism invariants).
    
    521
    +representation-polymorphism invariants). See Note [Eliminate casts in function position].
    
    522
    +
    
    523
    +[Alternative approach] (GHC ticket #26608)
    
    524
    +
    
    525
    +  We could instead, in the typechecker, emit a special form (a new constructor
    
    526
    +  of XXExprGhcTc) for instantiations of representation-polymorphic unlifted
    
    527
    +  newtypes (whether applied to a value argument or not):
    
    528
    +
    
    529
    +    UnliftedNT :: DataCon -> [Type] -> Coercion -> XXExprGhcTc
    
    530
    +
    
    531
    +  where "UnliftedNT nt_con [ty1, ...] co" represents the expression:
    
    532
    +
    
    533
    +    ( nt_con @ty1 ... ) |> co
    
    534
    +
    
    535
    +  The desugarer would then turn these AST nodes into appropriate Core, doing
    
    536
    +  what the simple optimiser does today:
    
    537
    +    - inline the compulsory unfolding of the newtype constructor
    
    538
    +    - apply it to its type arguments and beta reduce
    
    539
    +    - push the coercion into the resulting lambda
    
    540
    +
    
    541
    +  This would have several advantages:
    
    542
    +    - the desugarer would never produce "invalid" Core that needs to be
    
    543
    +      tidied up by the simple optimiser,
    
    544
    +    - the ugly and inefficient implementation described in
    
    545
    +      Note [Eliminate casts in function position] could be removed.
    
    515 546
     
    
    516 547
     Wrinkle [Unlifted newtypes with wrappers]
    
    517 548
     
    
    ... ... @@ -717,50 +748,49 @@ rhss here.
    717 748
     
    
    718 749
     Note [Eliminate casts in function position]
    
    719 750
     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    720
    -Consider the following program:
    
    751
    +Due to the current implementation strategy for representation-polymorphic
    
    752
    +unlifted newtypes, as described in Note [Desugaring unlifted newtypes], we rely
    
    753
    +on the simple optimiser to push coercions into lambdas, such as in the following
    
    754
    +example:
    
    721 755
     
    
    722 756
       type R :: Type -> RuntimeRep
    
    723
    -  type family R a where { R Float = FloatRep; R Double = DoubleRep }
    
    724
    -  type F :: forall (a :: Type) -> TYPE (R a)
    
    725
    -  type family F a where { F Float = Float#  ; F Double = Double# }
    
    757
    +  type family R a where { R Int = IntRep }
    
    758
    +  type F :: forall a -> TYPE (R a)
    
    759
    +  type family F a where { F Int = Int# }
    
    726 760
     
    
    727
    -  type N :: forall (a :: Type) -> TYPE (R a)
    
    728 761
       newtype N a = MkN (F a)
    
    729 762
     
    
    730
    -As MkN is a newtype, its unfolding is a lambda which wraps its argument
    
    731
    -in a cast:
    
    732
    -
    
    733
    -  MkN :: forall (a :: Type). F a -> N a
    
    734
    -  MkN = /\a \(x::F a). x |> co_ax
    
    735
    -    -- recall that F a :: TYPE (R a)
    
    736
    -
    
    737
    -This is a representation-polymorphic lambda, in which the binder has an unknown
    
    738
    -representation (R a). We can't compile such a lambda on its own, but we can
    
    739
    -compile instantiations, such as `MkN @Float` or `MkN @Double`.
    
    763
    +Now, an instantiated occurrence of 'MkN', such as 'MkN @Int' (whether applied
    
    764
    +to a value argument or not) will lead, after inlining the compulsory unfolding
    
    765
    +of 'MkN', to a lambda fo the form:
    
    740 766
     
    
    741
    -Our strategy to avoid running afoul of the representation-polymorphism
    
    742
    -invariants of Note [Representation polymorphism invariants] in GHC.Core is thus:
    
    767
    +  ( \ ( x :: F Int ) -> body ) |> co
    
    743 768
     
    
    744
    -  1. Give the newtype a compulsory unfolding (it has no binding, as we can't
    
    745
    -     define lambdas with representation-polymorphic value binders in source Haskell).
    
    746
    -  2. Rely on the optimiser to beta-reduce away any representation-polymorphic
    
    747
    -     value binders.
    
    769
    +    where
    
    770
    +      co :: ( F Int -> res ) ~# ( Int# -> res )
    
    748 771
     
    
    749
    -For example, consider the application
    
    772
    +The problem is that we now have a lambda abstraction whose binder does not have a
    
    773
    +fixed RuntimeRep in the sense of Note [Fixed RuntimeRep] in GHC.Tc.Utils.Concrete.
    
    750 774
     
    
    751
    -    MkN @Float 34.0#
    
    775
    +However, if we use 'pushCoercionIntoLambda', we end up with:
    
    752 776
     
    
    753
    -After inlining MkN we'll get
    
    777
    +  ( \ ( x' :: Int# ) -> body' )
    
    754 778
     
    
    755
    -   ((/\a \(x:F a). x |> co_ax) @Float) |> co 34#
    
    779
    +which satisfies the representation-polymorphism invariants of
    
    780
    +Note [Representation polymorphism invariants] in GHC.Core.
    
    756 781
     
    
    757
    -where co :: (F Float -> N Float) ~ (Float# ~ N Float)
    
    782
    +In conclusion:
    
    758 783
     
    
    759
    -But to actually beta-reduce that lambda, we need to push the 'co'
    
    760
    -inside the `\x` with pushCoercionIntoLambda.  Hence the extra
    
    761
    -equation for Cast-of-Lam in simple_app.
    
    784
    +  1. The simple optimiser must push casts into lambdas.
    
    785
    +  2. It must also deal with a situation such as (MkN @Int) |> co, where we first
    
    786
    +     inline the compulsory unfolding of N. This means the simple optimiser must
    
    787
    +     "peel off" the casts and optimise the inner expression first, to determine
    
    788
    +     whether it is a lambda abstraction or not.
    
    762 789
     
    
    763
    -This is regrettably delicate.
    
    790
    +This is regrettably delicate. If we could make sure the typechecker/desugarer
    
    791
    +did not produce these bad lambdas in the first place (as described in
    
    792
    +[Alternative approach] in Note [Desugaring unlifted newtypes]), we could
    
    793
    +get rid of this ugly logic.
    
    764 794
     
    
    765 795
     Note [Preserve join-binding arity]
    
    766 796
     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    ... ... @@ -1673,7 +1703,7 @@ exprIsLambda_maybe ise@(ISE in_scope_set _) (Cast casted_e co)
    1673 1703
         -- this implies that x is not in scope in gamma (makes this code simpler)
    
    1674 1704
         , not (isTyVar x) && not (isCoVar x)
    
    1675 1705
         , assert (not $ x `elemVarSet` tyCoVarsOfCo co) True
    
    1676
    -    , Just (x',e') <- pushCoercionIntoLambda (mkEmptySubst in_scope_set) x e co
    
    1706
    +    , Just (x',e') <- pushCoercionIntoLambda in_scope_set x e co
    
    1677 1707
         , let res = Just (x',e',ts)
    
    1678 1708
         = --pprTrace "exprIsLambda_maybe:Cast" (vcat [ppr casted_e,ppr co,ppr res)])
    
    1679 1709
           res
    

  • compiler/GHC/Driver/DynFlags.hs
    ... ... @@ -1268,6 +1268,7 @@ optLevelFlags -- see Note [Documenting optimisation flags]
    1268 1268
         , ([1,2],   Opt_CfgBlocklayout)      -- Experimental
    
    1269 1269
     
    
    1270 1270
         , ([1,2],   Opt_Specialise)
    
    1271
    +    , ([1,2],   Opt_PolymorphicSpecialisation)  -- Now on by default (#23559)
    
    1271 1272
         , ([1,2],   Opt_CrossModuleSpecialise)
    
    1272 1273
         , ([1,2],   Opt_InlineGenerics)
    
    1273 1274
         , ([1,2],   Opt_Strictness)
    

  • compiler/GHC/Driver/Flags.hs
    ... ... @@ -909,6 +909,7 @@ optimisationFlags = EnumSet.fromList
    909 909
        , Opt_SpecialiseAggressively
    
    910 910
        , Opt_CrossModuleSpecialise
    
    911 911
        , Opt_StaticArgumentTransformation
    
    912
    +   , Opt_PolymorphicSpecialisation
    
    912 913
        , Opt_CSE
    
    913 914
        , Opt_StgCSE
    
    914 915
        , Opt_StgLiftLams
    

  • compiler/GHC/Tc/Gen/App.hs
    ... ... @@ -749,13 +749,13 @@ tcInstFun do_ql inst_final (tc_fun, fun_ctxt) fun_sigma rn_args
    749 749
         go1 _pos acc fun_ty []
    
    750 750
            | XExpr (ConLikeTc (RealDataCon dc)) <- tc_fun
    
    751 751
            , isNewDataCon dc
    
    752
    -       , [Scaled _ arg_ty] <- dataConOrigArgTys dc
    
    752
    +       , [Scaled _ orig_arg_ty] <- dataConOrigArgTys dc
    
    753 753
            , n_val_args == 0
    
    754 754
            -- If we're dealing with an unsaturated representation-polymorphic
    
    755 755
            -- UnliftedNewype, then perform a representation-polymorphism check.
    
    756 756
            -- See Note [Representation-polymorphism checks for unsaturated unlifted newtypes]
    
    757 757
            -- in GHC.Tc.Utils.Concrete.
    
    758
    -       , not $ typeHasFixedRuntimeRep arg_ty
    
    758
    +       , not $ typeHasFixedRuntimeRep orig_arg_ty
    
    759 759
            = do { (wrap_co, arg_ty, res_ty) <-
    
    760 760
                       matchActualFunTy (FRRRepPolyUnliftedNewtype dc)
    
    761 761
                         (Just $ HsExprTcThing tc_fun)
    

  • docs/users_guide/exts/rank_polymorphism.rst
    ... ... @@ -195,7 +195,7 @@ For example: ::
    195 195
       g3c :: Int -> forall x y. y -> x -> x
    
    196 196
     
    
    197 197
       f4 :: (Int -> forall a. (Eq a, Show a) => a -> a) -> Bool
    
    198
    -  g4 ::  Int -> forall x. (Show x, Eq x) => x -> x) -> Bool
    
    198
    +  g4 ::  Int -> forall x. (Show x, Eq x) => x -> x
    
    199 199
     
    
    200 200
     Then the application ``f3 g3a`` is well-typed, because ``g3a`` has a type that matches the type
    
    201 201
     expected by ``f3``.  But ``f3 g3b`` is not well typed, because the foralls are in different places.
    

  • docs/users_guide/exts/type_families.rst
    ... ... @@ -680,7 +680,7 @@ thus: ::
    680 680
     When doing so, we (optionally) may drop the "``family``" keyword.
    
    681 681
     
    
    682 682
     The type parameters must all be type variables, of course, and some (but
    
    683
    -not necessarily all) of then can be the class parameters. Each class
    
    683
    +not necessarily all) of them can be the class parameters. Each class
    
    684 684
     parameter may only be used at most once per associated type, but some
    
    685 685
     may be omitted and they may be in an order other than in the class head.
    
    686 686
     Hence, the following contrived example is admissible: ::
    

  • docs/users_guide/phases.rst
    ... ... @@ -770,10 +770,9 @@ Options affecting code generation
    770 770
         :type: dynamic
    
    771 771
         :category: codegen
    
    772 772
     
    
    773
    -    Generate position-independent code (code that can be put into shared
    
    774
    -    libraries). This currently works on Linux x86 and x86-64. On
    
    775
    -    Windows, position-independent code is never used so the flag is a
    
    776
    -    no-op on that platform.
    
    773
    +    Generate position-independent code (PIC). This code can be put into shared
    
    774
    +    libraries and is sometimes required by operating systems, e.g. systems using
    
    775
    +    Address Space Layout Randomization (ASLR).
    
    777 776
     
    
    778 777
     .. ghc-flag:: -fexternal-dynamic-refs
    
    779 778
         :shortdesc: Generate code for linking against dynamic libraries
    
    ... ... @@ -790,9 +789,7 @@ Options affecting code generation
    790 789
         :category: codegen
    
    791 790
     
    
    792 791
         Generate code in such a way to be linkable into a position-independent
    
    793
    -    executable This currently works on Linux x86 and x86-64. On Windows,
    
    794
    -    position-independent code is never used so the flag is a no-op on that
    
    795
    -    platform. To link the final executable use :ghc-flag:`-pie`.
    
    792
    +    executable. To link the final executable use :ghc-flag:`-pie`.
    
    796 793
     
    
    797 794
     .. ghc-flag:: -dynamic
    
    798 795
         :shortdesc: Build dynamically-linked object files and executables
    

  • docs/users_guide/using-optimisation.rst
    ... ... @@ -1325,10 +1325,7 @@ as such you shouldn't need to set any of them explicitly. A flag
    1325 1325
         :reverse: -fno-polymorphic-specialisation
    
    1326 1326
         :category:
    
    1327 1327
     
    
    1328
    -    :default: off
    
    1329
    -
    
    1330
    -    Warning, this feature is highly experimental and may lead to incorrect runtime
    
    1331
    -    results. Use at your own risk (:ghc-ticket:`23469`, :ghc-ticket:`23109`, :ghc-ticket:`21229`, :ghc-ticket:`23445`).
    
    1328
    +    :default: on
    
    1332 1329
     
    
    1333 1330
         Enable specialisation of function calls to known dictionaries with free type variables.
    
    1334 1331
         The created specialisation will abstract over the type variables free in the dictionary.
    

  • rts/eventlog/EventLog.c
    ... ... @@ -491,13 +491,7 @@ endEventLogging(void)
    491 491
     
    
    492 492
         eventlog_enabled = false;
    
    493 493
     
    
    494
    -    // Flush all events remaining in the buffers.
    
    495
    -    //
    
    496
    -    // N.B. Don't flush if shutting down: this was done in
    
    497
    -    // finishCapEventLogging and the capabilities have already been freed.
    
    498
    -    if (getSchedState() != SCHED_SHUTTING_DOWN) {
    
    499
    -        flushEventLog(NULL);
    
    500
    -    }
    
    494
    +    flushEventLog(NULL);
    
    501 495
     
    
    502 496
         ACQUIRE_LOCK(&eventBufMutex);
    
    503 497
     
    
    ... ... @@ -1626,15 +1620,24 @@ void flushEventLog(Capability **cap USED_IF_THREADS)
    1626 1620
             return;
    
    1627 1621
         }
    
    1628 1622
     
    
    1623
    +    // N.B. Don't flush if shutting down: this was done in
    
    1624
    +    // finishCapEventLogging and the capabilities have already been freed.
    
    1625
    +    // This can also race against the shutdown if the flush is triggered by the
    
    1626
    +    // ticker thread. (#26573)
    
    1627
    +    if (getSchedState() == SCHED_SHUTTING_DOWN) {
    
    1628
    +      return;
    
    1629
    +    }
    
    1630
    +
    
    1629 1631
         ACQUIRE_LOCK(&eventBufMutex);
    
    1630 1632
         printAndClearEventBuf(&eventBuf);
    
    1631 1633
         RELEASE_LOCK(&eventBufMutex);
    
    1632 1634
     
    
    1633 1635
     #if defined(THREADED_RTS)
    
    1634
    -    Task *task = getMyTask();
    
    1636
    +    Task *task = newBoundTask();
    
    1635 1637
         stopAllCapabilitiesWith(cap, task, SYNC_FLUSH_EVENT_LOG);
    
    1636 1638
         flushAllCapsEventsBufs();
    
    1637 1639
         releaseAllCapabilities(getNumCapabilities(), cap ? *cap : NULL, task);
    
    1640
    +    exitMyTask();
    
    1638 1641
     #else
    
    1639 1642
         flushLocalEventsBuf(getCapability(0));
    
    1640 1643
     #endif
    

  • rts/linker/PEi386.c
    ... ... @@ -552,7 +552,12 @@ static int compare_path(StgWord key1, StgWord key2)
    552 552
     
    
    553 553
     static void addLoadedDll(LoadedDllCache *cache, const pathchar *dll_name, HINSTANCE instance)
    
    554 554
     {
    
    555
    -    insertHashTable_(cache->hash, (StgWord) dll_name, instance, hash_path);
    
    555
    +    // dll_name might be deallocated, we need to copy it to have a stable reference to the contents
    
    556
    +    // See #26613
    
    557
    +    size_t size = wcslen(dll_name) + 1;
    
    558
    +    pathchar* dll_name_copy = stgMallocBytes(size * sizeof(pathchar), "addLoadedDll");
    
    559
    +    wcsncpy(dll_name_copy, dll_name, size);
    
    560
    +    insertHashTable_(cache->hash, (StgWord) dll_name_copy, instance, hash_path);
    
    556 561
     }
    
    557 562
     
    
    558 563
     static HINSTANCE isDllLoaded(const LoadedDllCache *cache, const pathchar *dll_name)
    

  • testsuite/tests/codeGen/should_run/T24016.hs
    1
    +module Main (main) where
    
    2
    +
    
    3
    +data Command
    
    4
    +  = Command1
    
    5
    +  | Command2
    
    6
    +  | Command3
    
    7
    +  | Command4
    
    8
    +  | Command5
    
    9
    +  | Command6 -- Commenting this line works with -fPIC, uncommenting leads to a crash.
    
    10
    +
    
    11
    +main :: IO ()
    
    12
    +main = do
    
    13
    +  let x = case cmd of
    
    14
    +           Command1 -> 1 :: Int
    
    15
    +           Command2 -> 2
    
    16
    +           Command3 -> 3
    
    17
    +           Command4 -> 4
    
    18
    +           Command5 -> 5
    
    19
    +           Command6 -> 6
    
    20
    +  putStrLn (show x)
    
    21
    +
    
    22
    +{-# NOINLINE cmd #-}
    
    23
    +cmd :: Command
    
    24
    +cmd = Command6

  • testsuite/tests/codeGen/should_run/T24016.stdout
    1
    +6

  • testsuite/tests/codeGen/should_run/all.T
    ... ... @@ -257,3 +257,4 @@ test('CCallConv', [req_c], compile_and_run, ['CCallConv_c.c'])
    257 257
     test('T25364', normal, compile_and_run, [''])
    
    258 258
     test('T26061', normal, compile_and_run, [''])
    
    259 259
     test('T26537', normal, compile_and_run, ['-O2 -fregs-graph'])
    
    260
    +test('T24016', normal, compile_and_run, ['-O1 -fPIC'])

  • testsuite/tests/rts/all.T
    ... ... @@ -2,6 +2,11 @@ test('testblockalloc',
    2 2
          [c_src, only_ways(['normal','threaded1']), extra_run_opts('+RTS -I0')],
    
    3 3
          compile_and_run, [''])
    
    4 4
     
    
    5
    +test('numeric_version_eventlog_flush',
    
    6
    +     [ignore_stdout, req_ghc_with_threaded_rts],
    
    7
    +     run_command,
    
    8
    +     ['{compiler} --numeric-version +RTS -l --eventlog-flush-interval=1 -RTS'])
    
    9
    +
    
    5 10
     test('testmblockalloc',
    
    6 11
          [c_src, only_ways(['normal','threaded1']), extra_run_opts('+RTS -I0 -xr0.125T'),
    
    7 12
           when(arch('wasm32'), skip)], # MBlocks can't be freed on wasm32, see Note [Megablock allocator on wasm] in rts
    

  • testsuite/tests/simplCore/should_compile/T26588.hs
    1
    +module T26588 ( getOptionSettingFromText ) where
    
    2
    +
    
    3
    +import           Control.Applicative ( Const(..) )
    
    4
    +import           Data.Map (Map)
    
    5
    +import qualified Data.Map.Strict as Map
    
    6
    +
    
    7
    +------------------------------------------------------------------------
    
    8
    +-- ConfigState
    
    9
    +
    
    10
    +data ConfigLeaf
    
    11
    +data ConfigTrie = ConfigTrie !(Maybe ConfigLeaf) !ConfigMap
    
    12
    +
    
    13
    +type ConfigMap = Map Int ConfigTrie
    
    14
    +
    
    15
    +freshLeaf :: [Int] -> ConfigLeaf -> ConfigTrie
    
    16
    +freshLeaf [] l     = ConfigTrie (Just l) mempty
    
    17
    +freshLeaf (a:as) l = ConfigTrie Nothing (Map.singleton a (freshLeaf as l))
    
    18
    +
    
    19
    +adjustConfigTrie :: Functor t => [Int] -> (Maybe ConfigLeaf -> t (Maybe ConfigLeaf)) -> Maybe (ConfigTrie) -> t (Maybe ConfigTrie)
    
    20
    +adjustConfigTrie     as f Nothing                 = fmap (freshLeaf as) <$> f Nothing
    
    21
    +adjustConfigTrie (a:as) f (Just (ConfigTrie x m)) = Just . ConfigTrie x <$> adjustConfigMap a as f m
    
    22
    +adjustConfigTrie     [] f (Just (ConfigTrie x m)) = g <$> f x
    
    23
    +  where g Nothing | Map.null m = Nothing
    
    24
    +        g x' = Just (ConfigTrie x' m)
    
    25
    +
    
    26
    +adjustConfigMap :: Functor t => Int -> [Int] -> (Maybe ConfigLeaf -> t (Maybe ConfigLeaf)) -> ConfigMap -> t ConfigMap
    
    27
    +adjustConfigMap a as f = Map.alterF (adjustConfigTrie as f) a
    
    28
    +
    
    29
    +getOptionSettingFromText :: Int -> [Int] -> ConfigMap -> IO ()
    
    30
    +getOptionSettingFromText p ps = getConst . adjustConfigMap p ps f
    
    31
    +  where
    
    32
    +    f _ = Const (return ())

  • testsuite/tests/simplCore/should_compile/T26589.hs
    1
    +module T26589 ( executeTest ) where
    
    2
    +
    
    3
    +-- base
    
    4
    +import Data.Coerce ( coerce )
    
    5
    +import Data.Foldable ( foldMap )
    
    6
    +
    
    7
    +--------------------------------------------------------------------------------
    
    8
    +
    
    9
    +newtype Traversal f = Traversal { getTraversal :: f () }
    
    10
    +
    
    11
    +instance Applicative f => Semigroup (Traversal f) where
    
    12
    +  Traversal f1 <> Traversal f2 = Traversal $ f1 *> f2
    
    13
    +instance Applicative f => Monoid (Traversal f) where
    
    14
    +  mempty = Traversal $ pure ()
    
    15
    +
    
    16
    +newtype Seq a = Seq (FingerTree (Elem a))
    
    17
    +newtype Elem a = Elem { getElem :: a }
    
    18
    +
    
    19
    +data FingerTree a
    
    20
    +    = EmptyT
    
    21
    +    | Deep !a (FingerTree a) !a
    
    22
    +
    
    23
    +executeTest :: Seq () -> IO ()
    
    24
    +executeTest fins = destroyResources
    
    25
    +  where
    
    26
    +    destroyResources :: IO ()
    
    27
    +    destroyResources =
    
    28
    +      getTraversal $
    
    29
    +        flip foldMap1 fins $ \ _ ->
    
    30
    +          Traversal $ return ()
    
    31
    +
    
    32
    +foldMap1 :: forall m a. Monoid m => (a -> m) -> Seq a -> m
    
    33
    +foldMap1 = coerce (foldMap2 :: (Elem a -> m) -> FingerTree (Elem a) -> m)
    
    34
    +
    
    35
    +foldMap2 :: Monoid m => (Elem a -> m) -> FingerTree (Elem a) -> m
    
    36
    +foldMap2 _ EmptyT = mempty
    
    37
    +foldMap2 f' (Deep pr' m' sf') = f' pr' <> foldMapTree f' m' <> f' sf'
    
    38
    +      where
    
    39
    +        foldMapTree :: Monoid m => (a -> m) -> FingerTree a -> m
    
    40
    +        foldMapTree _ EmptyT = mempty
    
    41
    +        foldMapTree f (Deep pr m sf) =
    
    42
    +            f pr <>
    
    43
    +            foldMapTree f m <>
    
    44
    +            f sf

  • testsuite/tests/simplCore/should_compile/T8331.stderr
    1 1
     
    
    2 2
     ==================== Tidy Core rules ====================
    
    3
    +"SPEC $c*> @(ST s) @_"
    
    4
    +    forall (@s) (@r) ($dApplicative :: Applicative (ST s)).
    
    5
    +      $fApplicativeReaderT_$c*> @(ST s) @r $dApplicative
    
    6
    +      = ($fApplicativeReaderT2 @s @r)
    
    7
    +        `cast` (forall (a ::~ <*>_N) (b ::~ <*>_N).
    
    8
    +                <ReaderT r (ST s) a>_R
    
    9
    +                ->_R <ReaderT r (ST s) b>_R
    
    10
    +                ->_R <r>_R ->_R Sym (N:ST <s>_N <b>_R)
    
    11
    +                     ; Sym (N:ReaderT <*>_N <r>_R <ST s>_R <b>_N)
    
    12
    +                :: Coercible
    
    13
    +                     (forall a b.
    
    14
    +                      ReaderT r (ST s) a -> ReaderT r (ST s) b -> r -> STRep s b)
    
    15
    +                     (forall a b.
    
    16
    +                      ReaderT r (ST s) a -> ReaderT r (ST s) b -> ReaderT r (ST s) b))
    
    17
    +"SPEC $c<$ @(ST s) @_"
    
    18
    +    forall (@s) (@r) ($dFunctor :: Functor (ST s)).
    
    19
    +      $fFunctorReaderT_$c<$ @(ST s) @r $dFunctor
    
    20
    +      = ($fApplicativeReaderT6 @s @r)
    
    21
    +        `cast` (forall (a ::~ <*>_N) (b ::~ <*>_N).
    
    22
    +                <a>_R
    
    23
    +                ->_R <ReaderT r (ST s) b>_R
    
    24
    +                ->_R <r>_R ->_R Sym (N:ST <s>_N <a>_R)
    
    25
    +                     ; Sym (N:ReaderT <*>_N <r>_R <ST s>_R <a>_N)
    
    26
    +                :: Coercible
    
    27
    +                     (forall a b. a -> ReaderT r (ST s) b -> r -> STRep s a)
    
    28
    +                     (forall a b. a -> ReaderT r (ST s) b -> ReaderT r (ST s) a))
    
    29
    +"SPEC $c<* @(ST s) @_"
    
    30
    +    forall (@s) (@r) ($dApplicative :: Applicative (ST s)).
    
    31
    +      $fApplicativeReaderT_$c<* @(ST s) @r $dApplicative
    
    32
    +      = ($fApplicativeReaderT1 @s @r)
    
    33
    +        `cast` (forall (a ::~ <*>_N) (b ::~ <*>_N).
    
    34
    +                <ReaderT r (ST s) a>_R
    
    35
    +                ->_R <ReaderT r (ST s) b>_R
    
    36
    +                ->_R <r>_R ->_R Sym (N:ST <s>_N <a>_R)
    
    37
    +                     ; Sym (N:ReaderT <*>_N <r>_R <ST s>_R <a>_N)
    
    38
    +                :: Coercible
    
    39
    +                     (forall a b.
    
    40
    +                      ReaderT r (ST s) a -> ReaderT r (ST s) b -> r -> STRep s a)
    
    41
    +                     (forall a b.
    
    42
    +                      ReaderT r (ST s) a -> ReaderT r (ST s) b -> ReaderT r (ST s) a))
    
    43
    +"SPEC $c<*> @(ST s) @_"
    
    44
    +    forall (@s) (@r) ($dApplicative :: Applicative (ST s)).
    
    45
    +      $fApplicativeReaderT9 @(ST s) @r $dApplicative
    
    46
    +      = ($fApplicativeReaderT4 @s @r)
    
    47
    +        `cast` (forall (a ::~ <*>_N) (b ::~ <*>_N).
    
    48
    +                <ReaderT r (ST s) (a -> b)>_R
    
    49
    +                ->_R <ReaderT r (ST s) a>_R
    
    50
    +                ->_R <r>_R
    
    51
    +                ->_R Sym (N:ST <s>_N <b>_R)
    
    52
    +                :: Coercible
    
    53
    +                     (forall a b.
    
    54
    +                      ReaderT r (ST s) (a -> b) -> ReaderT r (ST s) a -> r -> STRep s b)
    
    55
    +                     (forall a b.
    
    56
    +                      ReaderT r (ST s) (a -> b) -> ReaderT r (ST s) a -> r -> ST s b))
    
    57
    +"SPEC $c>> @(ST s) @_"
    
    58
    +    forall (@s) (@r) ($dMonad :: Monad (ST s)).
    
    59
    +      $fMonadReaderT1 @(ST s) @r $dMonad
    
    60
    +      = $fMonadAbstractIOSTReaderT_$s$c>> @s @r
    
    61
    +"SPEC $c>>= @(ST s) @_"
    
    62
    +    forall (@s) (@r) ($dMonad :: Monad (ST s)).
    
    63
    +      $fMonadReaderT2 @(ST s) @r $dMonad
    
    64
    +      = ($fMonadAbstractIOSTReaderT2 @s @r)
    
    65
    +        `cast` (forall (a ::~ <*>_N) (b ::~ <*>_N).
    
    66
    +                <ReaderT r (ST s) a>_R
    
    67
    +                ->_R <a -> ReaderT r (ST s) b>_R
    
    68
    +                ->_R <r>_R
    
    69
    +                ->_R Sym (N:ST <s>_N <b>_R)
    
    70
    +                :: Coercible
    
    71
    +                     (forall a b.
    
    72
    +                      ReaderT r (ST s) a -> (a -> ReaderT r (ST s) b) -> r -> STRep s b)
    
    73
    +                     (forall a b.
    
    74
    +                      ReaderT r (ST s) a -> (a -> ReaderT r (ST s) b) -> r -> ST s b))
    
    75
    +"SPEC $cfmap @(ST s) @_"
    
    76
    +    forall (@s) (@r) ($dFunctor :: Functor (ST s)).
    
    77
    +      $fFunctorReaderT_$cfmap @(ST s) @r $dFunctor
    
    78
    +      = ($fApplicativeReaderT7 @s @r)
    
    79
    +        `cast` (forall (a ::~ <*>_N) (b ::~ <*>_N).
    
    80
    +                <a -> b>_R
    
    81
    +                ->_R <ReaderT r (ST s) a>_R
    
    82
    +                ->_R <r>_R ->_R Sym (N:ST <s>_N <b>_R)
    
    83
    +                     ; Sym (N:ReaderT <*>_N <r>_R <ST s>_R <b>_N)
    
    84
    +                :: Coercible
    
    85
    +                     (forall a b. (a -> b) -> ReaderT r (ST s) a -> r -> STRep s b)
    
    86
    +                     (forall a b. (a -> b) -> ReaderT r (ST s) a -> ReaderT r (ST s) b))
    
    87
    +"SPEC $cliftA2 @(ST s) @_"
    
    88
    +    forall (@s) (@r) ($dApplicative :: Applicative (ST s)).
    
    89
    +      $fApplicativeReaderT_$cliftA2 @(ST s) @r $dApplicative
    
    90
    +      = ($fApplicativeReaderT3 @s @r)
    
    91
    +        `cast` (forall (a ::~ <*>_N) (b ::~ <*>_N) (c ::~ <*>_N).
    
    92
    +                <a -> b -> c>_R
    
    93
    +                ->_R <ReaderT r (ST s) a>_R
    
    94
    +                ->_R <ReaderT r (ST s) b>_R
    
    95
    +                ->_R <r>_R ->_R Sym (N:ST <s>_N <c>_R)
    
    96
    +                     ; Sym (N:ReaderT <*>_N <r>_R <ST s>_R <c>_N)
    
    97
    +                :: Coercible
    
    98
    +                     (forall a b c.
    
    99
    +                      (a -> b -> c)
    
    100
    +                      -> ReaderT r (ST s) a -> ReaderT r (ST s) b -> r -> STRep s c)
    
    101
    +                     (forall a b c.
    
    102
    +                      (a -> b -> c)
    
    103
    +                      -> ReaderT r (ST s) a -> ReaderT r (ST s) b -> ReaderT r (ST s) c))
    
    104
    +"SPEC $cp1Applicative @(ST s) @_"
    
    105
    +    forall (@s) (@r) ($dApplicative :: Applicative (ST s)).
    
    106
    +      $fApplicativeReaderT_$cp1Applicative @(ST s) @r $dApplicative
    
    107
    +      = $fApplicativeReaderT_$s$fFunctorReaderT @s @r
    
    108
    +"SPEC $cp1Monad @(ST s) @_"
    
    109
    +    forall (@s) (@r) ($dMonad :: Monad (ST s)).
    
    110
    +      $fMonadReaderT_$cp1Monad @(ST s) @r $dMonad
    
    111
    +      = $fApplicativeReaderT_$s$fApplicativeReaderT @s @r
    
    112
    +"SPEC $cpure @(ST s) @_"
    
    113
    +    forall (@s) (@r) ($dApplicative :: Applicative (ST s)).
    
    114
    +      $fApplicativeReaderT_$cpure @(ST s) @r $dApplicative
    
    115
    +      = ($fApplicativeReaderT5 @s @r)
    
    116
    +        `cast` (forall (a ::~ <*>_N).
    
    117
    +                <a>_R
    
    118
    +                ->_R <r>_R ->_R Sym (N:ST <s>_N <a>_R)
    
    119
    +                     ; Sym (N:ReaderT <*>_N <r>_R <ST s>_R <a>_N)
    
    120
    +                :: Coercible
    
    121
    +                     (forall a. a -> r -> STRep s a)
    
    122
    +                     (forall a. a -> ReaderT r (ST s) a))
    
    123
    +"SPEC $creturn @(ST s) @_"
    
    124
    +    forall (@s) (@r) ($dMonad :: Monad (ST s)).
    
    125
    +      $fMonadReaderT_$creturn @(ST s) @r $dMonad
    
    126
    +      = ($fApplicativeReaderT5 @s @r)
    
    127
    +        `cast` (forall (a ::~ <*>_N).
    
    128
    +                <a>_R
    
    129
    +                ->_R <r>_R ->_R Sym (N:ST <s>_N <a>_R)
    
    130
    +                     ; Sym (N:ReaderT <*>_N <r>_R <ST s>_R <a>_N)
    
    131
    +                :: Coercible
    
    132
    +                     (forall a. a -> r -> STRep s a)
    
    133
    +                     (forall a. a -> ReaderT r (ST s) a))
    
    134
    +"SPEC $fApplicativeReaderT @(ST s) @_"
    
    135
    +    forall (@s) (@r) ($dApplicative :: Applicative (ST s)).
    
    136
    +      $fApplicativeReaderT @(ST s) @r $dApplicative
    
    137
    +      = $fApplicativeReaderT_$s$fApplicativeReaderT @s @r
    
    138
    +"SPEC $fFunctorReaderT @(ST s) @_"
    
    139
    +    forall (@s) (@r) ($dFunctor :: Functor (ST s)).
    
    140
    +      $fFunctorReaderT @(ST s) @r $dFunctor
    
    141
    +      = $fApplicativeReaderT_$s$fFunctorReaderT @s @r
    
    142
    +"SPEC $fMonadReaderT @(ST s) @_"
    
    143
    +    forall (@s) (@r) ($dMonad :: Monad (ST s)).
    
    144
    +      $fMonadReaderT @(ST s) @r $dMonad
    
    145
    +      = $fMonadAbstractIOSTReaderT_$s$fMonadReaderT @s @r
    
    3 146
     "USPEC useAbstractMonad @(ReaderT Int (ST s))"
    
    4 147
         forall (@s)
    
    5 148
                ($dMonadAbstractIOST :: MonadAbstractIOST (ReaderT Int (ST s))).
    

  • testsuite/tests/simplCore/should_compile/all.T
    ... ... @@ -544,6 +544,9 @@ test('T25883b', normal, compile_grep_core, [''])
    544 544
     test('T25883c', normal, compile_grep_core, [''])
    
    545 545
     test('T25883d', [extra_files(['T25883d_import.hs'])], multimod_compile_filter, ['T25883d', '-O -ddump-simpl -dno-typeable-binds -dsuppress-all -dsuppress-uniques', r'grep -e "y ="'])
    
    546 546
     
    
    547
    +test('T26588', normal, compile, ['-package containers -O'])
    
    548
    +test('T26589', normal, compile, ['-O'])
    
    549
    +
    
    547 550
     test('T25976', [grep_errmsg('Dead Code')], compile, ['-O -ddump-simpl -dsuppress-uniques -dno-typeable-binds'])
    
    548 551
     
    
    549 552
     test('T25965', normal, compile, ['-O'])
    

  • utils/check-exact/ExactPrint.hs
    ... ... @@ -19,6 +19,13 @@
    19 19
     {-# LANGUAGE UndecidableInstances  #-} -- For the (StmtLR GhcPs GhcPs (LocatedA (body GhcPs))) ExactPrint instance
    
    20 20
     {-# OPTIONS_GHC -Wno-incomplete-uni-patterns -Wno-incomplete-record-updates #-}
    
    21 21
     
    
    22
    +-- We switch off specialisation in this module. Otherwise we get lots of functions
    
    23
    +-- specialised on lots of (GHC syntax tree) data types.  Compilation time allocation
    
    24
    +-- (at least with -fpolymorphic-specialisation; see !15058) blows up from 17G to 108G.
    
    25
    +-- Bad! ExactPrint is not a performance-critical module so it's not worth taking the
    
    26
    +-- largely-fruitless hit in compile time.
    
    27
    +{-# OPTIONS_GHC -fno-specialise #-}
    
    28
    +
    
    22 29
     module ExactPrint
    
    23 30
       (
    
    24 31
         ExactPrint(..)