Marge Bot pushed to branch wip/marge_bot_batch_merge_job at Glasgow Haskell Compiler / GHC
Commits:
-
98fa0d36
by Simon Hengel at 2025-11-27T17:54:57-05:00
-
5b97e5ce
by Simon Hengel at 2025-11-27T17:55:37-05:00
-
fa2aaa00
by Simon Peyton Jones at 2025-11-27T17:56:18-05:00
-
1fd25987
by Simon Peyton Jones at 2025-11-27T17:56:18-05:00
-
b7fe7445
by Matthew Pickering at 2025-11-27T17:56:59-05:00
-
1d4a1229
by sheaf at 2025-11-27T17:58:02-05:00
-
a987c4d9
by Sylvain Henry at 2025-11-28T12:04:55-05:00
-
f3ccdc29
by Zubin Duggal at 2025-11-28T12:04:58-05:00
23 changed files:
- compiler/GHC/CmmToAsm/X86/CodeGen.hs
- compiler/GHC/CmmToAsm/X86/Instr.hs
- compiler/GHC/CmmToAsm/X86/Ppr.hs
- compiler/GHC/Core/Opt/Arity.hs
- compiler/GHC/Core/SimpleOpt.hs
- compiler/GHC/Driver/DynFlags.hs
- compiler/GHC/Driver/Flags.hs
- compiler/GHC/Tc/Gen/App.hs
- docs/users_guide/exts/rank_polymorphism.rst
- docs/users_guide/exts/type_families.rst
- docs/users_guide/phases.rst
- docs/users_guide/using-optimisation.rst
- rts/eventlog/EventLog.c
- rts/linker/PEi386.c
- + testsuite/tests/codeGen/should_run/T24016.hs
- + testsuite/tests/codeGen/should_run/T24016.stdout
- testsuite/tests/codeGen/should_run/all.T
- testsuite/tests/rts/all.T
- + testsuite/tests/simplCore/should_compile/T26588.hs
- + testsuite/tests/simplCore/should_compile/T26589.hs
- testsuite/tests/simplCore/should_compile/T8331.stderr
- testsuite/tests/simplCore/should_compile/all.T
- utils/check-exact/ExactPrint.hs
Changes:
| ... | ... | @@ -376,7 +376,7 @@ stmtToInstrs bid stmt = do |
| 376 | 376 | --We try to arrange blocks such that the likely branch is the fallthrough
|
| 377 | 377 | --in GHC.Cmm.ContFlowOpt. So we can assume the condition is likely false here.
|
| 378 | 378 | CmmCondBranch arg true false _ -> genCondBranch bid true false arg
|
| 379 | - CmmSwitch arg ids -> genSwitch arg ids
|
|
| 379 | + CmmSwitch arg ids -> genSwitch arg ids bid
|
|
| 380 | 380 | CmmCall { cml_target = arg
|
| 381 | 381 | , cml_args_regs = gregs } -> genJump arg (jumpRegs platform gregs)
|
| 382 | 382 | _ ->
|
| ... | ... | @@ -489,13 +489,6 @@ is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000 |
| 489 | 489 | where i64 = fromIntegral i :: Int64
|
| 490 | 490 | |
| 491 | 491 | |
| 492 | --- | Convert a BlockId to some CmmStatic data
|
|
| 493 | -jumpTableEntry :: NCGConfig -> Maybe BlockId -> CmmStatic
|
|
| 494 | -jumpTableEntry config Nothing = CmmStaticLit (CmmInt 0 (ncgWordWidth config))
|
|
| 495 | -jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
|
|
| 496 | - where blockLabel = blockLbl blockid
|
|
| 497 | - |
|
| 498 | - |
|
| 499 | 492 | -- -----------------------------------------------------------------------------
|
| 500 | 493 | -- General things for putting together code sequences
|
| 501 | 494 | |
| ... | ... | @@ -5375,11 +5368,52 @@ index (1), |
| 5375 | 5368 | indexExpr = UU_Conv(indexOffset); // == 1::I64
|
| 5376 | 5369 | |
| 5377 | 5370 | See #21186.
|
| 5378 | --}
|
|
| 5379 | 5371 | |
| 5380 | -genSwitch :: CmmExpr -> SwitchTargets -> NatM InstrBlock
|
|
| 5372 | +Note [Jump tables]
|
|
| 5373 | +~~~~~~~~~~~~~~~~~~
|
|
| 5374 | +The x86 backend has a virtual JMP_TBL instruction which payload can be used to
|
|
| 5375 | +generate both the jump instruction and the jump table contents. `genSwitch` is
|
|
| 5376 | +responsible for generating these JMP_TBL instructions.
|
|
| 5377 | + |
|
| 5378 | +Depending on `-fPIC` flag and on the architecture, we generate the following
|
|
| 5379 | +jump table variants:
|
|
| 5380 | + |
|
| 5381 | + | Variant | Arch | Table's contents | Reference to the table |
|
|
| 5382 | + |---------|--------|----------------------------------------|------------------------|
|
|
| 5383 | + | PIC | Both | Relative offset: target_lbl - base_lbl | PIC |
|
|
| 5384 | + | Non-PIC | 64-bit | Absolute: target_lbl | Non-PIC (rip-relative) |
|
|
| 5385 | + | Non-PIC | 32-bit | Absolute: target_lbl | Non-PIC (absolute) |
|
|
| 5386 | + |
|
| 5387 | +For the PIC variant, we store relative entries (`target_lbl - base_lbl`) in the
|
|
| 5388 | +jump table. Using absolute entries with PIC would require target_lbl symbols to
|
|
| 5389 | +be resolved at link time, hence to be global labels (currently they are local
|
|
| 5390 | +labels).
|
|
| 5391 | + |
|
| 5392 | +We use the block_id of the code containing the jump as `base_lbl`. It ensures
|
|
| 5393 | +that target_lbl and base_lbl are close enough to each others, avoiding
|
|
| 5394 | +overflows.
|
|
| 5395 | + |
|
| 5396 | +Historical note: in the past we used the table label `table_lbl` as base_lbl. It
|
|
| 5397 | +allowed the jumping code to only compute one global address (table_lbl) both to
|
|
| 5398 | +read the table and to compute the target address. However:
|
|
| 5381 | 5399 | |
| 5382 | -genSwitch expr targets = do
|
|
| 5400 | + * the table could be too far from the jump and on Windows which only
|
|
| 5401 | + has 32-bit relative relocations (IMAGE_REL_AMD64_REL64 doesn't exist),
|
|
| 5402 | + `dest_lbl - table_lbl` overflowed (see #24016)
|
|
| 5403 | + |
|
| 5404 | + * Mac OS X/x86-64 linker was unable to handle `.quad L1 - L0`
|
|
| 5405 | + relocations if L0 wasn't preceded by a non-anonymous label in its
|
|
| 5406 | + section (which was the case with table_lbl). Hence we used to put the
|
|
| 5407 | + jump table in the .text section in this case.
|
|
| 5408 | + |
|
| 5409 | + |
|
| 5410 | +-}
|
|
| 5411 | + |
|
| 5412 | +-- | Generate a JMP_TBL instruction
|
|
| 5413 | +--
|
|
| 5414 | +-- See Note [Jump tables]
|
|
| 5415 | +genSwitch :: CmmExpr -> SwitchTargets -> BlockId -> NatM InstrBlock
|
|
| 5416 | +genSwitch expr targets bid = do
|
|
| 5383 | 5417 | config <- getConfig
|
| 5384 | 5418 | let platform = ncgPlatform config
|
| 5385 | 5419 | expr_w = cmmExprWidth platform expr
|
| ... | ... | @@ -5390,79 +5424,76 @@ genSwitch expr targets = do |
| 5390 | 5424 | indexExpr = CmmMachOp
|
| 5391 | 5425 | (MO_UU_Conv expr_w (platformWordWidth platform))
|
| 5392 | 5426 | [indexExpr0]
|
| 5393 | - if ncgPIC config
|
|
| 5394 | - then do
|
|
| 5395 | - (reg,e_code) <- getNonClobberedReg indexExpr
|
|
| 5396 | - -- getNonClobberedReg because it needs to survive across t_code
|
|
| 5397 | - lbl <- getNewLabelNat
|
|
| 5398 | - let is32bit = target32Bit platform
|
|
| 5399 | - os = platformOS platform
|
|
| 5400 | - -- Might want to use .rodata.<function we're in> instead, but as
|
|
| 5401 | - -- long as it's something unique it'll work out since the
|
|
| 5402 | - -- references to the jump table are in the appropriate section.
|
|
| 5403 | - rosection = case os of
|
|
| 5404 | - -- on Mac OS X/x86_64, put the jump table in the text section to
|
|
| 5405 | - -- work around a limitation of the linker.
|
|
| 5406 | - -- ld64 is unable to handle the relocations for
|
|
| 5407 | - -- .quad L1 - L0
|
|
| 5408 | - -- if L0 is not preceded by a non-anonymous label in its section.
|
|
| 5409 | - OSDarwin | not is32bit -> Section Text lbl
|
|
| 5410 | - _ -> Section ReadOnlyData lbl
|
|
| 5411 | - dynRef <- cmmMakeDynamicReference config DataReference lbl
|
|
| 5412 | - (tableReg,t_code) <- getSomeReg $ dynRef
|
|
| 5413 | - let op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
|
|
| 5414 | - (EAIndex reg (platformWordSizeInBytes platform)) (ImmInt 0))
|
|
| 5415 | - |
|
| 5416 | - return $ e_code `appOL` t_code `appOL` toOL [
|
|
| 5417 | - ADD (intFormat (platformWordWidth platform)) op (OpReg tableReg),
|
|
| 5418 | - JMP_TBL (OpReg tableReg) ids rosection lbl
|
|
| 5419 | - ]
|
|
| 5420 | - else do
|
|
| 5421 | - (reg,e_code) <- getSomeReg indexExpr
|
|
| 5422 | - lbl <- getNewLabelNat
|
|
| 5423 | - let is32bit = target32Bit platform
|
|
| 5424 | - if is32bit
|
|
| 5425 | - then let op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg (platformWordSizeInBytes platform)) (ImmCLbl lbl))
|
|
| 5426 | - jmp_code = JMP_TBL op ids (Section ReadOnlyData lbl) lbl
|
|
| 5427 | - in return $ e_code `appOL` unitOL jmp_code
|
|
| 5428 | - else do
|
|
| 5427 | + |
|
| 5428 | + (offset, blockIds) = switchTargetsToTable targets
|
|
| 5429 | + ids = map (fmap DestBlockId) blockIds
|
|
| 5430 | + |
|
| 5431 | + is32bit = target32Bit platform
|
|
| 5432 | + fmt = archWordFormat is32bit
|
|
| 5433 | + |
|
| 5434 | + table_lbl <- getNewLabelNat
|
|
| 5435 | + let bid_lbl = blockLbl bid
|
|
| 5436 | + let table_section = Section ReadOnlyData table_lbl
|
|
| 5437 | + |
|
| 5438 | + -- see Note [Jump tables] for a description of the following 3 variants.
|
|
| 5439 | + if
|
|
| 5440 | + | ncgPIC config -> do
|
|
| 5441 | + -- PIC support: store relative offsets in the jump table to allow the code
|
|
| 5442 | + -- to be relocated without updating the table. The table itself and the
|
|
| 5443 | + -- block label used to make the relative labels absolute are read in a PIC
|
|
| 5444 | + -- way (via cmmMakeDynamicReference).
|
|
| 5445 | + (reg,e_code) <- getNonClobberedReg indexExpr -- getNonClobberedReg because it needs to survive across t_code and j_code
|
|
| 5446 | + (tableReg,t_code) <- getNonClobberedReg =<< cmmMakeDynamicReference config DataReference table_lbl
|
|
| 5447 | + (targetReg,j_code) <- getSomeReg =<< cmmMakeDynamicReference config DataReference bid_lbl
|
|
| 5448 | + pure $ e_code `appOL` t_code `appOL` j_code `appOL` toOL
|
|
| 5449 | + [ ADD fmt (OpAddr (AddrBaseIndex (EABaseReg tableReg) (EAIndex reg (platformWordSizeInBytes platform)) (ImmInt 0)))
|
|
| 5450 | + (OpReg targetReg)
|
|
| 5451 | + , JMP_TBL (OpReg targetReg) ids table_section table_lbl (Just bid_lbl)
|
|
| 5452 | + ]
|
|
| 5453 | + |
|
| 5454 | + | not is32bit -> do
|
|
| 5455 | + -- 64-bit non-PIC code
|
|
| 5456 | + (reg,e_code) <- getSomeReg indexExpr
|
|
| 5457 | + tableReg <- getNewRegNat (intFormat (platformWordWidth platform))
|
|
| 5458 | + targetReg <- getNewRegNat (intFormat (platformWordWidth platform))
|
|
| 5459 | + pure $ e_code `appOL` toOL
|
|
| 5429 | 5460 | -- See Note [%rip-relative addressing on x86-64].
|
| 5430 | - tableReg <- getNewRegNat (intFormat (platformWordWidth platform))
|
|
| 5431 | - targetReg <- getNewRegNat (intFormat (platformWordWidth platform))
|
|
| 5432 | - let op = OpAddr (AddrBaseIndex (EABaseReg tableReg) (EAIndex reg (platformWordSizeInBytes platform)) (ImmInt 0))
|
|
| 5433 | - fmt = archWordFormat is32bit
|
|
| 5434 | - code = e_code `appOL` toOL
|
|
| 5435 | - [ LEA fmt (OpAddr (AddrBaseIndex EABaseRip EAIndexNone (ImmCLbl lbl))) (OpReg tableReg)
|
|
| 5436 | - , MOV fmt op (OpReg targetReg)
|
|
| 5437 | - , JMP_TBL (OpReg targetReg) ids (Section ReadOnlyData lbl) lbl
|
|
| 5438 | - ]
|
|
| 5439 | - return code
|
|
| 5440 | - where
|
|
| 5441 | - (offset, blockIds) = switchTargetsToTable targets
|
|
| 5442 | - ids = map (fmap DestBlockId) blockIds
|
|
| 5461 | + [ LEA fmt (OpAddr (AddrBaseIndex EABaseRip EAIndexNone (ImmCLbl table_lbl))) (OpReg tableReg)
|
|
| 5462 | + , MOV fmt (OpAddr (AddrBaseIndex (EABaseReg tableReg) (EAIndex reg (platformWordSizeInBytes platform)) (ImmInt 0)))
|
|
| 5463 | + (OpReg targetReg)
|
|
| 5464 | + , JMP_TBL (OpReg targetReg) ids table_section table_lbl Nothing
|
|
| 5465 | + ]
|
|
| 5466 | + |
|
| 5467 | + | otherwise -> do
|
|
| 5468 | + -- 32-bit non-PIC code is a straightforward jump to &table[entry].
|
|
| 5469 | + (reg,e_code) <- getSomeReg indexExpr
|
|
| 5470 | + pure $ e_code `appOL` unitOL
|
|
| 5471 | + ( JMP_TBL (OpAddr (AddrBaseIndex EABaseNone (EAIndex reg (platformWordSizeInBytes platform)) (ImmCLbl table_lbl)))
|
|
| 5472 | + ids table_section table_lbl Nothing
|
|
| 5473 | + )
|
|
| 5443 | 5474 | |
| 5444 | 5475 | generateJumpTableForInstr :: NCGConfig -> Instr -> Maybe (NatCmmDecl (Alignment, RawCmmStatics) Instr)
|
| 5445 | -generateJumpTableForInstr config (JMP_TBL _ ids section lbl)
|
|
| 5446 | - = let getBlockId (DestBlockId id) = id
|
|
| 5447 | - getBlockId _ = panic "Non-Label target in Jump Table"
|
|
| 5448 | - blockIds = map (fmap getBlockId) ids
|
|
| 5449 | - in Just (createJumpTable config blockIds section lbl)
|
|
| 5450 | -generateJumpTableForInstr _ _ = Nothing
|
|
| 5451 | - |
|
| 5452 | -createJumpTable :: NCGConfig -> [Maybe BlockId] -> Section -> CLabel
|
|
| 5453 | - -> GenCmmDecl (Alignment, RawCmmStatics) h g
|
|
| 5454 | -createJumpTable config ids section lbl
|
|
| 5455 | - = let jumpTable
|
|
| 5456 | - | ncgPIC config =
|
|
| 5457 | - let ww = ncgWordWidth config
|
|
| 5458 | - jumpTableEntryRel Nothing
|
|
| 5459 | - = CmmStaticLit (CmmInt 0 ww)
|
|
| 5460 | - jumpTableEntryRel (Just blockid)
|
|
| 5461 | - = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0 ww)
|
|
| 5462 | - where blockLabel = blockLbl blockid
|
|
| 5463 | - in map jumpTableEntryRel ids
|
|
| 5464 | - | otherwise = map (jumpTableEntry config) ids
|
|
| 5465 | - in CmmData section (mkAlignment 1, CmmStaticsRaw lbl jumpTable)
|
|
| 5476 | +generateJumpTableForInstr config = \case
|
|
| 5477 | + JMP_TBL _ ids section table_lbl mrel_lbl ->
|
|
| 5478 | + let getBlockId (DestBlockId id) = id
|
|
| 5479 | + getBlockId _ = panic "Non-Label target in Jump Table"
|
|
| 5480 | + block_ids = map (fmap getBlockId) ids
|
|
| 5481 | + |
|
| 5482 | + jumpTable = case mrel_lbl of
|
|
| 5483 | + Nothing -> map mk_absolute block_ids -- absolute entries
|
|
| 5484 | + Just rel_lbl -> map (mk_relative rel_lbl) block_ids -- offsets relative to rel_lbl
|
|
| 5485 | + |
|
| 5486 | + mk_absolute = \case
|
|
| 5487 | + Nothing -> CmmStaticLit (CmmInt 0 (ncgWordWidth config))
|
|
| 5488 | + Just blockid -> CmmStaticLit (CmmLabel (blockLbl blockid))
|
|
| 5489 | + |
|
| 5490 | + mk_relative rel_lbl = \case
|
|
| 5491 | + Nothing -> CmmStaticLit (CmmInt 0 (ncgWordWidth config))
|
|
| 5492 | + Just blockid -> CmmStaticLit (CmmLabelDiffOff (blockLbl blockid) rel_lbl 0 (ncgWordWidth config))
|
|
| 5493 | + |
|
| 5494 | + in Just (CmmData section (mkAlignment 1, CmmStaticsRaw table_lbl jumpTable))
|
|
| 5495 | + |
|
| 5496 | + _ -> Nothing
|
|
| 5466 | 5497 | |
| 5467 | 5498 | extractUnwindPoints :: [Instr] -> [UnwindPoint]
|
| 5468 | 5499 | extractUnwindPoints instrs =
|
| ... | ... | @@ -252,6 +252,7 @@ data Instr |
| 252 | 252 | [Maybe JumpDest] -- Targets of the jump table
|
| 253 | 253 | Section -- Data section jump table should be put in
|
| 254 | 254 | CLabel -- Label of jump table
|
| 255 | + !(Maybe CLabel) -- Label used to compute relative offsets. Otherwise we store absolute addresses.
|
|
| 255 | 256 | -- | X86 call instruction
|
| 256 | 257 | | CALL (Either Imm Reg) -- ^ Jump target
|
| 257 | 258 | [RegWithFormat] -- ^ Arguments (required for register allocation)
|
| ... | ... | @@ -486,7 +487,7 @@ regUsageOfInstr platform instr |
| 486 | 487 | JXX _ _ -> mkRU [] []
|
| 487 | 488 | JXX_GBL _ _ -> mkRU [] []
|
| 488 | 489 | JMP op regs -> mkRU (use_R addrFmt op regs) []
|
| 489 | - JMP_TBL op _ _ _ -> mkRU (use_R addrFmt op []) []
|
|
| 490 | + JMP_TBL op _ _ _ _ -> mkRU (use_R addrFmt op []) []
|
|
| 490 | 491 | CALL (Left _) params -> mkRU params (map mkFmt $ callClobberedRegs platform)
|
| 491 | 492 | CALL (Right reg) params -> mkRU (mk addrFmt reg:params) (map mkFmt $ callClobberedRegs platform)
|
| 492 | 493 | CLTD fmt -> mkRU [mk fmt eax] [mk fmt edx]
|
| ... | ... | @@ -812,7 +813,7 @@ patchRegsOfInstr platform instr env |
| 812 | 813 | POP fmt op -> patch1 (POP fmt) op
|
| 813 | 814 | SETCC cond op -> patch1 (SETCC cond) op
|
| 814 | 815 | JMP op regs -> JMP (patchOp op) regs
|
| 815 | - JMP_TBL op ids s lbl -> JMP_TBL (patchOp op) ids s lbl
|
|
| 816 | + JMP_TBL op ids s tl jl -> JMP_TBL (patchOp op) ids s tl jl
|
|
| 816 | 817 | |
| 817 | 818 | FMA3 fmt perm var x1 x2 x3 -> patch3 (FMA3 fmt perm var) x1 x2 x3
|
| 818 | 819 | |
| ... | ... | @@ -1016,9 +1017,9 @@ isJumpishInstr instr |
| 1016 | 1017 | canFallthroughTo :: Instr -> BlockId -> Bool
|
| 1017 | 1018 | canFallthroughTo insn bid
|
| 1018 | 1019 | = case insn of
|
| 1019 | - JXX _ target -> bid == target
|
|
| 1020 | - JMP_TBL _ targets _ _ -> all isTargetBid targets
|
|
| 1021 | - _ -> False
|
|
| 1020 | + JXX _ target -> bid == target
|
|
| 1021 | + JMP_TBL _ targets _ _ _ -> all isTargetBid targets
|
|
| 1022 | + _ -> False
|
|
| 1022 | 1023 | where
|
| 1023 | 1024 | isTargetBid target = case target of
|
| 1024 | 1025 | Nothing -> True
|
| ... | ... | @@ -1031,9 +1032,9 @@ jumpDestsOfInstr |
| 1031 | 1032 | |
| 1032 | 1033 | jumpDestsOfInstr insn
|
| 1033 | 1034 | = case insn of
|
| 1034 | - JXX _ id -> [id]
|
|
| 1035 | - JMP_TBL _ ids _ _ -> [id | Just (DestBlockId id) <- ids]
|
|
| 1036 | - _ -> []
|
|
| 1035 | + JXX _ id -> [id]
|
|
| 1036 | + JMP_TBL _ ids _ _ _ -> [id | Just (DestBlockId id) <- ids]
|
|
| 1037 | + _ -> []
|
|
| 1037 | 1038 | |
| 1038 | 1039 | |
| 1039 | 1040 | patchJumpInstr
|
| ... | ... | @@ -1042,8 +1043,8 @@ patchJumpInstr |
| 1042 | 1043 | patchJumpInstr insn patchF
|
| 1043 | 1044 | = case insn of
|
| 1044 | 1045 | JXX cc id -> JXX cc (patchF id)
|
| 1045 | - JMP_TBL op ids section lbl
|
|
| 1046 | - -> JMP_TBL op (map (fmap (patchJumpDest patchF)) ids) section lbl
|
|
| 1046 | + JMP_TBL op ids section table_lbl rel_lbl
|
|
| 1047 | + -> JMP_TBL op (map (fmap (patchJumpDest patchF)) ids) section table_lbl rel_lbl
|
|
| 1047 | 1048 | _ -> insn
|
| 1048 | 1049 | where
|
| 1049 | 1050 | patchJumpDest f (DestBlockId id) = DestBlockId (f id)
|
| ... | ... | @@ -1504,14 +1505,14 @@ shortcutJump fn insn = shortcutJump' fn (setEmpty :: LabelSet) insn |
| 1504 | 1505 | Just (DestBlockId id') -> shortcutJump' fn seen' (JXX cc id')
|
| 1505 | 1506 | Just (DestImm imm) -> shortcutJump' fn seen' (JXX_GBL cc imm)
|
| 1506 | 1507 | where seen' = setInsert id seen
|
| 1507 | - shortcutJump' fn _ (JMP_TBL addr blocks section tblId) =
|
|
| 1508 | + shortcutJump' fn _ (JMP_TBL addr blocks section table_lbl rel_lbl) =
|
|
| 1508 | 1509 | let updateBlock (Just (DestBlockId bid)) =
|
| 1509 | 1510 | case fn bid of
|
| 1510 | 1511 | Nothing -> Just (DestBlockId bid )
|
| 1511 | 1512 | Just dest -> Just dest
|
| 1512 | 1513 | updateBlock dest = dest
|
| 1513 | 1514 | blocks' = map updateBlock blocks
|
| 1514 | - in JMP_TBL addr blocks' section tblId
|
|
| 1515 | + in JMP_TBL addr blocks' section table_lbl rel_lbl
|
|
| 1515 | 1516 | shortcutJump' _ _ other = other
|
| 1516 | 1517 | |
| 1517 | 1518 | -- Here because it knows about JumpDest
|
| ... | ... | @@ -895,7 +895,7 @@ pprInstr platform i = case i of |
| 895 | 895 | JMP op _
|
| 896 | 896 | -> line $ text "\tjmp *" <> pprOperand platform (archWordFormat (target32Bit platform)) op
|
| 897 | 897 | |
| 898 | - JMP_TBL op _ _ _
|
|
| 898 | + JMP_TBL op _ _ _ _
|
|
| 899 | 899 | -> pprInstr platform (JMP op [])
|
| 900 | 900 | |
| 901 | 901 | CALL (Left imm) _
|
| ... | ... | @@ -2993,12 +2993,12 @@ pushCoValArg co |
| 2993 | 2993 | Pair tyL tyR = coercionKind co
|
| 2994 | 2994 | |
| 2995 | 2995 | pushCoercionIntoLambda
|
| 2996 | - :: HasDebugCallStack => Subst -> InVar -> InExpr -> OutCoercionR -> Maybe (OutVar, OutExpr)
|
|
| 2996 | + :: HasDebugCallStack => InScopeSet -> Var -> CoreExpr -> CoercionR -> Maybe (Var, CoreExpr)
|
|
| 2997 | 2997 | -- This implements the Push rule from the paper on coercions
|
| 2998 | 2998 | -- (\x. e) |> co
|
| 2999 | 2999 | -- ===>
|
| 3000 | 3000 | -- (\x'. e |> co')
|
| 3001 | -pushCoercionIntoLambda subst x e co
|
|
| 3001 | +pushCoercionIntoLambda in_scope x e co
|
|
| 3002 | 3002 | | assert (not (isTyVar x) && not (isCoVar x)) True
|
| 3003 | 3003 | , Pair s1s2 t1t2 <- coercionKind co
|
| 3004 | 3004 | , Just {} <- splitFunTy_maybe s1s2
|
| ... | ... | @@ -3011,9 +3011,9 @@ pushCoercionIntoLambda subst x e co |
| 3011 | 3011 | -- Should we optimize the coercions here?
|
| 3012 | 3012 | -- Otherwise they might not match too well
|
| 3013 | 3013 | x' = x `setIdType` t1 `setIdMult` w1
|
| 3014 | - in_scope' = substInScopeSet subst `extendInScopeSet` x'
|
|
| 3014 | + in_scope' = in_scope `extendInScopeSet` x'
|
|
| 3015 | 3015 | subst' =
|
| 3016 | - extendIdSubst (setInScope subst in_scope')
|
|
| 3016 | + extendIdSubst (setInScope emptySubst in_scope')
|
|
| 3017 | 3017 | x
|
| 3018 | 3018 | (mkCast (Var x') (mkSymCo co1))
|
| 3019 | 3019 | -- We substitute x' for x, except we need to preserve types.
|
| ... | ... | @@ -393,12 +393,19 @@ simple_app env e0@(Lam {}) as0@(_:_) |
| 393 | 393 | = wrapLet mb_pr $ do_beta env'' body as
|
| 394 | 394 | where (env', b') = subst_opt_bndr env b
|
| 395 | 395 | |
| 396 | - do_beta env e@(Lam b body) as@(CastIt co:rest)
|
|
| 397 | - -- See Note [Desugaring unlifted newtypes]
|
|
| 396 | + -- See Note [Eliminate casts in function position]
|
|
| 397 | + do_beta env e@(Lam b _) as@(CastIt out_co:rest)
|
|
| 398 | 398 | | isNonCoVarId b
|
| 399 | - , Just (b', body') <- pushCoercionIntoLambda (soe_subst env) b body co
|
|
| 399 | + -- Optimise the inner lambda to make it an 'OutExpr', which makes it
|
|
| 400 | + -- possible to call 'pushCoercionIntoLambda' with the 'OutCoercion' 'co'.
|
|
| 401 | + -- This is kind of horrible, as for nested casted lambdas with a big body,
|
|
| 402 | + -- we will repeatedly optimise the body (once for each binder). However,
|
|
| 403 | + -- we need to do this to avoid mixing 'InExpr' and 'OutExpr', or two
|
|
| 404 | + -- 'InExpr' with different environments (getting this wrong caused #26588 & #26589.)
|
|
| 405 | + , Lam out_b out_body <- simple_app env e []
|
|
| 406 | + , Just (b', body') <- pushCoercionIntoLambda (soeInScope env) out_b out_body out_co
|
|
| 400 | 407 | = do_beta (soeZapSubst env) (Lam b' body') rest
|
| 401 | - -- soeZapSubst: pushCoercionIntoLambda applies the substitution
|
|
| 408 | + -- soeZapSubst: we've already optimised everything (the lambda and 'rest') by now.
|
|
| 402 | 409 | | otherwise
|
| 403 | 410 | = rebuild_app env (simple_opt_expr env e) as
|
| 404 | 411 | |
| ... | ... | @@ -511,7 +518,31 @@ TL;DR: To avoid the rest of the compiler pipeline seeing these bad lambas, we |
| 511 | 518 | rely on the simple optimiser to both inline the newtype unfolding and
|
| 512 | 519 | subsequently deal with the resulting lambdas (either beta-reducing them
|
| 513 | 520 | altogether or pushing coercions into them so that they satisfy the
|
| 514 | -representation-polymorphism invariants).
|
|
| 521 | +representation-polymorphism invariants). See Note [Eliminate casts in function position].
|
|
| 522 | + |
|
| 523 | +[Alternative approach] (GHC ticket #26608)
|
|
| 524 | + |
|
| 525 | + We could instead, in the typechecker, emit a special form (a new constructor
|
|
| 526 | + of XXExprGhcTc) for instantiations of representation-polymorphic unlifted
|
|
| 527 | + newtypes (whether applied to a value argument or not):
|
|
| 528 | + |
|
| 529 | + UnliftedNT :: DataCon -> [Type] -> Coercion -> XXExprGhcTc
|
|
| 530 | + |
|
| 531 | + where "UnliftedNT nt_con [ty1, ...] co" represents the expression:
|
|
| 532 | + |
|
| 533 | + ( nt_con @ty1 ... ) |> co
|
|
| 534 | + |
|
| 535 | + The desugarer would then turn these AST nodes into appropriate Core, doing
|
|
| 536 | + what the simple optimiser does today:
|
|
| 537 | + - inline the compulsory unfolding of the newtype constructor
|
|
| 538 | + - apply it to its type arguments and beta reduce
|
|
| 539 | + - push the coercion into the resulting lambda
|
|
| 540 | + |
|
| 541 | + This would have several advantages:
|
|
| 542 | + - the desugarer would never produce "invalid" Core that needs to be
|
|
| 543 | + tidied up by the simple optimiser,
|
|
| 544 | + - the ugly and inefficient implementation described in
|
|
| 545 | + Note [Eliminate casts in function position] could be removed.
|
|
| 515 | 546 | |
| 516 | 547 | Wrinkle [Unlifted newtypes with wrappers]
|
| 517 | 548 | |
| ... | ... | @@ -717,50 +748,49 @@ rhss here. |
| 717 | 748 | |
| 718 | 749 | Note [Eliminate casts in function position]
|
| 719 | 750 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
| 720 | -Consider the following program:
|
|
| 751 | +Due to the current implementation strategy for representation-polymorphic
|
|
| 752 | +unlifted newtypes, as described in Note [Desugaring unlifted newtypes], we rely
|
|
| 753 | +on the simple optimiser to push coercions into lambdas, such as in the following
|
|
| 754 | +example:
|
|
| 721 | 755 | |
| 722 | 756 | type R :: Type -> RuntimeRep
|
| 723 | - type family R a where { R Float = FloatRep; R Double = DoubleRep }
|
|
| 724 | - type F :: forall (a :: Type) -> TYPE (R a)
|
|
| 725 | - type family F a where { F Float = Float# ; F Double = Double# }
|
|
| 757 | + type family R a where { R Int = IntRep }
|
|
| 758 | + type F :: forall a -> TYPE (R a)
|
|
| 759 | + type family F a where { F Int = Int# }
|
|
| 726 | 760 | |
| 727 | - type N :: forall (a :: Type) -> TYPE (R a)
|
|
| 728 | 761 | newtype N a = MkN (F a)
|
| 729 | 762 | |
| 730 | -As MkN is a newtype, its unfolding is a lambda which wraps its argument
|
|
| 731 | -in a cast:
|
|
| 732 | - |
|
| 733 | - MkN :: forall (a :: Type). F a -> N a
|
|
| 734 | - MkN = /\a \(x::F a). x |> co_ax
|
|
| 735 | - -- recall that F a :: TYPE (R a)
|
|
| 736 | - |
|
| 737 | -This is a representation-polymorphic lambda, in which the binder has an unknown
|
|
| 738 | -representation (R a). We can't compile such a lambda on its own, but we can
|
|
| 739 | -compile instantiations, such as `MkN @Float` or `MkN @Double`.
|
|
| 763 | +Now, an instantiated occurrence of 'MkN', such as 'MkN @Int' (whether applied
|
|
| 764 | +to a value argument or not) will lead, after inlining the compulsory unfolding
|
|
| 765 | +of 'MkN', to a lambda fo the form:
|
|
| 740 | 766 | |
| 741 | -Our strategy to avoid running afoul of the representation-polymorphism
|
|
| 742 | -invariants of Note [Representation polymorphism invariants] in GHC.Core is thus:
|
|
| 767 | + ( \ ( x :: F Int ) -> body ) |> co
|
|
| 743 | 768 | |
| 744 | - 1. Give the newtype a compulsory unfolding (it has no binding, as we can't
|
|
| 745 | - define lambdas with representation-polymorphic value binders in source Haskell).
|
|
| 746 | - 2. Rely on the optimiser to beta-reduce away any representation-polymorphic
|
|
| 747 | - value binders.
|
|
| 769 | + where
|
|
| 770 | + co :: ( F Int -> res ) ~# ( Int# -> res )
|
|
| 748 | 771 | |
| 749 | -For example, consider the application
|
|
| 772 | +The problem is that we now have a lambda abstraction whose binder does not have a
|
|
| 773 | +fixed RuntimeRep in the sense of Note [Fixed RuntimeRep] in GHC.Tc.Utils.Concrete.
|
|
| 750 | 774 | |
| 751 | - MkN @Float 34.0#
|
|
| 775 | +However, if we use 'pushCoercionIntoLambda', we end up with:
|
|
| 752 | 776 | |
| 753 | -After inlining MkN we'll get
|
|
| 777 | + ( \ ( x' :: Int# ) -> body' )
|
|
| 754 | 778 | |
| 755 | - ((/\a \(x:F a). x |> co_ax) @Float) |> co 34#
|
|
| 779 | +which satisfies the representation-polymorphism invariants of
|
|
| 780 | +Note [Representation polymorphism invariants] in GHC.Core.
|
|
| 756 | 781 | |
| 757 | -where co :: (F Float -> N Float) ~ (Float# ~ N Float)
|
|
| 782 | +In conclusion:
|
|
| 758 | 783 | |
| 759 | -But to actually beta-reduce that lambda, we need to push the 'co'
|
|
| 760 | -inside the `\x` with pushCoercionIntoLambda. Hence the extra
|
|
| 761 | -equation for Cast-of-Lam in simple_app.
|
|
| 784 | + 1. The simple optimiser must push casts into lambdas.
|
|
| 785 | + 2. It must also deal with a situation such as (MkN @Int) |> co, where we first
|
|
| 786 | + inline the compulsory unfolding of N. This means the simple optimiser must
|
|
| 787 | + "peel off" the casts and optimise the inner expression first, to determine
|
|
| 788 | + whether it is a lambda abstraction or not.
|
|
| 762 | 789 | |
| 763 | -This is regrettably delicate.
|
|
| 790 | +This is regrettably delicate. If we could make sure the typechecker/desugarer
|
|
| 791 | +did not produce these bad lambdas in the first place (as described in
|
|
| 792 | +[Alternative approach] in Note [Desugaring unlifted newtypes]), we could
|
|
| 793 | +get rid of this ugly logic.
|
|
| 764 | 794 | |
| 765 | 795 | Note [Preserve join-binding arity]
|
| 766 | 796 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
| ... | ... | @@ -1673,7 +1703,7 @@ exprIsLambda_maybe ise@(ISE in_scope_set _) (Cast casted_e co) |
| 1673 | 1703 | -- this implies that x is not in scope in gamma (makes this code simpler)
|
| 1674 | 1704 | , not (isTyVar x) && not (isCoVar x)
|
| 1675 | 1705 | , assert (not $ x `elemVarSet` tyCoVarsOfCo co) True
|
| 1676 | - , Just (x',e') <- pushCoercionIntoLambda (mkEmptySubst in_scope_set) x e co
|
|
| 1706 | + , Just (x',e') <- pushCoercionIntoLambda in_scope_set x e co
|
|
| 1677 | 1707 | , let res = Just (x',e',ts)
|
| 1678 | 1708 | = --pprTrace "exprIsLambda_maybe:Cast" (vcat [ppr casted_e,ppr co,ppr res)])
|
| 1679 | 1709 | res
|
| ... | ... | @@ -1268,6 +1268,7 @@ optLevelFlags -- see Note [Documenting optimisation flags] |
| 1268 | 1268 | , ([1,2], Opt_CfgBlocklayout) -- Experimental
|
| 1269 | 1269 | |
| 1270 | 1270 | , ([1,2], Opt_Specialise)
|
| 1271 | + , ([1,2], Opt_PolymorphicSpecialisation) -- Now on by default (#23559)
|
|
| 1271 | 1272 | , ([1,2], Opt_CrossModuleSpecialise)
|
| 1272 | 1273 | , ([1,2], Opt_InlineGenerics)
|
| 1273 | 1274 | , ([1,2], Opt_Strictness)
|
| ... | ... | @@ -909,6 +909,7 @@ optimisationFlags = EnumSet.fromList |
| 909 | 909 | , Opt_SpecialiseAggressively
|
| 910 | 910 | , Opt_CrossModuleSpecialise
|
| 911 | 911 | , Opt_StaticArgumentTransformation
|
| 912 | + , Opt_PolymorphicSpecialisation
|
|
| 912 | 913 | , Opt_CSE
|
| 913 | 914 | , Opt_StgCSE
|
| 914 | 915 | , Opt_StgLiftLams
|
| ... | ... | @@ -749,13 +749,13 @@ tcInstFun do_ql inst_final (tc_fun, fun_ctxt) fun_sigma rn_args |
| 749 | 749 | go1 _pos acc fun_ty []
|
| 750 | 750 | | XExpr (ConLikeTc (RealDataCon dc)) <- tc_fun
|
| 751 | 751 | , isNewDataCon dc
|
| 752 | - , [Scaled _ arg_ty] <- dataConOrigArgTys dc
|
|
| 752 | + , [Scaled _ orig_arg_ty] <- dataConOrigArgTys dc
|
|
| 753 | 753 | , n_val_args == 0
|
| 754 | 754 | -- If we're dealing with an unsaturated representation-polymorphic
|
| 755 | 755 | -- UnliftedNewype, then perform a representation-polymorphism check.
|
| 756 | 756 | -- See Note [Representation-polymorphism checks for unsaturated unlifted newtypes]
|
| 757 | 757 | -- in GHC.Tc.Utils.Concrete.
|
| 758 | - , not $ typeHasFixedRuntimeRep arg_ty
|
|
| 758 | + , not $ typeHasFixedRuntimeRep orig_arg_ty
|
|
| 759 | 759 | = do { (wrap_co, arg_ty, res_ty) <-
|
| 760 | 760 | matchActualFunTy (FRRRepPolyUnliftedNewtype dc)
|
| 761 | 761 | (Just $ HsExprTcThing tc_fun)
|
| ... | ... | @@ -195,7 +195,7 @@ For example: :: |
| 195 | 195 | g3c :: Int -> forall x y. y -> x -> x
|
| 196 | 196 | |
| 197 | 197 | f4 :: (Int -> forall a. (Eq a, Show a) => a -> a) -> Bool
|
| 198 | - g4 :: Int -> forall x. (Show x, Eq x) => x -> x) -> Bool
|
|
| 198 | + g4 :: Int -> forall x. (Show x, Eq x) => x -> x
|
|
| 199 | 199 | |
| 200 | 200 | Then the application ``f3 g3a`` is well-typed, because ``g3a`` has a type that matches the type
|
| 201 | 201 | expected by ``f3``. But ``f3 g3b`` is not well typed, because the foralls are in different places.
|
| ... | ... | @@ -680,7 +680,7 @@ thus: :: |
| 680 | 680 | When doing so, we (optionally) may drop the "``family``" keyword.
|
| 681 | 681 | |
| 682 | 682 | The type parameters must all be type variables, of course, and some (but
|
| 683 | -not necessarily all) of then can be the class parameters. Each class
|
|
| 683 | +not necessarily all) of them can be the class parameters. Each class
|
|
| 684 | 684 | parameter may only be used at most once per associated type, but some
|
| 685 | 685 | may be omitted and they may be in an order other than in the class head.
|
| 686 | 686 | Hence, the following contrived example is admissible: ::
|
| ... | ... | @@ -770,10 +770,9 @@ Options affecting code generation |
| 770 | 770 | :type: dynamic
|
| 771 | 771 | :category: codegen
|
| 772 | 772 | |
| 773 | - Generate position-independent code (code that can be put into shared
|
|
| 774 | - libraries). This currently works on Linux x86 and x86-64. On
|
|
| 775 | - Windows, position-independent code is never used so the flag is a
|
|
| 776 | - no-op on that platform.
|
|
| 773 | + Generate position-independent code (PIC). This code can be put into shared
|
|
| 774 | + libraries and is sometimes required by operating systems, e.g. systems using
|
|
| 775 | + Address Space Layout Randomization (ASLR).
|
|
| 777 | 776 | |
| 778 | 777 | .. ghc-flag:: -fexternal-dynamic-refs
|
| 779 | 778 | :shortdesc: Generate code for linking against dynamic libraries
|
| ... | ... | @@ -790,9 +789,7 @@ Options affecting code generation |
| 790 | 789 | :category: codegen
|
| 791 | 790 | |
| 792 | 791 | Generate code in such a way to be linkable into a position-independent
|
| 793 | - executable This currently works on Linux x86 and x86-64. On Windows,
|
|
| 794 | - position-independent code is never used so the flag is a no-op on that
|
|
| 795 | - platform. To link the final executable use :ghc-flag:`-pie`.
|
|
| 792 | + executable. To link the final executable use :ghc-flag:`-pie`.
|
|
| 796 | 793 | |
| 797 | 794 | .. ghc-flag:: -dynamic
|
| 798 | 795 | :shortdesc: Build dynamically-linked object files and executables
|
| ... | ... | @@ -1325,10 +1325,7 @@ as such you shouldn't need to set any of them explicitly. A flag |
| 1325 | 1325 | :reverse: -fno-polymorphic-specialisation
|
| 1326 | 1326 | :category:
|
| 1327 | 1327 | |
| 1328 | - :default: off
|
|
| 1329 | - |
|
| 1330 | - Warning, this feature is highly experimental and may lead to incorrect runtime
|
|
| 1331 | - results. Use at your own risk (:ghc-ticket:`23469`, :ghc-ticket:`23109`, :ghc-ticket:`21229`, :ghc-ticket:`23445`).
|
|
| 1328 | + :default: on
|
|
| 1332 | 1329 | |
| 1333 | 1330 | Enable specialisation of function calls to known dictionaries with free type variables.
|
| 1334 | 1331 | The created specialisation will abstract over the type variables free in the dictionary.
|
| ... | ... | @@ -491,13 +491,7 @@ endEventLogging(void) |
| 491 | 491 | |
| 492 | 492 | eventlog_enabled = false;
|
| 493 | 493 | |
| 494 | - // Flush all events remaining in the buffers.
|
|
| 495 | - //
|
|
| 496 | - // N.B. Don't flush if shutting down: this was done in
|
|
| 497 | - // finishCapEventLogging and the capabilities have already been freed.
|
|
| 498 | - if (getSchedState() != SCHED_SHUTTING_DOWN) {
|
|
| 499 | - flushEventLog(NULL);
|
|
| 500 | - }
|
|
| 494 | + flushEventLog(NULL);
|
|
| 501 | 495 | |
| 502 | 496 | ACQUIRE_LOCK(&eventBufMutex);
|
| 503 | 497 | |
| ... | ... | @@ -1626,15 +1620,24 @@ void flushEventLog(Capability **cap USED_IF_THREADS) |
| 1626 | 1620 | return;
|
| 1627 | 1621 | }
|
| 1628 | 1622 | |
| 1623 | + // N.B. Don't flush if shutting down: this was done in
|
|
| 1624 | + // finishCapEventLogging and the capabilities have already been freed.
|
|
| 1625 | + // This can also race against the shutdown if the flush is triggered by the
|
|
| 1626 | + // ticker thread. (#26573)
|
|
| 1627 | + if (getSchedState() == SCHED_SHUTTING_DOWN) {
|
|
| 1628 | + return;
|
|
| 1629 | + }
|
|
| 1630 | + |
|
| 1629 | 1631 | ACQUIRE_LOCK(&eventBufMutex);
|
| 1630 | 1632 | printAndClearEventBuf(&eventBuf);
|
| 1631 | 1633 | RELEASE_LOCK(&eventBufMutex);
|
| 1632 | 1634 | |
| 1633 | 1635 | #if defined(THREADED_RTS)
|
| 1634 | - Task *task = getMyTask();
|
|
| 1636 | + Task *task = newBoundTask();
|
|
| 1635 | 1637 | stopAllCapabilitiesWith(cap, task, SYNC_FLUSH_EVENT_LOG);
|
| 1636 | 1638 | flushAllCapsEventsBufs();
|
| 1637 | 1639 | releaseAllCapabilities(getNumCapabilities(), cap ? *cap : NULL, task);
|
| 1640 | + exitMyTask();
|
|
| 1638 | 1641 | #else
|
| 1639 | 1642 | flushLocalEventsBuf(getCapability(0));
|
| 1640 | 1643 | #endif
|
| ... | ... | @@ -552,7 +552,12 @@ static int compare_path(StgWord key1, StgWord key2) |
| 552 | 552 | |
| 553 | 553 | static void addLoadedDll(LoadedDllCache *cache, const pathchar *dll_name, HINSTANCE instance)
|
| 554 | 554 | {
|
| 555 | - insertHashTable_(cache->hash, (StgWord) dll_name, instance, hash_path);
|
|
| 555 | + // dll_name might be deallocated, we need to copy it to have a stable reference to the contents
|
|
| 556 | + // See #26613
|
|
| 557 | + size_t size = wcslen(dll_name) + 1;
|
|
| 558 | + pathchar* dll_name_copy = stgMallocBytes(size * sizeof(pathchar), "addLoadedDll");
|
|
| 559 | + wcsncpy(dll_name_copy, dll_name, size);
|
|
| 560 | + insertHashTable_(cache->hash, (StgWord) dll_name_copy, instance, hash_path);
|
|
| 556 | 561 | }
|
| 557 | 562 | |
| 558 | 563 | static HINSTANCE isDllLoaded(const LoadedDllCache *cache, const pathchar *dll_name)
|
| 1 | +module Main (main) where
|
|
| 2 | + |
|
| 3 | +data Command
|
|
| 4 | + = Command1
|
|
| 5 | + | Command2
|
|
| 6 | + | Command3
|
|
| 7 | + | Command4
|
|
| 8 | + | Command5
|
|
| 9 | + | Command6 -- Commenting this line works with -fPIC, uncommenting leads to a crash.
|
|
| 10 | + |
|
| 11 | +main :: IO ()
|
|
| 12 | +main = do
|
|
| 13 | + let x = case cmd of
|
|
| 14 | + Command1 -> 1 :: Int
|
|
| 15 | + Command2 -> 2
|
|
| 16 | + Command3 -> 3
|
|
| 17 | + Command4 -> 4
|
|
| 18 | + Command5 -> 5
|
|
| 19 | + Command6 -> 6
|
|
| 20 | + putStrLn (show x)
|
|
| 21 | + |
|
| 22 | +{-# NOINLINE cmd #-}
|
|
| 23 | +cmd :: Command
|
|
| 24 | +cmd = Command6 |
| 1 | +6 |
| ... | ... | @@ -257,3 +257,4 @@ test('CCallConv', [req_c], compile_and_run, ['CCallConv_c.c']) |
| 257 | 257 | test('T25364', normal, compile_and_run, [''])
|
| 258 | 258 | test('T26061', normal, compile_and_run, [''])
|
| 259 | 259 | test('T26537', normal, compile_and_run, ['-O2 -fregs-graph'])
|
| 260 | +test('T24016', normal, compile_and_run, ['-O1 -fPIC']) |
| ... | ... | @@ -2,6 +2,11 @@ test('testblockalloc', |
| 2 | 2 | [c_src, only_ways(['normal','threaded1']), extra_run_opts('+RTS -I0')],
|
| 3 | 3 | compile_and_run, [''])
|
| 4 | 4 | |
| 5 | +test('numeric_version_eventlog_flush',
|
|
| 6 | + [ignore_stdout, req_ghc_with_threaded_rts],
|
|
| 7 | + run_command,
|
|
| 8 | + ['{compiler} --numeric-version +RTS -l --eventlog-flush-interval=1 -RTS'])
|
|
| 9 | + |
|
| 5 | 10 | test('testmblockalloc',
|
| 6 | 11 | [c_src, only_ways(['normal','threaded1']), extra_run_opts('+RTS -I0 -xr0.125T'),
|
| 7 | 12 | when(arch('wasm32'), skip)], # MBlocks can't be freed on wasm32, see Note [Megablock allocator on wasm] in rts
|
| 1 | +module T26588 ( getOptionSettingFromText ) where
|
|
| 2 | + |
|
| 3 | +import Control.Applicative ( Const(..) )
|
|
| 4 | +import Data.Map (Map)
|
|
| 5 | +import qualified Data.Map.Strict as Map
|
|
| 6 | + |
|
| 7 | +------------------------------------------------------------------------
|
|
| 8 | +-- ConfigState
|
|
| 9 | + |
|
| 10 | +data ConfigLeaf
|
|
| 11 | +data ConfigTrie = ConfigTrie !(Maybe ConfigLeaf) !ConfigMap
|
|
| 12 | + |
|
| 13 | +type ConfigMap = Map Int ConfigTrie
|
|
| 14 | + |
|
| 15 | +freshLeaf :: [Int] -> ConfigLeaf -> ConfigTrie
|
|
| 16 | +freshLeaf [] l = ConfigTrie (Just l) mempty
|
|
| 17 | +freshLeaf (a:as) l = ConfigTrie Nothing (Map.singleton a (freshLeaf as l))
|
|
| 18 | + |
|
| 19 | +adjustConfigTrie :: Functor t => [Int] -> (Maybe ConfigLeaf -> t (Maybe ConfigLeaf)) -> Maybe (ConfigTrie) -> t (Maybe ConfigTrie)
|
|
| 20 | +adjustConfigTrie as f Nothing = fmap (freshLeaf as) <$> f Nothing
|
|
| 21 | +adjustConfigTrie (a:as) f (Just (ConfigTrie x m)) = Just . ConfigTrie x <$> adjustConfigMap a as f m
|
|
| 22 | +adjustConfigTrie [] f (Just (ConfigTrie x m)) = g <$> f x
|
|
| 23 | + where g Nothing | Map.null m = Nothing
|
|
| 24 | + g x' = Just (ConfigTrie x' m)
|
|
| 25 | + |
|
| 26 | +adjustConfigMap :: Functor t => Int -> [Int] -> (Maybe ConfigLeaf -> t (Maybe ConfigLeaf)) -> ConfigMap -> t ConfigMap
|
|
| 27 | +adjustConfigMap a as f = Map.alterF (adjustConfigTrie as f) a
|
|
| 28 | + |
|
| 29 | +getOptionSettingFromText :: Int -> [Int] -> ConfigMap -> IO ()
|
|
| 30 | +getOptionSettingFromText p ps = getConst . adjustConfigMap p ps f
|
|
| 31 | + where
|
|
| 32 | + f _ = Const (return ()) |
| 1 | +module T26589 ( executeTest ) where
|
|
| 2 | + |
|
| 3 | +-- base
|
|
| 4 | +import Data.Coerce ( coerce )
|
|
| 5 | +import Data.Foldable ( foldMap )
|
|
| 6 | + |
|
| 7 | +--------------------------------------------------------------------------------
|
|
| 8 | + |
|
| 9 | +newtype Traversal f = Traversal { getTraversal :: f () }
|
|
| 10 | + |
|
| 11 | +instance Applicative f => Semigroup (Traversal f) where
|
|
| 12 | + Traversal f1 <> Traversal f2 = Traversal $ f1 *> f2
|
|
| 13 | +instance Applicative f => Monoid (Traversal f) where
|
|
| 14 | + mempty = Traversal $ pure ()
|
|
| 15 | + |
|
| 16 | +newtype Seq a = Seq (FingerTree (Elem a))
|
|
| 17 | +newtype Elem a = Elem { getElem :: a }
|
|
| 18 | + |
|
| 19 | +data FingerTree a
|
|
| 20 | + = EmptyT
|
|
| 21 | + | Deep !a (FingerTree a) !a
|
|
| 22 | + |
|
| 23 | +executeTest :: Seq () -> IO ()
|
|
| 24 | +executeTest fins = destroyResources
|
|
| 25 | + where
|
|
| 26 | + destroyResources :: IO ()
|
|
| 27 | + destroyResources =
|
|
| 28 | + getTraversal $
|
|
| 29 | + flip foldMap1 fins $ \ _ ->
|
|
| 30 | + Traversal $ return ()
|
|
| 31 | + |
|
| 32 | +foldMap1 :: forall m a. Monoid m => (a -> m) -> Seq a -> m
|
|
| 33 | +foldMap1 = coerce (foldMap2 :: (Elem a -> m) -> FingerTree (Elem a) -> m)
|
|
| 34 | + |
|
| 35 | +foldMap2 :: Monoid m => (Elem a -> m) -> FingerTree (Elem a) -> m
|
|
| 36 | +foldMap2 _ EmptyT = mempty
|
|
| 37 | +foldMap2 f' (Deep pr' m' sf') = f' pr' <> foldMapTree f' m' <> f' sf'
|
|
| 38 | + where
|
|
| 39 | + foldMapTree :: Monoid m => (a -> m) -> FingerTree a -> m
|
|
| 40 | + foldMapTree _ EmptyT = mempty
|
|
| 41 | + foldMapTree f (Deep pr m sf) =
|
|
| 42 | + f pr <>
|
|
| 43 | + foldMapTree f m <>
|
|
| 44 | + f sf |
| 1 | 1 | |
| 2 | 2 | ==================== Tidy Core rules ====================
|
| 3 | +"SPEC $c*> @(ST s) @_"
|
|
| 4 | + forall (@s) (@r) ($dApplicative :: Applicative (ST s)).
|
|
| 5 | + $fApplicativeReaderT_$c*> @(ST s) @r $dApplicative
|
|
| 6 | + = ($fApplicativeReaderT2 @s @r)
|
|
| 7 | + `cast` (forall (a ::~ <*>_N) (b ::~ <*>_N).
|
|
| 8 | + <ReaderT r (ST s) a>_R
|
|
| 9 | + ->_R <ReaderT r (ST s) b>_R
|
|
| 10 | + ->_R <r>_R ->_R Sym (N:ST <s>_N <b>_R)
|
|
| 11 | + ; Sym (N:ReaderT <*>_N <r>_R <ST s>_R <b>_N)
|
|
| 12 | + :: Coercible
|
|
| 13 | + (forall a b.
|
|
| 14 | + ReaderT r (ST s) a -> ReaderT r (ST s) b -> r -> STRep s b)
|
|
| 15 | + (forall a b.
|
|
| 16 | + ReaderT r (ST s) a -> ReaderT r (ST s) b -> ReaderT r (ST s) b))
|
|
| 17 | +"SPEC $c<$ @(ST s) @_"
|
|
| 18 | + forall (@s) (@r) ($dFunctor :: Functor (ST s)).
|
|
| 19 | + $fFunctorReaderT_$c<$ @(ST s) @r $dFunctor
|
|
| 20 | + = ($fApplicativeReaderT6 @s @r)
|
|
| 21 | + `cast` (forall (a ::~ <*>_N) (b ::~ <*>_N).
|
|
| 22 | + <a>_R
|
|
| 23 | + ->_R <ReaderT r (ST s) b>_R
|
|
| 24 | + ->_R <r>_R ->_R Sym (N:ST <s>_N <a>_R)
|
|
| 25 | + ; Sym (N:ReaderT <*>_N <r>_R <ST s>_R <a>_N)
|
|
| 26 | + :: Coercible
|
|
| 27 | + (forall a b. a -> ReaderT r (ST s) b -> r -> STRep s a)
|
|
| 28 | + (forall a b. a -> ReaderT r (ST s) b -> ReaderT r (ST s) a))
|
|
| 29 | +"SPEC $c<* @(ST s) @_"
|
|
| 30 | + forall (@s) (@r) ($dApplicative :: Applicative (ST s)).
|
|
| 31 | + $fApplicativeReaderT_$c<* @(ST s) @r $dApplicative
|
|
| 32 | + = ($fApplicativeReaderT1 @s @r)
|
|
| 33 | + `cast` (forall (a ::~ <*>_N) (b ::~ <*>_N).
|
|
| 34 | + <ReaderT r (ST s) a>_R
|
|
| 35 | + ->_R <ReaderT r (ST s) b>_R
|
|
| 36 | + ->_R <r>_R ->_R Sym (N:ST <s>_N <a>_R)
|
|
| 37 | + ; Sym (N:ReaderT <*>_N <r>_R <ST s>_R <a>_N)
|
|
| 38 | + :: Coercible
|
|
| 39 | + (forall a b.
|
|
| 40 | + ReaderT r (ST s) a -> ReaderT r (ST s) b -> r -> STRep s a)
|
|
| 41 | + (forall a b.
|
|
| 42 | + ReaderT r (ST s) a -> ReaderT r (ST s) b -> ReaderT r (ST s) a))
|
|
| 43 | +"SPEC $c<*> @(ST s) @_"
|
|
| 44 | + forall (@s) (@r) ($dApplicative :: Applicative (ST s)).
|
|
| 45 | + $fApplicativeReaderT9 @(ST s) @r $dApplicative
|
|
| 46 | + = ($fApplicativeReaderT4 @s @r)
|
|
| 47 | + `cast` (forall (a ::~ <*>_N) (b ::~ <*>_N).
|
|
| 48 | + <ReaderT r (ST s) (a -> b)>_R
|
|
| 49 | + ->_R <ReaderT r (ST s) a>_R
|
|
| 50 | + ->_R <r>_R
|
|
| 51 | + ->_R Sym (N:ST <s>_N <b>_R)
|
|
| 52 | + :: Coercible
|
|
| 53 | + (forall a b.
|
|
| 54 | + ReaderT r (ST s) (a -> b) -> ReaderT r (ST s) a -> r -> STRep s b)
|
|
| 55 | + (forall a b.
|
|
| 56 | + ReaderT r (ST s) (a -> b) -> ReaderT r (ST s) a -> r -> ST s b))
|
|
| 57 | +"SPEC $c>> @(ST s) @_"
|
|
| 58 | + forall (@s) (@r) ($dMonad :: Monad (ST s)).
|
|
| 59 | + $fMonadReaderT1 @(ST s) @r $dMonad
|
|
| 60 | + = $fMonadAbstractIOSTReaderT_$s$c>> @s @r
|
|
| 61 | +"SPEC $c>>= @(ST s) @_"
|
|
| 62 | + forall (@s) (@r) ($dMonad :: Monad (ST s)).
|
|
| 63 | + $fMonadReaderT2 @(ST s) @r $dMonad
|
|
| 64 | + = ($fMonadAbstractIOSTReaderT2 @s @r)
|
|
| 65 | + `cast` (forall (a ::~ <*>_N) (b ::~ <*>_N).
|
|
| 66 | + <ReaderT r (ST s) a>_R
|
|
| 67 | + ->_R <a -> ReaderT r (ST s) b>_R
|
|
| 68 | + ->_R <r>_R
|
|
| 69 | + ->_R Sym (N:ST <s>_N <b>_R)
|
|
| 70 | + :: Coercible
|
|
| 71 | + (forall a b.
|
|
| 72 | + ReaderT r (ST s) a -> (a -> ReaderT r (ST s) b) -> r -> STRep s b)
|
|
| 73 | + (forall a b.
|
|
| 74 | + ReaderT r (ST s) a -> (a -> ReaderT r (ST s) b) -> r -> ST s b))
|
|
| 75 | +"SPEC $cfmap @(ST s) @_"
|
|
| 76 | + forall (@s) (@r) ($dFunctor :: Functor (ST s)).
|
|
| 77 | + $fFunctorReaderT_$cfmap @(ST s) @r $dFunctor
|
|
| 78 | + = ($fApplicativeReaderT7 @s @r)
|
|
| 79 | + `cast` (forall (a ::~ <*>_N) (b ::~ <*>_N).
|
|
| 80 | + <a -> b>_R
|
|
| 81 | + ->_R <ReaderT r (ST s) a>_R
|
|
| 82 | + ->_R <r>_R ->_R Sym (N:ST <s>_N <b>_R)
|
|
| 83 | + ; Sym (N:ReaderT <*>_N <r>_R <ST s>_R <b>_N)
|
|
| 84 | + :: Coercible
|
|
| 85 | + (forall a b. (a -> b) -> ReaderT r (ST s) a -> r -> STRep s b)
|
|
| 86 | + (forall a b. (a -> b) -> ReaderT r (ST s) a -> ReaderT r (ST s) b))
|
|
| 87 | +"SPEC $cliftA2 @(ST s) @_"
|
|
| 88 | + forall (@s) (@r) ($dApplicative :: Applicative (ST s)).
|
|
| 89 | + $fApplicativeReaderT_$cliftA2 @(ST s) @r $dApplicative
|
|
| 90 | + = ($fApplicativeReaderT3 @s @r)
|
|
| 91 | + `cast` (forall (a ::~ <*>_N) (b ::~ <*>_N) (c ::~ <*>_N).
|
|
| 92 | + <a -> b -> c>_R
|
|
| 93 | + ->_R <ReaderT r (ST s) a>_R
|
|
| 94 | + ->_R <ReaderT r (ST s) b>_R
|
|
| 95 | + ->_R <r>_R ->_R Sym (N:ST <s>_N <c>_R)
|
|
| 96 | + ; Sym (N:ReaderT <*>_N <r>_R <ST s>_R <c>_N)
|
|
| 97 | + :: Coercible
|
|
| 98 | + (forall a b c.
|
|
| 99 | + (a -> b -> c)
|
|
| 100 | + -> ReaderT r (ST s) a -> ReaderT r (ST s) b -> r -> STRep s c)
|
|
| 101 | + (forall a b c.
|
|
| 102 | + (a -> b -> c)
|
|
| 103 | + -> ReaderT r (ST s) a -> ReaderT r (ST s) b -> ReaderT r (ST s) c))
|
|
| 104 | +"SPEC $cp1Applicative @(ST s) @_"
|
|
| 105 | + forall (@s) (@r) ($dApplicative :: Applicative (ST s)).
|
|
| 106 | + $fApplicativeReaderT_$cp1Applicative @(ST s) @r $dApplicative
|
|
| 107 | + = $fApplicativeReaderT_$s$fFunctorReaderT @s @r
|
|
| 108 | +"SPEC $cp1Monad @(ST s) @_"
|
|
| 109 | + forall (@s) (@r) ($dMonad :: Monad (ST s)).
|
|
| 110 | + $fMonadReaderT_$cp1Monad @(ST s) @r $dMonad
|
|
| 111 | + = $fApplicativeReaderT_$s$fApplicativeReaderT @s @r
|
|
| 112 | +"SPEC $cpure @(ST s) @_"
|
|
| 113 | + forall (@s) (@r) ($dApplicative :: Applicative (ST s)).
|
|
| 114 | + $fApplicativeReaderT_$cpure @(ST s) @r $dApplicative
|
|
| 115 | + = ($fApplicativeReaderT5 @s @r)
|
|
| 116 | + `cast` (forall (a ::~ <*>_N).
|
|
| 117 | + <a>_R
|
|
| 118 | + ->_R <r>_R ->_R Sym (N:ST <s>_N <a>_R)
|
|
| 119 | + ; Sym (N:ReaderT <*>_N <r>_R <ST s>_R <a>_N)
|
|
| 120 | + :: Coercible
|
|
| 121 | + (forall a. a -> r -> STRep s a)
|
|
| 122 | + (forall a. a -> ReaderT r (ST s) a))
|
|
| 123 | +"SPEC $creturn @(ST s) @_"
|
|
| 124 | + forall (@s) (@r) ($dMonad :: Monad (ST s)).
|
|
| 125 | + $fMonadReaderT_$creturn @(ST s) @r $dMonad
|
|
| 126 | + = ($fApplicativeReaderT5 @s @r)
|
|
| 127 | + `cast` (forall (a ::~ <*>_N).
|
|
| 128 | + <a>_R
|
|
| 129 | + ->_R <r>_R ->_R Sym (N:ST <s>_N <a>_R)
|
|
| 130 | + ; Sym (N:ReaderT <*>_N <r>_R <ST s>_R <a>_N)
|
|
| 131 | + :: Coercible
|
|
| 132 | + (forall a. a -> r -> STRep s a)
|
|
| 133 | + (forall a. a -> ReaderT r (ST s) a))
|
|
| 134 | +"SPEC $fApplicativeReaderT @(ST s) @_"
|
|
| 135 | + forall (@s) (@r) ($dApplicative :: Applicative (ST s)).
|
|
| 136 | + $fApplicativeReaderT @(ST s) @r $dApplicative
|
|
| 137 | + = $fApplicativeReaderT_$s$fApplicativeReaderT @s @r
|
|
| 138 | +"SPEC $fFunctorReaderT @(ST s) @_"
|
|
| 139 | + forall (@s) (@r) ($dFunctor :: Functor (ST s)).
|
|
| 140 | + $fFunctorReaderT @(ST s) @r $dFunctor
|
|
| 141 | + = $fApplicativeReaderT_$s$fFunctorReaderT @s @r
|
|
| 142 | +"SPEC $fMonadReaderT @(ST s) @_"
|
|
| 143 | + forall (@s) (@r) ($dMonad :: Monad (ST s)).
|
|
| 144 | + $fMonadReaderT @(ST s) @r $dMonad
|
|
| 145 | + = $fMonadAbstractIOSTReaderT_$s$fMonadReaderT @s @r
|
|
| 3 | 146 | "USPEC useAbstractMonad @(ReaderT Int (ST s))"
|
| 4 | 147 | forall (@s)
|
| 5 | 148 | ($dMonadAbstractIOST :: MonadAbstractIOST (ReaderT Int (ST s))).
|
| ... | ... | @@ -544,6 +544,9 @@ test('T25883b', normal, compile_grep_core, ['']) |
| 544 | 544 | test('T25883c', normal, compile_grep_core, [''])
|
| 545 | 545 | test('T25883d', [extra_files(['T25883d_import.hs'])], multimod_compile_filter, ['T25883d', '-O -ddump-simpl -dno-typeable-binds -dsuppress-all -dsuppress-uniques', r'grep -e "y ="'])
|
| 546 | 546 | |
| 547 | +test('T26588', normal, compile, ['-package containers -O'])
|
|
| 548 | +test('T26589', normal, compile, ['-O'])
|
|
| 549 | + |
|
| 547 | 550 | test('T25976', [grep_errmsg('Dead Code')], compile, ['-O -ddump-simpl -dsuppress-uniques -dno-typeable-binds'])
|
| 548 | 551 | |
| 549 | 552 | test('T25965', normal, compile, ['-O'])
|
| ... | ... | @@ -19,6 +19,13 @@ |
| 19 | 19 | {-# LANGUAGE UndecidableInstances #-} -- For the (StmtLR GhcPs GhcPs (LocatedA (body GhcPs))) ExactPrint instance
|
| 20 | 20 | {-# OPTIONS_GHC -Wno-incomplete-uni-patterns -Wno-incomplete-record-updates #-}
|
| 21 | 21 | |
| 22 | +-- We switch off specialisation in this module. Otherwise we get lots of functions
|
|
| 23 | +-- specialised on lots of (GHC syntax tree) data types. Compilation time allocation
|
|
| 24 | +-- (at least with -fpolymorphic-specialisation; see !15058) blows up from 17G to 108G.
|
|
| 25 | +-- Bad! ExactPrint is not a performance-critical module so it's not worth taking the
|
|
| 26 | +-- largely-fruitless hit in compile time.
|
|
| 27 | +{-# OPTIONS_GHC -fno-specialise #-}
|
|
| 28 | + |
|
| 22 | 29 | module ExactPrint
|
| 23 | 30 | (
|
| 24 | 31 | ExactPrint(..)
|