[Git][ghc/ghc][wip/T26519] 3 commits: Don't re-use stack slots for growing registers
Peter Trommler pushed to branch wip/T26519 at Glasgow Haskell Compiler / GHC Commits: 023c301c by sheaf at 2026-01-13T04:57:30-05:00 Don't re-use stack slots for growing registers This commit avoids re-using a stack slot for a register that has grown but already had a stack slot. For example, suppose we have stack slot assigments %v1 :: FF64 |-> StackSlot 0 %v2 :: FF64 |-> StackSlot 1 Later, we start using %v1 at a larger format (e.g. F64x2) and we need to spill it again. Then we **must not** use StackSlot 0, as a spill at format F64x2 would clobber the data in StackSlot 1. This can cause some fragmentation of the `StackMap`, but that's probably OK. Fixes #26668 - - - - - d0966e64 by fendor at 2026-01-13T04:58:11-05:00 Remove `traceId` from ghc-pkg executable - - - - - b324fd55 by Peter Trommler at 2026-01-13T13:28:52+01:00 PPC NCG: Fix shift right MO code The shift amount in shift right [arithmetic] MOs is machine word width. Therefore remove unnecessary zero- or sign-extending of shift amount. It looks harmless to extend the shift amount argument because the shift right instruction uses only the seven lowest bits (i. e. mod 128). But now we have a conversion operation from a smaller type to word width around a memory load at word width. The types are not matching up but there is no check done in CodeGen. The necessary conversion from word width down to the smaller width would be translated into a no-op on PowerPC anyway. So all seems harmless if it was not for a small optimisation in getRegister'. In getRegister' a load instruction with the smaller width of the conversion operation was generated. This loaded the most significant bits of the word in memory on a big-endian platform. These bits were zero and hence shift right was used with shift amount zero and not one as required in test Sized. Fixes #26519 - - - - - 4 changed files: - compiler/GHC/CmmToAsm/PPC/CodeGen.hs - compiler/GHC/CmmToAsm/Reg/Linear.hs - compiler/GHC/CmmToAsm/Reg/Linear/StackMap.hs - utils/ghc-pkg/Main.hs Changes: ===================================== compiler/GHC/CmmToAsm/PPC/CodeGen.hs ===================================== @@ -467,48 +467,26 @@ getRegister' _ platform (CmmLoad mem pk _) return (Any II64 code) -- catch simple cases of zero- or sign-extended load -getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _ _]) = do - Amode addr addr_code <- getAmode D mem - return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr)) - -getRegister' _ _ (CmmMachOp (MO_XX_Conv W8 W32) [CmmLoad mem _ _]) = do - Amode addr addr_code <- getAmode D mem - return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr)) - -getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad mem _ _]) = do - Amode addr addr_code <- getAmode D mem - return (Any II64 (\dst -> addr_code `snocOL` LD II8 dst addr)) - -getRegister' _ _ (CmmMachOp (MO_XX_Conv W8 W64) [CmmLoad mem _ _]) = do - Amode addr addr_code <- getAmode D mem - return (Any II64 (\dst -> addr_code `snocOL` LD II8 dst addr)) - --- Note: there is no Load Byte Arithmetic instruction, so no signed case here - -getRegister' _ _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _ _]) = do - Amode addr addr_code <- getAmode D mem - return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr)) - -getRegister' _ _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _ _]) = do - Amode addr addr_code <- getAmode D mem - return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr)) - -getRegister' _ _ (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad mem _ _]) = do - Amode addr addr_code <- getAmode D mem - return (Any II64 (\dst -> addr_code `snocOL` LD II16 dst addr)) - -getRegister' _ _ (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad mem _ _]) = do - Amode addr addr_code <- getAmode D mem - return (Any II64 (\dst -> addr_code `snocOL` LA II16 dst addr)) - -getRegister' _ _ (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad mem _ _]) = do - Amode addr addr_code <- getAmode D mem - return (Any II64 (\dst -> addr_code `snocOL` LD II32 dst addr)) - -getRegister' _ _ (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad mem _ _]) = do - -- lwa is DS-form. See Note [Power instruction format] - Amode addr addr_code <- getAmode DS mem - return (Any II64 (\dst -> addr_code `snocOL` LA II32 dst addr)) +getRegister' _ _ (CmmMachOp (MO_UU_Conv src tgt) [CmmLoad mem pk _]) + | src < tgt + , cmmTypeFormat pk == intFormat src = loadZeroExpand mem pk tgt + +getRegister' _ _ (CmmMachOp (MO_XX_Conv src tgt) [CmmLoad mem pk _]) + | src < tgt + , cmmTypeFormat pk == intFormat src = loadZeroExpand mem pk tgt + + -- XXX: This is ugly, refactor +getRegister' _ _ (CmmMachOp (MO_SS_Conv src tgt) [CmmLoad mem pk _]) + -- Note: there is no Load Byte Arithmetic instruction + | cmmTypeFormat pk /= II8 + , src < tgt = do + let format = cmmTypeFormat pk + -- lwa is DS-form. See Note [Power instruction format] + let form = if format >= II32 then DS else D + Amode addr addr_code <- getAmode form mem + let code dst = assert (format == intFormat src) + $ addr_code `snocOL` LA format dst addr + return (Any (intFormat tgt) code) getRegister' config platform (CmmMachOp (MO_RelaxedRead w) [e]) = getRegister' config platform (CmmLoad e (cmmBits w) NaturallyAligned) @@ -789,6 +767,12 @@ extendSExpr from to x = CmmMachOp (MO_SS_Conv from to) [x] extendUExpr :: Width -> Width -> CmmExpr -> CmmExpr extendUExpr from to x = CmmMachOp (MO_UU_Conv from to) [x] +loadZeroExpand :: CmmExpr -> CmmType -> Width -> NatM Register +loadZeroExpand mem pk tgt = do + Amode addr addr_code <- getAmode D mem + let code dst = addr_code `snocOL` LD (cmmTypeFormat pk) dst addr + return (Any (intFormat tgt) code) + -- ----------------------------------------------------------------------------- -- The 'Amode' type: Memory addressing modes passed up the tree. @@ -2448,8 +2432,8 @@ srCode width sgn instr x y = do let op_len = max W32 width extend = if sgn then extendSExpr else extendUExpr (src1, code1) <- getSomeReg (extend width op_len x) - (src2, code2) <- getSomeReg (extendUExpr width op_len y) - -- Note: Shift amount `y` is unsigned + (src2, code2) <- getSomeReg y + let code dst = code1 `appOL` code2 `snocOL` instr (intFormat op_len) dst src1 (RIReg src2) return (Any (intFormat width) code) ===================================== compiler/GHC/CmmToAsm/Reg/Linear.hs ===================================== @@ -406,7 +406,7 @@ linearRA block_live block_id = go [] [] -- | Do allocation for a single instruction. raInsn :: OutputableRegConstraint freeRegs instr - => BlockMap Regs -- ^ map of what vregs are love on entry to each block. + => BlockMap Regs -- ^ map of what vregs are live on entry to each block. -> [instr] -- ^ accumulator for instructions already processed. -> BlockId -- ^ the id of the current block, for debugging -> LiveInstr instr -- ^ the instr to have its regs allocated, with liveness info. ===================================== compiler/GHC/CmmToAsm/Reg/Linear/StackMap.hs ===================================== @@ -37,7 +37,11 @@ data StackMap -- See Note [UniqFM and the register allocator] -- | Assignment of vregs to stack slots. - , stackMapAssignment :: UniqFM Unique StackSlot } + -- + -- We record not just the slot, but also how many stack slots the vreg + -- takes up, in order to avoid re-using a stack slot for a register + -- that has grown but already had a stack slot (#26668). + , stackMapAssignment :: UniqFM Unique (StackSlot, Int) } -- | An empty stack map, with all slots available. @@ -50,14 +54,19 @@ emptyStackMap = StackMap 0 emptyUFM -- getStackSlotFor :: StackMap -> Format -> Unique -> (StackMap, Int) -getStackSlotFor fs@(StackMap _ reserved) _fmt regUnique - | Just slot <- lookupUFM reserved regUnique = (fs, slot) - -getStackSlotFor (StackMap freeSlot reserved) fmt regUnique = - let - nbSlots = (formatInBytes fmt + 7) `div` 8 - in - (StackMap (freeSlot+nbSlots) (addToUFM reserved regUnique freeSlot), freeSlot) +getStackSlotFor fs@(StackMap freeSlot reserved) fmt regUnique + -- The register already has a stack slot; try to re-use it. + | Just (slot, nbSlots) <- lookupUFM reserved regUnique + -- Make sure the slot is big enough for this format, in case the register + -- has grown (#26668). + , nbNeededSlots <= nbSlots + = (fs, slot) + | otherwise + = (StackMap (freeSlot+nbNeededSlots) (addToUFM reserved regUnique (freeSlot, nbNeededSlots)), freeSlot) + -- NB: this can create fragmentation if a register keeps growing. + -- That's probably OK, as this is only happens very rarely. + where + !nbNeededSlots = (formatInBytes fmt + 7) `div` 8 -- | Return the number of stack slots that were allocated getStackUse :: StackMap -> Int ===================================== utils/ghc-pkg/Main.hs ===================================== @@ -23,7 +23,6 @@ module Main (main) where -import Debug.Trace import qualified GHC.Unit.Database as GhcPkg import GHC.Unit.Database hiding (mkMungePathUrl) import GHC.HandleEncoding @@ -1634,7 +1633,7 @@ listPackages verbosity my_flags mPackageName mModuleName = do simplePackageList :: [Flag] -> [InstalledPackageInfo] -> IO () simplePackageList my_flags pkgs = do let showPkg :: InstalledPackageInfo -> String - showPkg | FlagShowUnitIds `elem` my_flags = traceId . display . installedUnitId + showPkg | FlagShowUnitIds `elem` my_flags = display . installedUnitId | FlagNamesOnly `elem` my_flags = display . mungedName . mungedId | otherwise = display . mungedId strs = map showPkg pkgs View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/b79c71f3b6b88c22cb75c1437b07346... -- View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/b79c71f3b6b88c22cb75c1437b07346... You're receiving this email because of your account on gitlab.haskell.org.
participants (1)
-
Peter Trommler (@trommler)