GitLab

recursion-ninja pushed to branch wip/fix-26109 at Glasgow Haskell Compiler / GHC

Commits:

9fbadea8

by Recursion Ninja at 2025-08-06T21:10:52-04:00

Resolving issues #20645 and #26109

Correctly sign extending and casting smaller bit width types for LLVM operations:

  - bitReverse8#
  - bitReverse16#
  - bitReverse32#
  - byteSwap16#
  - byteSwap32#
  - pdep8#
  - pdep16#
  - pext8#
  - pext16#

2 changed files:

compiler/GHC/CmmToLlvm/CodeGen.hs
testsuite/tests/numeric/should_run/foundation.hs

Changes:

compiler/GHC/CmmToLlvm/CodeGen.hs

@@ -230,23 +230,25 @@ genCall t@(PrimTarget (MO_Prefetch_Data localityInt)) [] args
      statement $ Expr $ Call StdCall fptr (argVars' ++ argSuffix) []
    | otherwise = panic $ "prefetch locality level integer must be between 0 and 3, given: " ++ (show localityInt)
 --- Handle PopCnt, Clz, Ctz, and BSwap that need to only convert arg
 --- and return types
 -genCall t@(PrimTarget (MO_PopCnt w)) dsts args =
 -    genCallSimpleCast w t dsts args
+-
 -genCall t@(PrimTarget (MO_Pdep w)) dsts args =
 -    genCallSimpleCast2 w t dsts args
 -genCall t@(PrimTarget (MO_Pext w)) dsts args =
 -    genCallSimpleCast2 w t dsts args
 -genCall t@(PrimTarget (MO_Clz w)) dsts args =
 -    genCallSimpleCast w t dsts args
 -genCall t@(PrimTarget (MO_Ctz w)) dsts args =
 -    genCallSimpleCast w t dsts args
 -genCall t@(PrimTarget (MO_BSwap w)) dsts args =
 -    genCallSimpleCast w t dsts args
 -genCall t@(PrimTarget (MO_BRev w)) dsts args =
 -    genCallSimpleCast w t dsts args
 +-- Handle PopCnt, Clz, Ctz, BRev, and BSwap that need to only convert arg and return types
 +genCall (PrimTarget op@(MO_PopCnt w)) [dst] args =
 +    genCallSimpleCast w op dst args
 +genCall (PrimTarget op@(MO_Clz w)) [dst] args =
 +    genCallSimpleCast w op dst args
 +genCall (PrimTarget op@(MO_Ctz w)) [dst] args =
 +    genCallSimpleCast w op dst args
 +genCall (PrimTarget op@(MO_BRev w)) [dst] args =
 +    genCallSimpleCast w op dst args
 +genCall (PrimTarget op@(MO_BSwap w)) [dst] args =
 +    genCallSimpleCast w op dst args
++
 +-- Handle Pdep and Pext that (may) require using a type with a larger bit-width
 +-- than the specified but width. This register width-extension is particualarly
 +-- necessary for W8 and W16.
 +genCall (PrimTarget op@(MO_Pdep w)) [dst] args =
 +    genCallCastWithMinWidthOf W32 w op dst args
 +genCall (PrimTarget op@(MO_Pext w)) [dst] args =
 +    genCallCastWithMinWidthOf W32 w op dst args
  genCall (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] = runStmtsDecls $ do
      addrVar <- exprToVarW addr
@@ -640,63 +642,35 @@ genCallExtract _ _ _ _ =
  -- since GHC only really has i32 and i64 types and things like Word8 are backed
  -- by an i32 and just present a logical i8 range. So we must handle conversions
  -- from i32 to i8 explicitly as LLVM is strict about types.
 -genCallSimpleCast :: Width -> ForeignTarget -> [CmmFormal] -> [CmmActual]
 -              -> LlvmM StmtData
 -genCallSimpleCast w t@(PrimTarget op) [dst] args = do
 -    let width = widthToLlvmInt w
 -        dstTy = cmmToLlvmType $ localRegType dst
+-
 -    fname                       <- cmmPrimOpFunctions op
 -    (fptr, _, top3)             <- getInstrinct fname width [width]
+-
 -    (dstV, _dst_ty)             <- getCmmReg (CmmLocal dst)
+-
 -    let (_, arg_hints) = foreignTargetHints t
 -    let args_hints = zip args arg_hints
 -    (argsV, stmts2, top2)       <- arg_vars args_hints ([], nilOL, [])
 -    (argsV', stmts4)            <- castVars Signed $ zip argsV [width]
 -    (retV, s1)                  <- doExpr width $ Call StdCall fptr argsV' []
 -    (retVs', stmts5)            <- castVars (cmmPrimOpRetValSignage op) [(retV,dstTy)]
 -    let retV'                    = singletonPanic "genCallSimpleCast" retVs'
 -    let s2                       = Store retV' dstV Nothing []
+-
 -    let stmts = stmts2 `appOL` stmts4 `snocOL`
 -                s1 `appOL` stmts5 `snocOL` s2
 -    return (stmts, top2 ++ top3)
 -genCallSimpleCast _ _ dsts _ =
 -    panic ("genCallSimpleCast: " ++ show (length dsts) ++ " dsts")
+-
 --- Handle simple function call that only need simple type casting, of the form:
 ---   truncate arg >>= \a -> call(a) >>= zext
 ---
 --- since GHC only really has i32 and i64 types and things like Word8 are backed
 --- by an i32 and just present a logical i8 range. So we must handle conversions
 --- from i32 to i8 explicitly as LLVM is strict about types.
 -genCallSimpleCast2 :: Width -> ForeignTarget -> [CmmFormal] -> [CmmActual]
 -              -> LlvmM StmtData
 -genCallSimpleCast2 w t@(PrimTarget op) [dst] args = do
 -    let width = widthToLlvmInt w
 -        dstTy = cmmToLlvmType $ localRegType dst
+-
 -    fname                       <- cmmPrimOpFunctions op
 -    (fptr, _, top3)             <- getInstrinct fname width (const width <$> args)
+-
 -    (dstV, _dst_ty)             <- getCmmReg (CmmLocal dst)
+-
 -    let (_, arg_hints) = foreignTargetHints t
 -    let args_hints = zip args arg_hints
 -    (argsV, stmts2, top2)       <- arg_vars args_hints ([], nilOL, [])
 -    (argsV', stmts4)            <- castVars Signed $ zip argsV (const width <$> argsV)
 -    (retV, s1)                  <- doExpr width $ Call StdCall fptr argsV' []
 -    (retVs', stmts5)             <- castVars (cmmPrimOpRetValSignage op) [(retV,dstTy)]
 -    let retV'                    = singletonPanic "genCallSimpleCast2" retVs'
 -    let s2                       = Store retV' dstV Nothing []
+-
 -    let stmts = stmts2 `appOL` stmts4 `snocOL`
 -                s1 `appOL` stmts5 `snocOL` s2
 +genCallSimpleCast :: Width -> CallishMachOp -> CmmFormal -> [CmmActual]
 +                  -> LlvmM StmtData
 +genCallSimpleCast w = genCallCastWithMinWidthOf w w
++
 +-- Handle extension case that the element should be extend to a larger bit-width
 +-- for the operation and subsequently truncated, of the form:
 +--   extend arg >>= \a -> call(a) >>= truncate
 +genCallCastWithMinWidthOf :: Width -> Width -> CallishMachOp -> CmmFormal
 +                          -> [CmmActual] -> LlvmM StmtData
 +genCallCastWithMinWidthOf minW specW op dst args = do
 +    let width   = widthToLlvmInt $ max minW specW
 +        argsW   = const width <$> args
 +        dstType = cmmToLlvmType $ localRegType dst
 +        signage = cmmPrimOpRetValSignage op
++
 +    fname                 <- cmmPrimOpFunctions op
 +    (fptr, _, top3)       <- getInstrinct fname width argsW
 +    (dstV, _dst_ty)       <- getCmmReg (CmmLocal dst)
 +    let (_, arg_hints)     = foreignTargetHints $ PrimTarget op
 +    let args_hints         = zip args arg_hints
 +    (argsV, stmts2, top2) <- arg_vars args_hints ([], nilOL, [])
 +    (argsV', stmts4)      <- castVars signage $ zip argsV argsW
 +    (retV, s1)            <- doExpr width $ Call StdCall fptr argsV' []
 +    (retV', stmts5)       <- castVar signage retV dstType
 +    let s2                 = Store retV' dstV Nothing []
++
 +    let stmts = stmts2 `appOL` stmts4 `snocOL` s1 `snocOL`
 +                stmts5 `snocOL` s2
      return (stmts, top2 ++ top3)
 -genCallSimpleCast2 _ _ dsts _ =
 -    panic ("genCallSimpleCast2: " ++ show (length dsts) ++ " dsts")
  -- | Create a function pointer from a target.
  getFunPtrW :: (LMString -> LlvmType) -> ForeignTarget
@@ -811,11 +785,39 @@ castVar signage v t | getVarType v == t
              Signed      -> LM_Sext
              Unsigned    -> LM_Zext
+-
  cmmPrimOpRetValSignage :: CallishMachOp -> Signage
  cmmPrimOpRetValSignage mop = case mop of
      MO_Pdep _   -> Unsigned
      MO_Pext _   -> Unsigned
 +    -- If the result of a Bit-Reverse is treated as signed,
 +    -- an positive input can result in an negative output, i.e.:
 +    --
 +    --   identity(0x03) = 0x03 = 00000011
 +    --   breverse(0x03) = 0xC0 = 11000000
 +    --
 +    -- Now if an extension is performed after the operation to
 +    -- promote a smaller bit-width value into a larger bit-width
 +    -- type, it is expected that the /bit-wise/ operations will
 +    -- not be treated /numerically/ as signed.
 +    --
 +    -- To illustrate the difference, consider how a signed extension
 +    -- for the type i16 to i32 differs for out values above:
 +    --   ext_zeroed(i32, breverse(0x03)) = 0x00C0 = 0000000011000000
 +    --   ext_signed(i32, breverse(0x03)) = 0xFFC0 = 1111111111000000
 +    --
 +    -- Here we can see that the former output is the expected result
 +    -- of a bit-wise operation which needs to be promoted to a larger
 +    -- bit-width type. The latter output is not desirable when we must
 +    -- constraining a value into a range of i16 within an i32 type.
 +    --
 +    -- Hence we always treat the "signage" as unsigned for Bit-Reverse!
 +    MO_BRev _   -> Unsigned
++
 +    -- The same reasoning applied to Bit-Reverse above applies to Byte-Swap;
 +    -- we do not want to sign extend a number whose sign may have changed!
 +    MO_BSwap _  -> Unsigned
++
 +    -- All other cases, default to preserving the numeric sign when extending.
      _           -> Signed
  -- | Decide what C function to use to implement a CallishMachOp
@@ -954,8 +956,8 @@ cmmPrimOpFunctions mop = do
            W256 -> fsLit "llvm.x86.bmi.pdep.256"
            W512 -> fsLit "llvm.x86.bmi.pdep.512"
        | otherwise -> case w of
 -          W8   -> fsLit "hs_pdep8"
 -          W16  -> fsLit "hs_pdep16"
 +          W8   -> fsLit "hs_pdep32"
 +          W16  -> fsLit "hs_pdep32"
            W32  -> fsLit "hs_pdep32"
            W64  -> fsLit "hs_pdep64"
            W128 -> fsLit "hs_pdep128"
@@ -971,8 +973,8 @@ cmmPrimOpFunctions mop = do
            W256 -> fsLit "llvm.x86.bmi.pext.256"
            W512 -> fsLit "llvm.x86.bmi.pext.512"
        | otherwise -> case w of
 -          W8   -> fsLit "hs_pext8"
 -          W16  -> fsLit "hs_pext16"
 +          W8   -> fsLit "hs_pext32"
 +          W16  -> fsLit "hs_pext32"
            W32  -> fsLit "hs_pext32"
            W64  -> fsLit "hs_pext64"
            W128 -> fsLit "hs_pext128"

testsuite/tests/numeric/should_run/foundation.hs

@@ -24,6 +24,7 @@ module Main
      ( main
      ) where
 +import Data.Bits (Bits((.&.), bit))
  import Data.Word
  import Data.Int
  import GHC.Natural
@@ -655,8 +656,8 @@ testPrimops = Group "primop"
    , testPrimop "ctz32#" Primop.ctz32# Wrapper.ctz32#
    , testPrimop "ctz64#" Primop.ctz64# Wrapper.ctz64#
    , testPrimop "ctz#" Primop.ctz# Wrapper.ctz#
 -  , testPrimop "byteSwap16#" Primop.byteSwap16# Wrapper.byteSwap16#
 -  , testPrimop "byteSwap32#" Primop.byteSwap32# Wrapper.byteSwap32#
 +  , testPrimop "byteSwap16#" (16 `LowerBitsAreDefined` Primop.byteSwap16#) (16 `LowerBitsAreDefined` Wrapper.byteSwap16#)
 +  , testPrimop "byteSwap32#" (32 `LowerBitsAreDefined` Primop.byteSwap32#) (32 `LowerBitsAreDefined` Wrapper.byteSwap32#)
    , testPrimop "byteSwap64#" Primop.byteSwap64# Wrapper.byteSwap64#
    , testPrimop "byteSwap#" Primop.byteSwap# Wrapper.byteSwap#
    , testPrimop "bitReverse8#" Primop.bitReverse8# Wrapper.bitReverse8#
@@ -672,6 +673,34 @@ testPrimops = Group "primop"
    , testPrimop "narrow32Word#" Primop.narrow32Word# Wrapper.narrow32Word#
+   ]
 +-- | A special data-type for representing functions where,
 +-- since only some number of the lower bits are defined,
 +-- testing for strict equality in the undefined upper bits is not appropriate!
 +-- Without using this data-type, false-positive failures will be reported
 +-- when the undefined bit regions do not match, even though the equality of bits
 +-- in this undefined region has no bearing on correctness.
 +data LowerBitsAreDefined =
 +    LowerBitsAreDefined
 +    { definedLowerWidth :: Word
 +    -- ^ The (strictly-non-negative) number of least-significant bits
 +    -- for which the attached function is defined.
 +    , undefinedBehavior :: (Word# -> Word#)
 +    -- ^ Function with undefined behavior for some of its most significant bits.
 +    }
++
 +instance TestPrimop LowerBitsAreDefined where
 +  testPrimop s l r = Property s $ \ (uWord#-> x0) ->
 +    let -- Create a mask to unset all bits in the undefined area,
 +        -- leaving set bits only in the area of defined behavior.
 +        -- Since the upper bits are undefined,
 +        -- if the function defines behavior for the lower N bits,
 +        -- then /only/ the lower N bits are preserved,
 +        -- and the upper WORDSIZE - N bits are discarded.
 +        mask = bit (fromEnum (definedLowerWidth r)) - 1
 +        valL = wWord# (undefinedBehavior l x0) .&. mask
 +        valR = wWord# (undefinedBehavior r x0) .&. mask
 +    in  valL === valR
++
  instance TestPrimop (Char# -> Char# -> Int#) where
    testPrimop s l r = Property s $ \ (uChar#-> x0) (uChar#-> x1) -> wInt# (l x0 x1) === wInt# (r x0 x1)