
recursion-ninja pushed to branch wip/fix-26109 at Glasgow Haskell Compiler / GHC Commits: 9fbadea8 by Recursion Ninja at 2025-08-06T21:10:52-04:00 Resolving issues #20645 and #26109 Correctly sign extending and casting smaller bit width types for LLVM operations: - bitReverse8# - bitReverse16# - bitReverse32# - byteSwap16# - byteSwap32# - pdep8# - pdep16# - pext8# - pext16# - - - - - 2 changed files: - compiler/GHC/CmmToLlvm/CodeGen.hs - testsuite/tests/numeric/should_run/foundation.hs Changes: ===================================== compiler/GHC/CmmToLlvm/CodeGen.hs ===================================== @@ -230,23 +230,25 @@ genCall t@(PrimTarget (MO_Prefetch_Data localityInt)) [] args statement $ Expr $ Call StdCall fptr (argVars' ++ argSuffix) [] | otherwise = panic $ "prefetch locality level integer must be between 0 and 3, given: " ++ (show localityInt) --- Handle PopCnt, Clz, Ctz, and BSwap that need to only convert arg --- and return types -genCall t@(PrimTarget (MO_PopCnt w)) dsts args = - genCallSimpleCast w t dsts args - -genCall t@(PrimTarget (MO_Pdep w)) dsts args = - genCallSimpleCast2 w t dsts args -genCall t@(PrimTarget (MO_Pext w)) dsts args = - genCallSimpleCast2 w t dsts args -genCall t@(PrimTarget (MO_Clz w)) dsts args = - genCallSimpleCast w t dsts args -genCall t@(PrimTarget (MO_Ctz w)) dsts args = - genCallSimpleCast w t dsts args -genCall t@(PrimTarget (MO_BSwap w)) dsts args = - genCallSimpleCast w t dsts args -genCall t@(PrimTarget (MO_BRev w)) dsts args = - genCallSimpleCast w t dsts args +-- Handle PopCnt, Clz, Ctz, BRev, and BSwap that need to only convert arg and return types +genCall (PrimTarget op@(MO_PopCnt w)) [dst] args = + genCallSimpleCast w op dst args +genCall (PrimTarget op@(MO_Clz w)) [dst] args = + genCallSimpleCast w op dst args +genCall (PrimTarget op@(MO_Ctz w)) [dst] args = + genCallSimpleCast w op dst args +genCall (PrimTarget op@(MO_BRev w)) [dst] args = + genCallSimpleCast w op dst args +genCall (PrimTarget op@(MO_BSwap w)) [dst] args = + genCallSimpleCast w op dst args + +-- Handle Pdep and Pext that (may) require using a type with a larger bit-width +-- than the specified but width. This register width-extension is particualarly +-- necessary for W8 and W16. +genCall (PrimTarget op@(MO_Pdep w)) [dst] args = + genCallCastWithMinWidthOf W32 w op dst args +genCall (PrimTarget op@(MO_Pext w)) [dst] args = + genCallCastWithMinWidthOf W32 w op dst args genCall (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] = runStmtsDecls $ do addrVar <- exprToVarW addr @@ -640,63 +642,35 @@ genCallExtract _ _ _ _ = -- since GHC only really has i32 and i64 types and things like Word8 are backed -- by an i32 and just present a logical i8 range. So we must handle conversions -- from i32 to i8 explicitly as LLVM is strict about types. -genCallSimpleCast :: Width -> ForeignTarget -> [CmmFormal] -> [CmmActual] - -> LlvmM StmtData -genCallSimpleCast w t@(PrimTarget op) [dst] args = do - let width = widthToLlvmInt w - dstTy = cmmToLlvmType $ localRegType dst - - fname <- cmmPrimOpFunctions op - (fptr, _, top3) <- getInstrinct fname width [width] - - (dstV, _dst_ty) <- getCmmReg (CmmLocal dst) - - let (_, arg_hints) = foreignTargetHints t - let args_hints = zip args arg_hints - (argsV, stmts2, top2) <- arg_vars args_hints ([], nilOL, []) - (argsV', stmts4) <- castVars Signed $ zip argsV [width] - (retV, s1) <- doExpr width $ Call StdCall fptr argsV' [] - (retVs', stmts5) <- castVars (cmmPrimOpRetValSignage op) [(retV,dstTy)] - let retV' = singletonPanic "genCallSimpleCast" retVs' - let s2 = Store retV' dstV Nothing [] - - let stmts = stmts2 `appOL` stmts4 `snocOL` - s1 `appOL` stmts5 `snocOL` s2 - return (stmts, top2 ++ top3) -genCallSimpleCast _ _ dsts _ = - panic ("genCallSimpleCast: " ++ show (length dsts) ++ " dsts") - --- Handle simple function call that only need simple type casting, of the form: --- truncate arg >>= \a -> call(a) >>= zext --- --- since GHC only really has i32 and i64 types and things like Word8 are backed --- by an i32 and just present a logical i8 range. So we must handle conversions --- from i32 to i8 explicitly as LLVM is strict about types. -genCallSimpleCast2 :: Width -> ForeignTarget -> [CmmFormal] -> [CmmActual] - -> LlvmM StmtData -genCallSimpleCast2 w t@(PrimTarget op) [dst] args = do - let width = widthToLlvmInt w - dstTy = cmmToLlvmType $ localRegType dst - - fname <- cmmPrimOpFunctions op - (fptr, _, top3) <- getInstrinct fname width (const width <$> args) - - (dstV, _dst_ty) <- getCmmReg (CmmLocal dst) - - let (_, arg_hints) = foreignTargetHints t - let args_hints = zip args arg_hints - (argsV, stmts2, top2) <- arg_vars args_hints ([], nilOL, []) - (argsV', stmts4) <- castVars Signed $ zip argsV (const width <$> argsV) - (retV, s1) <- doExpr width $ Call StdCall fptr argsV' [] - (retVs', stmts5) <- castVars (cmmPrimOpRetValSignage op) [(retV,dstTy)] - let retV' = singletonPanic "genCallSimpleCast2" retVs' - let s2 = Store retV' dstV Nothing [] - - let stmts = stmts2 `appOL` stmts4 `snocOL` - s1 `appOL` stmts5 `snocOL` s2 +genCallSimpleCast :: Width -> CallishMachOp -> CmmFormal -> [CmmActual] + -> LlvmM StmtData +genCallSimpleCast w = genCallCastWithMinWidthOf w w + +-- Handle extension case that the element should be extend to a larger bit-width +-- for the operation and subsequently truncated, of the form: +-- extend arg >>= \a -> call(a) >>= truncate +genCallCastWithMinWidthOf :: Width -> Width -> CallishMachOp -> CmmFormal + -> [CmmActual] -> LlvmM StmtData +genCallCastWithMinWidthOf minW specW op dst args = do + let width = widthToLlvmInt $ max minW specW + argsW = const width <$> args + dstType = cmmToLlvmType $ localRegType dst + signage = cmmPrimOpRetValSignage op + + fname <- cmmPrimOpFunctions op + (fptr, _, top3) <- getInstrinct fname width argsW + (dstV, _dst_ty) <- getCmmReg (CmmLocal dst) + let (_, arg_hints) = foreignTargetHints $ PrimTarget op + let args_hints = zip args arg_hints + (argsV, stmts2, top2) <- arg_vars args_hints ([], nilOL, []) + (argsV', stmts4) <- castVars signage $ zip argsV argsW + (retV, s1) <- doExpr width $ Call StdCall fptr argsV' [] + (retV', stmts5) <- castVar signage retV dstType + let s2 = Store retV' dstV Nothing [] + + let stmts = stmts2 `appOL` stmts4 `snocOL` s1 `snocOL` + stmts5 `snocOL` s2 return (stmts, top2 ++ top3) -genCallSimpleCast2 _ _ dsts _ = - panic ("genCallSimpleCast2: " ++ show (length dsts) ++ " dsts") -- | Create a function pointer from a target. getFunPtrW :: (LMString -> LlvmType) -> ForeignTarget @@ -811,11 +785,39 @@ castVar signage v t | getVarType v == t Signed -> LM_Sext Unsigned -> LM_Zext - cmmPrimOpRetValSignage :: CallishMachOp -> Signage cmmPrimOpRetValSignage mop = case mop of MO_Pdep _ -> Unsigned MO_Pext _ -> Unsigned + -- If the result of a Bit-Reverse is treated as signed, + -- an positive input can result in an negative output, i.e.: + -- + -- identity(0x03) = 0x03 = 00000011 + -- breverse(0x03) = 0xC0 = 11000000 + -- + -- Now if an extension is performed after the operation to + -- promote a smaller bit-width value into a larger bit-width + -- type, it is expected that the /bit-wise/ operations will + -- not be treated /numerically/ as signed. + -- + -- To illustrate the difference, consider how a signed extension + -- for the type i16 to i32 differs for out values above: + -- ext_zeroed(i32, breverse(0x03)) = 0x00C0 = 0000000011000000 + -- ext_signed(i32, breverse(0x03)) = 0xFFC0 = 1111111111000000 + -- + -- Here we can see that the former output is the expected result + -- of a bit-wise operation which needs to be promoted to a larger + -- bit-width type. The latter output is not desirable when we must + -- constraining a value into a range of i16 within an i32 type. + -- + -- Hence we always treat the "signage" as unsigned for Bit-Reverse! + MO_BRev _ -> Unsigned + + -- The same reasoning applied to Bit-Reverse above applies to Byte-Swap; + -- we do not want to sign extend a number whose sign may have changed! + MO_BSwap _ -> Unsigned + + -- All other cases, default to preserving the numeric sign when extending. _ -> Signed -- | Decide what C function to use to implement a CallishMachOp @@ -954,8 +956,8 @@ cmmPrimOpFunctions mop = do W256 -> fsLit "llvm.x86.bmi.pdep.256" W512 -> fsLit "llvm.x86.bmi.pdep.512" | otherwise -> case w of - W8 -> fsLit "hs_pdep8" - W16 -> fsLit "hs_pdep16" + W8 -> fsLit "hs_pdep32" + W16 -> fsLit "hs_pdep32" W32 -> fsLit "hs_pdep32" W64 -> fsLit "hs_pdep64" W128 -> fsLit "hs_pdep128" @@ -971,8 +973,8 @@ cmmPrimOpFunctions mop = do W256 -> fsLit "llvm.x86.bmi.pext.256" W512 -> fsLit "llvm.x86.bmi.pext.512" | otherwise -> case w of - W8 -> fsLit "hs_pext8" - W16 -> fsLit "hs_pext16" + W8 -> fsLit "hs_pext32" + W16 -> fsLit "hs_pext32" W32 -> fsLit "hs_pext32" W64 -> fsLit "hs_pext64" W128 -> fsLit "hs_pext128" ===================================== testsuite/tests/numeric/should_run/foundation.hs ===================================== @@ -24,6 +24,7 @@ module Main ( main ) where +import Data.Bits (Bits((.&.), bit)) import Data.Word import Data.Int import GHC.Natural @@ -655,8 +656,8 @@ testPrimops = Group "primop" , testPrimop "ctz32#" Primop.ctz32# Wrapper.ctz32# , testPrimop "ctz64#" Primop.ctz64# Wrapper.ctz64# , testPrimop "ctz#" Primop.ctz# Wrapper.ctz# - , testPrimop "byteSwap16#" Primop.byteSwap16# Wrapper.byteSwap16# - , testPrimop "byteSwap32#" Primop.byteSwap32# Wrapper.byteSwap32# + , testPrimop "byteSwap16#" (16 `LowerBitsAreDefined` Primop.byteSwap16#) (16 `LowerBitsAreDefined` Wrapper.byteSwap16#) + , testPrimop "byteSwap32#" (32 `LowerBitsAreDefined` Primop.byteSwap32#) (32 `LowerBitsAreDefined` Wrapper.byteSwap32#) , testPrimop "byteSwap64#" Primop.byteSwap64# Wrapper.byteSwap64# , testPrimop "byteSwap#" Primop.byteSwap# Wrapper.byteSwap# , testPrimop "bitReverse8#" Primop.bitReverse8# Wrapper.bitReverse8# @@ -672,6 +673,34 @@ testPrimops = Group "primop" , testPrimop "narrow32Word#" Primop.narrow32Word# Wrapper.narrow32Word# ] +-- | A special data-type for representing functions where, +-- since only some number of the lower bits are defined, +-- testing for strict equality in the undefined upper bits is not appropriate! +-- Without using this data-type, false-positive failures will be reported +-- when the undefined bit regions do not match, even though the equality of bits +-- in this undefined region has no bearing on correctness. +data LowerBitsAreDefined = + LowerBitsAreDefined + { definedLowerWidth :: Word + -- ^ The (strictly-non-negative) number of least-significant bits + -- for which the attached function is defined. + , undefinedBehavior :: (Word# -> Word#) + -- ^ Function with undefined behavior for some of its most significant bits. + } + +instance TestPrimop LowerBitsAreDefined where + testPrimop s l r = Property s $ \ (uWord#-> x0) -> + let -- Create a mask to unset all bits in the undefined area, + -- leaving set bits only in the area of defined behavior. + -- Since the upper bits are undefined, + -- if the function defines behavior for the lower N bits, + -- then /only/ the lower N bits are preserved, + -- and the upper WORDSIZE - N bits are discarded. + mask = bit (fromEnum (definedLowerWidth r)) - 1 + valL = wWord# (undefinedBehavior l x0) .&. mask + valR = wWord# (undefinedBehavior r x0) .&. mask + in valL === valR + instance TestPrimop (Char# -> Char# -> Int#) where testPrimop s l r = Property s $ \ (uChar#-> x0) (uChar#-> x1) -> wInt# (l x0 x1) === wInt# (r x0 x1) View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/9fbadea8fb900c08c812a29716c3c400... -- View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/9fbadea8fb900c08c812a29716c3c400... You're receiving this email because of your account on gitlab.haskell.org.