[Git][ghc/ghc][wip/fix-26065] Correcting LLVM linking of Intel BMI intrinsics pdep{8,16} and pext{8,16}.

Andreas Klebinger pushed to branch wip/fix-26065 at Glasgow Haskell Compiler / GHC Commits: d97f54f2 by Alex Washburn at 2025-09-11T13:38:05+02:00 Correcting LLVM linking of Intel BMI intrinsics pdep{8,16} and pext{8,16}. This patch fixes #26065. The LLVM interface does not expose bindings to: - llvm.x86.bmi.pdep.8 - llvm.x86.bmi.pdep.16 - llvm.x86.bmi.pext.8 - llvm.x86.bmi.pext.16 So calls are instead made to llvm.x86.bmi.{pdep,pext}.32 in these cases, with pre/post-operation truncation to constrain the logical value range. - - - - - 4 changed files: - compiler/GHC/CmmToLlvm/CodeGen.hs - + testsuite/tests/llvm/should_run/T26065.hs - + testsuite/tests/llvm/should_run/T26065.stdout - testsuite/tests/llvm/should_run/all.T Changes: ===================================== compiler/GHC/CmmToLlvm/CodeGen.hs ===================================== @@ -240,12 +240,25 @@ genCall (PrimTarget op@(MO_BRev w)) [dst] args = genCallSimpleCast w op dst args genCall (PrimTarget op@(MO_BSwap w)) [dst] args = genCallSimpleCast w op dst args -genCall (PrimTarget op@(MO_Pdep w)) [dst] args = - genCallSimpleCast w op dst args -genCall (PrimTarget op@(MO_Pext w)) [dst] args = - genCallSimpleCast w op dst args genCall (PrimTarget op@(MO_PopCnt w)) [dst] args = genCallSimpleCast w op dst args +{- Note [LLVM PDep/PExt intrinsics] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Since x86 PDep/PExt instructions only exist for 32/64 bit widths +we use the 32bit variant to compute the 8/16bit primops. +To do so we extend/truncate the argument/result around the +call. +-} +genCall (PrimTarget op@(MO_Pdep w)) [dst] args = do + cfg <- getConfig + if llvmCgBmiVersion cfg >= Just BMI2 + then genCallMinimumTruncationCast W32 w op dst args + else genCallSimpleCast w op dst args +genCall (PrimTarget op@(MO_Pext w)) [dst] args = do + cfg <- getConfig + if llvmCgBmiVersion cfg >= Just BMI2 + then genCallMinimumTruncationCast W32 w op dst args + else genCallSimpleCast w op dst args genCall (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] = runStmtsDecls $ do addrVar <- exprToVarW addr @@ -641,8 +654,15 @@ genCallExtract _ _ _ _ = -- from i32 to i8 explicitly as LLVM is strict about types. genCallSimpleCast :: Width -> CallishMachOp -> CmmFormal -> [CmmActual] -> LlvmM StmtData -genCallSimpleCast specW op dst args = do - let width = widthToLlvmInt specW +genCallSimpleCast w = genCallMinimumTruncationCast w w + +-- Given the minimum machine bit-width to use and the logical bit-width of the +-- value range, perform a type-cast truncation and extension before and after the +-- specified operation, respectively. +genCallMinimumTruncationCast :: Width -> Width -> CallishMachOp -> CmmFormal + -> [CmmActual] -> LlvmM StmtData +genCallMinimumTruncationCast minW specW op dst args = do + let width = widthToLlvmInt $ max minW specW argsW = const width <$> args dstType = cmmToLlvmType $ localRegType dst signage = cmmPrimOpRetValSignage op @@ -945,9 +965,10 @@ cmmPrimOpFunctions mop = do W256 -> fsLit "llvm.cttz.i256" W512 -> fsLit "llvm.cttz.i512" MO_Pdep w + -- See Note [LLVM PDep/PExt intrinsics] | isBmi2Enabled -> case w of - W8 -> fsLit "llvm.x86.bmi.pdep.8" - W16 -> fsLit "llvm.x86.bmi.pdep.16" + W8 -> fsLit "llvm.x86.bmi.pdep.32" + W16 -> fsLit "llvm.x86.bmi.pdep.32" W32 -> fsLit "llvm.x86.bmi.pdep.32" W64 -> fsLit "llvm.x86.bmi.pdep.64" W128 -> fsLit "llvm.x86.bmi.pdep.128" @@ -963,8 +984,9 @@ cmmPrimOpFunctions mop = do W512 -> fsLit "hs_pdep512" MO_Pext w | isBmi2Enabled -> case w of - W8 -> fsLit "llvm.x86.bmi.pext.8" - W16 -> fsLit "llvm.x86.bmi.pext.16" + -- See Note [LLVM PDep/PExt intrinsics] + W8 -> fsLit "llvm.x86.bmi.pext.32" + W16 -> fsLit "llvm.x86.bmi.pext.32" W32 -> fsLit "llvm.x86.bmi.pext.32" W64 -> fsLit "llvm.x86.bmi.pext.64" W128 -> fsLit "llvm.x86.bmi.pext.128" ===================================== testsuite/tests/llvm/should_run/T26065.hs ===================================== @@ -0,0 +1,68 @@ +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE ScopedTypeVariables #-} +import Data.Char (toUpper) +import GHC.Exts +import GHC.Word +import Numeric (showHex) + +pdep8 :: Word8 -> Word8 -> Word8 +pdep8 (W8# a) (W8# b) = W8# (wordToWord8# (pdep8# (word8ToWord# a) (word8ToWord# b))) +{-# NOINLINE pdep8 #-} + +pdep16 :: Word16 -> Word16 -> Word16 +pdep16 (W16# a) (W16# b) = W16# (wordToWord16# (pdep16# (word16ToWord# a) (word16ToWord# b))) +{-# NOINLINE pdep16 #-} + +pdep32 :: Word32 -> Word32 -> Word32 +pdep32 (W32# a) (W32# b) = W32# (wordToWord32# (pdep32# (word32ToWord# a) (word32ToWord# b))) +{-# NOINLINE pdep32 #-} + +pdep64 :: Word64 -> Word64 -> Word64 +pdep64 (W64# a) (W64# b) = W64# (pdep64# a b) +{-# NOINLINE pdep64 #-} + +pext8 :: Word8 -> Word8 -> Word8 +pext8 (W8# a) (W8# b) = W8# (wordToWord8# (pext8# (word8ToWord# a) (word8ToWord# b))) +{-# NOINLINE pext8 #-} + +pext16 :: Word16 -> Word16 -> Word16 +pext16 (W16# a) (W16# b) = W16# (wordToWord16# (pext16# (word16ToWord# a) (word16ToWord# b))) +{-# NOINLINE pext16 #-} + +pext32 :: Word32 -> Word32 -> Word32 +pext32 (W32# a) (W32# b) = W32# (wordToWord32# (pext32# (word32ToWord# a) (word32ToWord# b))) +{-# NOINLINE pext32 #-} + +pext64 :: Word64 -> Word64 -> Word64 +pext64 (W64# a) (W64# b) = W64# (pext64# a b) +{-# NOINLINE pext64 #-} + +valueSource :: Integral i => i +valueSource = fromInteger 0xA7F7A7F7A7F7A7F7 + +valueMask :: Integral i => i +valueMask = fromInteger 0x5555555555555555 + +printIntrinsicCall :: forall i. Integral i => String -> (i -> i -> i) -> IO () +printIntrinsicCall label f = + let op1 = valueSource + op2 = valueMask + pad s = + let hex :: Integral a => a -> String + hex = flip showHex "" + str = toUpper <$> hex s + len = length $ hex (maxBound :: Word64) + n = length str + in "0x" <> replicate (len - n) '0' <> str + in putStrLn $ unwords [ label, pad op1, pad op2, "=", pad (f op1 op2) ] + +main :: IO () +main = do + printIntrinsicCall "pdep8 " pdep8 + printIntrinsicCall "pdep16" pdep16 + printIntrinsicCall "pdep32" pdep32 + printIntrinsicCall "pdep64" pdep64 + printIntrinsicCall "pext8 " pext8 + printIntrinsicCall "pext16" pext16 + printIntrinsicCall "pext32" pext32 + printIntrinsicCall "pext64" pext64 ===================================== testsuite/tests/llvm/should_run/T26065.stdout ===================================== @@ -0,0 +1,8 @@ +pdep8 0x00000000000000F7 0x0000000000000055 = 0x0000000000000015 +pdep16 0x000000000000A7F7 0x0000000000005555 = 0x0000000000005515 +pdep32 0x00000000A7F7A7F7 0x0000000055555555 = 0x0000000044155515 +pdep64 0xA7F7A7F7A7F7A7F7 0x5555555555555555 = 0x4415551544155515 +pext8 0x00000000000000F7 0x0000000000000055 = 0x000000000000000F +pext16 0x000000000000A7F7 0x0000000000005555 = 0x000000000000003F +pext32 0x00000000A7F7A7F7 0x0000000055555555 = 0x0000000000003F3F +pext64 0xA7F7A7F7A7F7A7F7 0x5555555555555555 = 0x000000003F3F3F3F ===================================== testsuite/tests/llvm/should_run/all.T ===================================== @@ -18,3 +18,8 @@ test('T22033', [normal, normalise_errmsg_fun(ignore_llvm_and_vortex)], compile_a test('T25730', [req_c, unless(arch('x86_64'), skip), normalise_errmsg_fun(ignore_llvm_and_vortex)], compile_and_run, ['T25730C.c']) # T25730C.c contains Intel instrinsics, so only run this test on x86 test('T20645', [normal, normalise_errmsg_fun(ignore_llvm_and_vortex), when(have_llvm(), extra_ways(["optllvm"]))], compile_and_run, ['']) +# T26065.c tests LLVM linking of Intel instrinsics, so only run this test on x86 +test('T26065', [normal, normalise_errmsg_fun(ignore_llvm_and_vortex), when(have_llvm(), extra_ways(["optllvm"])), + unless((arch('x86_64') or arch('i386')) and have_cpu_feature('bmi2'),skip)], + compile_and_run, ['-mbmi2']) + View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/d97f54f22d60acb8bb869f589676f697... -- View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/d97f54f22d60acb8bb869f589676f697... You're receiving this email because of your account on gitlab.haskell.org.
participants (1)
-
Andreas Klebinger (@AndreasK)