Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC
Commits:
b22777d4 by ARATA Mizuki at 2025-11-11T11:49:44-05:00
LLVM backend: Pass the +evex512 attribute to LLVM 18+ if -mavx512f is set
The newer LLVM requires the +evex512 attribute to enable use of ZMM registers.
LLVM exhibits a backward-compatible behavior if the cpu is `x86-64`, but not if `penryn`.
Therefore, on macOS, where the cpu is set to `penryn`, we need to explicitly pass +evex512.
Fixes #26410
- - - - -
7 changed files:
- compiler/GHC/Driver/Pipeline/Execute.hs
- + testsuite/tests/simd/should_run/T26410_ffi.hs
- + testsuite/tests/simd/should_run/T26410_ffi.stdout
- + testsuite/tests/simd/should_run/T26410_ffi_c.c
- + testsuite/tests/simd/should_run/T26410_prim.hs
- + testsuite/tests/simd/should_run/T26410_prim.stdout
- testsuite/tests/simd/should_run/all.T
Changes:
=====================================
compiler/GHC/Driver/Pipeline/Execute.hs
=====================================
@@ -42,6 +42,7 @@ import GHC.Utils.Logger
import GHC.Utils.TmpFs
import GHC.Platform
import Data.List (intercalate, isInfixOf)
+import qualified Data.List.NonEmpty as NE
import GHC.Unit.Env
import GHC.Utils.Error
import Data.Maybe
@@ -69,6 +70,7 @@ import GHC.Platform.Ways
import GHC.Runtime.Loader (initializePlugins)
import GHC.Driver.LlvmConfigCache (readLlvmConfigCache)
import GHC.CmmToLlvm.Config (LlvmTarget (..), LlvmConfig (..))
+import GHC.CmmToLlvm.Version.Type (LlvmVersion (..))
import {-# SOURCE #-} GHC.Driver.Pipeline (compileForeign, compileEmptyStub)
import GHC.Settings
import System.IO
@@ -229,8 +231,9 @@ runLlvmLlcPhase pipe_env hsc_env input_fn = do
1 -> "-O1"
_ -> "-O2"
- defaultOptions = map GHC.SysTools.Option . concatMap words . snd
- $ unzip (llvmOptions llvm_config dflags)
+ llvm_version <- figureLlvmVersion logger dflags
+ let defaultOptions = map GHC.SysTools.Option . concatMap words . snd
+ $ unzip (llvmOptions llvm_config llvm_version dflags)
optFlag = if null (getOpts dflags opt_lc)
then map GHC.SysTools.Option $ words llvmOpts
else []
@@ -265,8 +268,9 @@ runLlvmOptPhase pipe_env hsc_env input_fn = do
Nothing -> panic ("runPhase LlvmOpt: llvm-passes file "
++ "is missing passes for level "
++ show optIdx)
- defaultOptions = map GHC.SysTools.Option . concat . fmap words . fst
- $ unzip (llvmOptions llvm_config dflags)
+ llvm_version <- figureLlvmVersion logger dflags
+ let defaultOptions = map GHC.SysTools.Option . concat . fmap words . fst
+ $ unzip (llvmOptions llvm_config llvm_version dflags)
-- don't specify anything if user has specified commands. We do this
-- for opt but not llc since opt is very specifically for optimisation
@@ -964,9 +968,10 @@ getOutputFilename logger tmpfs stop_phase output basename dflags next_phase mayb
-- | LLVM Options. These are flags to be passed to opt and llc, to ensure
-- consistency we list them in pairs, so that they form groups.
llvmOptions :: LlvmConfig
+ -> Maybe LlvmVersion
-> DynFlags
-> [(String, String)] -- ^ pairs of (opt, llc) arguments
-llvmOptions llvm_config dflags =
+llvmOptions llvm_config llvm_version dflags =
[("-relocation-model=" ++ rmodel
,"-relocation-model=" ++ rmodel) | not (null rmodel)]
@@ -1006,6 +1011,10 @@ llvmOptions llvm_config dflags =
++ ["+sse2" | isSse2Enabled platform ]
++ ["+sse" | isSseEnabled platform ]
++ ["+avx512f" | isAvx512fEnabled dflags ]
+ ++ ["+evex512" | isAvx512fEnabled dflags
+ , maybe False (>= LlvmVersion (18 NE.:| [])) llvm_version ]
+ -- +evex512 is recognized by LLVM 18 or newer and needed on macOS (#26410).
+ -- It may become deprecated in a future LLVM version, though.
++ ["+avx2" | isAvx2Enabled dflags ]
++ ["+avx" | isAvxEnabled dflags ]
++ ["+avx512cd"| isAvx512cdEnabled dflags ]
=====================================
testsuite/tests/simd/should_run/T26410_ffi.hs
=====================================
@@ -0,0 +1,40 @@
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+{-# LANGUAGE UnliftedFFITypes #-}
+import GHC.Exts
+import GHC.Int
+
+foreign import ccall unsafe "minInt64X8"
+ minInt64X8# :: Int64X8# -> Int64X8# -> Int64X8#
+
+data Int64X8 = Int64X8# Int64X8#
+
+minInt64X8 :: Int64X8 -> Int64X8 -> Int64X8
+minInt64X8 (Int64X8# a) (Int64X8# b) = Int64X8# (minInt64X8# a b)
+{-# NOINLINE minInt64X8 #-}
+
+broadcastInt64X8 :: Int64 -> Int64X8
+broadcastInt64X8 (I64# x) = Int64X8# (broadcastInt64X8# x)
+
+packInt64X8 :: (Int64, Int64, Int64, Int64, Int64, Int64, Int64, Int64) -> Int64X8
+packInt64X8 (I64# x0, I64# x1, I64# x2, I64# x3, I64# x4, I64# x5, I64# x6, I64# x7) = Int64X8# (packInt64X8# (# x0, x1, x2, x3, x4, x5, x6, x7 #))
+
+unpackInt64X8 :: Int64X8 -> (Int64, Int64, Int64, Int64, Int64, Int64, Int64, Int64)
+unpackInt64X8 (Int64X8# a) = case unpackInt64X8# a of
+ (# x0, x1, x2, x3, x4, x5, x6, x7 #) -> (I64# x0, I64# x1, I64# x2, I64# x3, I64# x4, I64# x5, I64# x6, I64# x7)
+
+-- You can check the assembly code for this function to see if ZMM registers are used
+plusInt64X8 :: Int64X8 -> Int64X8 -> Int64X8
+plusInt64X8 (Int64X8# a) (Int64X8# b) = Int64X8# (plusInt64X8# a b)
+
+main :: IO ()
+main = do
+ let a = broadcastInt64X8 3
+ b = packInt64X8 (1, 2, 3, 4, 5, 6, 7, 8)
+ c = minInt64X8 a b
+ print $ unpackInt64X8 c
+ let d = packInt64X8 (-1, -2, -3, -4, -5, -6, -7, -8)
+ e = broadcastInt64X8 (-3)
+ f = minInt64X8 d e
+ print $ unpackInt64X8 f
+ print $ unpackInt64X8 (plusInt64X8 a b)
=====================================
testsuite/tests/simd/should_run/T26410_ffi.stdout
=====================================
@@ -0,0 +1,3 @@
+(1,2,3,3,3,3,3,3)
+(-3,-3,-3,-4,-5,-6,-7,-8)
+(4,5,6,7,8,9,10,11)
=====================================
testsuite/tests/simd/should_run/T26410_ffi_c.c
=====================================
@@ -0,0 +1,6 @@
+#include