Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC
Commits:
-
b22777d4
by ARATA Mizuki at 2025-11-11T11:49:44-05:00
7 changed files:
- compiler/GHC/Driver/Pipeline/Execute.hs
- + testsuite/tests/simd/should_run/T26410_ffi.hs
- + testsuite/tests/simd/should_run/T26410_ffi.stdout
- + testsuite/tests/simd/should_run/T26410_ffi_c.c
- + testsuite/tests/simd/should_run/T26410_prim.hs
- + testsuite/tests/simd/should_run/T26410_prim.stdout
- testsuite/tests/simd/should_run/all.T
Changes:
| ... | ... | @@ -42,6 +42,7 @@ import GHC.Utils.Logger |
| 42 | 42 | import GHC.Utils.TmpFs
|
| 43 | 43 | import GHC.Platform
|
| 44 | 44 | import Data.List (intercalate, isInfixOf)
|
| 45 | +import qualified Data.List.NonEmpty as NE
|
|
| 45 | 46 | import GHC.Unit.Env
|
| 46 | 47 | import GHC.Utils.Error
|
| 47 | 48 | import Data.Maybe
|
| ... | ... | @@ -69,6 +70,7 @@ import GHC.Platform.Ways |
| 69 | 70 | import GHC.Runtime.Loader (initializePlugins)
|
| 70 | 71 | import GHC.Driver.LlvmConfigCache (readLlvmConfigCache)
|
| 71 | 72 | import GHC.CmmToLlvm.Config (LlvmTarget (..), LlvmConfig (..))
|
| 73 | +import GHC.CmmToLlvm.Version.Type (LlvmVersion (..))
|
|
| 72 | 74 | import {-# SOURCE #-} GHC.Driver.Pipeline (compileForeign, compileEmptyStub)
|
| 73 | 75 | import GHC.Settings
|
| 74 | 76 | import System.IO
|
| ... | ... | @@ -229,8 +231,9 @@ runLlvmLlcPhase pipe_env hsc_env input_fn = do |
| 229 | 231 | 1 -> "-O1"
|
| 230 | 232 | _ -> "-O2"
|
| 231 | 233 | |
| 232 | - defaultOptions = map GHC.SysTools.Option . concatMap words . snd
|
|
| 233 | - $ unzip (llvmOptions llvm_config dflags)
|
|
| 234 | + llvm_version <- figureLlvmVersion logger dflags
|
|
| 235 | + let defaultOptions = map GHC.SysTools.Option . concatMap words . snd
|
|
| 236 | + $ unzip (llvmOptions llvm_config llvm_version dflags)
|
|
| 234 | 237 | optFlag = if null (getOpts dflags opt_lc)
|
| 235 | 238 | then map GHC.SysTools.Option $ words llvmOpts
|
| 236 | 239 | else []
|
| ... | ... | @@ -265,8 +268,9 @@ runLlvmOptPhase pipe_env hsc_env input_fn = do |
| 265 | 268 | Nothing -> panic ("runPhase LlvmOpt: llvm-passes file "
|
| 266 | 269 | ++ "is missing passes for level "
|
| 267 | 270 | ++ show optIdx)
|
| 268 | - defaultOptions = map GHC.SysTools.Option . concat . fmap words . fst
|
|
| 269 | - $ unzip (llvmOptions llvm_config dflags)
|
|
| 271 | + llvm_version <- figureLlvmVersion logger dflags
|
|
| 272 | + let defaultOptions = map GHC.SysTools.Option . concat . fmap words . fst
|
|
| 273 | + $ unzip (llvmOptions llvm_config llvm_version dflags)
|
|
| 270 | 274 | |
| 271 | 275 | -- don't specify anything if user has specified commands. We do this
|
| 272 | 276 | -- for opt but not llc since opt is very specifically for optimisation
|
| ... | ... | @@ -964,9 +968,10 @@ getOutputFilename logger tmpfs stop_phase output basename dflags next_phase mayb |
| 964 | 968 | -- | LLVM Options. These are flags to be passed to opt and llc, to ensure
|
| 965 | 969 | -- consistency we list them in pairs, so that they form groups.
|
| 966 | 970 | llvmOptions :: LlvmConfig
|
| 971 | + -> Maybe LlvmVersion
|
|
| 967 | 972 | -> DynFlags
|
| 968 | 973 | -> [(String, String)] -- ^ pairs of (opt, llc) arguments
|
| 969 | -llvmOptions llvm_config dflags =
|
|
| 974 | +llvmOptions llvm_config llvm_version dflags =
|
|
| 970 | 975 | [("-relocation-model=" ++ rmodel
|
| 971 | 976 | ,"-relocation-model=" ++ rmodel) | not (null rmodel)]
|
| 972 | 977 | |
| ... | ... | @@ -1006,6 +1011,10 @@ llvmOptions llvm_config dflags = |
| 1006 | 1011 | ++ ["+sse2" | isSse2Enabled platform ]
|
| 1007 | 1012 | ++ ["+sse" | isSseEnabled platform ]
|
| 1008 | 1013 | ++ ["+avx512f" | isAvx512fEnabled dflags ]
|
| 1014 | + ++ ["+evex512" | isAvx512fEnabled dflags
|
|
| 1015 | + , maybe False (>= LlvmVersion (18 NE.:| [])) llvm_version ]
|
|
| 1016 | + -- +evex512 is recognized by LLVM 18 or newer and needed on macOS (#26410).
|
|
| 1017 | + -- It may become deprecated in a future LLVM version, though.
|
|
| 1009 | 1018 | ++ ["+avx2" | isAvx2Enabled dflags ]
|
| 1010 | 1019 | ++ ["+avx" | isAvxEnabled dflags ]
|
| 1011 | 1020 | ++ ["+avx512cd"| isAvx512cdEnabled dflags ]
|
| 1 | +{-# LANGUAGE MagicHash #-}
|
|
| 2 | +{-# LANGUAGE UnboxedTuples #-}
|
|
| 3 | +{-# LANGUAGE UnliftedFFITypes #-}
|
|
| 4 | +import GHC.Exts
|
|
| 5 | +import GHC.Int
|
|
| 6 | + |
|
| 7 | +foreign import ccall unsafe "minInt64X8"
|
|
| 8 | + minInt64X8# :: Int64X8# -> Int64X8# -> Int64X8#
|
|
| 9 | + |
|
| 10 | +data Int64X8 = Int64X8# Int64X8#
|
|
| 11 | + |
|
| 12 | +minInt64X8 :: Int64X8 -> Int64X8 -> Int64X8
|
|
| 13 | +minInt64X8 (Int64X8# a) (Int64X8# b) = Int64X8# (minInt64X8# a b)
|
|
| 14 | +{-# NOINLINE minInt64X8 #-}
|
|
| 15 | + |
|
| 16 | +broadcastInt64X8 :: Int64 -> Int64X8
|
|
| 17 | +broadcastInt64X8 (I64# x) = Int64X8# (broadcastInt64X8# x)
|
|
| 18 | + |
|
| 19 | +packInt64X8 :: (Int64, Int64, Int64, Int64, Int64, Int64, Int64, Int64) -> Int64X8
|
|
| 20 | +packInt64X8 (I64# x0, I64# x1, I64# x2, I64# x3, I64# x4, I64# x5, I64# x6, I64# x7) = Int64X8# (packInt64X8# (# x0, x1, x2, x3, x4, x5, x6, x7 #))
|
|
| 21 | + |
|
| 22 | +unpackInt64X8 :: Int64X8 -> (Int64, Int64, Int64, Int64, Int64, Int64, Int64, Int64)
|
|
| 23 | +unpackInt64X8 (Int64X8# a) = case unpackInt64X8# a of
|
|
| 24 | + (# x0, x1, x2, x3, x4, x5, x6, x7 #) -> (I64# x0, I64# x1, I64# x2, I64# x3, I64# x4, I64# x5, I64# x6, I64# x7)
|
|
| 25 | + |
|
| 26 | +-- You can check the assembly code for this function to see if ZMM registers are used
|
|
| 27 | +plusInt64X8 :: Int64X8 -> Int64X8 -> Int64X8
|
|
| 28 | +plusInt64X8 (Int64X8# a) (Int64X8# b) = Int64X8# (plusInt64X8# a b)
|
|
| 29 | + |
|
| 30 | +main :: IO ()
|
|
| 31 | +main = do
|
|
| 32 | + let a = broadcastInt64X8 3
|
|
| 33 | + b = packInt64X8 (1, 2, 3, 4, 5, 6, 7, 8)
|
|
| 34 | + c = minInt64X8 a b
|
|
| 35 | + print $ unpackInt64X8 c
|
|
| 36 | + let d = packInt64X8 (-1, -2, -3, -4, -5, -6, -7, -8)
|
|
| 37 | + e = broadcastInt64X8 (-3)
|
|
| 38 | + f = minInt64X8 d e
|
|
| 39 | + print $ unpackInt64X8 f
|
|
| 40 | + print $ unpackInt64X8 (plusInt64X8 a b) |
| 1 | +(1,2,3,3,3,3,3,3)
|
|
| 2 | +(-3,-3,-3,-4,-5,-6,-7,-8)
|
|
| 3 | +(4,5,6,7,8,9,10,11) |
| 1 | +#include <x86intrin.h>
|
|
| 2 | + |
|
| 3 | +__m512i minInt64X8(__m512i a, __m512i b)
|
|
| 4 | +{
|
|
| 5 | + return _mm512_min_epi64(a, b);
|
|
| 6 | +} |
| 1 | +{-# LANGUAGE MagicHash #-}
|
|
| 2 | +{-# LANGUAGE UnboxedTuples #-}
|
|
| 3 | +{-# LANGUAGE UnliftedFFITypes #-}
|
|
| 4 | +import GHC.Exts
|
|
| 5 | +import GHC.Int
|
|
| 6 | +import GHC.Prim (minInt64X8#)
|
|
| 7 | + |
|
| 8 | +data Int64X8 = Int64X8# Int64X8#
|
|
| 9 | + |
|
| 10 | +minInt64X8 :: Int64X8 -> Int64X8 -> Int64X8
|
|
| 11 | +minInt64X8 (Int64X8# a) (Int64X8# b) = Int64X8# (minInt64X8# a b)
|
|
| 12 | +{-# NOINLINE minInt64X8 #-}
|
|
| 13 | + |
|
| 14 | +broadcastInt64X8 :: Int64 -> Int64X8
|
|
| 15 | +broadcastInt64X8 (I64# x) = Int64X8# (broadcastInt64X8# x)
|
|
| 16 | + |
|
| 17 | +packInt64X8 :: (Int64, Int64, Int64, Int64, Int64, Int64, Int64, Int64) -> Int64X8
|
|
| 18 | +packInt64X8 (I64# x0, I64# x1, I64# x2, I64# x3, I64# x4, I64# x5, I64# x6, I64# x7) = Int64X8# (packInt64X8# (# x0, x1, x2, x3, x4, x5, x6, x7 #))
|
|
| 19 | + |
|
| 20 | +unpackInt64X8 :: Int64X8 -> (Int64, Int64, Int64, Int64, Int64, Int64, Int64, Int64)
|
|
| 21 | +unpackInt64X8 (Int64X8# a) = case unpackInt64X8# a of
|
|
| 22 | + (# x0, x1, x2, x3, x4, x5, x6, x7 #) -> (I64# x0, I64# x1, I64# x2, I64# x3, I64# x4, I64# x5, I64# x6, I64# x7)
|
|
| 23 | + |
|
| 24 | +-- You can check the assembly code for this function to see if ZMM registers are used
|
|
| 25 | +plusInt64X8 :: Int64X8 -> Int64X8 -> Int64X8
|
|
| 26 | +plusInt64X8 (Int64X8# a) (Int64X8# b) = Int64X8# (plusInt64X8# a b)
|
|
| 27 | + |
|
| 28 | +main :: IO ()
|
|
| 29 | +main = do
|
|
| 30 | + let a = broadcastInt64X8 3
|
|
| 31 | + b = packInt64X8 (1, 2, 3, 4, 5, 6, 7, 8)
|
|
| 32 | + c = minInt64X8 a b
|
|
| 33 | + print $ unpackInt64X8 c
|
|
| 34 | + let d = packInt64X8 (-1, -2, -3, -4, -5, -6, -7, -8)
|
|
| 35 | + e = broadcastInt64X8 (-3)
|
|
| 36 | + f = minInt64X8 d e
|
|
| 37 | + print $ unpackInt64X8 f
|
|
| 38 | + print $ unpackInt64X8 (plusInt64X8 a b) |
| 1 | +(1,2,3,3,3,3,3,3)
|
|
| 2 | +(-3,-3,-3,-4,-5,-6,-7,-8)
|
|
| 3 | +(4,5,6,7,8,9,10,11) |
| ... | ... | @@ -164,3 +164,19 @@ test('T25062_V64' |
| 164 | 164 | test('T25169', [], compile_and_run, [''])
|
| 165 | 165 | test('T25455', [], compile_and_run, [''])
|
| 166 | 166 | test('T25486', [], compile_and_run, [''])
|
| 167 | + |
|
| 168 | +test('T26410_ffi'
|
|
| 169 | + , [ only_ways(llvm_ways) # SIMD NCG TODO: support 512-bit wide vectors
|
|
| 170 | + , unless(arch('x86_64') and have_cpu_feature('avx512f'), skip)
|
|
| 171 | + , extra_hc_opts('-mavx512f -optc -mavx512f -optlc -mcpu=penryn')
|
|
| 172 | + ]
|
|
| 173 | + , compile_and_run if have_cpu_feature('avx512f') else compile
|
|
| 174 | + , ['T26410_ffi_c.c'])
|
|
| 175 | + |
|
| 176 | +test('T26410_prim'
|
|
| 177 | + , [ only_ways(llvm_ways) # SIMD NCG TODO: support 512-bit wide vectors
|
|
| 178 | + , unless(arch('x86_64') and have_cpu_feature('avx512f'), skip)
|
|
| 179 | + , extra_hc_opts('-mavx512f -optlc -mcpu=penryn')
|
|
| 180 | + ]
|
|
| 181 | + , compile_and_run if have_cpu_feature('avx512f') else compile
|
|
| 182 | + , ['']) |