Marge Bot pushed to branch wip/marge_bot_batch_merge_job at Glasgow Haskell Compiler / GHC Commits: 4157160f by Cheng Shao at 2026-02-13T06:27:04-05:00 ci: remove unused hlint-ghc-and-base job definition This patch removes the unused `hlint-ghc-and-base` job definition, it's never run since !9806. Note that hadrian lint rules still work locally, so anyone that wishes to run hlint on the codebase can continue to do so in their local worktree. - - - - - 039f1977 by Cheng Shao at 2026-02-13T06:27:47-05:00 wasm: use import.meta.main for proper distinction of nodejs main modules This patch uses `import.meta.main` for proper distinction of nodejs main modules, especially when the main module might be installed as a symlink. Fixes #26916. - - - - - e110ac41 by ARATA Mizuki at 2026-02-14T22:26:42+09:00 Support more x86 extensions: AVX-512 {BW,DQ,VL} and GFNI Also, mark AVX-512 ER and PF as deprecated. AVX-512 instructions can be used for certain 64-bit integer vector operations. GFNI can be used to implement bitReverse (currently not used by NCG, but LLVM may use it). Closes #26406 Addresses #26509 - - - - - c43a02ff by Ian-Woo Kim at 2026-02-14T20:52:28-05:00 determinism: Sort Usages by fingerprint to ensure consistent ordering In some situations it has been observed that the ordering of usages can be non-determinstic in parallel builds. Therefore to be on the safe side we perform a sort on the usages field before writing them to the interface. Fixes #26877 - - - - - 27 changed files: - .gitlab-ci.yml - compiler/GHC/CmmToAsm/Config.hs - compiler/GHC/CmmToAsm/X86/CodeGen.hs - compiler/GHC/CmmToAsm/X86/Instr.hs - compiler/GHC/CmmToAsm/X86/Ppr.hs - compiler/GHC/Driver/Config/CmmToAsm.hs - compiler/GHC/Driver/DynFlags.hs - compiler/GHC/Driver/Pipeline/Execute.hs - compiler/GHC/Driver/Session.hs - compiler/GHC/HsToCore/Usage.hs - compiler/GHC/SysTools/Cpp.hs - compiler/GHC/Unit/Module/Deps.hs - docs/users_guide/9.16.1-notes.rst - docs/users_guide/phases.rst - docs/users_guide/using.rst - testsuite/driver/cpu_features.py - testsuite/tests/codeGen/should_gen_asm/all.T - + testsuite/tests/codeGen/should_gen_asm/avx512-int64-minmax.asm - + testsuite/tests/codeGen/should_gen_asm/avx512-int64-minmax.hs - + testsuite/tests/codeGen/should_gen_asm/avx512-int64-mul.asm - + testsuite/tests/codeGen/should_gen_asm/avx512-int64-mul.hs - + testsuite/tests/codeGen/should_gen_asm/avx512-word64-minmax.asm - + testsuite/tests/codeGen/should_gen_asm/avx512-word64-minmax.hs - testsuite/tests/driver/recomp016/recomp016.stdout - testsuite/tests/simd/should_run/all.T - utils/jsffi/dyld.mjs - utils/jsffi/post-link.mjs Changes: ===================================== .gitlab-ci.yml ===================================== @@ -387,20 +387,6 @@ lint-submods-branch: paths: - cabal-cache -# Disabled due to #22830 -.hlint-ghc-and-base: - extends: .lint-params - image: "registry.gitlab.haskell.org/ghc/ci-images/linters:$DOCKER_REV" - variables: - BUILD_FLAVOUR: default - script: - - .gitlab/ci.sh setup - - .gitlab/ci.sh configure - - .gitlab/ci.sh run_hadrian lint:ghc-internal - - .gitlab/ci.sh run_hadrian lint:ghc-experimental - - .gitlab/ci.sh run_hadrian lint:base - - .gitlab/ci.sh run_hadrian lint:compiler - ############################################################ # GHC-in-GHCi (Hadrian) ############################################################ ===================================== compiler/GHC/CmmToAsm/Config.hs ===================================== @@ -31,6 +31,9 @@ data NCGConfig = NCGConfig , ncgDoConstantFolding :: !Bool -- ^ Perform CMM constant folding , ncgSseAvxVersion :: Maybe SseAvxVersion -- ^ (x86) SSE and AVX instructions , ncgAvx512fEnabled :: !Bool + , ncgAvx512vlEnabled :: !Bool + , ncgAvx512bwEnabled :: !Bool + , ncgAvx512dqEnabled :: !Bool , ncgBmiVersion :: Maybe BmiVersion -- ^ (x86) BMI instructions , ncgDumpRegAllocStages :: !Bool , ncgDumpAsmStats :: !Bool ===================================== compiler/GHC/CmmToAsm/X86/CodeGen.hs ===================================== @@ -134,6 +134,12 @@ avx2Enabled = do config <- getConfig return (ncgSseAvxVersion config >= Just AVX2) +avx512vlEnabled :: NatM Bool +avx512vlEnabled = ncgAvx512vlEnabled <$> getConfig + +avx512dqEnabled :: NatM Bool +avx512dqEnabled = ncgAvx512dqEnabled <$> getConfig + cmmTopCodeGen :: RawCmmDecl -> NatM [NatCmmDecl (Alignment, RawCmmStatics) Instr] @@ -1314,6 +1320,8 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps sse4_1 <- sse4_1Enabled sse4_2 <- sse4_2Enabled avx <- avxEnabled + avx512vl <- avx512vlEnabled + avx512dq <- avx512dqEnabled case mop of MO_F_Eq _ -> condFltReg is32Bit EQQ x y MO_F_Ne _ -> condFltReg is32Bit NE x y @@ -1432,57 +1440,76 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps MO_V_Sub l w | l * widthInBits w == 128 -> vector_int_op_sse PSUB l w x y | otherwise -> needLlvm mop MO_V_Mul 16 W8 -> vector_int8x16_mul_sse2 x y - MO_V_Mul l@8 w@W16 -> vector_int_op_sse PMULL l w x y -- PMULLW (SSE2) - MO_V_Mul l@4 w@W32 | sse4_1 -> vector_int_op_sse PMULL l w x y -- PMULLD (SSE4.1) + MO_V_Mul l@8 w@W16 | avx -> vector_int_op_avx VPMULL l w x y -- VPMULLW (AVX) + | otherwise -> vector_int_op_sse PMULL l w x y -- PMULLW (SSE2) + MO_V_Mul l@4 w@W32 | avx -> vector_int_op_avx VPMULL l w x y -- VPMULLD (AVX) + | sse4_1 -> vector_int_op_sse PMULL l w x y -- PMULLD (SSE4.1) | otherwise -> vector_int32x4_mul_sse2 x y - MO_V_Mul 2 W64 -> vector_int64x2_mul_sse2 x y + MO_V_Mul l@2 w@W64 | avx512dq && avx512vl -> vector_int_op_avx VPMULL l w x y -- VPMULLQ (AVX512DQ+VL) + | otherwise -> vector_int64x2_mul_sse2 x y MO_V_Mul {} -> needLlvm mop MO_VU_Min l@16 w@W8 - -> vector_int_op_sse (MINMAX Min (IntVecMinMax False)) l w x y -- PMINUB (SSE2) + | avx -> vector_int_op_avx (VMINMAX Min (IntVecMinMax False)) l w x y -- VPMINUB (AVX) + | otherwise -> vector_int_op_sse (MINMAX Min (IntVecMinMax False)) l w x y -- PMINUB (SSE2) MO_VU_Min l@8 w@W16 + | avx -> vector_int_op_avx (VMINMAX Min (IntVecMinMax False)) l w x y -- VPMINUW (AVX) | sse4_1 -> vector_int_op_sse (MINMAX Min (IntVecMinMax False)) l w x y -- PMINUW (SSE4.1) | otherwise -> vector_word_minmax_sse Min l w x y MO_VU_Min l@4 w@W32 + | avx -> vector_int_op_avx (VMINMAX Min (IntVecMinMax False)) l w x y -- VPMINUD (AVX) | sse4_1 -> vector_int_op_sse (MINMAX Min (IntVecMinMax False)) l w x y -- PMINUD (SSE4.1) | otherwise -> vector_word_minmax_sse Min l w x y MO_VU_Min l@2 w@W64 + | avx512vl -> vector_int_op_avx (VMINMAX Min (IntVecMinMax False)) l w x y -- VPMINUQ (AVX512F+VL) | sse4_2 -> vector_word_minmax_sse Min l w x y -- PCMPGTQ requires SSE4.2 -- The SSE2 version is implemented as a C call (MO_W64X2_Min) MO_VU_Min {} -> needLlvm mop MO_VU_Max l@16 w@W8 - -> vector_int_op_sse (MINMAX Max (IntVecMinMax False)) l w x y -- PMAXUB (SSE2) + | avx -> vector_int_op_avx (VMINMAX Max (IntVecMinMax False)) l w x y -- VPMAXUB (AVX) + | otherwise -> vector_int_op_sse (MINMAX Max (IntVecMinMax False)) l w x y -- PMAXUB (SSE2) MO_VU_Max l@8 w@W16 + | avx -> vector_int_op_avx (VMINMAX Max (IntVecMinMax False)) l w x y -- VPMAXUW (AVX) | sse4_1 -> vector_int_op_sse (MINMAX Max (IntVecMinMax False)) l w x y -- PMAXUW (SSE4.1) | otherwise -> vector_word_minmax_sse Max l w x y MO_VU_Max l@4 w@W32 + | avx -> vector_int_op_avx (VMINMAX Max (IntVecMinMax False)) l w x y -- VPMAXUD (AVX) | sse4_1 -> vector_int_op_sse (MINMAX Max (IntVecMinMax False)) l w x y -- PMAXUD (SSE4.1) | otherwise -> vector_word_minmax_sse Max l w x y MO_VU_Max l@2 w@W64 + | avx512vl -> vector_int_op_avx (VMINMAX Max (IntVecMinMax False)) l w x y -- VPMAXUQ (AVX512F+VL) | sse4_2 -> vector_word_minmax_sse Max l w x y -- PCMPGTQ requires SSE4.2 -- The SSE2 version is implemented as a C call (MO_W64X2_Max) MO_VU_Max {} -> needLlvm mop MO_VS_Min l@16 w@W8 + | avx -> vector_int_op_avx (VMINMAX Min (IntVecMinMax True)) l w x y -- VPMINSB (AVX) | sse4_1 -> vector_int_op_sse (MINMAX Min (IntVecMinMax True)) l w x y -- PMINSB (SSE4.1) | otherwise -> vector_int_minmax_sse Min l w x y MO_VS_Min l@8 w@W16 - -> vector_int_op_sse (MINMAX Min (IntVecMinMax True)) l w x y -- PMINSW (SSE2) + | avx -> vector_int_op_avx (VMINMAX Min (IntVecMinMax True)) l w x y -- VPMINSW (AVX) + | otherwise -> vector_int_op_sse (MINMAX Min (IntVecMinMax True)) l w x y -- PMINSW (SSE2) MO_VS_Min l@4 w@W32 + | avx -> vector_int_op_avx (VMINMAX Min (IntVecMinMax True)) l w x y -- VPMINSD (AVX) | sse4_1 -> vector_int_op_sse (MINMAX Min (IntVecMinMax True)) l w x y -- PMINSD (SSE4.1) | otherwise -> vector_int_minmax_sse Min l w x y MO_VS_Min l@2 w@W64 + | avx512vl -> vector_int_op_avx (VMINMAX Min (IntVecMinMax True)) l w x y -- VPMINSQ (AVX512F+VL) | sse4_2 -> vector_int_minmax_sse Min l w x y -- PCMPGTQ requires SSE4.2 -- The SSE2 version is implemented as a C call (MO_I64X2_Min) MO_VS_Min {} -> needLlvm mop MO_VS_Max l@16 w@W8 + | avx -> vector_int_op_avx (VMINMAX Max (IntVecMinMax True)) l w x y -- VPMAXSB (AVX) | sse4_1 -> vector_int_op_sse (MINMAX Max (IntVecMinMax True)) l w x y -- PMAXSB (SSE4.1) | otherwise -> vector_int_minmax_sse Max l w x y MO_VS_Max l@8 w@W16 - -> vector_int_op_sse (MINMAX Max (IntVecMinMax True)) l w x y -- PMAXSW (SSE2) + | avx -> vector_int_op_avx (VMINMAX Max (IntVecMinMax True)) l w x y -- VPMAXSW (AVX) + | otherwise -> vector_int_op_sse (MINMAX Max (IntVecMinMax True)) l w x y -- PMAXSW (SSE2) MO_VS_Max l@4 w@W32 + | avx -> vector_int_op_avx (VMINMAX Max (IntVecMinMax True)) l w x y -- VPMAXSD (AVX) | sse4_1 -> vector_int_op_sse (MINMAX Max (IntVecMinMax True)) l w x y -- PMAXSD (SSE4.1) | otherwise -> vector_int_minmax_sse Max l w x y MO_VS_Max l@2 w@W64 + | avx512vl -> vector_int_op_avx (VMINMAX Max (IntVecMinMax True)) l w x y -- VPMAXSQ (AVX512F+VL) | sse4_2 -> vector_int_minmax_sse Max l w x y -- PCMPGTQ requires SSE4.2 -- The SSE2 version is implemented as a C call (MO_I64X2_Max) MO_VS_Max {} -> needLlvm mop @@ -1975,7 +2002,6 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps (PUNPCKLDQ format (OpReg tmpOdd1) dst) -- dst <- (dst[0],tmpOdd1[0],dst[1],tmpOdd1[1]) return (Any format code) - -- TODO: We could use `VPMULLQ` if AVX-512 or AVX10.1 is available. vector_int64x2_mul_sse2 :: CmmExpr -> CmmExpr -> NatM Register vector_int64x2_mul_sse2 expr1 expr2 = do -- implement 64 bit multiplication using 32-bit PMULUDQ multiplication instructions ===================================== compiler/GHC/CmmToAsm/X86/Instr.hs ===================================== @@ -338,6 +338,7 @@ data Instr | PADD Format Operand Reg | PSUB Format Operand Reg | PMULL Format Operand Reg + | VPMULL Format Operand Reg Reg | PMULUDQ Format Operand Reg -- SIMD compare @@ -601,6 +602,7 @@ regUsageOfInstr platform instr PADD fmt src dst -> mkRU (use_R fmt src [mk fmt dst]) [mk fmt dst] PSUB fmt src dst -> mkRU (use_R fmt src [mk fmt dst]) [mk fmt dst] PMULL fmt src dst -> mkRU (use_R fmt src [mk fmt dst]) [mk fmt dst] + VPMULL fmt s1 s2 dst -> mkRU (use_R fmt s1 [mk fmt s2]) [mk fmt dst] PMULUDQ fmt src dst -> mkRU (use_R fmt src [mk fmt dst]) [mk fmt dst] PCMPGT fmt src dst -> mkRU (use_R fmt src [mk fmt dst]) [mk fmt dst] @@ -912,6 +914,7 @@ patchRegsOfInstr platform instr env PADD fmt src dst -> PADD fmt (patchOp src) (env dst) PSUB fmt src dst -> PSUB fmt (patchOp src) (env dst) PMULL fmt src dst -> PMULL fmt (patchOp src) (env dst) + VPMULL fmt s1 s2 dst -> VPMULL fmt (patchOp s1) (env s2) (env dst) PMULUDQ fmt src dst -> PMULUDQ fmt (patchOp src) (env dst) PCMPGT fmt src dst -> PCMPGT fmt (patchOp src) (env dst) ===================================== compiler/GHC/CmmToAsm/X86/Ppr.hs ===================================== @@ -1012,6 +1012,8 @@ pprInstr platform i = case i of -> pprFormatOpReg (text "psub") format src dst PMULL format src dst -> pprFormatOpReg (text "pmull") format src dst + VPMULL format s1 s2 dst + -> pprFormatOpRegReg (text "vpmull") format s1 s2 dst PMULUDQ format src dst -> pprOpReg (text "pmuludq") format src dst PCMPGT format src dst @@ -1574,7 +1576,8 @@ pprInstr platform i = case i of pprMinMax wantV minOrMax mmTy fmt regs = line $ hcat ( instr : intersperse comma ( map ( pprOperand platform fmt ) regs ) ) where - instr = (if wantV then text "v" else empty) + instr = char '\t' + <> (if wantV then text "v" else empty) <> (case mmTy of { IntVecMinMax {} -> text "p"; FloatMinMax -> empty }) <> (case minOrMax of { Min -> text "min"; Max -> text "max" }) <> (case mmTy of { IntVecMinMax wantSigned -> if wantSigned then text "s" else text "u"; FloatMinMax -> empty }) ===================================== compiler/GHC/Driver/Config/CmmToAsm.hs ===================================== @@ -65,6 +65,9 @@ initNCGConfig dflags this_mod = NCGConfig ArchX86 -> v _ -> Nothing , ncgAvx512fEnabled = isAvx512fEnabled dflags + , ncgAvx512vlEnabled = isAvx512vlEnabled dflags + , ncgAvx512bwEnabled = isAvx512bwEnabled dflags + , ncgAvx512dqEnabled = isAvx512dqEnabled dflags , ncgLa664Enabled = isLa664Enabled dflags ===================================== compiler/GHC/Driver/DynFlags.hs ===================================== @@ -83,11 +83,15 @@ module GHC.Driver.DynFlags ( isSse4_2Enabled, isAvxEnabled, isAvx2Enabled, + isAvx512bwEnabled, isAvx512cdEnabled, + isAvx512dqEnabled, isAvx512erEnabled, isAvx512fEnabled, isAvx512pfEnabled, + isAvx512vlEnabled, isFmaEnabled, + isGfniEnabled, isBmiEnabled, isBmi2Enabled, -- For LoongArch platform @@ -454,12 +458,16 @@ data DynFlags = DynFlags { -- | Machine dependent flags (-m\<blah> stuff) sseAvxVersion :: Maybe SseAvxVersion, bmiVersion :: Maybe BmiVersion, - avx512cd :: Bool, -- Enable AVX-512 Conflict Detection Instructions. - avx512er :: Bool, -- Enable AVX-512 Exponential and Reciprocal Instructions. - avx512f :: Bool, -- Enable AVX-512 instructions. - avx512pf :: Bool, -- Enable AVX-512 PreFetch Instructions. + avx512bw :: Bool, -- ^ Enable AVX-512BW Instructions. + avx512cd :: Bool, -- ^ Enable AVX-512 Conflict Detection Instructions. + avx512dq :: Bool, -- ^ Enable AVX-512DQ Instructions. + avx512er :: Bool, -- ^ Enable AVX-512 Exponential and Reciprocal Instructions. + avx512f :: Bool, -- ^ Enable AVX-512 instructions. + avx512pf :: Bool, -- ^ Enable AVX-512 PreFetch Instructions. + avx512vl :: Bool, -- ^ Enable AVX-512VL Instructions. fma :: Bool, -- ^ Enable FMA instructions. - la664 :: Bool, -- Enable LA664 instructions + gfni :: Bool, -- ^ Enable GFNI Instructions. + la664 :: Bool, -- ^ Enable LA664 instructions -- Constants used to control the amount of optimization done. @@ -737,12 +745,16 @@ defaultDynFlags mySettings = interactivePrint = Nothing, sseAvxVersion = Nothing, bmiVersion = Nothing, + avx512bw = False, avx512cd = False, + avx512dq = False, avx512er = False, avx512f = False, avx512pf = False, + avx512vl = False, -- Use FMA by default on AArch64 fma = (platformArch . sTargetPlatform $ mySettings) == ArchAArch64, + gfni = False, -- For LoongArch, la464 is used by default. la664 = False, @@ -1616,18 +1628,27 @@ isAvxEnabled dflags = sseAvxVersion dflags >= Just AVX1 || (isX86 && fma dflags) isAvx2Enabled :: DynFlags -> Bool isAvx2Enabled dflags = sseAvxVersion dflags >= Just AVX2 || isAvx512fEnabled dflags +isAvx512bwEnabled :: DynFlags -> Bool +isAvx512bwEnabled dflags = avx512bw dflags + isAvx512cdEnabled :: DynFlags -> Bool isAvx512cdEnabled dflags = avx512cd dflags +isAvx512dqEnabled :: DynFlags -> Bool +isAvx512dqEnabled dflags = avx512dq dflags + isAvx512erEnabled :: DynFlags -> Bool isAvx512erEnabled dflags = avx512er dflags isAvx512fEnabled :: DynFlags -> Bool -isAvx512fEnabled dflags = avx512f dflags || avx512cd dflags || avx512er dflags || avx512pf dflags +isAvx512fEnabled dflags = avx512f dflags || avx512bw dflags || avx512cd dflags || avx512dq dflags || avx512er dflags || avx512pf dflags || avx512vl dflags isAvx512pfEnabled :: DynFlags -> Bool isAvx512pfEnabled dflags = avx512pf dflags +isAvx512vlEnabled :: DynFlags -> Bool +isAvx512vlEnabled dflags = avx512vl dflags + isFmaEnabled :: DynFlags -> Bool isFmaEnabled dflags = fma dflags || (isX86 && isAvx512fEnabled dflags) where @@ -1637,6 +1658,9 @@ isFmaEnabled dflags = fma dflags || (isX86 && isAvx512fEnabled dflags) ArchX86 -> True _ -> False +isGfniEnabled :: DynFlags -> Bool +isGfniEnabled dflags = gfni dflags + {- Note [Implications between X86 CPU feature flags] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Many X86 CPU feature flags (such as -mavx, -mfma or -msse4) imply other @@ -1649,7 +1673,7 @@ structures: together with other implications such as 3. FMA -> AVX - 4. AVX512{CD,ED,PF} -> AVX512F -> AVX2 + 4. AVX512{BW,CD,DQ,ER,PF,VL} -> AVX512F -> AVX2 We handle this as follows: ===================================== compiler/GHC/Driver/Pipeline/Execute.hs ===================================== @@ -984,13 +984,17 @@ llvmOptions llvm_config llvm_version dflags = -- It may become deprecated in a future LLVM version, though. ++ ["+avx2" | isAvx2Enabled dflags ] ++ ["+avx" | isAvxEnabled dflags ] + ++ ["+avx512bw"| isAvx512bwEnabled dflags ] ++ ["+avx512cd"| isAvx512cdEnabled dflags ] + ++ ["+avx512dq"| isAvx512dqEnabled dflags ] ++ ["+avx512er"| isAvx512erEnabled dflags ] ++ ["+avx512pf"| isAvx512pfEnabled dflags ] - -- For Arch64 +fma is not a option (it's unconditionally available). + ++ ["+avx512vl"| isAvx512vlEnabled dflags ] + -- For AArch64 +fma is not a option (it's unconditionally available). ++ ["+fma" | isFmaEnabled dflags && (arch /= ArchAArch64) ] ++ ["+bmi" | isBmiEnabled dflags ] ++ ["+bmi2" | isBmi2Enabled dflags ] + ++ ["+gfni" | isGfniEnabled dflags ] abi :: String abi = case platformArch (targetPlatform dflags) of ===================================== compiler/GHC/Driver/Session.hs ===================================== @@ -212,11 +212,15 @@ module GHC.Driver.Session ( isBmi2Enabled, isAvxEnabled, isAvx2Enabled, + isAvx512bwEnabled, isAvx512cdEnabled, + isAvx512dqEnabled, isAvx512erEnabled, isAvx512fEnabled, isAvx512pfEnabled, + isAvx512vlEnabled, isFmaEnabled, + isGfniEnabled, -- LoongArch: ISA version: la664, la464(default) isLa664Enabled, @@ -1723,14 +1727,17 @@ dynamic_flags_deps = [ d { sseAvxVersion = max (Just AVX1) (sseAvxVersion d) })) , make_ord_flag defGhcFlag "mavx2" (noArg (\d -> d { sseAvxVersion = max (Just AVX2) (sseAvxVersion d) })) - , make_ord_flag defGhcFlag "mavx512cd" (noArg (\d -> - d { avx512cd = True })) - , make_ord_flag defGhcFlag "mavx512er" (noArg (\d -> - d { avx512er = True })) + , make_ord_flag defGhcFlag "mavx512bw" (noArg (\d -> d { avx512bw = True })) + , make_ord_flag defGhcFlag "mavx512cd" (noArg (\d -> d { avx512cd = True })) + , make_ord_flag defGhcFlag "mavx512dq" (noArg (\d -> d { avx512dq = True })) + , make_dep_flag defGhcFlag "mavx512er" (noArg (\d -> d { avx512er = True })) + "AVX-512ER was only available on Xeon Phi" , make_ord_flag defGhcFlag "mavx512f" (noArg (\d -> d { avx512f = True })) - , make_ord_flag defGhcFlag "mavx512pf" (noArg (\d -> - d { avx512pf = True })) + , make_dep_flag defGhcFlag "mavx512pf" (noArg (\d -> d { avx512pf = True })) + "AVX-512PF was only available on Xeon Phi" + , make_ord_flag defGhcFlag "mavx512vl" (noArg (\d -> d { avx512vl = True })) , make_ord_flag defGhcFlag "mfma" (noArg (\d -> d { fma = True })) + , make_ord_flag defGhcFlag "mgfni" (noArg (\d -> d { gfni = True })) , make_ord_flag defGhcFlag "mla664" (noArg (\d -> d { la664 = True })) ===================================== compiler/GHC/HsToCore/Usage.hs ===================================== @@ -36,7 +36,7 @@ import GHC.Data.Maybe import GHC.Data.FastString import Data.IORef -import Data.List (sortBy) +import Data.List (sortBy, sortOn) import Data.Map (Map) import qualified Data.Map as Map import qualified Data.Set as Set @@ -73,6 +73,8 @@ data UsageConfig = UsageConfig { uc_safe_implicit_imps_req :: !Bool -- ^ Are all implicit imports required to be safe for this Safe Haskell mode? } +-- | Build the list of 'Usage's that drives recompilation checking. +-- The resulting list is deterministically sorted (see 'usageFingerprint'). mkUsageInfo :: UsageConfig -> Plugins -> FinderCache -> UnitEnv -> Module -> ImportedMods -> [ImportUserSpec] -> NameSet -> [FilePath] -> [FilePath] -> [(Module, Fingerprint)] -> [Linkable] -> PkgsLoaded @@ -105,7 +107,10 @@ mkUsageInfo uc plugins fc unit_env } | (mod, hash) <- merged ] ++ object_usages - usages `seqList` return usages + + -- Sort all the Usages to ensure a deterministic ordering. + let sorted_usages = sortOn usageFingerprint usages + sorted_usages `seqList` return sorted_usages -- seq the list of Usages returned: occasionally these -- don't get evaluated for a while and we can end up hanging on to -- the entire collection of Ifaces. ===================================== compiler/GHC/SysTools/Cpp.hs ===================================== @@ -165,10 +165,16 @@ doCpp logger tmpfs dflags unit_env opts input_fn output_fn = do let avx_defs = [ "-D__AVX__" | isAvxEnabled dflags ] ++ [ "-D__AVX2__" | isAvx2Enabled dflags ] ++ + [ "-D__AVX512BW__" | isAvx512bwEnabled dflags ] ++ [ "-D__AVX512CD__" | isAvx512cdEnabled dflags ] ++ + [ "-D__AVX512DQ__" | isAvx512dqEnabled dflags ] ++ [ "-D__AVX512ER__" | isAvx512erEnabled dflags ] ++ [ "-D__AVX512F__" | isAvx512fEnabled dflags ] ++ - [ "-D__AVX512PF__" | isAvx512pfEnabled dflags ] + [ "-D__AVX512PF__" | isAvx512pfEnabled dflags ] ++ + [ "-D__AVX512VL__" | isAvx512vlEnabled dflags ] + + let gfni_def = + [ "-D__GFNI__" | isGfniEnabled dflags ] backend_defs <- applyCDefs (backendCDefs $ backend dflags) logger dflags @@ -209,6 +215,7 @@ doCpp logger tmpfs dflags unit_env opts input_fn output_fn = do ++ map GHC.SysTools.Option sse_defs ++ map GHC.SysTools.Option fma_def ++ map GHC.SysTools.Option avx_defs + ++ map GHC.SysTools.Option gfni_def ++ map GHC.SysTools.Option io_manager_defs ++ mb_macro_include ++ line_pragmas ===================================== compiler/GHC/Unit/Module/Deps.hs ===================================== @@ -17,6 +17,7 @@ module GHC.Unit.Module.Deps , noDependencies , pprDeps , Usage (..) + , usageFingerprint , HomeModImport (..) , HomeModImportedAvails (..) , ImportAvails (..) @@ -492,6 +493,17 @@ instance Binary Usage where i -> error ("Binary.get(Usage): " ++ show i) +-- | Extract the distinguishing fingerprint carried by a particular 'Usage' +-- constructor. Every constructor stores a hash capturing the bit of state +-- that drives recompilation decisions, so we can sort on it directly. +usageFingerprint :: Usage -> Fingerprint +usageFingerprint UsagePackageModule{ usg_mod_hash = fp } = fp +usageFingerprint UsageHomeModule{ usg_mod_hash = fp } = fp +usageFingerprint UsageFile{ usg_file_hash = fp } = fp +usageFingerprint UsageDirectory{ usg_dir_hash = fp } = fp +usageFingerprint UsageHomeModuleInterface{ usg_iface_hash = fp } = fp +usageFingerprint UsageMergedRequirement{ usg_mod_hash = fp } = fp + -- | Records the imports that we depend on from a home module, -- for recompilation checking. -- ===================================== docs/users_guide/9.16.1-notes.rst ===================================== @@ -101,6 +101,9 @@ to See :ghc-ticket:`25345`. +- Add several options for x86 extensions: :ghc-flag:`-mavx512bw`, + :ghc-flag:`-mavx512dq`, :ghc-flag:`-mavx512vl`, and :ghc-flag:`-mgfni`. + GHCi ~~~~ ===================================== docs/users_guide/phases.rst ===================================== @@ -553,8 +553,10 @@ SIMD macros These are defined conditionally based on the SIMD flags used for compilation: - ``__SSE__``, ``__SSE2__``, ``__SSE4_2__``, ``__FMA__``, - ``__AVX__``, ``__AVX2__``, ``__AVX512CD__``, ``__AVX512ER__``, ``__AVX512F__``, ``__AVX512PF__``, + ``__SSE__``, ``__SSE2__``, ``__SSE3__``, ``__SSSE3__``, + ``__SSE4_1__``, ``__SSE4_2__``, ``__FMA__``, ``__AVX__``, ``__AVX2__``, + ``__AVX512BW__``, ``__AVX512CD__``, ``__AVX512DQ__``, ``__AVX512ER__``, + ``__AVX512F__``, ``__AVX512PF__``, ``__AVX512VL__``, ``__GFNI__`` .. _cpp-string-gaps: ===================================== docs/users_guide/using.rst ===================================== @@ -1601,7 +1601,7 @@ Some flags only make sense for particular target platforms. :implies: :ghc-flag:`-msse4.2` (x86 only) This flag allows the code generator (whether the :ref:`native code generator <native-code-gen>` - or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86_64 AVX instructions. + or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86 AVX instructions. .. ghc-flag:: -mavx2 :shortdesc: (x86 only) Enable support for AVX2 SIMD extensions @@ -1611,47 +1611,84 @@ Some flags only make sense for particular target platforms. :implies: :ghc-flag:`-mavx` (x86 only) This flag allows the code generator (whether the :ref:`native code generator <native-code-gen>` - or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86_64 AVX2 instructions. + or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86 AVX2 instructions. + +.. ghc-flag:: -mavx512bw + :shortdesc: (x86 only) Enable support for AVX-512BW SIMD extensions + :type: dynamic + :category: platform-options + + :since: 9.16.1 + :implies: :ghc-flag:`-mavx512f` + + (x86 only) This flag allows the code generator (whether the :ref:`native code generator <native-code-gen>` + or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86 AVX-512BW instructions. .. ghc-flag:: -mavx512cd - :shortdesc: (x86 only) Enable support for AVX512-CD SIMD extensions + :shortdesc: (x86 only) Enable support for AVX-512CD SIMD extensions :type: dynamic :category: platform-options :implies: :ghc-flag:`-mavx512f` (x86 only) This flag allows the code generator (whether the :ref:`native code generator <native-code-gen>` - or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86_64 AVX512-CD instructions. + or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86 AVX-512CD instructions. + +.. ghc-flag:: -mavx512dq + :shortdesc: (x86 only) Enable support for AVX-512DQ SIMD extensions + :type: dynamic + :category: platform-options + + :since: 9.16.1 + :implies: :ghc-flag:`-mavx512f` + + (x86 only) This flag allows the code generator (whether the :ref:`native code generator <native-code-gen>` + or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86 AVX-512DQ instructions. .. ghc-flag:: -mavx512er - :shortdesc: (x86 only) Enable support for AVX512-ER SIMD extensions + :shortdesc: (x86 only, deprecated) Enable support for AVX-512ER SIMD extensions :type: dynamic :category: platform-options :implies: :ghc-flag:`-mavx512f` (x86 only) This flag allows the code generator (whether the :ref:`native code generator <native-code-gen>` - or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86_64 AVX512-ER instructions. + or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86 AVX-512ER instructions. + + The AVX-512ER extension is deprecated and not supported by newer LLVM versions. .. ghc-flag:: -mavx512f - :shortdesc: (x86 only) Enable support for AVX512-F SIMD extensions + :shortdesc: (x86 only) Enable support for AVX-512F SIMD extensions :type: dynamic :category: platform-options :implies: :ghc-flag:`-mavx2`, :ghc-flag:`-mfma` (x86 only) This flag allows the code generator (whether the :ref:`native code generator <native-code-gen>` - or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86_64 AVX512-F instructions. + or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86 AVX-512F instructions. .. ghc-flag:: -mavx512pf - :shortdesc: (x86 only) Enable support for AVX512-PF SIMD extensions + :shortdesc: (x86 only, deprecated) Enable support for AVX-512PF SIMD extensions :type: dynamic :category: platform-options :implies: :ghc-flag:`-mavx512f` (x86 only) This flag allows the code generator (whether the :ref:`native code generator <native-code-gen>` - or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86_64 AVX512-PF instructions. + or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86 AVX-512PF instructions. + + The AVX-512PF extension is deprecated and not supported by newer LLVM versions. + +.. ghc-flag:: -mavx512vl + :shortdesc: (x86 only) Enable support for AVX-512VL SIMD extensions + :type: dynamic + :category: platform-options + + :since: 9.16.1 + :implies: :ghc-flag:`-mavx512f` + + (x86 only) This flag allows the code generator (whether the :ref:`native code generator <native-code-gen>` + or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86 AVX-512VL instructions. .. ghc-flag:: -msse :shortdesc: (x86 only) Use SSE for floating-point operations @@ -1714,13 +1751,13 @@ Some flags only make sense for particular target platforms. or the :ref:`LLVM backend <llvm-code-gen>`). .. ghc-flag:: -msse4 - :shortdesc: (x86 only) Use SSE4 for floating-point operations + :shortdesc: (x86 only) Use SSE4.1 for floating-point operations :type: dynamic :category: platform-options :implies: :ghc-flag:`-mssse3` - (x86 only) Use the SSE4 instruction set to + (x86 only) Use the SSE4.1 instruction set to implement some floating point and bit operations(whether using the :ref:`native code generator <native-code-gen>` or the :ref:`LLVM backend <llvm-code-gen>`). @@ -1781,6 +1818,16 @@ Some flags only make sense for particular target platforms. multiply-add, which might perform non-IEEE-compliant software emulation on some platforms (depending on the implementation of the C standard library). +.. ghc-flag:: -mgfni + :shortdesc: (x86 only) Use GFNI for advanced bit manipulations + :type: dynamic + :category: platform-options + + :since: 9.16.1 + + (x86 only) This flag allows the code generator (whether the :ref:`native code generator <native-code-gen>` + or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86 GFNI instructions. + .. ghc-flag:: -mla664 :shortdesc: (LoongArch only) Used for new instructions for la664 uarch :type: dynamic ===================================== testsuite/driver/cpu_features.py ===================================== @@ -9,9 +9,9 @@ SUPPORTED_CPU_FEATURES = { # x86: 'sse', 'sse2', 'sse3', 'pni', 'ssse3', 'sse4_1', 'sse4_2', - 'avx', 'avx2', 'avx512f', + 'avx', 'avx2', 'avx512f', 'avx512vl', 'avx512bw', 'avx512dq', 'fma', - 'popcnt', 'bmi1', 'bmi2' + 'popcnt', 'bmi1', 'bmi2', 'gfni', } cpu_feature_cache = None ===================================== testsuite/tests/codeGen/should_gen_asm/all.T ===================================== @@ -17,3 +17,9 @@ test('msse-option-order', [unless(arch('x86_64') or arch('i386'), skip), when(unregisterised(), skip)], compile_grep_asm, ['hs', False, '-msse4.2 -msse2']) test('mavx-should-enable-popcnt', [unless(arch('x86_64') or arch('i386'), skip), when(unregisterised(), skip)], compile_grep_asm, ['hs', False, '-mavx']) +test('avx512-int64-mul', [unless(arch('x86_64') or arch('i386'), skip), + when(unregisterised(), skip)], compile_grep_asm, ['hs', True, '-mavx512dq -mavx512vl']) +test('avx512-int64-minmax', [unless(arch('x86_64') or arch('i386'), skip), + when(unregisterised(), skip)], compile_grep_asm, ['hs', True, '-mavx512vl']) +test('avx512-word64-minmax', [unless(arch('x86_64') or arch('i386'), skip), + when(unregisterised(), skip)], compile_grep_asm, ['hs', True, '-mavx512vl']) ===================================== testsuite/tests/codeGen/should_gen_asm/avx512-int64-minmax.asm ===================================== @@ -0,0 +1,2 @@ +vpminsq +vpmaxsq ===================================== testsuite/tests/codeGen/should_gen_asm/avx512-int64-minmax.hs ===================================== @@ -0,0 +1,27 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE ExtendedLiterals #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE UnboxedTuples #-} +import GHC.Exts +import GHC.Prim +import GHC.Int + +{-# NOINLINE f #-} +f :: Int64X2# -> Int64X2# -> Int64X2# -> Int64X2# +f x y z = minInt64X2# x (plusInt64X2# y z) + +{-# NOINLINE g #-} +g :: Int64X2# -> Int64X2# -> Int64X2# -> Int64X2# +g x y z = maxInt64X2# x (plusInt64X2# y z) + +main :: IO () +main = do + let !x = packInt64X2# (# 1#Int64, 10#Int64 #) + !y = packInt64X2# (# 4#Int64, 2#Int64 #) + !z = broadcastInt64X2# 5#Int64 + !w = f x y z + (# w0, w1 #) = unpackInt64X2# w + !v = g x y z + (# v0, v1 #) = unpackInt64X2# v + print (I64# w0, I64# w1) + print (I64# v0, I64# v1) ===================================== testsuite/tests/codeGen/should_gen_asm/avx512-int64-mul.asm ===================================== @@ -0,0 +1 @@ +vpmullq ===================================== testsuite/tests/codeGen/should_gen_asm/avx512-int64-mul.hs ===================================== @@ -0,0 +1,19 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE ExtendedLiterals #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE UnboxedTuples #-} +import GHC.Exts +import GHC.Int + +{-# NOINLINE f #-} +f :: Int64X2# -> Int64X2# -> Int64X2# -> Int64X2# +f x y z = timesInt64X2# x (plusInt64X2# y z) + +main :: IO () +main = do + let !x = packInt64X2# (# 1#Int64, 3#Int64 #) + !y = packInt64X2# (# 4#Int64, 2#Int64 #) + !z = broadcastInt64X2# 5#Int64 + !w = f x y z + (# w0, w1 #) = unpackInt64X2# w + print (I64# w0, I64# w1) ===================================== testsuite/tests/codeGen/should_gen_asm/avx512-word64-minmax.asm ===================================== @@ -0,0 +1,2 @@ +vpminuq +vpmaxuq ===================================== testsuite/tests/codeGen/should_gen_asm/avx512-word64-minmax.hs ===================================== @@ -0,0 +1,27 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE ExtendedLiterals #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE UnboxedTuples #-} +import GHC.Exts +import GHC.Prim +import GHC.Word + +{-# NOINLINE f #-} +f :: Word64X2# -> Word64X2# -> Word64X2# -> Word64X2# +f x y z = minWord64X2# x (plusWord64X2# y z) + +{-# NOINLINE g #-} +g :: Word64X2# -> Word64X2# -> Word64X2# -> Word64X2# +g x y z = maxWord64X2# x (plusWord64X2# y z) + +main :: IO () +main = do + let !x = packWord64X2# (# 1#Word64, 10#Word64 #) + !y = packWord64X2# (# 4#Word64, 2#Word64 #) + !z = broadcastWord64X2# 5#Word64 + !w = f x y z + (# w0, w1 #) = unpackWord64X2# w + !v = g x y z + (# v0, v1 #) = unpackWord64X2# v + print (W64# w0, W64# w1) + print (W64# v0, W64# v1) ===================================== testsuite/tests/driver/recomp016/recomp016.stdout ===================================== @@ -9,4 +9,4 @@ second run [2 of 5] Compiling B ( B.hs, B.o ) [Source file changed] [3 of 5] Compiling C ( C.hs, C.o ) [B changed] [4 of 5] Compiling D ( D.hs, D.o ) [C changed] -[5 of 5] Compiling E ( E.hs, E.o ) [B changed] +[5 of 5] Compiling E ( E.hs, E.o ) [D changed] ===================================== testsuite/tests/simd/should_run/all.T ===================================== @@ -66,6 +66,9 @@ setTestOpts( , when(have_cpu_feature('avx'), extra_hc_opts('-mavx')) , when(have_cpu_feature('avx2'), extra_hc_opts('-mavx2')) , when(have_cpu_feature('avx512f'), extra_hc_opts('-mavx512f')) + , when(have_cpu_feature('avx512vl'), extra_hc_opts('-mavx512vl')) + , when(have_cpu_feature('avx512bw'), extra_hc_opts('-mavx512bw')) + , when(have_cpu_feature('avx512dq'), extra_hc_opts('-mavx512dq')) ]) test('simd000', [], compile_and_run, ['']) ===================================== utils/jsffi/dyld.mjs ===================================== @@ -1470,7 +1470,7 @@ async function nodeMain({ searchDirs, mainSoPath, outFd, inFd, args }) { ); } -const isNodeMain = isNode && import.meta.filename === process.argv[1]; +const isNodeMain = isNode && import.meta.main; // node iserv as invoked by // GHC.Runtime.Interpreter.Wasm.spawnWasmInterp ===================================== utils/jsffi/post-link.mjs ===================================== @@ -119,7 +119,7 @@ function isMain() { return false; } - return import.meta.filename === process.argv[1]; + return import.meta.main; } async function main() { View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/12f48505d0222691b783f2054936054... -- View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/12f48505d0222691b783f2054936054... You're receiving this email because of your account on gitlab.haskell.org.
participants (1)
-
Marge Bot (@marge-bot)