Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC Commits: 6ef22fa0 by IC Rainbow at 2025-10-26T18:23:01-04:00 Add SIMD primops for bitwise logical operations This adds 128-bit wide and/or/xor instructions for X86 NCG, with both SSE and AVX encodings. ``` andFloatX4# :: FloatX4# -> FloatX4# -> FloatX4# -- andps / vandps andDoubleX2# :: DoubleX2# -> DoubleX2# -> DoubleX2# -- andpd / vandpd andInt8X16# :: Int8X16# -> Int8X16# -> Int8X16# -- pand / vpand ``` The new primops are available on ARM when using LLVM backend. Tests added: - simd015 (floats and doubles) - simd016 (integers) - simd017 (words) Fixes #26417 - - - - - 26 changed files: - compiler/GHC/Builtin/primops.txt.pp - compiler/GHC/Cmm/MachOp.hs - compiler/GHC/CmmToAsm/AArch64/CodeGen.hs - compiler/GHC/CmmToAsm/X86/CodeGen.hs - compiler/GHC/CmmToAsm/X86/Instr.hs - compiler/GHC/CmmToAsm/X86/Ppr.hs - compiler/GHC/CmmToC.hs - compiler/GHC/CmmToLlvm/CodeGen.hs - compiler/GHC/StgToCmm/Prim.hs - compiler/GHC/StgToJS/Prim.hs - docs/users_guide/9.16.1-notes.rst - libraries/base/src/GHC/Base.hs - libraries/base/src/GHC/Exts.hs - libraries/ghc-experimental/CHANGELOG.md - libraries/ghc-prim/changelog.md - testsuite/tests/interface-stability/ghc-experimental-exports.stdout - testsuite/tests/interface-stability/ghc-experimental-exports.stdout-mingw32 - testsuite/tests/interface-stability/ghc-prim-exports.stdout - testsuite/tests/interface-stability/ghc-prim-exports.stdout-mingw32 - testsuite/tests/simd/should_run/all.T - + testsuite/tests/simd/should_run/simd015.hs - + testsuite/tests/simd/should_run/simd015.stdout - + testsuite/tests/simd/should_run/simd016.hs - + testsuite/tests/simd/should_run/simd016.stdout - + testsuite/tests/simd/should_run/simd017.hs - + testsuite/tests/simd/should_run/simd017.stdout Changes: ===================================== compiler/GHC/Builtin/primops.txt.pp ===================================== @@ -4391,6 +4391,24 @@ primop VecMaxOp "max#" GenPrimOp with vector = ALL_VECTOR_TYPES +primop VecAndOp "and#" GenPrimOp + VECTOR -> VECTOR -> VECTOR + {Bit-wise AND of two vectors.} + with + vector = ALL_VECTOR_TYPES + +primop VecOrOp "or#" GenPrimOp + VECTOR -> VECTOR -> VECTOR + {Bit-wise OR of two vectors.} + with + vector = ALL_VECTOR_TYPES + +primop VecXorOp "xor#" GenPrimOp + VECTOR -> VECTOR -> VECTOR + {Bit-wise XOR of two vectors.} + with + vector = ALL_VECTOR_TYPES + ------------------------------------------------------------------------ section "Prefetch" ===================================== compiler/GHC/Cmm/MachOp.hs ===================================== @@ -196,6 +196,14 @@ data MachOp | MO_VF_Min Length Width | MO_VF_Max Length Width + -- Bitwise vector operations + | MO_V_And Length Width + | MO_V_Or Length Width + | MO_V_Xor Length Width + | MO_VF_And Length Width + | MO_VF_Or Length Width + | MO_VF_Xor Length Width + -- | An atomic read with no memory ordering. Address msut -- be naturally aligned. | MO_RelaxedRead Width @@ -507,6 +515,14 @@ machOpResultType platform mop tys = MO_V_Sub l w -> cmmVec l (cmmBits w) MO_V_Mul l w -> cmmVec l (cmmBits w) + MO_V_And l w -> cmmVec l (cmmBits w) + MO_V_Or l w -> cmmVec l (cmmBits w) + MO_V_Xor l w -> cmmVec l (cmmBits w) + + MO_VF_And l w -> cmmVec l (cmmBits w) + MO_VF_Or l w -> cmmVec l (cmmBits w) + MO_VF_Xor l w -> cmmVec l (cmmBits w) + MO_VS_Neg l w -> cmmVec l (cmmBits w) MO_VS_Min l w -> cmmVec l (cmmBits w) MO_VS_Max l w -> cmmVec l (cmmBits w) @@ -636,6 +652,13 @@ machOpArgReps platform op = MO_VF_Min l w -> [vecwidth l w, vecwidth l w] MO_VF_Max l w -> [vecwidth l w, vecwidth l w] + MO_V_And l w -> [vecwidth l w, vecwidth l w] + MO_V_Or l w -> [vecwidth l w, vecwidth l w] + MO_V_Xor l w -> [vecwidth l w, vecwidth l w] + MO_VF_And l w -> [vecwidth l w, vecwidth l w] + MO_VF_Or l w -> [vecwidth l w, vecwidth l w] + MO_VF_Xor l w -> [vecwidth l w, vecwidth l w] + MO_RelaxedRead _ -> [wordWidth platform] MO_AlignmentCheck _ w -> [w] where ===================================== compiler/GHC/CmmToAsm/AArch64/CodeGen.hs ===================================== @@ -832,6 +832,9 @@ getRegister' config plat expr MO_V_Add {} -> notUnary MO_V_Sub {} -> notUnary MO_V_Mul {} -> notUnary + MO_V_And {} -> notUnary + MO_V_Or {} -> notUnary + MO_V_Xor {} -> notUnary MO_VS_Neg {} -> notUnary MO_V_Shuffle {} -> notUnary MO_VF_Shuffle {} -> notUnary @@ -841,6 +844,9 @@ getRegister' config plat expr MO_VF_Sub {} -> notUnary MO_VF_Mul {} -> notUnary MO_VF_Quot {} -> notUnary + MO_VF_And {} -> notUnary + MO_VF_Or {} -> notUnary + MO_VF_Xor {} -> notUnary MO_Add {} -> notUnary MO_Sub {} -> notUnary @@ -1221,6 +1227,12 @@ getRegister' config plat expr MO_V_Add {} -> vectorsNeedLlvm MO_V_Sub {} -> vectorsNeedLlvm MO_V_Mul {} -> vectorsNeedLlvm + MO_V_And {} -> vectorsNeedLlvm + MO_V_Or {} -> vectorsNeedLlvm + MO_V_Xor {} -> vectorsNeedLlvm + MO_VF_And {} -> vectorsNeedLlvm + MO_VF_Or {} -> vectorsNeedLlvm + MO_VF_Xor {} -> vectorsNeedLlvm MO_VS_Neg {} -> vectorsNeedLlvm MO_VF_Extract {} -> vectorsNeedLlvm MO_VF_Add {} -> vectorsNeedLlvm ===================================== compiler/GHC/CmmToAsm/X86/CodeGen.hs ===================================== @@ -1137,6 +1137,13 @@ getRegister' platform is32Bit (CmmMachOp mop [x]) = do -- unary MachOps MO_VF_Min {} -> incorrectOperands MO_VF_Max {} -> incorrectOperands + MO_V_And {} -> incorrectOperands + MO_V_Or {} -> incorrectOperands + MO_V_Xor {} -> incorrectOperands + MO_VF_And {} -> incorrectOperands + MO_VF_Or {} -> incorrectOperands + MO_VF_Xor {} -> incorrectOperands + MO_VF_Extract {} -> incorrectOperands MO_VF_Add {} -> incorrectOperands MO_VF_Sub {} -> incorrectOperands @@ -1404,6 +1411,20 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps MO_VF_Max l w | avx -> vector_float_op_avx (VMINMAX Max FloatMinMax) l w x y | otherwise -> vector_float_op_sse (MINMAX Max FloatMinMax) l w x y + MO_V_And l w | avx -> vector_int_op_avx VPAND l w x y + | otherwise -> vector_int_op_sse PAND l w x y + MO_V_Or l w | avx -> vector_int_op_avx VPOR l w x y + | otherwise -> vector_int_op_sse POR l w x y + MO_V_Xor l w | avx -> vector_int_op_avx VPXOR l w x y + | otherwise -> vector_int_op_sse PXOR l w x y + + MO_VF_And l w | avx -> vector_float_op_avx VAND l w x y + | otherwise -> vector_float_op_sse (\fmt op2 -> AND fmt op2 . OpReg) l w x y + MO_VF_Or l w | avx -> vector_float_op_avx VOR l w x y + | otherwise -> vector_float_op_sse (\fmt op2 -> OR fmt op2 . OpReg) l w x y + MO_VF_Xor l w | avx -> vector_float_op_avx VXOR l w x y + | otherwise -> vector_float_op_sse (\fmt op2 -> XOR fmt op2 . OpReg) l w x y + -- SIMD NCG TODO: 256/512-bit integer vector operations MO_V_Shuffle 16 W8 is | not is32Bit -> vector_shuffle_int8x16 sse4_1 x y is MO_V_Shuffle 8 W16 is -> vector_shuffle_int16x8 sse4_1 x y is @@ -1680,6 +1701,21 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps ----------------------- -- Vector operations--- + vector_int_op_avx :: (Format -> Operand -> Reg -> Reg -> Instr) + -> Length + -> Width + -> CmmExpr + -> CmmExpr + -> NatM Register + vector_int_op_avx instr l w = vector_op_avx_reg (\fmt -> instr fmt . OpReg) format + where format = case w of + W8 -> VecFormat l FmtInt8 + W16 -> VecFormat l FmtInt16 + W32 -> VecFormat l FmtInt32 + W64 -> VecFormat l FmtInt64 + _ -> pprPanic "Integer AVX vector operation not supported at this width" + (text "width:" <+> ppr w) + vector_float_op_avx :: (Format -> Operand -> Reg -> Reg -> Instr) -> Length -> Width @@ -3157,7 +3193,7 @@ getRegister' platform is32Bit (CmmLit lit) = do | avx = if float_or_floatvec then unitOL (VXOR fmt (OpReg dst) dst dst) - else unitOL (VPXOR fmt dst dst dst) + else unitOL (VPXOR fmt (OpReg dst) dst dst) | otherwise = if float_or_floatvec then unitOL (XOR fmt (OpReg dst) (OpReg dst)) ===================================== compiler/GHC/CmmToAsm/X86/Instr.hs ===================================== @@ -175,11 +175,13 @@ data Instr | AND Format Operand Operand | OR Format Operand Operand | XOR Format Operand Operand - -- | AVX bitwise logical XOR operation - | VXOR Format Operand Reg Reg | NOT Format Operand | NEGI Format Operand -- NEG instruction (name clash with Cond) | BSWAP Format Reg + -- Vector bitwise logical operations + | VAND Format Operand Reg Reg + | VOR Format Operand Reg Reg + | VXOR Format Operand Reg Reg -- Shifts (amount may be immediate or %cl only) | SHL Format Operand{-amount-} Operand @@ -318,10 +320,12 @@ data Instr -- logic operations | PXOR Format Operand Reg - | VPXOR Format Reg Reg Reg + | VPXOR Format Operand Reg Reg | PAND Format Operand Reg | PANDN Format Operand Reg + | VPAND Format Operand Reg Reg | POR Format Operand Reg + | VPOR Format Operand Reg Reg -- Arithmetic | VADD Format Operand Reg Reg @@ -444,8 +448,14 @@ regUsageOfInstr platform instr IDIV fmt op -> mkRU (mk fmt eax:mk fmt edx:use_R fmt op []) [mk fmt eax, mk fmt edx] ADD_CC fmt src dst -> usageRM fmt src dst SUB_CC fmt src dst -> usageRM fmt src dst + AND fmt src dst -> usageRM fmt src dst + VAND fmt src1 src2 dst + -> mkRU (use_R fmt src1 [mk fmt src2]) [mk fmt dst] + OR fmt src dst -> usageRM fmt src dst + VOR fmt src1 src2 dst + -> mkRU (use_R fmt src1 [mk fmt src2]) [mk fmt dst] XOR fmt (OpReg src) (OpReg dst) | src == dst @@ -500,6 +510,8 @@ regUsageOfInstr platform instr LOCATION{} -> noUsage UNWIND{} -> noUsage DELTA _ -> noUsage + LDATA{} -> noUsage + NEWBLOCK{} -> noUsage POPCNT fmt src dst -> mkRU (use_R fmt src []) [mk fmt dst] LZCNT fmt src dst -> mkRU (use_R fmt src []) [mk fmt dst] @@ -525,7 +537,7 @@ regUsageOfInstr platform instr VPBROADCAST sFmt vFmt src dst -> mkRU (use_R sFmt src []) [mk vFmt dst] VEXTRACT fmt _off src dst -> usageRW fmt (OpReg src) dst INSERTPS fmt (ImmInt off) src dst - -> mkRU ((use_R fmt src []) ++ [mk fmt dst | not doesNotReadDst]) [mk fmt dst] + -> mkRU (use_R fmt src [mk fmt dst | not doesNotReadDst]) [mk fmt dst] where -- Compute whether the instruction reads the destination register or not. -- Immediate bits: ss_dd_zzzz s = src pos, d = dst pos, z = zeroed components. @@ -534,7 +546,7 @@ regUsageOfInstr platform instr -- are being zeroed. where pos = ( off `shiftR` 4 ) .&. 0b11 INSERTPS fmt _off src dst - -> mkRU ((use_R fmt src []) ++ [mk fmt dst]) [mk fmt dst] + -> mkRU (use_R fmt src [mk fmt dst]) [mk fmt dst] VINSERTPS fmt _imm src2 src1 dst -> mkRU (use_R fmt src2 [mk fmt src1]) [mk fmt dst] PINSR sFmt vFmt _off src dst @@ -550,26 +562,30 @@ regUsageOfInstr platform instr VMOVDQU fmt src dst -> usageRW fmt src dst VMOV_MERGE fmt src2 src1 dst -> mkRU [mk fmt src1, mk fmt src2] [mk fmt dst] - PXOR fmt (OpReg src) dst - | src == dst + PXOR fmt src dst + | OpReg src_reg <- src + , src_reg == dst -> mkRU [] [mk fmt dst] | otherwise - -> mkRU [mk fmt src, mk fmt dst] [mk fmt dst] + -> mkRU (use_R fmt src [mk fmt dst]) [mk fmt dst] VPXOR fmt s1 s2 dst - | s1 == s2, s1 == dst + | OpReg s1_reg <- s1 + , s1_reg == s2, s1_reg == dst -> mkRU [] [mk fmt dst] | otherwise - -> mkRU [mk fmt s1, mk fmt s2] [mk fmt dst] + -> mkRU (use_R fmt s1 [mk fmt s2]) [mk fmt dst] PAND fmt src dst -> mkRU (use_R fmt src [mk fmt dst]) [mk fmt dst] PANDN fmt src dst -> mkRU (use_R fmt src [mk fmt dst]) [mk fmt dst] + VPAND fmt s1 s2 dst -> mkRU (use_R fmt s1 [mk fmt s2]) [mk fmt dst] POR fmt src dst -> mkRU (use_R fmt src [mk fmt dst]) [mk fmt dst] + VPOR fmt s1 s2 dst -> mkRU (use_R fmt s1 [mk fmt s2]) [mk fmt dst] - VADD fmt s1 s2 dst -> mkRU ((use_R fmt s1 []) ++ [mk fmt s2]) [mk fmt dst] - VSUB fmt s1 s2 dst -> mkRU ((use_R fmt s1 []) ++ [mk fmt s2]) [mk fmt dst] - VMUL fmt s1 s2 dst -> mkRU ((use_R fmt s1 []) ++ [mk fmt s2]) [mk fmt dst] - VDIV fmt s1 s2 dst -> mkRU ((use_R fmt s1 []) ++ [mk fmt s2]) [mk fmt dst] + VADD fmt s1 s2 dst -> mkRU (use_R fmt s1 [mk fmt s2]) [mk fmt dst] + VSUB fmt s1 s2 dst -> mkRU (use_R fmt s1 [mk fmt s2]) [mk fmt dst] + VMUL fmt s1 s2 dst -> mkRU (use_R fmt s1 [mk fmt s2]) [mk fmt dst] + VDIV fmt s1 s2 dst -> mkRU (use_R fmt s1 [mk fmt s2]) [mk fmt dst] PADD fmt src dst -> mkRU (use_R fmt src [mk fmt dst]) [mk fmt dst] PSUB fmt src dst -> mkRU (use_R fmt src [mk fmt dst]) [mk fmt dst] PMULL fmt src dst -> mkRU (use_R fmt src [mk fmt dst]) [mk fmt dst] @@ -651,7 +667,6 @@ regUsageOfInstr platform instr -> mkRU (use_R fmt src [mk fmt dst]) [mk fmt dst] VMINMAX _ _ fmt src1 src2 dst -> mkRU (use_R fmt src1 [mk fmt src2]) [mk fmt dst] - _other -> panic "regUsage: unrecognised instr" where -- # Definitions @@ -779,6 +794,8 @@ patchRegsOfInstr platform instr env AND fmt src dst -> patch2 (AND fmt) src dst OR fmt src dst -> patch2 (OR fmt) src dst XOR fmt src dst -> patch2 (XOR fmt) src dst + VAND fmt src1 src2 dst -> VAND fmt (patchOp src1) (env src2) (env dst) + VOR fmt src1 src2 dst -> VOR fmt (patchOp src1) (env src2) (env dst) VXOR fmt src1 src2 dst -> VXOR fmt (patchOp src1) (env src2) (env dst) NOT fmt op -> patch1 (NOT fmt) op BSWAP fmt reg -> BSWAP fmt (env reg) @@ -868,11 +885,13 @@ patchRegsOfInstr platform instr env VMOVDQU fmt src dst -> VMOVDQU fmt (patchOp src) (patchOp dst) VMOV_MERGE fmt src2 src1 dst -> VMOV_MERGE fmt (env src2) (env src1) (env dst) - PXOR fmt src dst -> PXOR fmt (patchOp src) (env dst) - VPXOR fmt s1 s2 dst -> VPXOR fmt (env s1) (env s2) (env dst) - PAND fmt src dst -> PAND fmt (patchOp src) (env dst) + PXOR fmt src dst -> PXOR fmt (patchOp src) (env dst) + VPXOR fmt s1 s2 dst -> VPXOR fmt (patchOp s1) (env s2) (env dst) + PAND fmt src dst -> PAND fmt (patchOp src) (env dst) + VPAND fmt s1 s2 dst -> VPAND fmt (patchOp s1) (env s2) (env dst) PANDN fmt src dst -> PANDN fmt (patchOp src) (env dst) - POR fmt src dst -> POR fmt (patchOp src) (env dst) + POR fmt src dst -> POR fmt (patchOp src) (env dst) + VPOR fmt s1 s2 dst -> VPOR fmt (patchOp s1) (env s2) (env dst) VADD fmt s1 s2 dst -> VADD fmt (patchOp s1) (env s2) (env dst) VSUB fmt s1 s2 dst -> VSUB fmt (patchOp s1) (env s2) (env dst) ===================================== compiler/GHC/CmmToAsm/X86/Ppr.hs ===================================== @@ -737,9 +737,15 @@ pprInstr platform i = case i of AND format src dst -> pprFormatOpOp (text "and") format src dst + VAND format src1 src2 dst + -> pprFormatOpRegReg (text "vand") format src1 src2 dst + OR format src dst -> pprFormatOpOp (text "or") format src dst + VOR format src1 src2 dst + -> pprFormatOpRegReg (text "vor") format src1 src2 dst + XOR FF32 src dst -> pprOpOp (text "xorps") FF32 src dst @@ -753,7 +759,7 @@ pprInstr platform i = case i of -> pprFormatOpOp (text "xor") format src dst VXOR fmt src1 src2 dst - -> pprVxor fmt src1 src2 dst + -> pprVXor fmt src1 src2 dst POPCNT format src dst -> pprOpOp (text "popcnt") format src (OpReg dst) @@ -1036,13 +1042,17 @@ pprInstr platform i = case i of PXOR format src dst -> pprPXor (text "pxor") format src dst VPXOR format s1 s2 dst - -> pprXor (text "vpxor") format s1 s2 dst + -> pprVXor format s1 s2 dst PAND format src dst -> pprOpReg (text "pand") format src dst + VPAND format s1 s2 dst + -> pprOpRegReg (text "vpand") format s1 s2 dst PANDN format src dst -> pprOpReg (text "pandn") format src dst POR format src dst -> pprOpReg (text "por") format src dst + VPOR format s1 s2 dst + -> pprOpRegReg (text "vpor") format s1 s2 dst VEXTRACT format offset from to -> pprFormatImmRegOp (text "vextract") format offset from to INSERTPS format offset addr dst @@ -1299,6 +1309,16 @@ pprInstr platform i = case i of pprReg platform (archWordFormat (target32Bit platform)) reg ] + pprOpRegReg :: Line doc -> Format -> Operand -> Reg -> Reg -> doc + pprOpRegReg name format op1 reg2 reg3 + = line $ hcat [ + pprMnemonic_ name, + pprOperand platform format op1, + comma, + pprReg platform (archWordFormat (target32Bit platform)) reg2, + comma, + pprReg platform (archWordFormat (target32Bit platform)) reg3 + ] pprFormatOpReg :: Line doc -> Format -> Operand -> Reg -> doc pprFormatOpReg name format op1 reg2 @@ -1397,17 +1417,6 @@ pprInstr platform i = case i of pprReg platform vectorFormat dst ] - pprXor :: Line doc -> Format -> Reg -> Reg -> Reg -> doc - pprXor name format reg1 reg2 reg3 - = line $ hcat [ - pprGenMnemonic name format, - pprReg platform format reg1, - comma, - pprReg platform format reg2, - comma, - pprReg platform format reg3 - ] - pprPXor :: Line doc -> Format -> Operand -> Reg -> doc pprPXor name format src dst = line $ hcat [ @@ -1417,8 +1426,8 @@ pprInstr platform i = case i of pprReg platform format dst ] - pprVxor :: Format -> Operand -> Reg -> Reg -> doc - pprVxor fmt src1 src2 dst + pprVXor :: Format -> Operand -> Reg -> Reg -> doc + pprVXor fmt src1 src2 dst = line $ hcat [ pprGenMnemonic mem fmt, pprOperand platform fmt src1, @@ -1433,7 +1442,8 @@ pprInstr platform i = case i of FF64 -> text "vxorpd" VecFormat _ FmtFloat -> text "vxorps" VecFormat _ FmtDouble -> text "vxorpd" - _ -> pprPanic "GHC.CmmToAsm.X86.Ppr.pprVxor: element type must be Float or Double" + VecFormat _ _ints -> text "vpxor" + _ -> pprPanic "GHC.CmmToAsm.X86.Ppr.pprVXor: unexpected format" (ppr fmt) pprInsert :: Line doc -> Format -> Imm -> Operand -> Reg -> doc ===================================== compiler/GHC/CmmToC.hs ===================================== @@ -873,6 +873,31 @@ pprMachOp_for_C platform mop = case mop of (text "MO_V_Mul") (panic $ "PprC.pprMachOp_for_C: MO_V_Mul" ++ "unsupported by the unregisterised backend") + MO_V_And {} -> pprTrace "offending mop:" + (text "MO_V_And") + (panic $ "PprC.pprMachOp_for_C: MO_V_And" + ++ "unsupported by the unregisterised backend") + MO_V_Or {} -> pprTrace "offending mop:" + (text "MO_V_Or") + (panic $ "PprC.pprMachOp_for_C: MO_V_Or" + ++ "unsupported by the unregisterised backend") + MO_V_Xor {} -> pprTrace "offending mop:" + (text "MO_V_Xor") + (panic $ "PprC.pprMachOp_for_C: MO_V_Xor" + ++ "unsupported by the unregisterised backend") + MO_VF_And {} -> pprTrace "offending mop:" + (text "MO_VF_And") + (panic $ "PprC.pprMachOp_for_C: MO_VF_And" + ++ "unsupported by the unregisterised backend") + MO_VF_Or {} -> pprTrace "offending mop:" + (text "MO_VF_Or") + (panic $ "PprC.pprMachOp_for_C: MO_VF_Or" + ++ "unsupported by the unregisterised backend") + MO_VF_Xor {} -> pprTrace "offending mop:" + (text "MO_VF_Xor") + (panic $ "PprC.pprMachOp_for_C: MO_VF_Xor" + ++ "unsupported by the unregisterised backend") + MO_VS_Neg {} -> pprTrace "offending mop:" (text "MO_VS_Neg") (panic $ "PprC.pprMachOp_for_C: MO_VS_Neg" ===================================== compiler/GHC/CmmToLlvm/CodeGen.hs ===================================== @@ -1593,6 +1593,14 @@ genMachOp _ op [x] = case op of MO_VF_Min _ _ -> panicOp MO_VF_Max _ _ -> panicOp + MO_V_And {} -> panicOp + MO_V_Or {} -> panicOp + MO_V_Xor {} -> panicOp + + MO_VF_And {} -> panicOp + MO_VF_Or {} -> panicOp + MO_VF_Xor {} -> panicOp + where negate ty v2 negOp = do (vx, stmts, top) <- exprToVar x @@ -1754,11 +1762,19 @@ genMachOp_slow opt op [x, y] = case op of MO_V_Sub l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_Sub MO_V_Mul l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_Mul + MO_V_And l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_And + MO_V_Or l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_Or + MO_V_Xor l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_Xor + MO_VF_Add l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FAdd MO_VF_Sub l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FSub MO_VF_Mul l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FMul MO_VF_Quot l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FDiv + MO_VF_And l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_And + MO_VF_Or l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_Or + MO_VF_Xor l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_Xor + MO_Not _ -> panicOp MO_S_Neg _ -> panicOp MO_F_Neg _ -> panicOp ===================================== compiler/GHC/StgToCmm/Prim.hs ===================================== @@ -1558,6 +1558,16 @@ emitPrimOp cfg primop = | not allowIntWord64X2MinMax -> opCallish MO_W64X2_Max (VecMaxOp WordVec n w) -> opTranslate (MO_VU_Max n w) + -- Vector bitwise instructions + -- On floats, ANDPS-like + (VecAndOp FloatVec n w) -> opTranslate (MO_VF_And n w) + (VecOrOp FloatVec n w) -> opTranslate (MO_VF_Or n w) + (VecXorOp FloatVec n w) -> opTranslate (MO_VF_Xor n w) + -- On integer, PAND-like + (VecAndOp _ n w) -> opTranslate (MO_V_And n w) + (VecOrOp _ n w) -> opTranslate (MO_V_Or n w) + (VecXorOp _ n w) -> opTranslate (MO_V_Xor n w) + -- Vector FMA instructions VecFMAdd _ n w -> fmaOp FMAdd n w VecFMSub _ n w -> fmaOp FMSub n w ===================================== compiler/GHC/StgToJS/Prim.hs ===================================== @@ -1211,6 +1211,9 @@ genPrim prof bound ty op = case op of VecShuffleOp _ _ _ -> unhandledPrimop op VecMinOp {} -> unhandledPrimop op VecMaxOp {} -> unhandledPrimop op + VecAndOp {} -> unhandledPrimop op + VecOrOp {} -> unhandledPrimop op + VecXorOp {} -> unhandledPrimop op PrefetchByteArrayOp3 -> noOp PrefetchMutableByteArrayOp3 -> noOp ===================================== docs/users_guide/9.16.1-notes.rst ===================================== @@ -85,6 +85,8 @@ Cmm ``ghc-experimental`` library ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +- New SIMD primops for bitwise logical operations on 128-wide vectors. + ``template-haskell`` library ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ===================================== libraries/base/src/GHC/Base.hs ===================================== @@ -270,6 +270,97 @@ import GHC.Prim hiding , minWord8X16# , minWord8X32# , minWord8X64# + -- Don't re-export vector logical primops + , andDoubleX2# + , andDoubleX4# + , andDoubleX8# + , andFloatX16# + , andFloatX4# + , andFloatX8# + , andInt16X16# + , andInt16X32# + , andInt16X8# + , andInt32X16# + , andInt32X4# + , andInt32X8# + , andInt64X2# + , andInt64X4# + , andInt64X8# + , andInt8X16# + , andInt8X32# + , andInt8X64# + , andWord16X16# + , andWord16X32# + , andWord16X8# + , andWord32X16# + , andWord32X4# + , andWord32X8# + , andWord64X2# + , andWord64X4# + , andWord64X8# + , andWord8X16# + , andWord8X32# + , andWord8X64# + , orDoubleX2# + , orDoubleX4# + , orDoubleX8# + , orFloatX16# + , orFloatX4# + , orFloatX8# + , orInt16X16# + , orInt16X32# + , orInt16X8# + , orInt32X16# + , orInt32X4# + , orInt32X8# + , orInt64X2# + , orInt64X4# + , orInt64X8# + , orInt8X16# + , orInt8X32# + , orInt8X64# + , orWord16X16# + , orWord16X32# + , orWord16X8# + , orWord32X16# + , orWord32X4# + , orWord32X8# + , orWord64X2# + , orWord64X4# + , orWord64X8# + , orWord8X16# + , orWord8X32# + , orWord8X64# + , xorDoubleX2# + , xorDoubleX4# + , xorDoubleX8# + , xorFloatX16# + , xorFloatX4# + , xorFloatX8# + , xorInt16X16# + , xorInt16X32# + , xorInt16X8# + , xorInt32X16# + , xorInt32X4# + , xorInt32X8# + , xorInt64X2# + , xorInt64X4# + , xorInt64X8# + , xorInt8X16# + , xorInt8X32# + , xorInt8X64# + , xorWord16X16# + , xorWord16X32# + , xorWord16X8# + , xorWord32X16# + , xorWord32X4# + , xorWord32X8# + , xorWord64X2# + , xorWord64X4# + , xorWord64X8# + , xorWord8X16# + , xorWord8X32# + , xorWord8X64# ) import GHC.Prim.Ext ===================================== libraries/base/src/GHC/Exts.hs ===================================== @@ -245,6 +245,97 @@ import GHC.Prim hiding , minWord8X16# , minWord8X32# , minWord8X64# + -- Don't re-export vector logical primops + , andDoubleX2# + , andDoubleX4# + , andDoubleX8# + , andFloatX16# + , andFloatX4# + , andFloatX8# + , andInt16X16# + , andInt16X32# + , andInt16X8# + , andInt32X16# + , andInt32X4# + , andInt32X8# + , andInt64X2# + , andInt64X4# + , andInt64X8# + , andInt8X16# + , andInt8X32# + , andInt8X64# + , andWord16X16# + , andWord16X32# + , andWord16X8# + , andWord32X16# + , andWord32X4# + , andWord32X8# + , andWord64X2# + , andWord64X4# + , andWord64X8# + , andWord8X16# + , andWord8X32# + , andWord8X64# + , orDoubleX2# + , orDoubleX4# + , orDoubleX8# + , orFloatX16# + , orFloatX4# + , orFloatX8# + , orInt16X16# + , orInt16X32# + , orInt16X8# + , orInt32X16# + , orInt32X4# + , orInt32X8# + , orInt64X2# + , orInt64X4# + , orInt64X8# + , orInt8X16# + , orInt8X32# + , orInt8X64# + , orWord16X16# + , orWord16X32# + , orWord16X8# + , orWord32X16# + , orWord32X4# + , orWord32X8# + , orWord64X2# + , orWord64X4# + , orWord64X8# + , orWord8X16# + , orWord8X32# + , orWord8X64# + , xorDoubleX2# + , xorDoubleX4# + , xorDoubleX8# + , xorFloatX16# + , xorFloatX4# + , xorFloatX8# + , xorInt16X16# + , xorInt16X32# + , xorInt16X8# + , xorInt32X16# + , xorInt32X4# + , xorInt32X8# + , xorInt64X2# + , xorInt64X4# + , xorInt64X8# + , xorInt8X16# + , xorInt8X32# + , xorInt8X64# + , xorWord16X16# + , xorWord16X32# + , xorWord16X8# + , xorWord32X16# + , xorWord32X4# + , xorWord32X8# + , xorWord64X2# + , xorWord64X4# + , xorWord64X8# + , xorWord8X16# + , xorWord8X32# + , xorWord8X64# ) import GHC.Prim.Ext ===================================== libraries/ghc-experimental/CHANGELOG.md ===================================== @@ -1,5 +1,10 @@ # Revision history for ghc-experimental +## 9.1601.0 + +- New and/or/xor SIMD primops for bitwise logical operations, such as andDoubleX4#, orWord32X4#, xorInt8X16#, etc. + These are supported by the LLVM backend and by the X86_64 NCG backend (for the latter, only for 128-wide vectors). + ## 0.1.0.0 -- YYYY-mm-dd * First version. Released on an unsuspecting world. ===================================== libraries/ghc-prim/changelog.md ===================================== @@ -1,3 +1,10 @@ +## 0.14.0 + +- Shipped with GHC 9.16.1 + +- New and/or/xor SIMD primops for bitwise logical operations, such as andDoubleX4#, orWord32X4#, xorInt8X16#, etc. + These are supported by the LLVM backend and by the X86_64 NCG backend (for the latter, only for 128-wide vectors). + ## 0.13.1 - Shipped with GHC 9.14.1 ===================================== testsuite/tests/interface-stability/ghc-experimental-exports.stdout ===================================== @@ -4747,10 +4747,40 @@ module GHC.PrimOps where addrToAny# :: forall {l :: Levity} (a :: TYPE (BoxedRep l)). Addr# -> (# a #) and# :: Word# -> Word# -> Word# and64# :: Word64# -> Word64# -> Word64# + andDoubleX2# :: DoubleX2# -> DoubleX2# -> DoubleX2# + andDoubleX4# :: DoubleX4# -> DoubleX4# -> DoubleX4# + andDoubleX8# :: DoubleX8# -> DoubleX8# -> DoubleX8# + andFloatX16# :: FloatX16# -> FloatX16# -> FloatX16# + andFloatX4# :: FloatX4# -> FloatX4# -> FloatX4# + andFloatX8# :: FloatX8# -> FloatX8# -> FloatX8# andI# :: Int# -> Int# -> Int# + andInt16X16# :: Int16X16# -> Int16X16# -> Int16X16# + andInt16X32# :: Int16X32# -> Int16X32# -> Int16X32# + andInt16X8# :: Int16X8# -> Int16X8# -> Int16X8# + andInt32X16# :: Int32X16# -> Int32X16# -> Int32X16# + andInt32X4# :: Int32X4# -> Int32X4# -> Int32X4# + andInt32X8# :: Int32X8# -> Int32X8# -> Int32X8# + andInt64X2# :: Int64X2# -> Int64X2# -> Int64X2# + andInt64X4# :: Int64X4# -> Int64X4# -> Int64X4# + andInt64X8# :: Int64X8# -> Int64X8# -> Int64X8# + andInt8X16# :: Int8X16# -> Int8X16# -> Int8X16# + andInt8X32# :: Int8X32# -> Int8X32# -> Int8X32# + andInt8X64# :: Int8X64# -> Int8X64# -> Int8X64# andWord16# :: Word16# -> Word16# -> Word16# + andWord16X16# :: Word16X16# -> Word16X16# -> Word16X16# + andWord16X32# :: Word16X32# -> Word16X32# -> Word16X32# + andWord16X8# :: Word16X8# -> Word16X8# -> Word16X8# andWord32# :: Word32# -> Word32# -> Word32# + andWord32X16# :: Word32X16# -> Word32X16# -> Word32X16# + andWord32X4# :: Word32X4# -> Word32X4# -> Word32X4# + andWord32X8# :: Word32X8# -> Word32X8# -> Word32X8# + andWord64X2# :: Word64X2# -> Word64X2# -> Word64X2# + andWord64X4# :: Word64X4# -> Word64X4# -> Word64X4# + andWord64X8# :: Word64X8# -> Word64X8# -> Word64X8# andWord8# :: Word8# -> Word8# -> Word8# + andWord8X16# :: Word8X16# -> Word8X16# -> Word8X16# + andWord8X32# :: Word8X32# -> Word8X32# -> Word8X32# + andWord8X64# :: Word8X64# -> Word8X64# -> Word8X64# annotateStack# :: forall {q :: RuntimeRep} b d (a :: TYPE q). b -> (State# d -> (# State# d, a #)) -> State# d -> (# State# d, a #) anyToAddr# :: forall a. a -> State# RealWorld -> (# State# RealWorld, Addr# #) asinDouble# :: Double# -> Double# @@ -5458,10 +5488,40 @@ module GHC.PrimOps where oneShot :: forall {q :: RuntimeRep} {r :: RuntimeRep} (a :: TYPE q) (b :: TYPE r). (a -> b) -> a -> b or# :: Word# -> Word# -> Word# or64# :: Word64# -> Word64# -> Word64# + orDoubleX2# :: DoubleX2# -> DoubleX2# -> DoubleX2# + orDoubleX4# :: DoubleX4# -> DoubleX4# -> DoubleX4# + orDoubleX8# :: DoubleX8# -> DoubleX8# -> DoubleX8# + orFloatX16# :: FloatX16# -> FloatX16# -> FloatX16# + orFloatX4# :: FloatX4# -> FloatX4# -> FloatX4# + orFloatX8# :: FloatX8# -> FloatX8# -> FloatX8# orI# :: Int# -> Int# -> Int# + orInt16X16# :: Int16X16# -> Int16X16# -> Int16X16# + orInt16X32# :: Int16X32# -> Int16X32# -> Int16X32# + orInt16X8# :: Int16X8# -> Int16X8# -> Int16X8# + orInt32X16# :: Int32X16# -> Int32X16# -> Int32X16# + orInt32X4# :: Int32X4# -> Int32X4# -> Int32X4# + orInt32X8# :: Int32X8# -> Int32X8# -> Int32X8# + orInt64X2# :: Int64X2# -> Int64X2# -> Int64X2# + orInt64X4# :: Int64X4# -> Int64X4# -> Int64X4# + orInt64X8# :: Int64X8# -> Int64X8# -> Int64X8# + orInt8X16# :: Int8X16# -> Int8X16# -> Int8X16# + orInt8X32# :: Int8X32# -> Int8X32# -> Int8X32# + orInt8X64# :: Int8X64# -> Int8X64# -> Int8X64# orWord16# :: Word16# -> Word16# -> Word16# + orWord16X16# :: Word16X16# -> Word16X16# -> Word16X16# + orWord16X32# :: Word16X32# -> Word16X32# -> Word16X32# + orWord16X8# :: Word16X8# -> Word16X8# -> Word16X8# orWord32# :: Word32# -> Word32# -> Word32# + orWord32X16# :: Word32X16# -> Word32X16# -> Word32X16# + orWord32X4# :: Word32X4# -> Word32X4# -> Word32X4# + orWord32X8# :: Word32X8# -> Word32X8# -> Word32X8# + orWord64X2# :: Word64X2# -> Word64X2# -> Word64X2# + orWord64X4# :: Word64X4# -> Word64X4# -> Word64X4# + orWord64X8# :: Word64X8# -> Word64X8# -> Word64X8# orWord8# :: Word8# -> Word8# -> Word8# + orWord8X16# :: Word8X16# -> Word8X16# -> Word8X16# + orWord8X32# :: Word8X32# -> Word8X32# -> Word8X32# + orWord8X64# :: Word8X64# -> Word8X64# -> Word8X64# ord# :: Char# -> Int# packDoubleX2# :: (# Double#, Double# #) -> DoubleX2# packDoubleX4# :: (# Double#, Double#, Double#, Double# #) -> DoubleX4# @@ -6271,10 +6331,40 @@ module GHC.PrimOps where writeWordOffAddr# :: forall d. Addr# -> Int# -> Word# -> State# d -> State# d xor# :: Word# -> Word# -> Word# xor64# :: Word64# -> Word64# -> Word64# + xorDoubleX2# :: DoubleX2# -> DoubleX2# -> DoubleX2# + xorDoubleX4# :: DoubleX4# -> DoubleX4# -> DoubleX4# + xorDoubleX8# :: DoubleX8# -> DoubleX8# -> DoubleX8# + xorFloatX16# :: FloatX16# -> FloatX16# -> FloatX16# + xorFloatX4# :: FloatX4# -> FloatX4# -> FloatX4# + xorFloatX8# :: FloatX8# -> FloatX8# -> FloatX8# xorI# :: Int# -> Int# -> Int# + xorInt16X16# :: Int16X16# -> Int16X16# -> Int16X16# + xorInt16X32# :: Int16X32# -> Int16X32# -> Int16X32# + xorInt16X8# :: Int16X8# -> Int16X8# -> Int16X8# + xorInt32X16# :: Int32X16# -> Int32X16# -> Int32X16# + xorInt32X4# :: Int32X4# -> Int32X4# -> Int32X4# + xorInt32X8# :: Int32X8# -> Int32X8# -> Int32X8# + xorInt64X2# :: Int64X2# -> Int64X2# -> Int64X2# + xorInt64X4# :: Int64X4# -> Int64X4# -> Int64X4# + xorInt64X8# :: Int64X8# -> Int64X8# -> Int64X8# + xorInt8X16# :: Int8X16# -> Int8X16# -> Int8X16# + xorInt8X32# :: Int8X32# -> Int8X32# -> Int8X32# + xorInt8X64# :: Int8X64# -> Int8X64# -> Int8X64# xorWord16# :: Word16# -> Word16# -> Word16# + xorWord16X16# :: Word16X16# -> Word16X16# -> Word16X16# + xorWord16X32# :: Word16X32# -> Word16X32# -> Word16X32# + xorWord16X8# :: Word16X8# -> Word16X8# -> Word16X8# xorWord32# :: Word32# -> Word32# -> Word32# + xorWord32X16# :: Word32X16# -> Word32X16# -> Word32X16# + xorWord32X4# :: Word32X4# -> Word32X4# -> Word32X4# + xorWord32X8# :: Word32X8# -> Word32X8# -> Word32X8# + xorWord64X2# :: Word64X2# -> Word64X2# -> Word64X2# + xorWord64X4# :: Word64X4# -> Word64X4# -> Word64X4# + xorWord64X8# :: Word64X8# -> Word64X8# -> Word64X8# xorWord8# :: Word8# -> Word8# -> Word8# + xorWord8X16# :: Word8X16# -> Word8X16# -> Word8X16# + xorWord8X32# :: Word8X32# -> Word8X32# -> Word8X32# + xorWord8X64# :: Word8X64# -> Word8X64# -> Word8X64# yield# :: State# RealWorld -> State# RealWorld type (~) :: forall k. k -> k -> Constraint class (a ~ b) => (~) a b ===================================== testsuite/tests/interface-stability/ghc-experimental-exports.stdout-mingw32 ===================================== @@ -4747,10 +4747,40 @@ module GHC.PrimOps where addrToAny# :: forall {l :: Levity} (a :: TYPE (BoxedRep l)). Addr# -> (# a #) and# :: Word# -> Word# -> Word# and64# :: Word64# -> Word64# -> Word64# + andDoubleX2# :: DoubleX2# -> DoubleX2# -> DoubleX2# + andDoubleX4# :: DoubleX4# -> DoubleX4# -> DoubleX4# + andDoubleX8# :: DoubleX8# -> DoubleX8# -> DoubleX8# + andFloatX16# :: FloatX16# -> FloatX16# -> FloatX16# + andFloatX4# :: FloatX4# -> FloatX4# -> FloatX4# + andFloatX8# :: FloatX8# -> FloatX8# -> FloatX8# andI# :: Int# -> Int# -> Int# + andInt16X16# :: Int16X16# -> Int16X16# -> Int16X16# + andInt16X32# :: Int16X32# -> Int16X32# -> Int16X32# + andInt16X8# :: Int16X8# -> Int16X8# -> Int16X8# + andInt32X16# :: Int32X16# -> Int32X16# -> Int32X16# + andInt32X4# :: Int32X4# -> Int32X4# -> Int32X4# + andInt32X8# :: Int32X8# -> Int32X8# -> Int32X8# + andInt64X2# :: Int64X2# -> Int64X2# -> Int64X2# + andInt64X4# :: Int64X4# -> Int64X4# -> Int64X4# + andInt64X8# :: Int64X8# -> Int64X8# -> Int64X8# + andInt8X16# :: Int8X16# -> Int8X16# -> Int8X16# + andInt8X32# :: Int8X32# -> Int8X32# -> Int8X32# + andInt8X64# :: Int8X64# -> Int8X64# -> Int8X64# andWord16# :: Word16# -> Word16# -> Word16# + andWord16X16# :: Word16X16# -> Word16X16# -> Word16X16# + andWord16X32# :: Word16X32# -> Word16X32# -> Word16X32# + andWord16X8# :: Word16X8# -> Word16X8# -> Word16X8# andWord32# :: Word32# -> Word32# -> Word32# + andWord32X16# :: Word32X16# -> Word32X16# -> Word32X16# + andWord32X4# :: Word32X4# -> Word32X4# -> Word32X4# + andWord32X8# :: Word32X8# -> Word32X8# -> Word32X8# + andWord64X2# :: Word64X2# -> Word64X2# -> Word64X2# + andWord64X4# :: Word64X4# -> Word64X4# -> Word64X4# + andWord64X8# :: Word64X8# -> Word64X8# -> Word64X8# andWord8# :: Word8# -> Word8# -> Word8# + andWord8X16# :: Word8X16# -> Word8X16# -> Word8X16# + andWord8X32# :: Word8X32# -> Word8X32# -> Word8X32# + andWord8X64# :: Word8X64# -> Word8X64# -> Word8X64# annotateStack# :: forall {q :: RuntimeRep} b d (a :: TYPE q). b -> (State# d -> (# State# d, a #)) -> State# d -> (# State# d, a #) anyToAddr# :: forall a. a -> State# RealWorld -> (# State# RealWorld, Addr# #) asinDouble# :: Double# -> Double# @@ -5461,10 +5491,40 @@ module GHC.PrimOps where oneShot :: forall {q :: RuntimeRep} {r :: RuntimeRep} (a :: TYPE q) (b :: TYPE r). (a -> b) -> a -> b or# :: Word# -> Word# -> Word# or64# :: Word64# -> Word64# -> Word64# + orDoubleX2# :: DoubleX2# -> DoubleX2# -> DoubleX2# + orDoubleX4# :: DoubleX4# -> DoubleX4# -> DoubleX4# + orDoubleX8# :: DoubleX8# -> DoubleX8# -> DoubleX8# + orFloatX16# :: FloatX16# -> FloatX16# -> FloatX16# + orFloatX4# :: FloatX4# -> FloatX4# -> FloatX4# + orFloatX8# :: FloatX8# -> FloatX8# -> FloatX8# orI# :: Int# -> Int# -> Int# + orInt16X16# :: Int16X16# -> Int16X16# -> Int16X16# + orInt16X32# :: Int16X32# -> Int16X32# -> Int16X32# + orInt16X8# :: Int16X8# -> Int16X8# -> Int16X8# + orInt32X16# :: Int32X16# -> Int32X16# -> Int32X16# + orInt32X4# :: Int32X4# -> Int32X4# -> Int32X4# + orInt32X8# :: Int32X8# -> Int32X8# -> Int32X8# + orInt64X2# :: Int64X2# -> Int64X2# -> Int64X2# + orInt64X4# :: Int64X4# -> Int64X4# -> Int64X4# + orInt64X8# :: Int64X8# -> Int64X8# -> Int64X8# + orInt8X16# :: Int8X16# -> Int8X16# -> Int8X16# + orInt8X32# :: Int8X32# -> Int8X32# -> Int8X32# + orInt8X64# :: Int8X64# -> Int8X64# -> Int8X64# orWord16# :: Word16# -> Word16# -> Word16# + orWord16X16# :: Word16X16# -> Word16X16# -> Word16X16# + orWord16X32# :: Word16X32# -> Word16X32# -> Word16X32# + orWord16X8# :: Word16X8# -> Word16X8# -> Word16X8# orWord32# :: Word32# -> Word32# -> Word32# + orWord32X16# :: Word32X16# -> Word32X16# -> Word32X16# + orWord32X4# :: Word32X4# -> Word32X4# -> Word32X4# + orWord32X8# :: Word32X8# -> Word32X8# -> Word32X8# + orWord64X2# :: Word64X2# -> Word64X2# -> Word64X2# + orWord64X4# :: Word64X4# -> Word64X4# -> Word64X4# + orWord64X8# :: Word64X8# -> Word64X8# -> Word64X8# orWord8# :: Word8# -> Word8# -> Word8# + orWord8X16# :: Word8X16# -> Word8X16# -> Word8X16# + orWord8X32# :: Word8X32# -> Word8X32# -> Word8X32# + orWord8X64# :: Word8X64# -> Word8X64# -> Word8X64# ord# :: Char# -> Int# packDoubleX2# :: (# Double#, Double# #) -> DoubleX2# packDoubleX4# :: (# Double#, Double#, Double#, Double# #) -> DoubleX4# @@ -6274,10 +6334,40 @@ module GHC.PrimOps where writeWordOffAddr# :: forall d. Addr# -> Int# -> Word# -> State# d -> State# d xor# :: Word# -> Word# -> Word# xor64# :: Word64# -> Word64# -> Word64# + xorDoubleX2# :: DoubleX2# -> DoubleX2# -> DoubleX2# + xorDoubleX4# :: DoubleX4# -> DoubleX4# -> DoubleX4# + xorDoubleX8# :: DoubleX8# -> DoubleX8# -> DoubleX8# + xorFloatX16# :: FloatX16# -> FloatX16# -> FloatX16# + xorFloatX4# :: FloatX4# -> FloatX4# -> FloatX4# + xorFloatX8# :: FloatX8# -> FloatX8# -> FloatX8# xorI# :: Int# -> Int# -> Int# + xorInt16X16# :: Int16X16# -> Int16X16# -> Int16X16# + xorInt16X32# :: Int16X32# -> Int16X32# -> Int16X32# + xorInt16X8# :: Int16X8# -> Int16X8# -> Int16X8# + xorInt32X16# :: Int32X16# -> Int32X16# -> Int32X16# + xorInt32X4# :: Int32X4# -> Int32X4# -> Int32X4# + xorInt32X8# :: Int32X8# -> Int32X8# -> Int32X8# + xorInt64X2# :: Int64X2# -> Int64X2# -> Int64X2# + xorInt64X4# :: Int64X4# -> Int64X4# -> Int64X4# + xorInt64X8# :: Int64X8# -> Int64X8# -> Int64X8# + xorInt8X16# :: Int8X16# -> Int8X16# -> Int8X16# + xorInt8X32# :: Int8X32# -> Int8X32# -> Int8X32# + xorInt8X64# :: Int8X64# -> Int8X64# -> Int8X64# xorWord16# :: Word16# -> Word16# -> Word16# + xorWord16X16# :: Word16X16# -> Word16X16# -> Word16X16# + xorWord16X32# :: Word16X32# -> Word16X32# -> Word16X32# + xorWord16X8# :: Word16X8# -> Word16X8# -> Word16X8# xorWord32# :: Word32# -> Word32# -> Word32# + xorWord32X16# :: Word32X16# -> Word32X16# -> Word32X16# + xorWord32X4# :: Word32X4# -> Word32X4# -> Word32X4# + xorWord32X8# :: Word32X8# -> Word32X8# -> Word32X8# + xorWord64X2# :: Word64X2# -> Word64X2# -> Word64X2# + xorWord64X4# :: Word64X4# -> Word64X4# -> Word64X4# + xorWord64X8# :: Word64X8# -> Word64X8# -> Word64X8# xorWord8# :: Word8# -> Word8# -> Word8# + xorWord8X16# :: Word8X16# -> Word8X16# -> Word8X16# + xorWord8X32# :: Word8X32# -> Word8X32# -> Word8X32# + xorWord8X64# :: Word8X64# -> Word8X64# -> Word8X64# yield# :: State# RealWorld -> State# RealWorld type (~) :: forall k. k -> k -> Constraint class (a ~ b) => (~) a b ===================================== testsuite/tests/interface-stability/ghc-prim-exports.stdout ===================================== @@ -1423,10 +1423,40 @@ module GHC.Prim where addrToAny# :: forall {l :: GHC.Internal.Types.Levity} (a :: TYPE (GHC.Internal.Types.BoxedRep l)). Addr# -> (# a #) and# :: Word# -> Word# -> Word# and64# :: Word64# -> Word64# -> Word64# + andDoubleX2# :: DoubleX2# -> DoubleX2# -> DoubleX2# + andDoubleX4# :: DoubleX4# -> DoubleX4# -> DoubleX4# + andDoubleX8# :: DoubleX8# -> DoubleX8# -> DoubleX8# + andFloatX16# :: FloatX16# -> FloatX16# -> FloatX16# + andFloatX4# :: FloatX4# -> FloatX4# -> FloatX4# + andFloatX8# :: FloatX8# -> FloatX8# -> FloatX8# andI# :: Int# -> Int# -> Int# + andInt16X16# :: Int16X16# -> Int16X16# -> Int16X16# + andInt16X32# :: Int16X32# -> Int16X32# -> Int16X32# + andInt16X8# :: Int16X8# -> Int16X8# -> Int16X8# + andInt32X16# :: Int32X16# -> Int32X16# -> Int32X16# + andInt32X4# :: Int32X4# -> Int32X4# -> Int32X4# + andInt32X8# :: Int32X8# -> Int32X8# -> Int32X8# + andInt64X2# :: Int64X2# -> Int64X2# -> Int64X2# + andInt64X4# :: Int64X4# -> Int64X4# -> Int64X4# + andInt64X8# :: Int64X8# -> Int64X8# -> Int64X8# + andInt8X16# :: Int8X16# -> Int8X16# -> Int8X16# + andInt8X32# :: Int8X32# -> Int8X32# -> Int8X32# + andInt8X64# :: Int8X64# -> Int8X64# -> Int8X64# andWord16# :: Word16# -> Word16# -> Word16# + andWord16X16# :: Word16X16# -> Word16X16# -> Word16X16# + andWord16X32# :: Word16X32# -> Word16X32# -> Word16X32# + andWord16X8# :: Word16X8# -> Word16X8# -> Word16X8# andWord32# :: Word32# -> Word32# -> Word32# + andWord32X16# :: Word32X16# -> Word32X16# -> Word32X16# + andWord32X4# :: Word32X4# -> Word32X4# -> Word32X4# + andWord32X8# :: Word32X8# -> Word32X8# -> Word32X8# + andWord64X2# :: Word64X2# -> Word64X2# -> Word64X2# + andWord64X4# :: Word64X4# -> Word64X4# -> Word64X4# + andWord64X8# :: Word64X8# -> Word64X8# -> Word64X8# andWord8# :: Word8# -> Word8# -> Word8# + andWord8X16# :: Word8X16# -> Word8X16# -> Word8X16# + andWord8X32# :: Word8X32# -> Word8X32# -> Word8X32# + andWord8X64# :: Word8X64# -> Word8X64# -> Word8X64# annotateStack# :: forall {q :: GHC.Internal.Types.RuntimeRep} b d (a :: TYPE q). b -> (State# d -> (# State# d, a #)) -> State# d -> (# State# d, a #) anyToAddr# :: forall a. a -> State# RealWorld -> (# State# RealWorld, Addr# #) asinDouble# :: Double# -> Double# @@ -2111,10 +2141,40 @@ module GHC.Prim where numSparks# :: forall d. State# d -> (# State# d, Int# #) or# :: Word# -> Word# -> Word# or64# :: Word64# -> Word64# -> Word64# + orDoubleX2# :: DoubleX2# -> DoubleX2# -> DoubleX2# + orDoubleX4# :: DoubleX4# -> DoubleX4# -> DoubleX4# + orDoubleX8# :: DoubleX8# -> DoubleX8# -> DoubleX8# + orFloatX16# :: FloatX16# -> FloatX16# -> FloatX16# + orFloatX4# :: FloatX4# -> FloatX4# -> FloatX4# + orFloatX8# :: FloatX8# -> FloatX8# -> FloatX8# orI# :: Int# -> Int# -> Int# + orInt16X16# :: Int16X16# -> Int16X16# -> Int16X16# + orInt16X32# :: Int16X32# -> Int16X32# -> Int16X32# + orInt16X8# :: Int16X8# -> Int16X8# -> Int16X8# + orInt32X16# :: Int32X16# -> Int32X16# -> Int32X16# + orInt32X4# :: Int32X4# -> Int32X4# -> Int32X4# + orInt32X8# :: Int32X8# -> Int32X8# -> Int32X8# + orInt64X2# :: Int64X2# -> Int64X2# -> Int64X2# + orInt64X4# :: Int64X4# -> Int64X4# -> Int64X4# + orInt64X8# :: Int64X8# -> Int64X8# -> Int64X8# + orInt8X16# :: Int8X16# -> Int8X16# -> Int8X16# + orInt8X32# :: Int8X32# -> Int8X32# -> Int8X32# + orInt8X64# :: Int8X64# -> Int8X64# -> Int8X64# orWord16# :: Word16# -> Word16# -> Word16# + orWord16X16# :: Word16X16# -> Word16X16# -> Word16X16# + orWord16X32# :: Word16X32# -> Word16X32# -> Word16X32# + orWord16X8# :: Word16X8# -> Word16X8# -> Word16X8# orWord32# :: Word32# -> Word32# -> Word32# + orWord32X16# :: Word32X16# -> Word32X16# -> Word32X16# + orWord32X4# :: Word32X4# -> Word32X4# -> Word32X4# + orWord32X8# :: Word32X8# -> Word32X8# -> Word32X8# + orWord64X2# :: Word64X2# -> Word64X2# -> Word64X2# + orWord64X4# :: Word64X4# -> Word64X4# -> Word64X4# + orWord64X8# :: Word64X8# -> Word64X8# -> Word64X8# orWord8# :: Word8# -> Word8# -> Word8# + orWord8X16# :: Word8X16# -> Word8X16# -> Word8X16# + orWord8X32# :: Word8X32# -> Word8X32# -> Word8X32# + orWord8X64# :: Word8X64# -> Word8X64# -> Word8X64# ord# :: Char# -> Int# packDoubleX2# :: (# Double#, Double# #) -> DoubleX2# packDoubleX4# :: (# Double#, Double#, Double#, Double# #) -> DoubleX4# @@ -2886,10 +2946,40 @@ module GHC.Prim where writeWordOffAddr# :: forall d. Addr# -> Int# -> Word# -> State# d -> State# d xor# :: Word# -> Word# -> Word# xor64# :: Word64# -> Word64# -> Word64# + xorDoubleX2# :: DoubleX2# -> DoubleX2# -> DoubleX2# + xorDoubleX4# :: DoubleX4# -> DoubleX4# -> DoubleX4# + xorDoubleX8# :: DoubleX8# -> DoubleX8# -> DoubleX8# + xorFloatX16# :: FloatX16# -> FloatX16# -> FloatX16# + xorFloatX4# :: FloatX4# -> FloatX4# -> FloatX4# + xorFloatX8# :: FloatX8# -> FloatX8# -> FloatX8# xorI# :: Int# -> Int# -> Int# + xorInt16X16# :: Int16X16# -> Int16X16# -> Int16X16# + xorInt16X32# :: Int16X32# -> Int16X32# -> Int16X32# + xorInt16X8# :: Int16X8# -> Int16X8# -> Int16X8# + xorInt32X16# :: Int32X16# -> Int32X16# -> Int32X16# + xorInt32X4# :: Int32X4# -> Int32X4# -> Int32X4# + xorInt32X8# :: Int32X8# -> Int32X8# -> Int32X8# + xorInt64X2# :: Int64X2# -> Int64X2# -> Int64X2# + xorInt64X4# :: Int64X4# -> Int64X4# -> Int64X4# + xorInt64X8# :: Int64X8# -> Int64X8# -> Int64X8# + xorInt8X16# :: Int8X16# -> Int8X16# -> Int8X16# + xorInt8X32# :: Int8X32# -> Int8X32# -> Int8X32# + xorInt8X64# :: Int8X64# -> Int8X64# -> Int8X64# xorWord16# :: Word16# -> Word16# -> Word16# + xorWord16X16# :: Word16X16# -> Word16X16# -> Word16X16# + xorWord16X32# :: Word16X32# -> Word16X32# -> Word16X32# + xorWord16X8# :: Word16X8# -> Word16X8# -> Word16X8# xorWord32# :: Word32# -> Word32# -> Word32# + xorWord32X16# :: Word32X16# -> Word32X16# -> Word32X16# + xorWord32X4# :: Word32X4# -> Word32X4# -> Word32X4# + xorWord32X8# :: Word32X8# -> Word32X8# -> Word32X8# + xorWord64X2# :: Word64X2# -> Word64X2# -> Word64X2# + xorWord64X4# :: Word64X4# -> Word64X4# -> Word64X4# + xorWord64X8# :: Word64X8# -> Word64X8# -> Word64X8# xorWord8# :: Word8# -> Word8# -> Word8# + xorWord8X16# :: Word8X16# -> Word8X16# -> Word8X16# + xorWord8X32# :: Word8X32# -> Word8X32# -> Word8X32# + xorWord8X64# :: Word8X64# -> Word8X64# -> Word8X64# yield# :: State# RealWorld -> State# RealWorld module GHC.Prim.Exception where ===================================== testsuite/tests/interface-stability/ghc-prim-exports.stdout-mingw32 ===================================== @@ -1423,10 +1423,40 @@ module GHC.Prim where addrToAny# :: forall {l :: GHC.Internal.Types.Levity} (a :: TYPE (GHC.Internal.Types.BoxedRep l)). Addr# -> (# a #) and# :: Word# -> Word# -> Word# and64# :: Word64# -> Word64# -> Word64# + andDoubleX2# :: DoubleX2# -> DoubleX2# -> DoubleX2# + andDoubleX4# :: DoubleX4# -> DoubleX4# -> DoubleX4# + andDoubleX8# :: DoubleX8# -> DoubleX8# -> DoubleX8# + andFloatX16# :: FloatX16# -> FloatX16# -> FloatX16# + andFloatX4# :: FloatX4# -> FloatX4# -> FloatX4# + andFloatX8# :: FloatX8# -> FloatX8# -> FloatX8# andI# :: Int# -> Int# -> Int# + andInt16X16# :: Int16X16# -> Int16X16# -> Int16X16# + andInt16X32# :: Int16X32# -> Int16X32# -> Int16X32# + andInt16X8# :: Int16X8# -> Int16X8# -> Int16X8# + andInt32X16# :: Int32X16# -> Int32X16# -> Int32X16# + andInt32X4# :: Int32X4# -> Int32X4# -> Int32X4# + andInt32X8# :: Int32X8# -> Int32X8# -> Int32X8# + andInt64X2# :: Int64X2# -> Int64X2# -> Int64X2# + andInt64X4# :: Int64X4# -> Int64X4# -> Int64X4# + andInt64X8# :: Int64X8# -> Int64X8# -> Int64X8# + andInt8X16# :: Int8X16# -> Int8X16# -> Int8X16# + andInt8X32# :: Int8X32# -> Int8X32# -> Int8X32# + andInt8X64# :: Int8X64# -> Int8X64# -> Int8X64# andWord16# :: Word16# -> Word16# -> Word16# + andWord16X16# :: Word16X16# -> Word16X16# -> Word16X16# + andWord16X32# :: Word16X32# -> Word16X32# -> Word16X32# + andWord16X8# :: Word16X8# -> Word16X8# -> Word16X8# andWord32# :: Word32# -> Word32# -> Word32# + andWord32X16# :: Word32X16# -> Word32X16# -> Word32X16# + andWord32X4# :: Word32X4# -> Word32X4# -> Word32X4# + andWord32X8# :: Word32X8# -> Word32X8# -> Word32X8# + andWord64X2# :: Word64X2# -> Word64X2# -> Word64X2# + andWord64X4# :: Word64X4# -> Word64X4# -> Word64X4# + andWord64X8# :: Word64X8# -> Word64X8# -> Word64X8# andWord8# :: Word8# -> Word8# -> Word8# + andWord8X16# :: Word8X16# -> Word8X16# -> Word8X16# + andWord8X32# :: Word8X32# -> Word8X32# -> Word8X32# + andWord8X64# :: Word8X64# -> Word8X64# -> Word8X64# annotateStack# :: forall {q :: GHC.Internal.Types.RuntimeRep} b d (a :: TYPE q). b -> (State# d -> (# State# d, a #)) -> State# d -> (# State# d, a #) anyToAddr# :: forall a. a -> State# RealWorld -> (# State# RealWorld, Addr# #) asinDouble# :: Double# -> Double# @@ -2111,10 +2141,40 @@ module GHC.Prim where numSparks# :: forall d. State# d -> (# State# d, Int# #) or# :: Word# -> Word# -> Word# or64# :: Word64# -> Word64# -> Word64# + orDoubleX2# :: DoubleX2# -> DoubleX2# -> DoubleX2# + orDoubleX4# :: DoubleX4# -> DoubleX4# -> DoubleX4# + orDoubleX8# :: DoubleX8# -> DoubleX8# -> DoubleX8# + orFloatX16# :: FloatX16# -> FloatX16# -> FloatX16# + orFloatX4# :: FloatX4# -> FloatX4# -> FloatX4# + orFloatX8# :: FloatX8# -> FloatX8# -> FloatX8# orI# :: Int# -> Int# -> Int# + orInt16X16# :: Int16X16# -> Int16X16# -> Int16X16# + orInt16X32# :: Int16X32# -> Int16X32# -> Int16X32# + orInt16X8# :: Int16X8# -> Int16X8# -> Int16X8# + orInt32X16# :: Int32X16# -> Int32X16# -> Int32X16# + orInt32X4# :: Int32X4# -> Int32X4# -> Int32X4# + orInt32X8# :: Int32X8# -> Int32X8# -> Int32X8# + orInt64X2# :: Int64X2# -> Int64X2# -> Int64X2# + orInt64X4# :: Int64X4# -> Int64X4# -> Int64X4# + orInt64X8# :: Int64X8# -> Int64X8# -> Int64X8# + orInt8X16# :: Int8X16# -> Int8X16# -> Int8X16# + orInt8X32# :: Int8X32# -> Int8X32# -> Int8X32# + orInt8X64# :: Int8X64# -> Int8X64# -> Int8X64# orWord16# :: Word16# -> Word16# -> Word16# + orWord16X16# :: Word16X16# -> Word16X16# -> Word16X16# + orWord16X32# :: Word16X32# -> Word16X32# -> Word16X32# + orWord16X8# :: Word16X8# -> Word16X8# -> Word16X8# orWord32# :: Word32# -> Word32# -> Word32# + orWord32X16# :: Word32X16# -> Word32X16# -> Word32X16# + orWord32X4# :: Word32X4# -> Word32X4# -> Word32X4# + orWord32X8# :: Word32X8# -> Word32X8# -> Word32X8# + orWord64X2# :: Word64X2# -> Word64X2# -> Word64X2# + orWord64X4# :: Word64X4# -> Word64X4# -> Word64X4# + orWord64X8# :: Word64X8# -> Word64X8# -> Word64X8# orWord8# :: Word8# -> Word8# -> Word8# + orWord8X16# :: Word8X16# -> Word8X16# -> Word8X16# + orWord8X32# :: Word8X32# -> Word8X32# -> Word8X32# + orWord8X64# :: Word8X64# -> Word8X64# -> Word8X64# ord# :: Char# -> Int# packDoubleX2# :: (# Double#, Double# #) -> DoubleX2# packDoubleX4# :: (# Double#, Double#, Double#, Double# #) -> DoubleX4# @@ -2886,10 +2946,40 @@ module GHC.Prim where writeWordOffAddr# :: forall d. Addr# -> Int# -> Word# -> State# d -> State# d xor# :: Word# -> Word# -> Word# xor64# :: Word64# -> Word64# -> Word64# + xorDoubleX2# :: DoubleX2# -> DoubleX2# -> DoubleX2# + xorDoubleX4# :: DoubleX4# -> DoubleX4# -> DoubleX4# + xorDoubleX8# :: DoubleX8# -> DoubleX8# -> DoubleX8# + xorFloatX16# :: FloatX16# -> FloatX16# -> FloatX16# + xorFloatX4# :: FloatX4# -> FloatX4# -> FloatX4# + xorFloatX8# :: FloatX8# -> FloatX8# -> FloatX8# xorI# :: Int# -> Int# -> Int# + xorInt16X16# :: Int16X16# -> Int16X16# -> Int16X16# + xorInt16X32# :: Int16X32# -> Int16X32# -> Int16X32# + xorInt16X8# :: Int16X8# -> Int16X8# -> Int16X8# + xorInt32X16# :: Int32X16# -> Int32X16# -> Int32X16# + xorInt32X4# :: Int32X4# -> Int32X4# -> Int32X4# + xorInt32X8# :: Int32X8# -> Int32X8# -> Int32X8# + xorInt64X2# :: Int64X2# -> Int64X2# -> Int64X2# + xorInt64X4# :: Int64X4# -> Int64X4# -> Int64X4# + xorInt64X8# :: Int64X8# -> Int64X8# -> Int64X8# + xorInt8X16# :: Int8X16# -> Int8X16# -> Int8X16# + xorInt8X32# :: Int8X32# -> Int8X32# -> Int8X32# + xorInt8X64# :: Int8X64# -> Int8X64# -> Int8X64# xorWord16# :: Word16# -> Word16# -> Word16# + xorWord16X16# :: Word16X16# -> Word16X16# -> Word16X16# + xorWord16X32# :: Word16X32# -> Word16X32# -> Word16X32# + xorWord16X8# :: Word16X8# -> Word16X8# -> Word16X8# xorWord32# :: Word32# -> Word32# -> Word32# + xorWord32X16# :: Word32X16# -> Word32X16# -> Word32X16# + xorWord32X4# :: Word32X4# -> Word32X4# -> Word32X4# + xorWord32X8# :: Word32X8# -> Word32X8# -> Word32X8# + xorWord64X2# :: Word64X2# -> Word64X2# -> Word64X2# + xorWord64X4# :: Word64X4# -> Word64X4# -> Word64X4# + xorWord64X8# :: Word64X8# -> Word64X8# -> Word64X8# xorWord8# :: Word8# -> Word8# -> Word8# + xorWord8X16# :: Word8X16# -> Word8X16# -> Word8X16# + xorWord8X32# :: Word8X32# -> Word8X32# -> Word8X32# + xorWord8X64# :: Word8X64# -> Word8X64# -> Word8X64# yield# :: State# RealWorld -> State# RealWorld module GHC.Prim.Exception where ===================================== testsuite/tests/simd/should_run/all.T ===================================== @@ -92,6 +92,15 @@ test('simd014', # of the XMM4 register, which may not be mapped to a real machine # register on non-x86 architectures. compile_and_run, ['simd014Cmm.cmm']) +test('simd015', + [ when(have_llvm(), extra_ways(["optllvm"])) ], + compile_and_run, ['']) +test('simd016', + [ when(have_llvm(), extra_ways(["optllvm"])) ], + compile_and_run, ['']) +test('simd017', + [ when(have_llvm(), extra_ways(["optllvm"])) ], + compile_and_run, ['']) test('simd_insert', [], compile_and_run, ['']) test('simd_insert_array', [], compile_and_run, ['']) ===================================== testsuite/tests/simd/should_run/simd015.hs ===================================== @@ -0,0 +1,45 @@ +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE UnboxedTuples #-} +{-# LANGUAGE ExtendedLiterals #-} + +-- bitwise instructions on floating point vectors + +import GHC.Exts +import GHC.Int +import GHC.Prim + + +main :: IO () +main = do + putStrLn "DoubleX2#" + let + !d1 = packDoubleX2# (# 1.1##, 2.2## #) + !d2 = packDoubleX2# (# 0.0##, 2.2## #) + !d3 = packDoubleX2# (# -5.5##, 32.0## #) + !d4 = packDoubleX2# (# 5.5##, 128.0## #) + + case unpackDoubleX2# (andDoubleX2# d1 d2) of + (# a, b #) -> print (D# a, D# b) + case unpackDoubleX2# (andDoubleX2# d3 d4) of + (# c, d #) -> print (D# c, D# d) + case unpackDoubleX2# (orDoubleX2# d1 d2) of + (# a, b #) -> print (D# a, D# b) + case unpackDoubleX2# (orDoubleX2# d3 d4) of + (# c, d #) -> print (D# c, D# d) + case unpackDoubleX2# (xorDoubleX2# d1 d2) of + (# a, b #) -> print (D# a, D# b) + case unpackDoubleX2# (xorDoubleX2# d3 d4) of + (# c, d #) -> print (D# c, D# d) + + putStrLn "" + putStrLn "FloatX4#" + let + !f1 = packFloatX4# (# 1.1#, 2.2#, -5.5#, 128.0# #) + !f2 = packFloatX4# (# 0.0#, 2.2#, 5.5#, 32.0# #) + + case unpackFloatX4# (andFloatX4# f1 f2) of + (# a, b, c, d #) -> print (F# a, F# b, F# c, F# d) + case unpackFloatX4# (orFloatX4# f1 f2) of + (# a, b, c, d #) -> print (F# a, F# b, F# c, F# d) + case unpackFloatX4# (xorFloatX4# f1 f2) of + (# a, b, c, d #) -> print (F# a, F# b, F# c, F# d) ===================================== testsuite/tests/simd/should_run/simd015.stdout ===================================== @@ -0,0 +1,12 @@ +DoubleX2# +(0.0,2.2) +(5.5,32.0) +(1.1,2.2) +(-5.5,128.0) +(1.1,0.0) +(-0.0,4.450147717014403e-308) + +FloatX4# +(0.0,2.2,5.5,32.0) +(1.1,2.2,-5.5,128.0) +(1.1,0.0,-0.0,2.3509887e-38) ===================================== testsuite/tests/simd/should_run/simd016.hs ===================================== @@ -0,0 +1,115 @@ +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE UnboxedTuples #-} +{-# LANGUAGE ExtendedLiterals #-} + +-- bitwise instructions on signed integer vectors + +import GHC.Exts +import GHC.Int +import GHC.Prim + + +main :: IO () +main = do + putStrLn "Int64X2#" + let + !i64_1 = packInt64X2# (# 1#Int64, 2#Int64 #) + !i64_2 = packInt64X2# (# 0#Int64, 2#Int64 #) + !i64_3 = packInt64X2# (# -5#Int64, 128#Int64 #) + !i64_4 = packInt64X2# (# 5#Int64, 32#Int64 #) + + case unpackInt64X2# (andInt64X2# i64_1 i64_2) of + (# a, b #) -> print (I64# a, I64# b) + case unpackInt64X2# (andInt64X2# i64_3 i64_4) of + (# c, d #) -> print (I64# c, I64# d) + case unpackInt64X2# (orInt64X2# i64_1 i64_2) of + (# a, b #) -> print (I64# a, I64# b) + case unpackInt64X2# (orInt64X2# i64_3 i64_4) of + (# c, d #) -> print (I64# c, I64# d) + case unpackInt64X2# (xorInt64X2# i64_1 i64_2) of + (# a, b #) -> print (I64# a, I64# b) + case unpackInt64X2# (xorInt64X2# i64_3 i64_4) of + (# c, d #) -> print (I64# c, I64# d) + + putStrLn "" + putStrLn "Int32X4#" + let + !i32_1 = packInt32X4# (# 1#Int32, 2#Int32, -5#Int32, 128#Int32 #) + !i32_2 = packInt32X4# (# 0#Int32, 2#Int32, 5#Int32, 32#Int32 #) + + case unpackInt32X4# (andInt32X4# i32_1 i32_2) of + (# a, b, c, d #) -> print (I32# a, I32# b, I32# c, I32# d) + case unpackInt32X4# (orInt32X4# i32_1 i32_2) of + (# a, b, c, d #) -> print (I32# a, I32# b, I32# c, I32# d) + case unpackInt32X4# (xorInt32X4# i32_1 i32_2) of + (# a, b, c, d #) -> print (I32# a, I32# b, I32# c, I32# d) + + putStrLn "" + putStrLn "Int16X8#" + let + !i16_1 = packInt16X8# + (# 1#Int16, 2#Int16, -5#Int16, 128#Int16 + , 1#Int16, 2#Int16, -5#Int16, 128#Int16 + #) + !i16_2 = packInt16X8# + (# 0#Int16, 2#Int16, 5#Int16, 32#Int16 + , 0#Int16, 2#Int16, 5#Int16, 32#Int16 + #) + case unpackInt16X8# (andInt16X8# i16_1 i16_2) of + (# a, b, c, d, e, f, g, h #) -> + print + ( (I16# a, I16# b, I16# c, I16# d) + , (I16# e, I16# f, I16# g, I16# h) + ) + case unpackInt16X8# (orInt16X8# i16_1 i16_2) of + (# a, b, c, d, e, f, g, h #) -> + print + ( (I16# a, I16# b, I16# c, I16# d) + , (I16# e, I16# f, I16# g, I16# h) + ) + case unpackInt16X8# (xorInt16X8# i16_1 i16_2) of + (# a, b, c, d, e, f, g, h #) -> + print + ( (I16# a, I16# b, I16# c, I16# d) + , (I16# e, I16# f, I16# g, I16# h) + ) + + putStrLn "" + putStrLn "Int8X16#" + let + !i8_1 = packInt8X16# + (# 1#Int8, 2#Int8, -5#Int8, 128#Int8 + , 1#Int8, 2#Int8, -5#Int8, 128#Int8 + , 1#Int8, 2#Int8, -5#Int8, 128#Int8 + , 1#Int8, 2#Int8, -5#Int8, 128#Int8 + #) + !i8_2 = packInt8X16# + (# 0#Int8, 2#Int8, 5#Int8, 32#Int8 + , 0#Int8, 2#Int8, 5#Int8, 32#Int8 + , 0#Int8, 2#Int8, 5#Int8, 32#Int8 + , 0#Int8, 2#Int8, 5#Int8, 32#Int8 + #) + case unpackInt8X16# (andInt8X16# i8_1 i8_2) of + (# a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p #) -> + print + ( (I8# a, I8# b, I8# c, I8# d) + , (I8# e, I8# f, I8# g, I8# h) + , (I8# i, I8# j, I8# k, I8# l) + , (I8# m, I8# n, I8# o, I8# p) + ) + case unpackInt8X16# (orInt8X16# i8_1 i8_2) of + (# a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p #) -> + print + ( (I8# a, I8# b, I8# c, I8# d) + , (I8# e, I8# f, I8# g, I8# h) + , (I8# i, I8# j, I8# k, I8# l) + , (I8# m, I8# n, I8# o, I8# p) + ) + case unpackInt8X16# (xorInt8X16# i8_1 i8_2) of + (# a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p #) -> + print + ( (I8# a, I8# b, I8# c, I8# d) + , (I8# e, I8# f, I8# g, I8# h) + , (I8# i, I8# j, I8# k, I8# l) + , (I8# m, I8# n, I8# o, I8# p) + ) ===================================== testsuite/tests/simd/should_run/simd016.stdout ===================================== @@ -0,0 +1,22 @@ +Int64X2# +(0,2) +(1,0) +(1,2) +(-1,160) +(1,0) +(-2,160) + +Int32X4# +(0,2,1,0) +(1,2,-1,160) +(1,0,-2,160) + +Int16X8# +((0,2,1,0),(0,2,1,0)) +((1,2,-1,160),(1,2,-1,160)) +((1,0,-2,160),(1,0,-2,160)) + +Int8X16# +((0,2,1,0),(0,2,1,0),(0,2,1,0),(0,2,1,0)) +((1,2,-1,-96),(1,2,-1,-96),(1,2,-1,-96),(1,2,-1,-96)) +((1,0,-2,-96),(1,0,-2,-96),(1,0,-2,-96),(1,0,-2,-96)) ===================================== testsuite/tests/simd/should_run/simd017.hs ===================================== @@ -0,0 +1,115 @@ +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE UnboxedTuples #-} +{-# LANGUAGE ExtendedLiterals #-} + +-- bitwise instructions on unsigned integer vectors + +import GHC.Exts +import GHC.Word +import GHC.Prim + + +main :: IO () +main = do + putStrLn "Word64X2#" + let + !w64_1 = packWord64X2# (# 1#Word64, 2#Word64 #) + !w64_2 = packWord64X2# (# 0#Word64, 2#Word64 #) + !w64_3 = packWord64X2# (# 18446744073709551615#Word64, 128#Word64 #) + !w64_4 = packWord64X2# (# 5#Word64, 32#Word64 #) + + case unpackWord64X2# (andWord64X2# w64_1 w64_2) of + (# a, b #) -> print (W64# a, W64# b) + case unpackWord64X2# (andWord64X2# w64_3 w64_4) of + (# c, d #) -> print (W64# c, W64# d) + case unpackWord64X2# (orWord64X2# w64_1 w64_2) of + (# a, b #) -> print (W64# a, W64# b) + case unpackWord64X2# (orWord64X2# w64_3 w64_4) of + (# c, d #) -> print (W64# c, W64# d) + case unpackWord64X2# (xorWord64X2# w64_1 w64_2) of + (# a, b #) -> print (W64# a, W64# b) + case unpackWord64X2# (xorWord64X2# w64_3 w64_4) of + (# c, d #) -> print (W64# c, W64# d) + + putStrLn "" + putStrLn "Word32X4#" + let + !w32_1 = packWord32X4# (# 1#Word32, 2#Word32, 4294967295#Word32, 128#Word32 #) + !w32_2 = packWord32X4# (# 0#Word32, 2#Word32, 5#Word32, 32#Word32 #) + + case unpackWord32X4# (andWord32X4# w32_1 w32_2) of + (# a, b, c, d #) -> print (W32# a, W32# b, W32# c, W32# d) + case unpackWord32X4# (orWord32X4# w32_1 w32_2) of + (# a, b, c, d #) -> print (W32# a, W32# b, W32# c, W32# d) + case unpackWord32X4# (xorWord32X4# w32_1 w32_2) of + (# a, b, c, d #) -> print (W32# a, W32# b, W32# c, W32# d) + + putStrLn "" + putStrLn "Word16X8#" + let + !w16_1 = packWord16X8# + (# 1#Word16, 2#Word16, 65535#Word16, 128#Word16 + , 1#Word16, 2#Word16, 65535#Word16, 128#Word16 + #) + !w16_2 = packWord16X8# + (# 0#Word16, 2#Word16, 5#Word16, 32#Word16 + , 0#Word16, 2#Word16, 5#Word16, 32#Word16 + #) + case unpackWord16X8# (andWord16X8# w16_1 w16_2) of + (# a, b, c, d, e, f, g, h #) -> + print + ( (W16# a, W16# b, W16# c, W16# d) + , (W16# e, W16# f, W16# g, W16# h) + ) + case unpackWord16X8# (orWord16X8# w16_1 w16_2) of + (# a, b, c, d, e, f, g, h #) -> + print + ( (W16# a, W16# b, W16# c, W16# d) + , (W16# e, W16# f, W16# g, W16# h) + ) + case unpackWord16X8# (xorWord16X8# w16_1 w16_2) of + (# a, b, c, d, e, f, g, h #) -> + print + ( (W16# a, W16# b, W16# c, W16# d) + , (W16# e, W16# f, W16# g, W16# h) + ) + + putStrLn "" + putStrLn "Word8X16#" + let + !w8_1 = packWord8X16# + (# 1#Word8, 2#Word8, 255#Word8, 128#Word8 + , 1#Word8, 2#Word8, 255#Word8, 128#Word8 + , 1#Word8, 2#Word8, 255#Word8, 128#Word8 + , 1#Word8, 2#Word8, 255#Word8, 128#Word8 + #) + !w8_2 = packWord8X16# + (# 0#Word8, 2#Word8, 5#Word8, 32#Word8 + , 0#Word8, 2#Word8, 5#Word8, 32#Word8 + , 0#Word8, 2#Word8, 5#Word8, 32#Word8 + , 0#Word8, 2#Word8, 5#Word8, 32#Word8 + #) + case unpackWord8X16# (andWord8X16# w8_1 w8_2) of + (# a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p #) -> + print + ( (W8# a, W8# b, W8# c, W8# d) + , (W8# e, W8# f, W8# g, W8# h) + , (W8# i, W8# j, W8# k, W8# l) + , (W8# m, W8# n, W8# o, W8# p) + ) + case unpackWord8X16# (orWord8X16# w8_1 w8_2) of + (# a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p #) -> + print + ( (W8# a, W8# b, W8# c, W8# d) + , (W8# e, W8# f, W8# g, W8# h) + , (W8# i, W8# j, W8# k, W8# l) + , (W8# m, W8# n, W8# o, W8# p) + ) + case unpackWord8X16# (xorWord8X16# w8_1 w8_2) of + (# a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p #) -> + print + ( (W8# a, W8# b, W8# c, W8# d) + , (W8# e, W8# f, W8# g, W8# h) + , (W8# i, W8# j, W8# k, W8# l) + , (W8# m, W8# n, W8# o, W8# p) + ) ===================================== testsuite/tests/simd/should_run/simd017.stdout ===================================== @@ -0,0 +1,22 @@ +Word64X2# +(0,2) +(5,0) +(1,2) +(18446744073709551615,160) +(1,0) +(18446744073709551610,160) + +Word32X4# +(0,2,5,0) +(1,2,4294967295,160) +(1,0,4294967290,160) + +Word16X8# +((0,2,5,0),(0,2,5,0)) +((1,2,65535,160),(1,2,65535,160)) +((1,0,65530,160),(1,0,65530,160)) + +Word8X16# +((0,2,5,0),(0,2,5,0),(0,2,5,0),(0,2,5,0)) +((1,2,255,160),(1,2,255,160),(1,2,255,160),(1,2,255,160)) +((1,0,250,160),(1,0,250,160),(1,0,250,160),(1,0,250,160)) View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/6ef22fa0ba7c0a9284176e40fdc3135e... -- View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/6ef22fa0ba7c0a9284176e40fdc3135e... You're receiving this email because of your account on gitlab.haskell.org.