| ... |
... |
@@ -248,6 +248,14 @@ Since x86 PDep/PExt instructions only exist for 32/64 bit widths |
|
248
|
248
|
we use the 32bit variant to compute the 8/16bit primops.
|
|
249
|
249
|
To do so we extend/truncate the argument/result around the
|
|
250
|
250
|
call.
|
|
|
251
|
+
|
|
|
252
|
+Note that the 64-bit intrinsics (`llvm.x86.bmi.pdep.64` and
|
|
|
253
|
+`llvm.x86.bmi.pext.64`) are only legal on 64-bit x86 targets, not on
|
|
|
254
|
+i386. Therefore on i386 we must fall back to the runtime helper
|
|
|
255
|
+(`hs_pdep64`/`hs_pext64`) for the 64-bit primops.
|
|
|
256
|
+
|
|
|
257
|
+See https://github.com/llvm/llvm-project/issues/172857 for upstream
|
|
|
258
|
+discussion about portable pdep/pext intrinsics.
|
|
251
|
259
|
-}
|
|
252
|
260
|
genCall (PrimTarget op@(MO_Pdep w)) [dst] args = do
|
|
253
|
261
|
cfg <- getConfig
|
| ... |
... |
@@ -970,36 +978,34 @@ cmmPrimOpFunctions mop = do |
|
970
|
978
|
W8 -> fsLit "llvm.x86.bmi.pdep.32"
|
|
971
|
979
|
W16 -> fsLit "llvm.x86.bmi.pdep.32"
|
|
972
|
980
|
W32 -> fsLit "llvm.x86.bmi.pdep.32"
|
|
973
|
|
- W64 -> fsLit "llvm.x86.bmi.pdep.64"
|
|
974
|
|
- W128 -> fsLit "llvm.x86.bmi.pdep.128"
|
|
975
|
|
- W256 -> fsLit "llvm.x86.bmi.pdep.256"
|
|
976
|
|
- W512 -> fsLit "llvm.x86.bmi.pdep.512"
|
|
|
981
|
+ W64
|
|
|
982
|
+ | is32bit -> fsLit "hs_pdep64"
|
|
|
983
|
+ | otherwise -> fsLit "llvm.x86.bmi.pdep.64"
|
|
|
984
|
+ -- LLVM only provides x86 PDep/PExt intrinsics for 32/64 bits
|
|
|
985
|
+ _ -> unsupported
|
|
977
|
986
|
| otherwise -> case w of
|
|
978
|
987
|
W8 -> fsLit "hs_pdep8"
|
|
979
|
988
|
W16 -> fsLit "hs_pdep16"
|
|
980
|
989
|
W32 -> fsLit "hs_pdep32"
|
|
981
|
990
|
W64 -> fsLit "hs_pdep64"
|
|
982
|
|
- W128 -> fsLit "hs_pdep128"
|
|
983
|
|
- W256 -> fsLit "hs_pdep256"
|
|
984
|
|
- W512 -> fsLit "hs_pdep512"
|
|
|
991
|
+ _ -> unsupported
|
|
985
|
992
|
MO_Pext w
|
|
986
|
993
|
| isBmi2Enabled -> case w of
|
|
987
|
994
|
-- See Note [LLVM PDep/PExt intrinsics]
|
|
988
|
995
|
W8 -> fsLit "llvm.x86.bmi.pext.32"
|
|
989
|
996
|
W16 -> fsLit "llvm.x86.bmi.pext.32"
|
|
990
|
997
|
W32 -> fsLit "llvm.x86.bmi.pext.32"
|
|
991
|
|
- W64 -> fsLit "llvm.x86.bmi.pext.64"
|
|
992
|
|
- W128 -> fsLit "llvm.x86.bmi.pext.128"
|
|
993
|
|
- W256 -> fsLit "llvm.x86.bmi.pext.256"
|
|
994
|
|
- W512 -> fsLit "llvm.x86.bmi.pext.512"
|
|
|
998
|
+ W64
|
|
|
999
|
+ | is32bit -> fsLit "hs_pext64"
|
|
|
1000
|
+ | otherwise -> fsLit "llvm.x86.bmi.pext.64"
|
|
|
1001
|
+ -- LLVM only provides x86 PDep/PExt intrinsics for 32/64 bits
|
|
|
1002
|
+ _ -> unsupported
|
|
995
|
1003
|
| otherwise -> case w of
|
|
996
|
1004
|
W8 -> fsLit "hs_pext8"
|
|
997
|
1005
|
W16 -> fsLit "hs_pext16"
|
|
998
|
1006
|
W32 -> fsLit "hs_pext32"
|
|
999
|
1007
|
W64 -> fsLit "hs_pext64"
|
|
1000
|
|
- W128 -> fsLit "hs_pext128"
|
|
1001
|
|
- W256 -> fsLit "hs_pext256"
|
|
1002
|
|
- W512 -> fsLit "hs_pext512"
|
|
|
1008
|
+ _ -> unsupported
|
|
1003
|
1009
|
|
|
1004
|
1010
|
MO_AddIntC w -> case w of
|
|
1005
|
1011
|
W8 -> fsLit "llvm.sadd.with.overflow.i8"
|