... |
... |
@@ -230,23 +230,25 @@ genCall t@(PrimTarget (MO_Prefetch_Data localityInt)) [] args |
230
|
230
|
statement $ Expr $ Call StdCall fptr (argVars' ++ argSuffix) []
|
231
|
231
|
| otherwise = panic $ "prefetch locality level integer must be between 0 and 3, given: " ++ (show localityInt)
|
232
|
232
|
|
233
|
|
--- Handle PopCnt, Clz, Ctz, and BSwap that need to only convert arg
|
234
|
|
--- and return types
|
235
|
|
-genCall t@(PrimTarget (MO_PopCnt w)) dsts args =
|
236
|
|
- genCallSimpleCast w t dsts args
|
237
|
|
-
|
238
|
|
-genCall t@(PrimTarget (MO_Pdep w)) dsts args =
|
239
|
|
- genCallSimpleCast2 w t dsts args
|
240
|
|
-genCall t@(PrimTarget (MO_Pext w)) dsts args =
|
241
|
|
- genCallSimpleCast2 w t dsts args
|
242
|
|
-genCall t@(PrimTarget (MO_Clz w)) dsts args =
|
243
|
|
- genCallSimpleCast w t dsts args
|
244
|
|
-genCall t@(PrimTarget (MO_Ctz w)) dsts args =
|
245
|
|
- genCallSimpleCast w t dsts args
|
246
|
|
-genCall t@(PrimTarget (MO_BSwap w)) dsts args =
|
247
|
|
- genCallSimpleCast w t dsts args
|
248
|
|
-genCall t@(PrimTarget (MO_BRev w)) dsts args =
|
249
|
|
- genCallSimpleCast w t dsts args
|
|
233
|
+-- Handle PopCnt, Clz, Ctz, BRev, and BSwap that need to only convert arg and return types
|
|
234
|
+genCall (PrimTarget op@(MO_PopCnt w)) [dst] args =
|
|
235
|
+ genCallSimpleCast w op dst args
|
|
236
|
+genCall (PrimTarget op@(MO_Clz w)) [dst] args =
|
|
237
|
+ genCallSimpleCast w op dst args
|
|
238
|
+genCall (PrimTarget op@(MO_Ctz w)) [dst] args =
|
|
239
|
+ genCallSimpleCast w op dst args
|
|
240
|
+genCall (PrimTarget op@(MO_BRev w)) [dst] args =
|
|
241
|
+ genCallSimpleCast w op dst args
|
|
242
|
+genCall (PrimTarget op@(MO_BSwap w)) [dst] args =
|
|
243
|
+ genCallSimpleCast w op dst args
|
|
244
|
+
|
|
245
|
+-- Handle Pdep and Pext that (may) require using a type with a larger bit-width
|
|
246
|
+-- than the specified but width. This register width-extension is particualarly
|
|
247
|
+-- necessary for W8 and W16.
|
|
248
|
+genCall (PrimTarget op@(MO_Pdep w)) [dst] args =
|
|
249
|
+ genCallCastWithMinWidthOf W32 w op dst args
|
|
250
|
+genCall (PrimTarget op@(MO_Pext w)) [dst] args =
|
|
251
|
+ genCallCastWithMinWidthOf W32 w op dst args
|
250
|
252
|
|
251
|
253
|
genCall (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] = runStmtsDecls $ do
|
252
|
254
|
addrVar <- exprToVarW addr
|
... |
... |
@@ -640,63 +642,35 @@ genCallExtract _ _ _ _ = |
640
|
642
|
-- since GHC only really has i32 and i64 types and things like Word8 are backed
|
641
|
643
|
-- by an i32 and just present a logical i8 range. So we must handle conversions
|
642
|
644
|
-- from i32 to i8 explicitly as LLVM is strict about types.
|
643
|
|
-genCallSimpleCast :: Width -> ForeignTarget -> [CmmFormal] -> [CmmActual]
|
644
|
|
- -> LlvmM StmtData
|
645
|
|
-genCallSimpleCast w t@(PrimTarget op) [dst] args = do
|
646
|
|
- let width = widthToLlvmInt w
|
647
|
|
- dstTy = cmmToLlvmType $ localRegType dst
|
648
|
|
-
|
649
|
|
- fname <- cmmPrimOpFunctions op
|
650
|
|
- (fptr, _, top3) <- getInstrinct fname width [width]
|
651
|
|
-
|
652
|
|
- (dstV, _dst_ty) <- getCmmReg (CmmLocal dst)
|
653
|
|
-
|
654
|
|
- let (_, arg_hints) = foreignTargetHints t
|
655
|
|
- let args_hints = zip args arg_hints
|
656
|
|
- (argsV, stmts2, top2) <- arg_vars args_hints ([], nilOL, [])
|
657
|
|
- (argsV', stmts4) <- castVars Signed $ zip argsV [width]
|
658
|
|
- (retV, s1) <- doExpr width $ Call StdCall fptr argsV' []
|
659
|
|
- (retVs', stmts5) <- castVars (cmmPrimOpRetValSignage op) [(retV,dstTy)]
|
660
|
|
- let retV' = singletonPanic "genCallSimpleCast" retVs'
|
661
|
|
- let s2 = Store retV' dstV Nothing []
|
662
|
|
-
|
663
|
|
- let stmts = stmts2 `appOL` stmts4 `snocOL`
|
664
|
|
- s1 `appOL` stmts5 `snocOL` s2
|
665
|
|
- return (stmts, top2 ++ top3)
|
666
|
|
-genCallSimpleCast _ _ dsts _ =
|
667
|
|
- panic ("genCallSimpleCast: " ++ show (length dsts) ++ " dsts")
|
668
|
|
-
|
669
|
|
--- Handle simple function call that only need simple type casting, of the form:
|
670
|
|
--- truncate arg >>= \a -> call(a) >>= zext
|
671
|
|
---
|
672
|
|
--- since GHC only really has i32 and i64 types and things like Word8 are backed
|
673
|
|
--- by an i32 and just present a logical i8 range. So we must handle conversions
|
674
|
|
--- from i32 to i8 explicitly as LLVM is strict about types.
|
675
|
|
-genCallSimpleCast2 :: Width -> ForeignTarget -> [CmmFormal] -> [CmmActual]
|
676
|
|
- -> LlvmM StmtData
|
677
|
|
-genCallSimpleCast2 w t@(PrimTarget op) [dst] args = do
|
678
|
|
- let width = widthToLlvmInt w
|
679
|
|
- dstTy = cmmToLlvmType $ localRegType dst
|
680
|
|
-
|
681
|
|
- fname <- cmmPrimOpFunctions op
|
682
|
|
- (fptr, _, top3) <- getInstrinct fname width (const width <$> args)
|
683
|
|
-
|
684
|
|
- (dstV, _dst_ty) <- getCmmReg (CmmLocal dst)
|
685
|
|
-
|
686
|
|
- let (_, arg_hints) = foreignTargetHints t
|
687
|
|
- let args_hints = zip args arg_hints
|
688
|
|
- (argsV, stmts2, top2) <- arg_vars args_hints ([], nilOL, [])
|
689
|
|
- (argsV', stmts4) <- castVars Signed $ zip argsV (const width <$> argsV)
|
690
|
|
- (retV, s1) <- doExpr width $ Call StdCall fptr argsV' []
|
691
|
|
- (retVs', stmts5) <- castVars (cmmPrimOpRetValSignage op) [(retV,dstTy)]
|
692
|
|
- let retV' = singletonPanic "genCallSimpleCast2" retVs'
|
693
|
|
- let s2 = Store retV' dstV Nothing []
|
694
|
|
-
|
695
|
|
- let stmts = stmts2 `appOL` stmts4 `snocOL`
|
696
|
|
- s1 `appOL` stmts5 `snocOL` s2
|
|
645
|
+genCallSimpleCast :: Width -> CallishMachOp -> CmmFormal -> [CmmActual]
|
|
646
|
+ -> LlvmM StmtData
|
|
647
|
+genCallSimpleCast w = genCallCastWithMinWidthOf w w
|
|
648
|
+
|
|
649
|
+-- Handle extension case that the element should be extend to a larger bit-width
|
|
650
|
+-- for the operation and subsequently truncated, of the form:
|
|
651
|
+-- extend arg >>= \a -> call(a) >>= truncate
|
|
652
|
+genCallCastWithMinWidthOf :: Width -> Width -> CallishMachOp -> CmmFormal
|
|
653
|
+ -> [CmmActual] -> LlvmM StmtData
|
|
654
|
+genCallCastWithMinWidthOf minW specW op dst args = do
|
|
655
|
+ let width = widthToLlvmInt $ max minW specW
|
|
656
|
+ argsW = const width <$> args
|
|
657
|
+ dstType = cmmToLlvmType $ localRegType dst
|
|
658
|
+ signage = cmmPrimOpRetValSignage op
|
|
659
|
+
|
|
660
|
+ fname <- cmmPrimOpFunctions op
|
|
661
|
+ (fptr, _, top3) <- getInstrinct fname width argsW
|
|
662
|
+ (dstV, _dst_ty) <- getCmmReg (CmmLocal dst)
|
|
663
|
+ let (_, arg_hints) = foreignTargetHints $ PrimTarget op
|
|
664
|
+ let args_hints = zip args arg_hints
|
|
665
|
+ (argsV, stmts2, top2) <- arg_vars args_hints ([], nilOL, [])
|
|
666
|
+ (argsV', stmts4) <- castVars signage $ zip argsV argsW
|
|
667
|
+ (retV, s1) <- doExpr width $ Call StdCall fptr argsV' []
|
|
668
|
+ (retV', stmts5) <- castVar signage retV dstType
|
|
669
|
+ let s2 = Store retV' dstV Nothing []
|
|
670
|
+
|
|
671
|
+ let stmts = stmts2 `appOL` stmts4 `snocOL` s1 `snocOL`
|
|
672
|
+ stmts5 `snocOL` s2
|
697
|
673
|
return (stmts, top2 ++ top3)
|
698
|
|
-genCallSimpleCast2 _ _ dsts _ =
|
699
|
|
- panic ("genCallSimpleCast2: " ++ show (length dsts) ++ " dsts")
|
700
|
674
|
|
701
|
675
|
-- | Create a function pointer from a target.
|
702
|
676
|
getFunPtrW :: (LMString -> LlvmType) -> ForeignTarget
|
... |
... |
@@ -811,11 +785,39 @@ castVar signage v t | getVarType v == t |
811
|
785
|
Signed -> LM_Sext
|
812
|
786
|
Unsigned -> LM_Zext
|
813
|
787
|
|
814
|
|
-
|
815
|
788
|
cmmPrimOpRetValSignage :: CallishMachOp -> Signage
|
816
|
789
|
cmmPrimOpRetValSignage mop = case mop of
|
817
|
790
|
MO_Pdep _ -> Unsigned
|
818
|
791
|
MO_Pext _ -> Unsigned
|
|
792
|
+ -- If the result of a Bit-Reverse is treated as signed,
|
|
793
|
+ -- an positive input can result in an negative output, i.e.:
|
|
794
|
+ --
|
|
795
|
+ -- identity(0x03) = 0x03 = 00000011
|
|
796
|
+ -- breverse(0x03) = 0xC0 = 11000000
|
|
797
|
+ --
|
|
798
|
+ -- Now if an extension is performed after the operation to
|
|
799
|
+ -- promote a smaller bit-width value into a larger bit-width
|
|
800
|
+ -- type, it is expected that the /bit-wise/ operations will
|
|
801
|
+ -- not be treated /numerically/ as signed.
|
|
802
|
+ --
|
|
803
|
+ -- To illustrate the difference, consider how a signed extension
|
|
804
|
+ -- for the type i16 to i32 differs for out values above:
|
|
805
|
+ -- ext_zeroed(i32, breverse(0x03)) = 0x00C0 = 0000000011000000
|
|
806
|
+ -- ext_signed(i32, breverse(0x03)) = 0xFFC0 = 1111111111000000
|
|
807
|
+ --
|
|
808
|
+ -- Here we can see that the former output is the expected result
|
|
809
|
+ -- of a bit-wise operation which needs to be promoted to a larger
|
|
810
|
+ -- bit-width type. The latter output is not desirable when we must
|
|
811
|
+ -- constraining a value into a range of i16 within an i32 type.
|
|
812
|
+ --
|
|
813
|
+ -- Hence we always treat the "signage" as unsigned for Bit-Reverse!
|
|
814
|
+ MO_BRev _ -> Unsigned
|
|
815
|
+
|
|
816
|
+ -- The same reasoning applied to Bit-Reverse above applies to Byte-Swap;
|
|
817
|
+ -- we do not want to sign extend a number whose sign may have changed!
|
|
818
|
+ MO_BSwap _ -> Unsigned
|
|
819
|
+
|
|
820
|
+ -- All other cases, default to preserving the numeric sign when extending.
|
819
|
821
|
_ -> Signed
|
820
|
822
|
|
821
|
823
|
-- | Decide what C function to use to implement a CallishMachOp
|
... |
... |
@@ -954,8 +956,8 @@ cmmPrimOpFunctions mop = do |
954
|
956
|
W256 -> fsLit "llvm.x86.bmi.pdep.256"
|
955
|
957
|
W512 -> fsLit "llvm.x86.bmi.pdep.512"
|
956
|
958
|
| otherwise -> case w of
|
957
|
|
- W8 -> fsLit "hs_pdep8"
|
958
|
|
- W16 -> fsLit "hs_pdep16"
|
|
959
|
+ W8 -> fsLit "hs_pdep32"
|
|
960
|
+ W16 -> fsLit "hs_pdep32"
|
959
|
961
|
W32 -> fsLit "hs_pdep32"
|
960
|
962
|
W64 -> fsLit "hs_pdep64"
|
961
|
963
|
W128 -> fsLit "hs_pdep128"
|
... |
... |
@@ -971,8 +973,8 @@ cmmPrimOpFunctions mop = do |
971
|
973
|
W256 -> fsLit "llvm.x86.bmi.pext.256"
|
972
|
974
|
W512 -> fsLit "llvm.x86.bmi.pext.512"
|
973
|
975
|
| otherwise -> case w of
|
974
|
|
- W8 -> fsLit "hs_pext8"
|
975
|
|
- W16 -> fsLit "hs_pext16"
|
|
976
|
+ W8 -> fsLit "hs_pext32"
|
|
977
|
+ W16 -> fsLit "hs_pext32"
|
976
|
978
|
W32 -> fsLit "hs_pext32"
|
977
|
979
|
W64 -> fsLit "hs_pext64"
|
978
|
980
|
W128 -> fsLit "hs_pext128"
|