| ... |
... |
@@ -3,6 +3,7 @@ |
|
3
|
3
|
{-# LANGUAGE BangPatterns #-}
|
|
4
|
4
|
{-# LANGUAGE BinaryLiterals #-}
|
|
5
|
5
|
{-# LANGUAGE OverloadedStrings #-}
|
|
|
6
|
+{-# LANGUAGE MultiWayIf #-}
|
|
6
|
7
|
module GHC.CmmToAsm.LA64.CodeGen (
|
|
7
|
8
|
cmmTopCodeGen
|
|
8
|
9
|
, generateJumpTableForInstr
|
| ... |
... |
@@ -268,8 +269,10 @@ stmtToInstrs stmt = do |
|
268
|
269
|
config <- getConfig
|
|
269
|
270
|
platform <- getPlatform
|
|
270
|
271
|
case stmt of
|
|
271
|
|
- CmmUnsafeForeignCall target result_regs args
|
|
272
|
|
- -> genCCall target result_regs args
|
|
|
272
|
+ CmmUnsafeForeignCall target result_regs args ->
|
|
|
273
|
+ case target of
|
|
|
274
|
+ PrimTarget primOp -> genPrim primOp result_regs args
|
|
|
275
|
+ ForeignTarget addr conv -> genCCall addr conv result_regs args
|
|
273
|
276
|
|
|
274
|
277
|
CmmComment s -> return (unitOL (COMMENT (ftext s)))
|
|
275
|
278
|
CmmTick {} -> return nilOL
|
| ... |
... |
@@ -1631,6 +1634,319 @@ genCondBranch true false expr = do |
|
1631
|
1634
|
b2 <- genBranch false
|
|
1632
|
1635
|
return (b1 `appOL` b2)
|
|
1633
|
1636
|
|
|
|
1637
|
+genPrim
|
|
|
1638
|
+ :: CallishMachOp -- MachOp
|
|
|
1639
|
+ -> [CmmFormal] -- where to put the result
|
|
|
1640
|
+ -> [CmmActual] -- arguments (of mixed type)
|
|
|
1641
|
+ -> NatM InstrBlock
|
|
|
1642
|
+
|
|
|
1643
|
+genPrim MO_F32_Fabs [dst] [src] = genFloatAbs W32 dst src
|
|
|
1644
|
+genPrim MO_F64_Fabs [dst] [src] = genFloatAbs W64 dst src
|
|
|
1645
|
+genPrim MO_F32_Sqrt [dst] [src] = genFloatSqrt FF32 dst src
|
|
|
1646
|
+genPrim MO_F64_Sqrt [dst] [src] = genFloatSqrt FF64 dst src
|
|
|
1647
|
+genPrim (MO_Clz width) [dst] [src] = genClz width dst src
|
|
|
1648
|
+genPrim (MO_Ctz width) [dst] [src] = genCtz width dst src
|
|
|
1649
|
+genPrim (MO_BSwap width) [dst] [src] = genByteSwap width dst src
|
|
|
1650
|
+genPrim (MO_BRev width) [dst] [src] = genBitRev width dst src
|
|
|
1651
|
+genPrim MO_AcquireFence [] [] = return $ unitOL (DBAR HintAcquire)
|
|
|
1652
|
+genPrim MO_ReleaseFence [] [] = return $ unitOL (DBAR HintRelease)
|
|
|
1653
|
+genPrim MO_SeqCstFence [] [] = return $ unitOL (DBAR HintSeqcst)
|
|
|
1654
|
+genPrim MO_Touch [] [_] = return nilOL
|
|
|
1655
|
+genPrim (MO_Prefetch_Data _n) [] [_] = return nilOL
|
|
|
1656
|
+genPrim (MO_AtomicRead w mo) [dst] [addr] = genAtomicRead w mo dst addr
|
|
|
1657
|
+genPrim (MO_AtomicWrite w mo) [] [addr,val] = genAtomicWrite w mo addr val
|
|
|
1658
|
+
|
|
|
1659
|
+genPrim mop@(MO_S_Mul2 _w) _ _ = unsupported mop
|
|
|
1660
|
+genPrim mop@(MO_S_QuotRem _w) _ _ = unsupported mop
|
|
|
1661
|
+genPrim mop@(MO_U_QuotRem _w) _ _ = unsupported mop
|
|
|
1662
|
+genPrim mop@(MO_U_QuotRem2 _w) _ _ = unsupported mop
|
|
|
1663
|
+genPrim mop@(MO_Add2 _w) _ _ = unsupported mop
|
|
|
1664
|
+genPrim mop@(MO_AddWordC _w) _ _ = unsupported mop
|
|
|
1665
|
+genPrim mop@(MO_SubWordC _w) _ _ = unsupported mop
|
|
|
1666
|
+genPrim mop@(MO_AddIntC _w) _ _ = unsupported mop
|
|
|
1667
|
+genPrim mop@(MO_SubIntC _w) _ _ = unsupported mop
|
|
|
1668
|
+genPrim mop@(MO_U_Mul2 _w) _ _ = unsupported mop
|
|
|
1669
|
+genPrim mop@MO_I64X2_Min _ _ = unsupported mop
|
|
|
1670
|
+genPrim mop@MO_I64X2_Max _ _ = unsupported mop
|
|
|
1671
|
+genPrim mop@MO_W64X2_Min _ _ = unsupported mop
|
|
|
1672
|
+genPrim mop@MO_W64X2_Max _ _ = unsupported mop
|
|
|
1673
|
+genPrim mop@MO_VS_Quot {} _ _ = unsupported mop
|
|
|
1674
|
+genPrim mop@MO_VS_Rem {} _ _ = unsupported mop
|
|
|
1675
|
+genPrim mop@MO_VU_Quot {} _ _ = unsupported mop
|
|
|
1676
|
+genPrim mop@MO_VU_Rem {} _ _ = unsupported mop
|
|
|
1677
|
+
|
|
|
1678
|
+genPrim (MO_PopCnt width) [dst] [src] = genLibCCall (popCntLabel width) [dst] [src]
|
|
|
1679
|
+genPrim (MO_Pdep width) [dst] [src,mask] = genLibCCall (pdepLabel width) [dst] [src,mask]
|
|
|
1680
|
+genPrim (MO_Pext width) [dst] [src,mask] = genLibCCall (pextLabel width) [dst] [src,mask]
|
|
|
1681
|
+genPrim (MO_UF_Conv width) [dst] [src] = genLibCCall (word2FloatLabel width) [dst] [src]
|
|
|
1682
|
+genPrim (MO_AtomicRMW width amop) [dst] [addr,n] = genLibCCall (atomicRMWLabel width amop) [dst] [addr,n]
|
|
|
1683
|
+genPrim (MO_Cmpxchg width) [dst] [addr,old,new] = genLibCCall (cmpxchgLabel width) [dst] [addr,old,new]
|
|
|
1684
|
+genPrim (MO_Xchg width) [dst] [addr,val] = genLibCCall (xchgLabel width) [dst] [addr,val]
|
|
|
1685
|
+genPrim (MO_Memcpy _align) [] [dst,src,n] = genLibCCall (fsLit "memcpy") [] [dst,src,n]
|
|
|
1686
|
+genPrim (MO_Memmove _align) [] [dst,src,n] = genLibCCall (fsLit "memmove") [] [dst,src,n]
|
|
|
1687
|
+genPrim (MO_Memcmp _align) [rst] [dst,src,n] = genLibCCall (fsLit "memcmp") [rst] [dst,src,n]
|
|
|
1688
|
+genPrim (MO_Memset _align) [] [dst,cnt,n] = genLibCCall (fsLit "memset") [] [dst,cnt,n]
|
|
|
1689
|
+genPrim MO_F32_Sin [dst] [src] = genLibCCall (fsLit "sinf") [dst] [src]
|
|
|
1690
|
+genPrim MO_F32_Cos [dst] [src] = genLibCCall (fsLit "cosf") [dst] [src]
|
|
|
1691
|
+genPrim MO_F32_Tan [dst] [src] = genLibCCall (fsLit "tanf") [dst] [src]
|
|
|
1692
|
+genPrim MO_F32_Exp [dst] [src] = genLibCCall (fsLit "expf") [dst] [src]
|
|
|
1693
|
+genPrim MO_F32_ExpM1 [dst] [src] = genLibCCall (fsLit "expm1f") [dst] [src]
|
|
|
1694
|
+genPrim MO_F32_Log [dst] [src] = genLibCCall (fsLit "logf") [dst] [src]
|
|
|
1695
|
+genPrim MO_F32_Log1P [dst] [src] = genLibCCall (fsLit "log1pf") [dst] [src]
|
|
|
1696
|
+genPrim MO_F32_Asin [dst] [src] = genLibCCall (fsLit "asinf") [dst] [src]
|
|
|
1697
|
+genPrim MO_F32_Acos [dst] [src] = genLibCCall (fsLit "acosf") [dst] [src]
|
|
|
1698
|
+genPrim MO_F32_Atan [dst] [src] = genLibCCall (fsLit "atanf") [dst] [src]
|
|
|
1699
|
+genPrim MO_F32_Sinh [dst] [src] = genLibCCall (fsLit "sinhf") [dst] [src]
|
|
|
1700
|
+genPrim MO_F32_Cosh [dst] [src] = genLibCCall (fsLit "coshf") [dst] [src]
|
|
|
1701
|
+genPrim MO_F32_Tanh [dst] [src] = genLibCCall (fsLit "tanhf") [dst] [src]
|
|
|
1702
|
+genPrim MO_F32_Pwr [dst] [x,y] = genLibCCall (fsLit "powf") [dst] [x,y]
|
|
|
1703
|
+genPrim MO_F32_Asinh [dst] [src] = genLibCCall (fsLit "asinhf") [dst] [src]
|
|
|
1704
|
+genPrim MO_F32_Acosh [dst] [src] = genLibCCall (fsLit "acoshf") [dst] [src]
|
|
|
1705
|
+genPrim MO_F32_Atanh [dst] [src] = genLibCCall (fsLit "atanhf") [dst] [src]
|
|
|
1706
|
+genPrim MO_F64_Sin [dst] [src] = genLibCCall (fsLit "sin") [dst] [src]
|
|
|
1707
|
+genPrim MO_F64_Cos [dst] [src] = genLibCCall (fsLit "cos") [dst] [src]
|
|
|
1708
|
+genPrim MO_F64_Tan [dst] [src] = genLibCCall (fsLit "tan") [dst] [src]
|
|
|
1709
|
+genPrim MO_F64_Exp [dst] [src] = genLibCCall (fsLit "exp") [dst] [src]
|
|
|
1710
|
+genPrim MO_F64_ExpM1 [dst] [src] = genLibCCall (fsLit "expm1") [dst] [src]
|
|
|
1711
|
+genPrim MO_F64_Log [dst] [src] = genLibCCall (fsLit "log") [dst] [src]
|
|
|
1712
|
+genPrim MO_F64_Log1P [dst] [src] = genLibCCall (fsLit "log1p") [dst] [src]
|
|
|
1713
|
+genPrim MO_F64_Asin [dst] [src] = genLibCCall (fsLit "asin") [dst] [src]
|
|
|
1714
|
+genPrim MO_F64_Acos [dst] [src] = genLibCCall (fsLit "acos") [dst] [src]
|
|
|
1715
|
+genPrim MO_F64_Atan [dst] [src] = genLibCCall (fsLit "atan") [dst] [src]
|
|
|
1716
|
+genPrim MO_F64_Sinh [dst] [src] = genLibCCall (fsLit "sinh") [dst] [src]
|
|
|
1717
|
+genPrim MO_F64_Cosh [dst] [src] = genLibCCall (fsLit "cosh") [dst] [src]
|
|
|
1718
|
+genPrim MO_F64_Tanh [dst] [src] = genLibCCall (fsLit "tanh") [dst] [src]
|
|
|
1719
|
+genPrim MO_F64_Pwr [dst] [x,y] = genLibCCall (fsLit "pow") [dst] [x,y]
|
|
|
1720
|
+genPrim MO_F64_Asinh [dst] [src] = genLibCCall (fsLit "asinh") [dst] [src]
|
|
|
1721
|
+genPrim MO_F64_Acosh [dst] [src] = genLibCCall (fsLit "acosh") [dst] [src]
|
|
|
1722
|
+genPrim MO_F64_Atanh [dst] [src] = genLibCCall (fsLit "atanh") [dst] [src]
|
|
|
1723
|
+genPrim MO_SuspendThread [tok] [rs,i] = genLibCCall (fsLit "suspendThread") [tok] [rs,i]
|
|
|
1724
|
+genPrim MO_ResumeThread [rs] [tok] = genLibCCall (fsLit "resumeThread") [rs] [tok]
|
|
|
1725
|
+genPrim MO_I64_ToI [dst] [src] = genLibCCall (fsLit "hs_int64ToInt") [dst] [src]
|
|
|
1726
|
+genPrim MO_I64_FromI [dst] [src] = genLibCCall (fsLit "hs_intToInt64") [dst] [src]
|
|
|
1727
|
+genPrim MO_W64_ToW [dst] [src] = genLibCCall (fsLit "hs_word64ToWord") [dst] [src]
|
|
|
1728
|
+genPrim MO_W64_FromW [dst] [src] = genLibCCall (fsLit "hs_wordToWord64") [dst] [src]
|
|
|
1729
|
+genPrim MO_x64_Neg [dst] [src] = genLibCCall (fsLit "hs_neg64") [dst] [src]
|
|
|
1730
|
+genPrim MO_x64_Add [dst] [src] = genLibCCall (fsLit "hs_add64") [dst] [src]
|
|
|
1731
|
+genPrim MO_x64_Sub [dst] [src] = genLibCCall (fsLit "hs_sub64") [dst] [src]
|
|
|
1732
|
+genPrim MO_x64_Mul [dst] [src] = genLibCCall (fsLit "hs_mul64") [dst] [src]
|
|
|
1733
|
+genPrim MO_I64_Quot [dst] [src] = genLibCCall (fsLit "hs_quotInt64") [dst] [src]
|
|
|
1734
|
+genPrim MO_I64_Rem [dst] [src] = genLibCCall (fsLit "hs_remInt64") [dst] [src]
|
|
|
1735
|
+genPrim MO_W64_Quot [dst] [src] = genLibCCall (fsLit "hs_quotWord64") [dst] [src]
|
|
|
1736
|
+genPrim MO_W64_Rem [dst] [src] = genLibCCall (fsLit "hs_remWord64") [dst] [src]
|
|
|
1737
|
+genPrim MO_x64_And [dst] [src] = genLibCCall (fsLit "hs_and64") [dst] [src]
|
|
|
1738
|
+genPrim MO_x64_Or [dst] [src] = genLibCCall (fsLit "hs_or64") [dst] [src]
|
|
|
1739
|
+genPrim MO_x64_Xor [dst] [src] = genLibCCall (fsLit "hs_xor64") [dst] [src]
|
|
|
1740
|
+genPrim MO_x64_Not [dst] [src] = genLibCCall (fsLit "hs_not64") [dst] [src]
|
|
|
1741
|
+genPrim MO_x64_Shl [dst] [src] = genLibCCall (fsLit "hs_uncheckedShiftL64") [dst] [src]
|
|
|
1742
|
+genPrim MO_I64_Shr [dst] [src] = genLibCCall (fsLit "hs_uncheckedIShiftRA64") [dst] [src]
|
|
|
1743
|
+genPrim MO_W64_Shr [dst] [src] = genLibCCall (fsLit "hs_uncheckedShiftRL64") [dst] [src]
|
|
|
1744
|
+genPrim MO_x64_Eq [dst] [src] = genLibCCall (fsLit "hs_eq64") [dst] [src]
|
|
|
1745
|
+genPrim MO_x64_Ne [dst] [src] = genLibCCall (fsLit "hs_ne64") [dst] [src]
|
|
|
1746
|
+genPrim MO_I64_Ge [dst] [src] = genLibCCall (fsLit "hs_geInt64") [dst] [src]
|
|
|
1747
|
+genPrim MO_I64_Gt [dst] [src] = genLibCCall (fsLit "hs_gtInt64") [dst] [src]
|
|
|
1748
|
+genPrim MO_I64_Le [dst] [src] = genLibCCall (fsLit "hs_leInt64") [dst] [src]
|
|
|
1749
|
+genPrim MO_I64_Lt [dst] [src] = genLibCCall (fsLit "hs_ltInt64") [dst] [src]
|
|
|
1750
|
+genPrim MO_W64_Ge [dst] [src] = genLibCCall (fsLit "hs_geWord64") [dst] [src]
|
|
|
1751
|
+genPrim MO_W64_Gt [dst] [src] = genLibCCall (fsLit "hs_gtWord64") [dst] [src]
|
|
|
1752
|
+genPrim MO_W64_Le [dst] [src] = genLibCCall (fsLit "hs_leWord64") [dst] [src]
|
|
|
1753
|
+genPrim MO_W64_Lt [dst] [src] = genLibCCall (fsLit "hs_ltWord64") [dst] [src]
|
|
|
1754
|
+genPrim op dst args = do
|
|
|
1755
|
+ platform <- ncgPlatform <$> getConfig
|
|
|
1756
|
+ pprPanic "genPrim: unknown primOp" (ppr (pprCallishMachOp op, dst, fmap (pdoc platform) args))
|
|
|
1757
|
+
|
|
|
1758
|
+
|
|
|
1759
|
+genFloatAbs :: Width -> LocalReg -> CmmExpr -> NatM InstrBlock
|
|
|
1760
|
+genFloatAbs w dst src = do
|
|
|
1761
|
+ platform <- getPlatform
|
|
|
1762
|
+ (reg_fx, _, code_fx) <- getFloatReg src
|
|
|
1763
|
+ let dst_reg = getRegisterReg platform (CmmLocal dst)
|
|
|
1764
|
+ return (code_fx `appOL` toOL
|
|
|
1765
|
+ [
|
|
|
1766
|
+ FABS (OpReg w dst_reg) (OpReg w reg_fx)
|
|
|
1767
|
+ ]
|
|
|
1768
|
+ )
|
|
|
1769
|
+
|
|
|
1770
|
+genFloatSqrt :: Format -> LocalReg -> CmmExpr -> NatM InstrBlock
|
|
|
1771
|
+genFloatSqrt f dst src = do
|
|
|
1772
|
+ platform <- getPlatform
|
|
|
1773
|
+ (reg_fx, _, code_fx) <- getFloatReg src
|
|
|
1774
|
+ let dst_reg = getRegisterReg platform (CmmLocal dst)
|
|
|
1775
|
+ return (code_fx `appOL` toOL
|
|
|
1776
|
+ [
|
|
|
1777
|
+ FSQRT (OpReg w dst_reg) (OpReg w reg_fx)
|
|
|
1778
|
+ ]
|
|
|
1779
|
+ )
|
|
|
1780
|
+ where
|
|
|
1781
|
+ w = case f of
|
|
|
1782
|
+ FF32 -> W32
|
|
|
1783
|
+ _ -> W64
|
|
|
1784
|
+
|
|
|
1785
|
+genClz :: Width -> LocalReg -> CmmExpr -> NatM InstrBlock
|
|
|
1786
|
+genClz w dst src = do
|
|
|
1787
|
+ platform <- getPlatform
|
|
|
1788
|
+ (reg_x, _, code_x) <- getSomeReg src
|
|
|
1789
|
+ let dst_reg = getRegisterReg platform (CmmLocal dst)
|
|
|
1790
|
+ if w `elem` [W32, W64] then do
|
|
|
1791
|
+ return (code_x `snocOL` CLZ (OpReg w dst_reg) (OpReg w reg_x))
|
|
|
1792
|
+ else if w `elem` [W8, W16] then do
|
|
|
1793
|
+ return (code_x `appOL` toOL
|
|
|
1794
|
+ [
|
|
|
1795
|
+ MOV (OpReg W64 dst_reg) (OpImm (ImmInt 1)),
|
|
|
1796
|
+ SLL (OpReg W64 dst_reg) (OpReg W64 dst_reg) (OpImm (ImmInt (31-shift))),
|
|
|
1797
|
+ SLL (OpReg W64 reg_x) (OpReg W64 reg_x) (OpImm (ImmInt (32-shift))),
|
|
|
1798
|
+ OR (OpReg W64 dst_reg) (OpReg W64 dst_reg) (OpReg W64 reg_x),
|
|
|
1799
|
+ CLZ (OpReg W64 dst_reg) (OpReg W32 dst_reg)
|
|
|
1800
|
+ ]
|
|
|
1801
|
+ )
|
|
|
1802
|
+ else do
|
|
|
1803
|
+ pprPanic "genClz: invalid width: " (ppr w)
|
|
|
1804
|
+ where
|
|
|
1805
|
+ shift = widthToInt w
|
|
|
1806
|
+
|
|
|
1807
|
+genCtz :: Width -> LocalReg -> CmmExpr -> NatM InstrBlock
|
|
|
1808
|
+genCtz w dst src = do
|
|
|
1809
|
+ platform <- getPlatform
|
|
|
1810
|
+ (reg_x, _, code_x) <- getSomeReg src
|
|
|
1811
|
+ let dst_reg = getRegisterReg platform (CmmLocal dst)
|
|
|
1812
|
+ if w `elem` [W32, W64] then do
|
|
|
1813
|
+ return (code_x `snocOL` CTZ (OpReg w dst_reg) (OpReg w reg_x))
|
|
|
1814
|
+ else if w `elem` [W8, W16] then do
|
|
|
1815
|
+ return (code_x `appOL` toOL
|
|
|
1816
|
+ [
|
|
|
1817
|
+ MOV (OpReg W64 dst_reg) (OpImm (ImmInt 1)),
|
|
|
1818
|
+ SLL (OpReg W64 dst_reg) (OpReg W64 dst_reg) (OpImm (ImmInt shift)),
|
|
|
1819
|
+ BSTRPICK II64 (OpReg W64 reg_x) (OpReg W64 reg_x) (OpImm (ImmInt (shift-1))) (OpImm (ImmInt 0)),
|
|
|
1820
|
+ OR (OpReg W64 dst_reg) (OpReg W64 dst_reg) (OpReg W64 reg_x),
|
|
|
1821
|
+ CTZ (OpReg W64 dst_reg) (OpReg W64 dst_reg)
|
|
|
1822
|
+ ]
|
|
|
1823
|
+ )
|
|
|
1824
|
+ else do
|
|
|
1825
|
+ pprPanic "genCtz: invalid width: " (ppr w)
|
|
|
1826
|
+ where
|
|
|
1827
|
+ shift = (widthToInt w)
|
|
|
1828
|
+
|
|
|
1829
|
+genByteSwap :: Width -> LocalReg -> CmmExpr -> NatM InstrBlock
|
|
|
1830
|
+genByteSwap w dst src = do
|
|
|
1831
|
+ platform <- getPlatform
|
|
|
1832
|
+ (reg_x, _, code_x) <- getSomeReg src
|
|
|
1833
|
+ let dst_reg = getRegisterReg platform (CmmLocal dst)
|
|
|
1834
|
+ case w of
|
|
|
1835
|
+ W64 ->
|
|
|
1836
|
+ return (code_x `appOL` toOL
|
|
|
1837
|
+ [
|
|
|
1838
|
+ REVBD (OpReg w dst_reg) (OpReg w reg_x)
|
|
|
1839
|
+ ]
|
|
|
1840
|
+ )
|
|
|
1841
|
+ W32 ->
|
|
|
1842
|
+ return (code_x `appOL` toOL
|
|
|
1843
|
+ [
|
|
|
1844
|
+ REVB2W (OpReg w dst_reg) (OpReg w reg_x)
|
|
|
1845
|
+ ]
|
|
|
1846
|
+ )
|
|
|
1847
|
+ W16 ->
|
|
|
1848
|
+ return (code_x `appOL` toOL
|
|
|
1849
|
+ [
|
|
|
1850
|
+ REVB2H (OpReg w dst_reg) (OpReg w reg_x)
|
|
|
1851
|
+ ]
|
|
|
1852
|
+ )
|
|
|
1853
|
+ _ -> pprPanic "genBSwap: invalid width: " (ppr w)
|
|
|
1854
|
+
|
|
|
1855
|
+genBitRev :: Width -> LocalReg -> CmmExpr -> NatM InstrBlock
|
|
|
1856
|
+genBitRev w dst src = do
|
|
|
1857
|
+ platform <- getPlatform
|
|
|
1858
|
+ (reg_x, _, code_x) <- getSomeReg src
|
|
|
1859
|
+ let dst_reg = getRegisterReg platform (CmmLocal dst)
|
|
|
1860
|
+ case w of
|
|
|
1861
|
+ W8 ->
|
|
|
1862
|
+ return (code_x `appOL` toOL
|
|
|
1863
|
+ [
|
|
|
1864
|
+ BITREV4B (OpReg W32 reg_x) (OpReg W32 reg_x),
|
|
|
1865
|
+ AND (OpReg W64 dst_reg) (OpReg W64 reg_x) (OpImm (ImmInt 255))
|
|
|
1866
|
+ ]
|
|
|
1867
|
+ )
|
|
|
1868
|
+ W16 ->
|
|
|
1869
|
+ return (code_x `appOL` toOL
|
|
|
1870
|
+ [
|
|
|
1871
|
+ BITREV (OpReg W64 reg_x) (OpReg W64 reg_x),
|
|
|
1872
|
+ SRL (OpReg W64 dst_reg) (OpReg W64 reg_x) (OpImm (ImmInt 48))
|
|
|
1873
|
+ ]
|
|
|
1874
|
+ )
|
|
|
1875
|
+ _ -> return ( code_x `snocOL` BITREV (OpReg w dst_reg) (OpReg w reg_x))
|
|
|
1876
|
+
|
|
|
1877
|
+-- Generate C call to the given function in libc
|
|
|
1878
|
+genLibCCall :: FastString -> [CmmFormal] -> [CmmActual] -> NatM InstrBlock
|
|
|
1879
|
+genLibCCall name dsts args = do
|
|
|
1880
|
+ config <- getConfig
|
|
|
1881
|
+ target <-
|
|
|
1882
|
+ cmmMakeDynamicReference config CallReference
|
|
|
1883
|
+ $ mkForeignLabel name ForeignLabelInThisPackage IsFunction
|
|
|
1884
|
+ let cconv = ForeignConvention CCallConv [NoHint] [NoHint] CmmMayReturn
|
|
|
1885
|
+ genCCall target cconv dsts args
|
|
|
1886
|
+
|
|
|
1887
|
+unsupported :: Show a => a -> b
|
|
|
1888
|
+unsupported mop = panic ("outOfLineCmmOp: " ++ show mop
|
|
|
1889
|
+ ++ " not supported here")
|
|
|
1890
|
+
|
|
|
1891
|
+-- AMSWAP_DB* insns implentment a fully functional synchronization barrier, like DBAR 0x0.
|
|
|
1892
|
+-- This is terrible. And AMSWAPDB only supports ISA version greater than LA64V1_0. So,
|
|
|
1893
|
+-- implement with DBAR
|
|
|
1894
|
+genAtomicRead :: Width -> MemoryOrdering -> LocalReg -> CmmExpr -> NatM InstrBlock
|
|
|
1895
|
+genAtomicRead w mo dst arg = do
|
|
|
1896
|
+ (addr_p, _, code_p) <- getSomeReg arg
|
|
|
1897
|
+ platform <- getPlatform
|
|
|
1898
|
+ let d = getRegisterReg platform (CmmLocal dst)
|
|
|
1899
|
+ case mo of
|
|
|
1900
|
+ MemOrderRelaxed ->
|
|
|
1901
|
+ return (code_p `appOL` toOL
|
|
|
1902
|
+ [
|
|
|
1903
|
+ LD (intFormat w) (OpReg w d) (OpAddr $ AddrReg addr_p)
|
|
|
1904
|
+ ]
|
|
|
1905
|
+ )
|
|
|
1906
|
+
|
|
|
1907
|
+ MemOrderAcquire ->
|
|
|
1908
|
+ return (code_p `appOL` toOL
|
|
|
1909
|
+ [
|
|
|
1910
|
+ LD (intFormat w) (OpReg w d) (OpAddr $ AddrReg addr_p),
|
|
|
1911
|
+ DBAR HintAcquire
|
|
|
1912
|
+ ]
|
|
|
1913
|
+ )
|
|
|
1914
|
+ MemOrderSeqCst ->
|
|
|
1915
|
+ return (code_p `appOL` toOL
|
|
|
1916
|
+ [
|
|
|
1917
|
+ LD (intFormat w) (OpReg w d) (OpAddr $ AddrReg addr_p),
|
|
|
1918
|
+ DBAR HintSeqcst
|
|
|
1919
|
+ ]
|
|
|
1920
|
+ )
|
|
|
1921
|
+ _ -> panic $ "Unexpected MemOrderRelease on an AtomicRead: " ++ show mo
|
|
|
1922
|
+
|
|
|
1923
|
+genAtomicWrite :: Width -> MemoryOrdering -> CmmExpr -> CmmExpr -> NatM InstrBlock
|
|
|
1924
|
+genAtomicWrite w mo addr val = do
|
|
|
1925
|
+ (addr_p, _, code_p) <- getSomeReg addr
|
|
|
1926
|
+ (val_reg, fmt_val, code_val) <- getSomeReg val
|
|
|
1927
|
+ case mo of
|
|
|
1928
|
+ MemOrderRelaxed ->
|
|
|
1929
|
+ return (code_p `appOL`code_val `appOL` toOL
|
|
|
1930
|
+ [
|
|
|
1931
|
+ ST fmt_val (OpReg w val_reg) (OpAddr $ AddrReg addr_p)
|
|
|
1932
|
+ ]
|
|
|
1933
|
+ )
|
|
|
1934
|
+ MemOrderRelease ->
|
|
|
1935
|
+ return (code_p `appOL`code_val `appOL` toOL
|
|
|
1936
|
+ [
|
|
|
1937
|
+ DBAR HintRelease,
|
|
|
1938
|
+ ST fmt_val (OpReg w val_reg) (OpAddr $ AddrReg addr_p)
|
|
|
1939
|
+ ]
|
|
|
1940
|
+ )
|
|
|
1941
|
+ MemOrderSeqCst ->
|
|
|
1942
|
+ return (code_p `appOL`code_val `appOL` toOL
|
|
|
1943
|
+ [
|
|
|
1944
|
+ DBAR HintSeqcst,
|
|
|
1945
|
+ ST fmt_val (OpReg w val_reg) (OpAddr $ AddrReg addr_p)
|
|
|
1946
|
+ ]
|
|
|
1947
|
+ )
|
|
|
1948
|
+ _ -> panic $ "Unexpected MemOrderAcquire on an AtomicWrite" ++ show mo
|
|
|
1949
|
+
|
|
1634
|
1950
|
-- -----------------------------------------------------------------------------
|
|
1635
|
1951
|
{-
|
|
1636
|
1952
|
Generating C calls
|
| ... |
... |
@@ -1664,393 +1980,68 @@ wider than FRLEN may be passed in a GAR. |
|
1664
|
1980
|
-}
|
|
1665
|
1981
|
|
|
1666
|
1982
|
genCCall
|
|
1667
|
|
- :: ForeignTarget -- function to call
|
|
1668
|
|
- -> [CmmFormal] -- where to put the result
|
|
1669
|
|
- -> [CmmActual] -- arguments (of mixed type)
|
|
1670
|
|
- -> NatM InstrBlock
|
|
1671
|
|
-
|
|
1672
|
|
--- TODO: Specialize where we can.
|
|
1673
|
|
--- Generic impl
|
|
1674
|
|
-genCCall target dest_regs arg_regs = do
|
|
1675
|
|
- case target of
|
|
1676
|
|
- -- The target :: ForeignTarget call can either
|
|
1677
|
|
- -- be a foreign procedure with an address expr
|
|
1678
|
|
- -- and a calling convention.
|
|
1679
|
|
- ForeignTarget expr _cconv -> do
|
|
1680
|
|
- (call_target, call_target_code) <- case expr of
|
|
1681
|
|
- -- if this is a label, let's just directly to it.
|
|
1682
|
|
- (CmmLit (CmmLabel lbl)) -> pure (TLabel lbl, nilOL)
|
|
1683
|
|
- -- if it's not a label, let's compute the expression into a
|
|
1684
|
|
- -- register and jump to that.
|
|
1685
|
|
- _ -> do
|
|
1686
|
|
- (reg, _format, reg_code) <- getSomeReg expr
|
|
1687
|
|
- pure (TReg reg, reg_code)
|
|
1688
|
|
- -- compute the code and register logic for all arg_regs.
|
|
1689
|
|
- -- this will give us the format information to match on.
|
|
1690
|
|
- arg_regs' <- mapM getSomeReg arg_regs
|
|
1691
|
|
-
|
|
1692
|
|
- -- Now this is stupid. Our Cmm expressions doesn't carry the proper sizes
|
|
1693
|
|
- -- so while in Cmm we might get W64 incorrectly for an int, that is W32 in
|
|
1694
|
|
- -- STG; this thenn breaks packing of stack arguments, if we need to pack
|
|
1695
|
|
- -- for the pcs, e.g. darwinpcs. Option one would be to fix the Int type
|
|
1696
|
|
- -- in Cmm proper. Option two, which we choose here is to use extended Hint
|
|
1697
|
|
- -- information to contain the size information and use that when packing
|
|
1698
|
|
- -- arguments, spilled onto the stack.
|
|
1699
|
|
- let (_res_hints, arg_hints) = foreignTargetHints target
|
|
1700
|
|
- arg_regs'' = zipWith (\(r, f, c) h -> (r,f,h,c)) arg_regs' arg_hints
|
|
1701
|
|
-
|
|
1702
|
|
- (stackSpaceWords, passRegs, passArgumentsCode) <- passArguments allGpArgRegs allFpArgRegs arg_regs'' 0 [] nilOL
|
|
1703
|
|
-
|
|
1704
|
|
- readResultsCode <- readResults allGpArgRegs allFpArgRegs dest_regs [] nilOL
|
|
1705
|
|
-
|
|
1706
|
|
- let moveStackDown 0 = toOL [ PUSH_STACK_FRAME
|
|
1707
|
|
- , DELTA (-16)
|
|
1708
|
|
- ]
|
|
1709
|
|
- moveStackDown i | odd i = moveStackDown (i + 1)
|
|
1710
|
|
- moveStackDown i = toOL [ PUSH_STACK_FRAME
|
|
1711
|
|
- , SUB (OpReg W64 (spMachReg)) (OpReg W64 (spMachReg)) (OpImm (ImmInt (8 * i)))
|
|
1712
|
|
- , DELTA (-8 * i - 16)
|
|
1713
|
|
- ]
|
|
1714
|
|
- moveStackUp 0 = toOL [ POP_STACK_FRAME
|
|
1715
|
|
- , DELTA 0
|
|
1716
|
|
- ]
|
|
1717
|
|
- moveStackUp i | odd i = moveStackUp (i + 1)
|
|
1718
|
|
- moveStackUp i = toOL [ ADD (OpReg W64 (spMachReg)) (OpReg W64 (spMachReg)) (OpImm (ImmInt (8 * i)))
|
|
1719
|
|
- , POP_STACK_FRAME
|
|
1720
|
|
- , DELTA 0
|
|
1721
|
|
- ]
|
|
1722
|
|
-
|
|
1723
|
|
- let code =
|
|
1724
|
|
- call_target_code -- compute the label (possibly into a register)
|
|
1725
|
|
- `appOL` moveStackDown (stackSpaceWords)
|
|
1726
|
|
- `appOL` passArgumentsCode -- put the arguments into x0, ...
|
|
1727
|
|
- `snocOL` CALL call_target passRegs -- branch and link (C calls aren't tail calls, but return)
|
|
1728
|
|
- `appOL` readResultsCode -- parse the results into registers
|
|
1729
|
|
- `appOL` moveStackUp (stackSpaceWords)
|
|
1730
|
|
- return code
|
|
1731
|
|
-
|
|
1732
|
|
- PrimTarget MO_F32_Fabs
|
|
1733
|
|
- | [arg_reg] <- arg_regs, [dest_reg] <- dest_regs ->
|
|
1734
|
|
- unaryFloatOp W32 (\d x -> unitOL $ FABS d x) arg_reg dest_reg
|
|
1735
|
|
- | otherwise -> panic "mal-formed MO_F32_Fabs"
|
|
1736
|
|
- PrimTarget MO_F64_Fabs
|
|
1737
|
|
- | [arg_reg] <- arg_regs, [dest_reg] <- dest_regs ->
|
|
1738
|
|
- unaryFloatOp W64 (\d x -> unitOL $ FABS d x) arg_reg dest_reg
|
|
1739
|
|
- | otherwise -> panic "mal-formed MO_F64_Fabs"
|
|
1740
|
|
-
|
|
1741
|
|
- PrimTarget MO_F32_Sqrt
|
|
1742
|
|
- | [arg_reg] <- arg_regs, [dest_reg] <- dest_regs ->
|
|
1743
|
|
- unaryFloatOp W32 (\d x -> unitOL $ FSQRT d x) arg_reg dest_reg
|
|
1744
|
|
- | otherwise -> panic "mal-formed MO_F32_Sqrt"
|
|
1745
|
|
- PrimTarget MO_F64_Sqrt
|
|
1746
|
|
- | [arg_reg] <- arg_regs, [dest_reg] <- dest_regs ->
|
|
1747
|
|
- unaryFloatOp W64 (\d x -> unitOL $ FSQRT d x) arg_reg dest_reg
|
|
1748
|
|
- | otherwise -> panic "mal-formed MO_F64_Sqrt"
|
|
1749
|
|
-
|
|
1750
|
|
- PrimTarget (MO_Clz w)
|
|
1751
|
|
- | w `elem` [W32, W64],
|
|
1752
|
|
- [arg_reg] <- arg_regs,
|
|
1753
|
|
- [dest_reg] <- dest_regs -> do
|
|
1754
|
|
- platform <- getPlatform
|
|
1755
|
|
- (reg_x, _format_x, code_x) <- getSomeReg arg_reg
|
|
1756
|
|
- let dst_reg = getRegisterReg platform (CmmLocal dest_reg)
|
|
1757
|
|
- return ( code_x `snocOL`
|
|
1758
|
|
- CLZ (OpReg w dst_reg) (OpReg w reg_x)
|
|
1759
|
|
- )
|
|
1760
|
|
- | w `elem` [W8, W16],
|
|
1761
|
|
- [arg_reg] <- arg_regs,
|
|
1762
|
|
- [dest_reg] <- dest_regs -> do
|
|
1763
|
|
- platform <- getPlatform
|
|
1764
|
|
- (reg_x, _format_x, code_x) <- getSomeReg arg_reg
|
|
1765
|
|
- let dst_reg = getRegisterReg platform (CmmLocal dest_reg)
|
|
1766
|
|
- return ( code_x `appOL` toOL
|
|
1767
|
|
- [
|
|
1768
|
|
- MOV (OpReg W64 dst_reg) (OpImm (ImmInt 1)),
|
|
1769
|
|
- SLL (OpReg W64 dst_reg) (OpReg W64 dst_reg) (OpImm (ImmInt (31-shift))),
|
|
1770
|
|
- SLL (OpReg W64 reg_x) (OpReg W64 reg_x) (OpImm (ImmInt (32-shift))),
|
|
1771
|
|
- OR (OpReg W64 dst_reg) (OpReg W64 dst_reg) (OpReg W64 reg_x),
|
|
1772
|
|
- CLZ (OpReg W64 dst_reg) (OpReg W32 dst_reg)
|
|
1773
|
|
- ]
|
|
1774
|
|
- )
|
|
1775
|
|
- | otherwise -> unsupported (MO_Clz w)
|
|
1776
|
|
- where
|
|
1777
|
|
- shift = widthToInt w
|
|
1778
|
|
-
|
|
1779
|
|
- PrimTarget (MO_Ctz w)
|
|
1780
|
|
- | w `elem` [W32, W64],
|
|
1781
|
|
- [arg_reg] <- arg_regs,
|
|
1782
|
|
- [dest_reg] <- dest_regs -> do
|
|
1783
|
|
- platform <- getPlatform
|
|
1784
|
|
- (reg_x, _format_x, code_x) <- getSomeReg arg_reg
|
|
1785
|
|
- let dst_reg = getRegisterReg platform (CmmLocal dest_reg)
|
|
1786
|
|
- return ( code_x `snocOL`
|
|
1787
|
|
- CTZ (OpReg w dst_reg) (OpReg w reg_x)
|
|
1788
|
|
- )
|
|
1789
|
|
- | w `elem` [W8, W16],
|
|
1790
|
|
- [arg_reg] <- arg_regs,
|
|
1791
|
|
- [dest_reg] <- dest_regs -> do
|
|
1792
|
|
- platform <- getPlatform
|
|
1793
|
|
- (reg_x, _format_x, code_x) <- getSomeReg arg_reg
|
|
1794
|
|
- let dst_reg = getRegisterReg platform (CmmLocal dest_reg)
|
|
1795
|
|
- return ( code_x `appOL` toOL
|
|
1796
|
|
- [
|
|
1797
|
|
- MOV (OpReg W64 dst_reg) (OpImm (ImmInt 1)),
|
|
1798
|
|
- SLL (OpReg W64 dst_reg) (OpReg W64 dst_reg) (OpImm (ImmInt shift)),
|
|
1799
|
|
- BSTRPICK II64 (OpReg W64 reg_x) (OpReg W64 reg_x) (OpImm (ImmInt (shift-1))) (OpImm (ImmInt 0)),
|
|
1800
|
|
- OR (OpReg W64 dst_reg) (OpReg W64 dst_reg) (OpReg W64 reg_x),
|
|
1801
|
|
- CTZ (OpReg W64 dst_reg) (OpReg W64 dst_reg)
|
|
1802
|
|
- ]
|
|
1803
|
|
- )
|
|
1804
|
|
- | otherwise -> unsupported (MO_Ctz w)
|
|
1805
|
|
- where
|
|
1806
|
|
- shift = (widthToInt w)
|
|
|
1983
|
+ :: CmmExpr -- address of func call
|
|
|
1984
|
+ -> ForeignConvention -- calling convention
|
|
|
1985
|
+ -> [CmmFormal] -- results
|
|
|
1986
|
+ -> [CmmActual] -- arguments
|
|
|
1987
|
+ -> NatM InstrBlock
|
|
|
1988
|
+
|
|
|
1989
|
+
|
|
|
1990
|
+genCCall expr _conv@(ForeignConvention _ argHints _resHints _) dest_regs arg_regs = do
|
|
|
1991
|
+ (call_target, call_target_code) <- case expr of
|
|
|
1992
|
+ -- if this is a label, let's just directly to it.
|
|
|
1993
|
+ (CmmLit (CmmLabel lbl)) -> pure (TLabel lbl, nilOL)
|
|
|
1994
|
+ -- if it's not a label, let's compute the expression into a
|
|
|
1995
|
+ -- register and jump to that.
|
|
|
1996
|
+ _ -> do
|
|
|
1997
|
+ (reg, _format, reg_code) <- getSomeReg expr
|
|
|
1998
|
+ pure (TReg reg, reg_code)
|
|
|
1999
|
+ -- compute the code and register logic for all arg_regs.
|
|
|
2000
|
+ -- this will give us the format information to match on.
|
|
|
2001
|
+ arg_regs' <- mapM getSomeReg arg_regs
|
|
|
2002
|
+
|
|
|
2003
|
+ -- Now this is stupid. Our Cmm expressions doesn't carry the proper sizes
|
|
|
2004
|
+ -- so while in Cmm we might get W64 incorrectly for an int, that is W32 in
|
|
|
2005
|
+ -- STG; this thenn breaks packing of stack arguments, if we need to pack
|
|
|
2006
|
+ -- for the pcs, e.g. darwinpcs. Option one would be to fix the Int type
|
|
|
2007
|
+ -- in Cmm proper. Option two, which we choose here is to use extended Hint
|
|
|
2008
|
+ -- information to contain the size information and use that when packing
|
|
|
2009
|
+ -- arguments, spilled onto the stack.
|
|
|
2010
|
+ let
|
|
|
2011
|
+ arg_hints = take (length arg_regs) (argHints ++ repeat NoHint)
|
|
|
2012
|
+ arg_regs'' = zipWith (\(r, f, c) h -> (r,f,h,c)) arg_regs' arg_hints
|
|
|
2013
|
+
|
|
|
2014
|
+ (stackSpaceWords, passRegs, passArgumentsCode) <- passArguments allGpArgRegs allFpArgRegs arg_regs'' 0 [] nilOL
|
|
|
2015
|
+
|
|
|
2016
|
+ readResultsCode <- readResults allGpArgRegs allFpArgRegs dest_regs [] nilOL
|
|
|
2017
|
+
|
|
|
2018
|
+ let moveStackDown 0 = toOL [ PUSH_STACK_FRAME
|
|
|
2019
|
+ , DELTA (-16)
|
|
|
2020
|
+ ]
|
|
|
2021
|
+ moveStackDown i | odd i = moveStackDown (i + 1)
|
|
|
2022
|
+ moveStackDown i = toOL [ PUSH_STACK_FRAME
|
|
|
2023
|
+ , SUB (OpReg W64 (spMachReg)) (OpReg W64 (spMachReg)) (OpImm (ImmInt (8 * i)))
|
|
|
2024
|
+ , DELTA (-8 * i - 16)
|
|
|
2025
|
+ ]
|
|
|
2026
|
+ moveStackUp 0 = toOL [ POP_STACK_FRAME
|
|
|
2027
|
+ , DELTA 0
|
|
|
2028
|
+ ]
|
|
|
2029
|
+ moveStackUp i | odd i = moveStackUp (i + 1)
|
|
|
2030
|
+ moveStackUp i = toOL [ ADD (OpReg W64 (spMachReg)) (OpReg W64 (spMachReg)) (OpImm (ImmInt (8 * i)))
|
|
|
2031
|
+ , POP_STACK_FRAME
|
|
|
2032
|
+ , DELTA 0
|
|
|
2033
|
+ ]
|
|
1807
|
2034
|
|
|
1808
|
|
- PrimTarget (MO_BSwap w)
|
|
1809
|
|
- | w `elem` [W16, W32, W64],
|
|
1810
|
|
- [arg_reg] <- arg_regs,
|
|
1811
|
|
- [dest_reg] <- dest_regs -> do
|
|
1812
|
|
- platform <- getPlatform
|
|
1813
|
|
- (reg_x, _, code_x) <- getSomeReg arg_reg
|
|
1814
|
|
- let dst_reg = getRegisterReg platform (CmmLocal dest_reg)
|
|
1815
|
|
- case w of
|
|
1816
|
|
- W64 -> return ( code_x `appOL` toOL
|
|
1817
|
|
- [
|
|
1818
|
|
- REVBD (OpReg w dst_reg) (OpReg w reg_x)
|
|
1819
|
|
- ])
|
|
1820
|
|
- W32 -> return ( code_x `appOL` toOL
|
|
1821
|
|
- [
|
|
1822
|
|
- REVB2W (OpReg w dst_reg) (OpReg w reg_x)
|
|
1823
|
|
- ])
|
|
1824
|
|
- _ -> return ( code_x `appOL` toOL
|
|
1825
|
|
- [
|
|
1826
|
|
- REVB2H (OpReg w dst_reg) (OpReg w reg_x)
|
|
1827
|
|
- ])
|
|
1828
|
|
- | otherwise -> unsupported (MO_BSwap w)
|
|
1829
|
|
-
|
|
1830
|
|
- PrimTarget (MO_BRev w)
|
|
1831
|
|
- | w `elem` [W8, W16, W32, W64],
|
|
1832
|
|
- [arg_reg] <- arg_regs,
|
|
1833
|
|
- [dest_reg] <- dest_regs -> do
|
|
1834
|
|
- platform <- getPlatform
|
|
1835
|
|
- (reg_x, _, code_x) <- getSomeReg arg_reg
|
|
1836
|
|
- let dst_reg = getRegisterReg platform (CmmLocal dest_reg)
|
|
1837
|
|
- case w of
|
|
1838
|
|
- W8 -> return ( code_x `appOL` toOL
|
|
1839
|
|
- [
|
|
1840
|
|
- BITREV4B (OpReg W32 reg_x) (OpReg W32 reg_x),
|
|
1841
|
|
- AND (OpReg W64 dst_reg) (OpReg W64 reg_x) (OpImm (ImmInt 255))
|
|
1842
|
|
- ])
|
|
1843
|
|
- W16 -> return ( code_x `appOL` toOL
|
|
1844
|
|
- [
|
|
1845
|
|
- BITREV (OpReg W64 reg_x) (OpReg W64 reg_x),
|
|
1846
|
|
- SRL (OpReg W64 dst_reg) (OpReg W64 reg_x) (OpImm (ImmInt 48))
|
|
1847
|
|
- ])
|
|
1848
|
|
- _ -> return ( code_x `snocOL` BITREV (OpReg w dst_reg) (OpReg w reg_x))
|
|
1849
|
|
- | otherwise -> unsupported (MO_BRev w)
|
|
1850
|
|
-
|
|
1851
|
|
- -- mop :: CallishMachOp (see GHC.Cmm.MachOp)
|
|
1852
|
|
- PrimTarget mop -> do
|
|
1853
|
|
- -- We'll need config to construct forien targets
|
|
1854
|
|
- case mop of
|
|
1855
|
|
- -- 64 bit float ops
|
|
1856
|
|
- MO_F64_Pwr -> mkCCall "pow"
|
|
1857
|
|
-
|
|
1858
|
|
- MO_F64_Sin -> mkCCall "sin"
|
|
1859
|
|
- MO_F64_Cos -> mkCCall "cos"
|
|
1860
|
|
- MO_F64_Tan -> mkCCall "tan"
|
|
1861
|
|
-
|
|
1862
|
|
- MO_F64_Sinh -> mkCCall "sinh"
|
|
1863
|
|
- MO_F64_Cosh -> mkCCall "cosh"
|
|
1864
|
|
- MO_F64_Tanh -> mkCCall "tanh"
|
|
1865
|
|
-
|
|
1866
|
|
- MO_F64_Asin -> mkCCall "asin"
|
|
1867
|
|
- MO_F64_Acos -> mkCCall "acos"
|
|
1868
|
|
- MO_F64_Atan -> mkCCall "atan"
|
|
1869
|
|
-
|
|
1870
|
|
- MO_F64_Asinh -> mkCCall "asinh"
|
|
1871
|
|
- MO_F64_Acosh -> mkCCall "acosh"
|
|
1872
|
|
- MO_F64_Atanh -> mkCCall "atanh"
|
|
1873
|
|
-
|
|
1874
|
|
- MO_F64_Log -> mkCCall "log"
|
|
1875
|
|
- MO_F64_Log1P -> mkCCall "log1p"
|
|
1876
|
|
- MO_F64_Exp -> mkCCall "exp"
|
|
1877
|
|
- MO_F64_ExpM1 -> mkCCall "expm1"
|
|
1878
|
|
-
|
|
1879
|
|
- -- 32 bit float ops
|
|
1880
|
|
- MO_F32_Pwr -> mkCCall "powf"
|
|
1881
|
|
-
|
|
1882
|
|
- MO_F32_Sin -> mkCCall "sinf"
|
|
1883
|
|
- MO_F32_Cos -> mkCCall "cosf"
|
|
1884
|
|
- MO_F32_Tan -> mkCCall "tanf"
|
|
1885
|
|
- MO_F32_Sinh -> mkCCall "sinhf"
|
|
1886
|
|
- MO_F32_Cosh -> mkCCall "coshf"
|
|
1887
|
|
- MO_F32_Tanh -> mkCCall "tanhf"
|
|
1888
|
|
- MO_F32_Asin -> mkCCall "asinf"
|
|
1889
|
|
- MO_F32_Acos -> mkCCall "acosf"
|
|
1890
|
|
- MO_F32_Atan -> mkCCall "atanf"
|
|
1891
|
|
- MO_F32_Asinh -> mkCCall "asinhf"
|
|
1892
|
|
- MO_F32_Acosh -> mkCCall "acoshf"
|
|
1893
|
|
- MO_F32_Atanh -> mkCCall "atanhf"
|
|
1894
|
|
- MO_F32_Log -> mkCCall "logf"
|
|
1895
|
|
- MO_F32_Log1P -> mkCCall "log1pf"
|
|
1896
|
|
- MO_F32_Exp -> mkCCall "expf"
|
|
1897
|
|
- MO_F32_ExpM1 -> mkCCall "expm1f"
|
|
1898
|
|
-
|
|
1899
|
|
- -- 64-bit primops
|
|
1900
|
|
- MO_I64_ToI -> mkCCall "hs_int64ToInt"
|
|
1901
|
|
- MO_I64_FromI -> mkCCall "hs_intToInt64"
|
|
1902
|
|
- MO_W64_ToW -> mkCCall "hs_word64ToWord"
|
|
1903
|
|
- MO_W64_FromW -> mkCCall "hs_wordToWord64"
|
|
1904
|
|
- MO_x64_Neg -> mkCCall "hs_neg64"
|
|
1905
|
|
- MO_x64_Add -> mkCCall "hs_add64"
|
|
1906
|
|
- MO_x64_Sub -> mkCCall "hs_sub64"
|
|
1907
|
|
- MO_x64_Mul -> mkCCall "hs_mul64"
|
|
1908
|
|
- MO_I64_Quot -> mkCCall "hs_quotInt64"
|
|
1909
|
|
- MO_I64_Rem -> mkCCall "hs_remInt64"
|
|
1910
|
|
- MO_W64_Quot -> mkCCall "hs_quotWord64"
|
|
1911
|
|
- MO_W64_Rem -> mkCCall "hs_remWord64"
|
|
1912
|
|
- MO_x64_And -> mkCCall "hs_and64"
|
|
1913
|
|
- MO_x64_Or -> mkCCall "hs_or64"
|
|
1914
|
|
- MO_x64_Xor -> mkCCall "hs_xor64"
|
|
1915
|
|
- MO_x64_Not -> mkCCall "hs_not64"
|
|
1916
|
|
- MO_x64_Shl -> mkCCall "hs_uncheckedShiftL64"
|
|
1917
|
|
- MO_I64_Shr -> mkCCall "hs_uncheckedIShiftRA64"
|
|
1918
|
|
- MO_W64_Shr -> mkCCall "hs_uncheckedShiftRL64"
|
|
1919
|
|
- MO_x64_Eq -> mkCCall "hs_eq64"
|
|
1920
|
|
- MO_x64_Ne -> mkCCall "hs_ne64"
|
|
1921
|
|
- MO_I64_Ge -> mkCCall "hs_geInt64"
|
|
1922
|
|
- MO_I64_Gt -> mkCCall "hs_gtInt64"
|
|
1923
|
|
- MO_I64_Le -> mkCCall "hs_leInt64"
|
|
1924
|
|
- MO_I64_Lt -> mkCCall "hs_ltInt64"
|
|
1925
|
|
- MO_W64_Ge -> mkCCall "hs_geWord64"
|
|
1926
|
|
- MO_W64_Gt -> mkCCall "hs_gtWord64"
|
|
1927
|
|
- MO_W64_Le -> mkCCall "hs_leWord64"
|
|
1928
|
|
- MO_W64_Lt -> mkCCall "hs_ltWord64"
|
|
1929
|
|
-
|
|
1930
|
|
- -- Conversion
|
|
1931
|
|
- MO_UF_Conv w -> mkCCall (word2FloatLabel w)
|
|
1932
|
|
-
|
|
1933
|
|
- -- Optional MachOps
|
|
1934
|
|
- -- These are enabled/disabled by backend flags: GHC.StgToCmm.Config
|
|
1935
|
|
- MO_S_Mul2 _w -> unsupported mop
|
|
1936
|
|
- MO_S_QuotRem _w -> unsupported mop
|
|
1937
|
|
- MO_U_QuotRem _w -> unsupported mop
|
|
1938
|
|
- MO_U_QuotRem2 _w -> unsupported mop
|
|
1939
|
|
- MO_Add2 _w -> unsupported mop
|
|
1940
|
|
- MO_AddWordC _w -> unsupported mop
|
|
1941
|
|
- MO_SubWordC _w -> unsupported mop
|
|
1942
|
|
- MO_AddIntC _w -> unsupported mop
|
|
1943
|
|
- MO_SubIntC _w -> unsupported mop
|
|
1944
|
|
- MO_U_Mul2 _w -> unsupported mop
|
|
1945
|
|
-
|
|
1946
|
|
- MO_VS_Quot {} -> unsupported mop
|
|
1947
|
|
- MO_VS_Rem {} -> unsupported mop
|
|
1948
|
|
- MO_VU_Quot {} -> unsupported mop
|
|
1949
|
|
- MO_VU_Rem {} -> unsupported mop
|
|
1950
|
|
- MO_I64X2_Min -> unsupported mop
|
|
1951
|
|
- MO_I64X2_Max -> unsupported mop
|
|
1952
|
|
- MO_W64X2_Min -> unsupported mop
|
|
1953
|
|
- MO_W64X2_Max -> unsupported mop
|
|
1954
|
|
-
|
|
1955
|
|
- -- Memory Ordering
|
|
1956
|
|
- -- Support finer-grained DBAR hints for LA664 and newer uarchs.
|
|
1957
|
|
- -- These are treated as DBAR 0 on older uarchs, so we can start
|
|
1958
|
|
- -- to unconditionally emit the new hints right away.
|
|
1959
|
|
- MO_AcquireFence -> pure (unitOL (DBAR HintAcquire))
|
|
1960
|
|
- MO_ReleaseFence -> pure (unitOL (DBAR HintRelease))
|
|
1961
|
|
- MO_SeqCstFence -> pure (unitOL (DBAR HintSeqcst))
|
|
1962
|
|
-
|
|
1963
|
|
- MO_Touch -> pure nilOL -- Keep variables live (when using interior pointers)
|
|
1964
|
|
- -- Prefetch
|
|
1965
|
|
- MO_Prefetch_Data _n -> pure nilOL -- Prefetch hint.
|
|
1966
|
|
-
|
|
1967
|
|
- -- Memory copy/set/move/cmp, with alignment for optimization
|
|
1968
|
|
-
|
|
1969
|
|
- -- TODO Optimize and use e.g. quad registers to move memory around instead
|
|
1970
|
|
- -- of offloading this to memcpy. For small memcpys we can utilize
|
|
1971
|
|
- -- the 128bit quad registers in NEON to move block of bytes around.
|
|
1972
|
|
- -- Might also make sense of small memsets? Use xzr? What's the function
|
|
1973
|
|
- -- call overhead?
|
|
1974
|
|
- MO_Memcpy _align -> mkCCall "memcpy"
|
|
1975
|
|
- MO_Memset _align -> mkCCall "memset"
|
|
1976
|
|
- MO_Memmove _align -> mkCCall "memmove"
|
|
1977
|
|
- MO_Memcmp _align -> mkCCall "memcmp"
|
|
1978
|
|
-
|
|
1979
|
|
- MO_SuspendThread -> mkCCall "suspendThread"
|
|
1980
|
|
- MO_ResumeThread -> mkCCall "resumeThread"
|
|
1981
|
|
-
|
|
1982
|
|
- MO_PopCnt w -> mkCCall (popCntLabel w)
|
|
1983
|
|
- MO_Pdep w -> mkCCall (pdepLabel w)
|
|
1984
|
|
- MO_Pext w -> mkCCall (pextLabel w)
|
|
1985
|
|
-
|
|
1986
|
|
- -- or a possibly side-effecting machine operation
|
|
1987
|
|
- mo@(MO_AtomicRead w ord)
|
|
1988
|
|
- | [p_reg] <- arg_regs
|
|
1989
|
|
- , [dst_reg] <- dest_regs -> do
|
|
1990
|
|
- (p, _fmt_p, code_p) <- getSomeReg p_reg
|
|
1991
|
|
- platform <- getPlatform
|
|
1992
|
|
- let instrs = case ord of
|
|
1993
|
|
- MemOrderRelaxed -> unitOL $ ann moDescr (LD (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p))
|
|
1994
|
|
-
|
|
1995
|
|
- MemOrderAcquire -> toOL [
|
|
1996
|
|
- ann moDescr (LD (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p)),
|
|
1997
|
|
- DBAR HintAcquire
|
|
1998
|
|
- ]
|
|
1999
|
|
- MemOrderSeqCst -> toOL [
|
|
2000
|
|
- ann moDescr (LD (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p)),
|
|
2001
|
|
- DBAR HintSeqcst
|
|
2002
|
|
- ]
|
|
2003
|
|
- _ -> panic $ "Unexpected MemOrderRelease on an AtomicRead: " ++ show mo
|
|
2004
|
|
- dst = getRegisterReg platform (CmmLocal dst_reg)
|
|
2005
|
|
- moDescr = (text . show) mo
|
|
2006
|
|
- code = code_p `appOL` instrs
|
|
2007
|
|
- pure code
|
|
2008
|
|
- | otherwise -> panic "mal-formed AtomicRead"
|
|
2009
|
|
-
|
|
2010
|
|
- mo@(MO_AtomicWrite w ord)
|
|
2011
|
|
- | [p_reg, val_reg] <- arg_regs -> do
|
|
2012
|
|
- (p, _fmt_p, code_p) <- getSomeReg p_reg
|
|
2013
|
|
- (val, fmt_val, code_val) <- getSomeReg val_reg
|
|
2014
|
|
- let instrs = case ord of
|
|
2015
|
|
- MemOrderRelaxed -> unitOL $ ann moDescr (ST fmt_val (OpReg w val) (OpAddr $ AddrReg p))
|
|
2016
|
|
- -- AMSWAP_DB* insns implentment a fully functional synchronization barrier, like DBAR 0x0.
|
|
2017
|
|
- -- This is terrible. And AMSWAPDB only supports ISA version greater than LA64V1_0. So,
|
|
2018
|
|
- -- implement with DBAR
|
|
2019
|
|
- MemOrderRelease -> toOL [
|
|
2020
|
|
- ann moDescr (DBAR HintRelease),
|
|
2021
|
|
- ST fmt_val (OpReg w val) (OpAddr $ AddrReg p)
|
|
2022
|
|
- ]
|
|
2023
|
|
- MemOrderSeqCst -> toOL [
|
|
2024
|
|
- ann moDescr (DBAR HintSeqcst),
|
|
2025
|
|
- ST fmt_val (OpReg w val) (OpAddr $ AddrReg p)
|
|
2026
|
|
- ]
|
|
2027
|
|
- _ -> panic $ "Unexpected MemOrderAcquire on an AtomicWrite" ++ show mo
|
|
2028
|
|
- moDescr = (text . show) mo
|
|
2029
|
|
- code =
|
|
2030
|
|
- code_p `appOL`
|
|
2031
|
|
- code_val `appOL`
|
|
2032
|
|
- instrs
|
|
2033
|
|
- pure code
|
|
2034
|
|
- | otherwise -> panic "mal-formed AtomicWrite"
|
|
2035
|
|
-
|
|
2036
|
|
- MO_AtomicRMW w amop -> mkCCall (atomicRMWLabel w amop)
|
|
2037
|
|
- MO_Cmpxchg w -> mkCCall (cmpxchgLabel w)
|
|
2038
|
|
- MO_Xchg w -> mkCCall (xchgLabel w)
|
|
|
2035
|
+ let code =
|
|
|
2036
|
+ call_target_code -- compute the label (possibly into a register)
|
|
|
2037
|
+ `appOL` moveStackDown (stackSpaceWords)
|
|
|
2038
|
+ `appOL` passArgumentsCode -- put the arguments into x0, ...
|
|
|
2039
|
+ `snocOL` CALL call_target passRegs -- branch and link (C calls aren't tail calls, but return)
|
|
|
2040
|
+ `appOL` readResultsCode -- parse the results into registers
|
|
|
2041
|
+ `appOL` moveStackUp (stackSpaceWords)
|
|
|
2042
|
+ return code
|
|
2039
|
2043
|
|
|
2040
|
2044
|
where
|
|
2041
|
|
- unsupported :: Show a => a -> b
|
|
2042
|
|
- unsupported mop = panic ("outOfLineCmmOp: " ++ show mop
|
|
2043
|
|
- ++ " not supported here")
|
|
2044
|
|
-
|
|
2045
|
|
- mkCCall :: FastString -> NatM InstrBlock
|
|
2046
|
|
- mkCCall name = do
|
|
2047
|
|
- config <- getConfig
|
|
2048
|
|
- target <-
|
|
2049
|
|
- cmmMakeDynamicReference config CallReference
|
|
2050
|
|
- $ mkForeignLabel name ForeignLabelInThisPackage IsFunction
|
|
2051
|
|
- let cconv = ForeignConvention CCallConv [NoHint] [NoHint] CmmMayReturn
|
|
2052
|
|
- genCCall (ForeignTarget target cconv) dest_regs arg_regs
|
|
2053
|
|
-
|
|
2054
|
2045
|
-- Implementiation of the LoongArch ABI calling convention.
|
|
2055
|
2046
|
-- https://github.com/loongson/la-abi-specs/blob/release/lapcs.adoc#passing-arguments
|
|
2056
|
2047
|
passArguments :: [Reg] -> [Reg] -> [(Reg, Format, ForeignHint, InstrBlock)] -> Int -> [Reg] -> InstrBlock -> NatM (Int, [Reg], InstrBlock)
|
| ... |
... |
@@ -2129,10 +2120,10 @@ genCCall target dest_regs arg_regs = do |
|
2129
|
2120
|
readResults _ _ [] _ accumCode = return accumCode
|
|
2130
|
2121
|
readResults [] _ _ _ _ = do
|
|
2131
|
2122
|
platform <- getPlatform
|
|
2132
|
|
- pprPanic "genCCall, out of gp registers when reading results" (pdoc platform target)
|
|
|
2123
|
+ pprPanic "genCCall, out of gp registers when reading results" (pdoc platform expr)
|
|
2133
|
2124
|
readResults _ [] _ _ _ = do
|
|
2134
|
2125
|
platform <- getPlatform
|
|
2135
|
|
- pprPanic "genCCall, out of fp registers when reading results" (pdoc platform target)
|
|
|
2126
|
+ pprPanic "genCCall, out of fp registers when reading results" (pdoc platform expr)
|
|
2136
|
2127
|
readResults (gpReg:gpRegs) (fpReg:fpRegs) (dst:dsts) accumRegs accumCode = do
|
|
2137
|
2128
|
-- gp/fp reg -> dst
|
|
2138
|
2129
|
platform <- getPlatform
|
| ... |
... |
@@ -2150,13 +2141,6 @@ genCCall target dest_regs arg_regs = do |
|
2150
|
2141
|
-- truncate, otherwise an unexpectedly big value might be used in upfollowing calculations
|
|
2151
|
2142
|
truncateReg W64 w r_dst
|
|
2152
|
2143
|
|
|
2153
|
|
- unaryFloatOp w op arg_reg dest_reg = do
|
|
2154
|
|
- platform <- getPlatform
|
|
2155
|
|
- (reg_fx, _format_x, code_fx) <- getFloatReg arg_reg
|
|
2156
|
|
- let dst = getRegisterReg platform (CmmLocal dest_reg)
|
|
2157
|
|
- let code = code_fx `appOL` op (OpReg w dst) (OpReg w reg_fx)
|
|
2158
|
|
- pure code
|
|
2159
|
|
-
|
|
2160
|
2144
|
data BlockInRange = InRange | NotInRange BlockId
|
|
2161
|
2145
|
|
|
2162
|
2146
|
genCondFarJump :: (MonadGetUnique m) => Cond -> Operand -> Operand -> BlockId -> m InstrBlock
|