Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC

Commits:

1 changed file:

Changes:

  • compiler/GHC/CmmToAsm/LA64/CodeGen.hs
    ... ... @@ -3,6 +3,7 @@
    3 3
     {-# LANGUAGE BangPatterns #-}
    
    4 4
     {-# LANGUAGE BinaryLiterals #-}
    
    5 5
     {-# LANGUAGE OverloadedStrings #-}
    
    6
    +{-# LANGUAGE MultiWayIf #-}
    
    6 7
     module GHC.CmmToAsm.LA64.CodeGen (
    
    7 8
           cmmTopCodeGen
    
    8 9
         , generateJumpTableForInstr
    
    ... ... @@ -268,8 +269,10 @@ stmtToInstrs stmt = do
    268 269
       config <- getConfig
    
    269 270
       platform <- getPlatform
    
    270 271
       case stmt of
    
    271
    -    CmmUnsafeForeignCall target result_regs args
    
    272
    -      -> genCCall target result_regs args
    
    272
    +    CmmUnsafeForeignCall target result_regs args ->
    
    273
    +      case target of
    
    274
    +        PrimTarget primOp       -> genPrim primOp result_regs args
    
    275
    +        ForeignTarget addr conv -> genCCall addr conv result_regs args
    
    273 276
     
    
    274 277
         CmmComment s   -> return (unitOL (COMMENT (ftext s)))
    
    275 278
         CmmTick {}     -> return nilOL
    
    ... ... @@ -1631,6 +1634,319 @@ genCondBranch true false expr = do
    1631 1634
       b2 <- genBranch false
    
    1632 1635
       return (b1 `appOL` b2)
    
    1633 1636
     
    
    1637
    +genPrim
    
    1638
    +    :: CallishMachOp      -- MachOp
    
    1639
    +    -> [CmmFormal]        -- where to put the result
    
    1640
    +    -> [CmmActual]        -- arguments (of mixed type)
    
    1641
    +    -> NatM InstrBlock
    
    1642
    +
    
    1643
    +genPrim MO_F32_Fabs          [dst]   [src]          = genFloatAbs W32 dst src
    
    1644
    +genPrim MO_F64_Fabs          [dst]   [src]          = genFloatAbs W64 dst src
    
    1645
    +genPrim MO_F32_Sqrt          [dst]   [src]          = genFloatSqrt FF32 dst src
    
    1646
    +genPrim MO_F64_Sqrt          [dst]   [src]          = genFloatSqrt FF64 dst src
    
    1647
    +genPrim (MO_Clz width)       [dst]   [src]          = genClz width dst src
    
    1648
    +genPrim (MO_Ctz width)       [dst]   [src]          = genCtz width dst src
    
    1649
    +genPrim (MO_BSwap width)     [dst]   [src]          = genByteSwap width dst src
    
    1650
    +genPrim (MO_BRev width)      [dst]   [src]          = genBitRev width dst src
    
    1651
    +genPrim MO_AcquireFence      []      []             = return $ unitOL (DBAR HintAcquire)
    
    1652
    +genPrim MO_ReleaseFence      []      []             = return $ unitOL (DBAR HintRelease)
    
    1653
    +genPrim MO_SeqCstFence       []      []             = return $ unitOL (DBAR HintSeqcst)
    
    1654
    +genPrim MO_Touch             []      [_]            = return nilOL
    
    1655
    +genPrim (MO_Prefetch_Data _n) []     [_]            = return nilOL
    
    1656
    +genPrim (MO_AtomicRead w mo)  [dst]  [addr]         = genAtomicRead w mo dst addr
    
    1657
    +genPrim (MO_AtomicWrite w mo) []     [addr,val]     = genAtomicWrite w mo addr val
    
    1658
    +
    
    1659
    +genPrim mop@(MO_S_Mul2     _w) _         _          = unsupported mop
    
    1660
    +genPrim mop@(MO_S_QuotRem  _w) _         _          = unsupported mop
    
    1661
    +genPrim mop@(MO_U_QuotRem  _w) _         _          = unsupported mop
    
    1662
    +genPrim mop@(MO_U_QuotRem2 _w) _         _          = unsupported mop
    
    1663
    +genPrim mop@(MO_Add2       _w) _         _          = unsupported mop
    
    1664
    +genPrim mop@(MO_AddWordC   _w) _         _          = unsupported mop
    
    1665
    +genPrim mop@(MO_SubWordC   _w) _         _          = unsupported mop
    
    1666
    +genPrim mop@(MO_AddIntC    _w) _         _          = unsupported mop
    
    1667
    +genPrim mop@(MO_SubIntC    _w) _         _          = unsupported mop
    
    1668
    +genPrim mop@(MO_U_Mul2     _w) _         _          = unsupported mop
    
    1669
    +genPrim mop@MO_I64X2_Min       _         _          = unsupported mop
    
    1670
    +genPrim mop@MO_I64X2_Max       _         _          = unsupported mop
    
    1671
    +genPrim mop@MO_W64X2_Min       _         _          = unsupported mop
    
    1672
    +genPrim mop@MO_W64X2_Max       _         _          = unsupported mop
    
    1673
    +genPrim mop@MO_VS_Quot {}      _         _          = unsupported mop
    
    1674
    +genPrim mop@MO_VS_Rem  {}      _         _          = unsupported mop
    
    1675
    +genPrim mop@MO_VU_Quot {}      _         _          = unsupported mop
    
    1676
    +genPrim mop@MO_VU_Rem  {}      _         _          = unsupported mop
    
    1677
    +
    
    1678
    +genPrim (MO_PopCnt width)    [dst]   [src]          = genLibCCall (popCntLabel width) [dst] [src]
    
    1679
    +genPrim (MO_Pdep width)      [dst]   [src,mask]     = genLibCCall (pdepLabel width) [dst] [src,mask]
    
    1680
    +genPrim (MO_Pext width)      [dst]   [src,mask]     = genLibCCall (pextLabel width) [dst] [src,mask]
    
    1681
    +genPrim (MO_UF_Conv width)   [dst]   [src]          = genLibCCall (word2FloatLabel width) [dst] [src]
    
    1682
    +genPrim (MO_AtomicRMW width amop)    [dst] [addr,n] = genLibCCall (atomicRMWLabel width amop) [dst] [addr,n]
    
    1683
    +genPrim (MO_Cmpxchg width)   [dst]   [addr,old,new] = genLibCCall (cmpxchgLabel width) [dst] [addr,old,new]
    
    1684
    +genPrim (MO_Xchg width)      [dst]   [addr,val]     = genLibCCall (xchgLabel width) [dst] [addr,val]
    
    1685
    +genPrim (MO_Memcpy _align)   []   [dst,src,n]       = genLibCCall (fsLit "memcpy")  [] [dst,src,n]
    
    1686
    +genPrim (MO_Memmove _align)  []   [dst,src,n]       = genLibCCall (fsLit "memmove") [] [dst,src,n]
    
    1687
    +genPrim (MO_Memcmp _align)   [rst]   [dst,src,n]    = genLibCCall (fsLit "memcmp")  [rst] [dst,src,n]
    
    1688
    +genPrim (MO_Memset _align)   []   [dst,cnt,n]       = genLibCCall (fsLit "memset")  [] [dst,cnt,n]
    
    1689
    +genPrim MO_F32_Sin           [dst]   [src]          = genLibCCall (fsLit "sinf") [dst] [src]
    
    1690
    +genPrim MO_F32_Cos           [dst]   [src]          = genLibCCall (fsLit "cosf") [dst] [src]
    
    1691
    +genPrim MO_F32_Tan           [dst]   [src]          = genLibCCall (fsLit "tanf") [dst] [src]
    
    1692
    +genPrim MO_F32_Exp           [dst]   [src]          = genLibCCall (fsLit "expf") [dst] [src]
    
    1693
    +genPrim MO_F32_ExpM1         [dst]   [src]          = genLibCCall (fsLit "expm1f") [dst] [src]
    
    1694
    +genPrim MO_F32_Log           [dst]   [src]          = genLibCCall (fsLit "logf") [dst] [src]
    
    1695
    +genPrim MO_F32_Log1P         [dst]   [src]          = genLibCCall (fsLit "log1pf") [dst] [src]
    
    1696
    +genPrim MO_F32_Asin          [dst]   [src]          = genLibCCall (fsLit "asinf") [dst] [src]
    
    1697
    +genPrim MO_F32_Acos          [dst]   [src]          = genLibCCall (fsLit "acosf") [dst] [src]
    
    1698
    +genPrim MO_F32_Atan          [dst]   [src]          = genLibCCall (fsLit "atanf") [dst] [src]
    
    1699
    +genPrim MO_F32_Sinh          [dst]   [src]          = genLibCCall (fsLit "sinhf") [dst] [src]
    
    1700
    +genPrim MO_F32_Cosh          [dst]   [src]          = genLibCCall (fsLit "coshf") [dst] [src]
    
    1701
    +genPrim MO_F32_Tanh          [dst]   [src]          = genLibCCall (fsLit "tanhf") [dst] [src]
    
    1702
    +genPrim MO_F32_Pwr           [dst]   [x,y]          = genLibCCall (fsLit "powf")  [dst] [x,y]
    
    1703
    +genPrim MO_F32_Asinh         [dst]   [src]          = genLibCCall (fsLit "asinhf") [dst] [src]
    
    1704
    +genPrim MO_F32_Acosh         [dst]   [src]          = genLibCCall (fsLit "acoshf") [dst] [src]
    
    1705
    +genPrim MO_F32_Atanh         [dst]   [src]          = genLibCCall (fsLit "atanhf") [dst] [src]
    
    1706
    +genPrim MO_F64_Sin           [dst]   [src]          = genLibCCall (fsLit "sin") [dst] [src]
    
    1707
    +genPrim MO_F64_Cos           [dst]   [src]          = genLibCCall (fsLit "cos") [dst] [src]
    
    1708
    +genPrim MO_F64_Tan           [dst]   [src]          = genLibCCall (fsLit "tan") [dst] [src]
    
    1709
    +genPrim MO_F64_Exp           [dst]   [src]          = genLibCCall (fsLit "exp") [dst] [src]
    
    1710
    +genPrim MO_F64_ExpM1         [dst]   [src]          = genLibCCall (fsLit "expm1") [dst] [src]
    
    1711
    +genPrim MO_F64_Log           [dst]   [src]          = genLibCCall (fsLit "log") [dst] [src]
    
    1712
    +genPrim MO_F64_Log1P         [dst]   [src]          = genLibCCall (fsLit "log1p") [dst] [src]
    
    1713
    +genPrim MO_F64_Asin          [dst]   [src]          = genLibCCall (fsLit "asin") [dst] [src]
    
    1714
    +genPrim MO_F64_Acos          [dst]   [src]          = genLibCCall (fsLit "acos") [dst] [src]
    
    1715
    +genPrim MO_F64_Atan          [dst]   [src]          = genLibCCall (fsLit "atan") [dst] [src]
    
    1716
    +genPrim MO_F64_Sinh          [dst]   [src]          = genLibCCall (fsLit "sinh") [dst] [src]
    
    1717
    +genPrim MO_F64_Cosh          [dst]   [src]          = genLibCCall (fsLit "cosh") [dst] [src]
    
    1718
    +genPrim MO_F64_Tanh          [dst]   [src]          = genLibCCall (fsLit "tanh") [dst] [src]
    
    1719
    +genPrim MO_F64_Pwr           [dst]   [x,y]          = genLibCCall (fsLit "pow")  [dst] [x,y]
    
    1720
    +genPrim MO_F64_Asinh         [dst]   [src]          = genLibCCall (fsLit "asinh") [dst] [src]
    
    1721
    +genPrim MO_F64_Acosh         [dst]   [src]          = genLibCCall (fsLit "acosh") [dst] [src]
    
    1722
    +genPrim MO_F64_Atanh         [dst]   [src]          = genLibCCall (fsLit "atanh") [dst] [src]
    
    1723
    +genPrim MO_SuspendThread     [tok]   [rs,i]         = genLibCCall (fsLit "suspendThread") [tok] [rs,i]
    
    1724
    +genPrim MO_ResumeThread      [rs]    [tok]          = genLibCCall (fsLit "resumeThread") [rs] [tok]
    
    1725
    +genPrim MO_I64_ToI           [dst]   [src]          = genLibCCall (fsLit "hs_int64ToInt") [dst] [src]
    
    1726
    +genPrim MO_I64_FromI         [dst]   [src]          = genLibCCall (fsLit "hs_intToInt64") [dst] [src]
    
    1727
    +genPrim MO_W64_ToW           [dst]   [src]          = genLibCCall (fsLit "hs_word64ToWord") [dst] [src]
    
    1728
    +genPrim MO_W64_FromW         [dst]   [src]          = genLibCCall (fsLit "hs_wordToWord64") [dst] [src]
    
    1729
    +genPrim MO_x64_Neg           [dst]   [src]          = genLibCCall (fsLit "hs_neg64") [dst] [src]
    
    1730
    +genPrim MO_x64_Add           [dst]   [src]          = genLibCCall (fsLit "hs_add64") [dst] [src]
    
    1731
    +genPrim MO_x64_Sub           [dst]   [src]          = genLibCCall (fsLit "hs_sub64") [dst] [src]
    
    1732
    +genPrim MO_x64_Mul           [dst]   [src]          = genLibCCall (fsLit "hs_mul64") [dst] [src]
    
    1733
    +genPrim MO_I64_Quot          [dst]   [src]          = genLibCCall (fsLit "hs_quotInt64") [dst] [src]
    
    1734
    +genPrim MO_I64_Rem           [dst]   [src]          = genLibCCall (fsLit "hs_remInt64") [dst] [src]
    
    1735
    +genPrim MO_W64_Quot          [dst]   [src]          = genLibCCall (fsLit "hs_quotWord64") [dst] [src]
    
    1736
    +genPrim MO_W64_Rem           [dst]   [src]          = genLibCCall (fsLit "hs_remWord64") [dst] [src]
    
    1737
    +genPrim MO_x64_And           [dst]   [src]          = genLibCCall (fsLit "hs_and64") [dst] [src]
    
    1738
    +genPrim MO_x64_Or            [dst]   [src]          = genLibCCall (fsLit "hs_or64") [dst] [src]
    
    1739
    +genPrim MO_x64_Xor           [dst]   [src]          = genLibCCall (fsLit "hs_xor64") [dst] [src]
    
    1740
    +genPrim MO_x64_Not           [dst]   [src]          = genLibCCall (fsLit "hs_not64") [dst] [src]
    
    1741
    +genPrim MO_x64_Shl           [dst]   [src]          = genLibCCall (fsLit "hs_uncheckedShiftL64") [dst] [src]
    
    1742
    +genPrim MO_I64_Shr           [dst]   [src]          = genLibCCall (fsLit "hs_uncheckedIShiftRA64") [dst] [src]
    
    1743
    +genPrim MO_W64_Shr           [dst]   [src]          = genLibCCall (fsLit "hs_uncheckedShiftRL64") [dst] [src]
    
    1744
    +genPrim MO_x64_Eq            [dst]   [src]          = genLibCCall (fsLit "hs_eq64") [dst] [src]
    
    1745
    +genPrim MO_x64_Ne            [dst]   [src]          = genLibCCall (fsLit "hs_ne64") [dst] [src]
    
    1746
    +genPrim MO_I64_Ge            [dst]   [src]          = genLibCCall (fsLit "hs_geInt64") [dst] [src]
    
    1747
    +genPrim MO_I64_Gt            [dst]   [src]          = genLibCCall (fsLit "hs_gtInt64") [dst] [src]
    
    1748
    +genPrim MO_I64_Le            [dst]   [src]          = genLibCCall (fsLit "hs_leInt64") [dst] [src]
    
    1749
    +genPrim MO_I64_Lt            [dst]   [src]          = genLibCCall (fsLit "hs_ltInt64") [dst] [src]
    
    1750
    +genPrim MO_W64_Ge            [dst]   [src]          = genLibCCall (fsLit "hs_geWord64") [dst] [src]
    
    1751
    +genPrim MO_W64_Gt            [dst]   [src]          = genLibCCall (fsLit "hs_gtWord64") [dst] [src]
    
    1752
    +genPrim MO_W64_Le            [dst]   [src]          = genLibCCall (fsLit "hs_leWord64") [dst] [src]
    
    1753
    +genPrim MO_W64_Lt            [dst]   [src]          = genLibCCall (fsLit "hs_ltWord64") [dst] [src]
    
    1754
    +genPrim op                   dst     args           = do
    
    1755
    +  platform <- ncgPlatform <$> getConfig
    
    1756
    +  pprPanic "genPrim: unknown primOp" (ppr (pprCallishMachOp op, dst, fmap (pdoc platform) args))
    
    1757
    +
    
    1758
    +
    
    1759
    +genFloatAbs :: Width -> LocalReg -> CmmExpr -> NatM InstrBlock
    
    1760
    +genFloatAbs w dst src = do
    
    1761
    +  platform <- getPlatform
    
    1762
    +  (reg_fx, _, code_fx) <- getFloatReg src
    
    1763
    +  let dst_reg = getRegisterReg platform (CmmLocal dst)
    
    1764
    +  return (code_fx `appOL` toOL
    
    1765
    +          [
    
    1766
    +            FABS (OpReg w dst_reg) (OpReg w reg_fx)
    
    1767
    +          ]
    
    1768
    +         )
    
    1769
    +
    
    1770
    +genFloatSqrt :: Format -> LocalReg -> CmmExpr -> NatM InstrBlock
    
    1771
    +genFloatSqrt f dst src = do
    
    1772
    +  platform <- getPlatform
    
    1773
    +  (reg_fx, _, code_fx) <- getFloatReg src
    
    1774
    +  let dst_reg = getRegisterReg platform (CmmLocal dst)
    
    1775
    +  return (code_fx `appOL` toOL
    
    1776
    +          [
    
    1777
    +            FSQRT (OpReg w dst_reg) (OpReg w reg_fx)
    
    1778
    +          ]
    
    1779
    +         )
    
    1780
    +  where
    
    1781
    +    w = case f of
    
    1782
    +      FF32 -> W32
    
    1783
    +      _ -> W64
    
    1784
    +
    
    1785
    +genClz :: Width -> LocalReg -> CmmExpr -> NatM InstrBlock
    
    1786
    +genClz w dst src = do
    
    1787
    +  platform <- getPlatform
    
    1788
    +  (reg_x, _, code_x) <- getSomeReg src
    
    1789
    +  let dst_reg = getRegisterReg platform (CmmLocal dst)
    
    1790
    +  if w `elem` [W32, W64] then do
    
    1791
    +    return (code_x `snocOL` CLZ (OpReg w dst_reg) (OpReg w reg_x))
    
    1792
    +  else if w `elem` [W8, W16] then do
    
    1793
    +    return (code_x `appOL` toOL
    
    1794
    +                 [
    
    1795
    +                  MOV (OpReg W64 dst_reg) (OpImm (ImmInt 1)),
    
    1796
    +                  SLL (OpReg W64 dst_reg) (OpReg W64 dst_reg) (OpImm (ImmInt (31-shift))),
    
    1797
    +                  SLL (OpReg W64 reg_x) (OpReg W64 reg_x) (OpImm (ImmInt (32-shift))),
    
    1798
    +                  OR (OpReg W64 dst_reg) (OpReg W64 dst_reg) (OpReg W64 reg_x),
    
    1799
    +                  CLZ (OpReg W64 dst_reg) (OpReg W32 dst_reg)
    
    1800
    +                 ]
    
    1801
    +           )
    
    1802
    +  else do
    
    1803
    +    pprPanic "genClz: invalid width: " (ppr w)
    
    1804
    +  where
    
    1805
    +    shift = widthToInt w
    
    1806
    +
    
    1807
    +genCtz :: Width -> LocalReg -> CmmExpr -> NatM InstrBlock
    
    1808
    +genCtz w dst src = do
    
    1809
    +  platform <- getPlatform
    
    1810
    +  (reg_x, _, code_x) <- getSomeReg src
    
    1811
    +  let dst_reg = getRegisterReg platform (CmmLocal dst)
    
    1812
    +  if w `elem` [W32, W64] then do
    
    1813
    +    return (code_x `snocOL` CTZ (OpReg w dst_reg) (OpReg w reg_x))
    
    1814
    +  else if w `elem` [W8, W16] then do
    
    1815
    +    return (code_x `appOL` toOL
    
    1816
    +                 [
    
    1817
    +                  MOV (OpReg W64 dst_reg) (OpImm (ImmInt 1)),
    
    1818
    +                  SLL (OpReg W64 dst_reg) (OpReg W64 dst_reg) (OpImm (ImmInt shift)),
    
    1819
    +                  BSTRPICK II64 (OpReg W64 reg_x) (OpReg W64 reg_x) (OpImm (ImmInt (shift-1))) (OpImm (ImmInt 0)),
    
    1820
    +                  OR  (OpReg W64 dst_reg) (OpReg W64 dst_reg) (OpReg W64 reg_x),
    
    1821
    +                  CTZ (OpReg W64 dst_reg) (OpReg W64 dst_reg)
    
    1822
    +                 ]
    
    1823
    +           )
    
    1824
    +  else do
    
    1825
    +    pprPanic "genCtz: invalid width: " (ppr w)
    
    1826
    +  where
    
    1827
    +    shift = (widthToInt w)
    
    1828
    +
    
    1829
    +genByteSwap :: Width -> LocalReg -> CmmExpr -> NatM InstrBlock
    
    1830
    +genByteSwap w dst src = do
    
    1831
    +  platform <- getPlatform
    
    1832
    +  (reg_x, _, code_x) <- getSomeReg src
    
    1833
    +  let dst_reg = getRegisterReg platform (CmmLocal dst)
    
    1834
    +  case w of
    
    1835
    +    W64 ->
    
    1836
    +      return (code_x `appOL` toOL
    
    1837
    +              [
    
    1838
    +                REVBD (OpReg w dst_reg) (OpReg w reg_x)
    
    1839
    +              ]
    
    1840
    +             )
    
    1841
    +    W32 ->
    
    1842
    +      return (code_x `appOL` toOL
    
    1843
    +              [
    
    1844
    +                REVB2W (OpReg w dst_reg) (OpReg w reg_x)
    
    1845
    +              ]
    
    1846
    +             )
    
    1847
    +    W16 ->
    
    1848
    +      return (code_x `appOL` toOL
    
    1849
    +              [
    
    1850
    +                REVB2H (OpReg w dst_reg) (OpReg w reg_x)
    
    1851
    +              ]
    
    1852
    +             )
    
    1853
    +    _ -> pprPanic "genBSwap: invalid width: " (ppr w)
    
    1854
    +
    
    1855
    +genBitRev :: Width -> LocalReg -> CmmExpr -> NatM InstrBlock
    
    1856
    +genBitRev w dst src = do
    
    1857
    +  platform <- getPlatform
    
    1858
    +  (reg_x, _, code_x) <- getSomeReg src
    
    1859
    +  let dst_reg = getRegisterReg platform (CmmLocal dst)
    
    1860
    +  case w of
    
    1861
    +    W8 ->
    
    1862
    +      return (code_x `appOL` toOL
    
    1863
    +              [
    
    1864
    +                BITREV4B (OpReg W32 reg_x) (OpReg W32 reg_x),
    
    1865
    +                AND (OpReg W64 dst_reg) (OpReg W64 reg_x) (OpImm (ImmInt 255))
    
    1866
    +              ]
    
    1867
    +             )
    
    1868
    +    W16 ->
    
    1869
    +      return (code_x `appOL` toOL
    
    1870
    +              [
    
    1871
    +                BITREV (OpReg W64 reg_x) (OpReg W64 reg_x),
    
    1872
    +                SRL (OpReg W64 dst_reg) (OpReg W64 reg_x) (OpImm (ImmInt 48))
    
    1873
    +              ]
    
    1874
    +             )
    
    1875
    +    _ -> return ( code_x `snocOL` BITREV (OpReg w dst_reg) (OpReg w reg_x))
    
    1876
    +
    
    1877
    +-- Generate C call to the given function in libc
    
    1878
    +genLibCCall :: FastString -> [CmmFormal] -> [CmmActual] -> NatM InstrBlock
    
    1879
    +genLibCCall name dsts args = do
    
    1880
    +  config <- getConfig
    
    1881
    +  target <-
    
    1882
    +    cmmMakeDynamicReference config CallReference
    
    1883
    +      $ mkForeignLabel name ForeignLabelInThisPackage IsFunction
    
    1884
    +  let cconv = ForeignConvention CCallConv [NoHint] [NoHint] CmmMayReturn
    
    1885
    +  genCCall target cconv dsts args
    
    1886
    +
    
    1887
    +unsupported :: Show a => a -> b
    
    1888
    +unsupported mop = panic ("outOfLineCmmOp: " ++ show mop
    
    1889
    +                      ++ " not supported here")
    
    1890
    +
    
    1891
    +-- AMSWAP_DB* insns implentment a fully functional synchronization barrier, like DBAR 0x0.
    
    1892
    +-- This is terrible. And AMSWAPDB only supports ISA version greater than LA64V1_0. So,
    
    1893
    +-- implement with DBAR
    
    1894
    +genAtomicRead :: Width -> MemoryOrdering -> LocalReg -> CmmExpr -> NatM InstrBlock
    
    1895
    +genAtomicRead w mo dst arg = do
    
    1896
    +  (addr_p, _, code_p) <- getSomeReg arg
    
    1897
    +  platform <- getPlatform
    
    1898
    +  let d = getRegisterReg platform (CmmLocal dst)
    
    1899
    +  case mo of
    
    1900
    +    MemOrderRelaxed ->
    
    1901
    +      return (code_p `appOL` toOL
    
    1902
    +              [
    
    1903
    +                LD (intFormat w) (OpReg w d) (OpAddr $ AddrReg addr_p)
    
    1904
    +              ]
    
    1905
    +             )
    
    1906
    +
    
    1907
    +    MemOrderAcquire ->
    
    1908
    +      return (code_p `appOL` toOL
    
    1909
    +              [
    
    1910
    +                LD (intFormat w) (OpReg w d) (OpAddr $ AddrReg addr_p),
    
    1911
    +                DBAR HintAcquire
    
    1912
    +              ]
    
    1913
    +             )
    
    1914
    +    MemOrderSeqCst ->
    
    1915
    +      return (code_p `appOL` toOL
    
    1916
    +              [
    
    1917
    +                LD (intFormat w) (OpReg w d) (OpAddr $ AddrReg addr_p),
    
    1918
    +                DBAR HintSeqcst
    
    1919
    +              ]
    
    1920
    +             )
    
    1921
    +    _ -> panic $ "Unexpected MemOrderRelease on an AtomicRead: " ++ show mo
    
    1922
    +
    
    1923
    +genAtomicWrite :: Width -> MemoryOrdering -> CmmExpr -> CmmExpr -> NatM InstrBlock
    
    1924
    +genAtomicWrite w mo addr val = do
    
    1925
    +  (addr_p, _, code_p) <- getSomeReg addr
    
    1926
    +  (val_reg, fmt_val, code_val) <- getSomeReg val
    
    1927
    +  case mo of
    
    1928
    +    MemOrderRelaxed ->
    
    1929
    +      return (code_p `appOL`code_val `appOL` toOL
    
    1930
    +              [
    
    1931
    +                ST fmt_val (OpReg w val_reg) (OpAddr $ AddrReg addr_p)
    
    1932
    +              ]
    
    1933
    +             )
    
    1934
    +    MemOrderRelease ->
    
    1935
    +      return (code_p `appOL`code_val `appOL` toOL
    
    1936
    +              [
    
    1937
    +                DBAR HintRelease,
    
    1938
    +                ST fmt_val (OpReg w val_reg) (OpAddr $ AddrReg addr_p)
    
    1939
    +              ]
    
    1940
    +             )
    
    1941
    +    MemOrderSeqCst ->
    
    1942
    +      return (code_p `appOL`code_val `appOL` toOL
    
    1943
    +              [
    
    1944
    +                DBAR HintSeqcst,
    
    1945
    +                ST fmt_val (OpReg w val_reg) (OpAddr $ AddrReg addr_p)
    
    1946
    +              ]
    
    1947
    +             )
    
    1948
    +    _ ->  panic $ "Unexpected MemOrderAcquire on an AtomicWrite" ++ show mo
    
    1949
    +
    
    1634 1950
     -- -----------------------------------------------------------------------------
    
    1635 1951
     {-
    
    1636 1952
     Generating C calls
    
    ... ... @@ -1664,393 +1980,68 @@ wider than FRLEN may be passed in a GAR.
    1664 1980
     -}
    
    1665 1981
     
    
    1666 1982
     genCCall
    
    1667
    -    :: ForeignTarget      -- function to call
    
    1668
    -    -> [CmmFormal]        -- where to put the result
    
    1669
    -    -> [CmmActual]        -- arguments (of mixed type)
    
    1670
    -    -> NatM InstrBlock
    
    1671
    -
    
    1672
    --- TODO: Specialize where we can.
    
    1673
    --- Generic impl
    
    1674
    -genCCall target dest_regs arg_regs = do
    
    1675
    -  case target of
    
    1676
    -    -- The target :: ForeignTarget call can either
    
    1677
    -    -- be a foreign procedure with an address expr
    
    1678
    -    -- and a calling convention.
    
    1679
    -    ForeignTarget expr _cconv -> do
    
    1680
    -      (call_target, call_target_code) <- case expr of
    
    1681
    -        -- if this is a label, let's just directly to it.
    
    1682
    -        (CmmLit (CmmLabel lbl)) -> pure (TLabel lbl, nilOL)
    
    1683
    -        -- if it's not a label, let's compute the expression into a
    
    1684
    -        -- register and jump to that.
    
    1685
    -        _ -> do
    
    1686
    -          (reg, _format, reg_code) <- getSomeReg expr
    
    1687
    -          pure (TReg reg, reg_code)
    
    1688
    -      -- compute the code and register logic for all arg_regs.
    
    1689
    -      -- this will give us the format information to match on.
    
    1690
    -      arg_regs' <- mapM getSomeReg arg_regs
    
    1691
    -
    
    1692
    -      -- Now this is stupid.  Our Cmm expressions doesn't carry the proper sizes
    
    1693
    -      -- so while in Cmm we might get W64 incorrectly for an int, that is W32 in
    
    1694
    -      -- STG; this thenn breaks packing of stack arguments, if we need to pack
    
    1695
    -      -- for the pcs, e.g. darwinpcs.  Option one would be to fix the Int type
    
    1696
    -      -- in Cmm proper. Option two, which we choose here is to use extended Hint
    
    1697
    -      -- information to contain the size information and use that when packing
    
    1698
    -      -- arguments, spilled onto the stack.
    
    1699
    -      let (_res_hints, arg_hints) = foreignTargetHints target
    
    1700
    -          arg_regs'' = zipWith (\(r, f, c) h -> (r,f,h,c)) arg_regs' arg_hints
    
    1701
    -
    
    1702
    -      (stackSpaceWords, passRegs, passArgumentsCode) <- passArguments allGpArgRegs allFpArgRegs arg_regs'' 0 [] nilOL
    
    1703
    -
    
    1704
    -      readResultsCode <- readResults allGpArgRegs allFpArgRegs dest_regs [] nilOL
    
    1705
    -
    
    1706
    -      let moveStackDown 0 = toOL [ PUSH_STACK_FRAME
    
    1707
    -                                 , DELTA (-16)
    
    1708
    -                                 ]
    
    1709
    -          moveStackDown i | odd i = moveStackDown (i + 1)
    
    1710
    -          moveStackDown i = toOL [ PUSH_STACK_FRAME
    
    1711
    -                                 , SUB (OpReg W64 (spMachReg)) (OpReg W64 (spMachReg)) (OpImm (ImmInt (8 * i)))
    
    1712
    -                                 , DELTA (-8 * i - 16)
    
    1713
    -                                 ]
    
    1714
    -          moveStackUp 0 = toOL [ POP_STACK_FRAME
    
    1715
    -                               , DELTA 0
    
    1716
    -                               ]
    
    1717
    -          moveStackUp i | odd i = moveStackUp (i + 1)
    
    1718
    -          moveStackUp i = toOL [ ADD (OpReg W64 (spMachReg)) (OpReg W64 (spMachReg)) (OpImm (ImmInt (8 * i)))
    
    1719
    -                               , POP_STACK_FRAME
    
    1720
    -                               , DELTA 0
    
    1721
    -                               ]
    
    1722
    -
    
    1723
    -      let code =
    
    1724
    -            call_target_code -- compute the label (possibly into a register)
    
    1725
    -              `appOL` moveStackDown (stackSpaceWords)
    
    1726
    -              `appOL` passArgumentsCode -- put the arguments into x0, ...
    
    1727
    -              `snocOL` CALL call_target passRegs -- branch and link (C calls aren't tail calls, but return)
    
    1728
    -              `appOL` readResultsCode -- parse the results into registers
    
    1729
    -              `appOL` moveStackUp (stackSpaceWords)
    
    1730
    -      return code
    
    1731
    -
    
    1732
    -    PrimTarget MO_F32_Fabs
    
    1733
    -      | [arg_reg] <- arg_regs, [dest_reg] <- dest_regs ->
    
    1734
    -        unaryFloatOp W32 (\d x -> unitOL $ FABS d x) arg_reg dest_reg
    
    1735
    -      | otherwise -> panic "mal-formed MO_F32_Fabs"
    
    1736
    -    PrimTarget MO_F64_Fabs
    
    1737
    -      | [arg_reg] <- arg_regs, [dest_reg] <- dest_regs ->
    
    1738
    -        unaryFloatOp W64 (\d x -> unitOL $ FABS d x) arg_reg dest_reg
    
    1739
    -      | otherwise -> panic "mal-formed MO_F64_Fabs"
    
    1740
    -
    
    1741
    -    PrimTarget MO_F32_Sqrt
    
    1742
    -      | [arg_reg] <- arg_regs, [dest_reg] <- dest_regs ->
    
    1743
    -        unaryFloatOp W32 (\d x -> unitOL $ FSQRT d x) arg_reg dest_reg
    
    1744
    -      | otherwise -> panic "mal-formed MO_F32_Sqrt"
    
    1745
    -    PrimTarget MO_F64_Sqrt
    
    1746
    -      | [arg_reg] <- arg_regs, [dest_reg] <- dest_regs ->
    
    1747
    -        unaryFloatOp W64 (\d x -> unitOL $ FSQRT d x) arg_reg dest_reg
    
    1748
    -      | otherwise -> panic "mal-formed MO_F64_Sqrt"
    
    1749
    -
    
    1750
    -    PrimTarget (MO_Clz w)
    
    1751
    -      | w `elem` [W32, W64],
    
    1752
    -      [arg_reg] <- arg_regs,
    
    1753
    -      [dest_reg] <- dest_regs -> do
    
    1754
    -      platform <- getPlatform
    
    1755
    -      (reg_x, _format_x, code_x) <- getSomeReg arg_reg
    
    1756
    -      let dst_reg = getRegisterReg platform (CmmLocal dest_reg)
    
    1757
    -      return ( code_x `snocOL`
    
    1758
    -               CLZ (OpReg w dst_reg) (OpReg w reg_x)
    
    1759
    -             )
    
    1760
    -      | w `elem` [W8, W16],
    
    1761
    -      [arg_reg] <- arg_regs,
    
    1762
    -      [dest_reg] <- dest_regs -> do
    
    1763
    -        platform <- getPlatform
    
    1764
    -        (reg_x, _format_x, code_x) <- getSomeReg arg_reg
    
    1765
    -        let dst_reg = getRegisterReg platform (CmmLocal dest_reg)
    
    1766
    -        return ( code_x `appOL` toOL
    
    1767
    -                 [
    
    1768
    -                  MOV (OpReg W64 dst_reg) (OpImm (ImmInt 1)),
    
    1769
    -                  SLL (OpReg W64 dst_reg) (OpReg W64 dst_reg) (OpImm (ImmInt (31-shift))),
    
    1770
    -                  SLL (OpReg W64 reg_x) (OpReg W64 reg_x) (OpImm (ImmInt (32-shift))),
    
    1771
    -                  OR (OpReg W64 dst_reg) (OpReg W64 dst_reg) (OpReg W64 reg_x),
    
    1772
    -                  CLZ (OpReg W64 dst_reg) (OpReg W32 dst_reg)
    
    1773
    -                 ]
    
    1774
    -               )
    
    1775
    -      | otherwise -> unsupported (MO_Clz w)
    
    1776
    -      where
    
    1777
    -        shift = widthToInt w
    
    1778
    -
    
    1779
    -    PrimTarget (MO_Ctz w)
    
    1780
    -      | w `elem` [W32, W64],
    
    1781
    -      [arg_reg] <- arg_regs,
    
    1782
    -      [dest_reg] <- dest_regs -> do
    
    1783
    -      platform <- getPlatform
    
    1784
    -      (reg_x, _format_x, code_x) <- getSomeReg arg_reg
    
    1785
    -      let dst_reg = getRegisterReg platform (CmmLocal dest_reg)
    
    1786
    -      return ( code_x `snocOL`
    
    1787
    -               CTZ (OpReg w dst_reg) (OpReg w reg_x)
    
    1788
    -             )
    
    1789
    -      | w `elem` [W8, W16],
    
    1790
    -      [arg_reg] <- arg_regs,
    
    1791
    -      [dest_reg] <- dest_regs -> do
    
    1792
    -      platform <- getPlatform
    
    1793
    -      (reg_x, _format_x, code_x) <- getSomeReg arg_reg
    
    1794
    -      let dst_reg = getRegisterReg platform (CmmLocal dest_reg)
    
    1795
    -      return ( code_x `appOL` toOL
    
    1796
    -               [
    
    1797
    -                MOV (OpReg W64 dst_reg) (OpImm (ImmInt 1)),
    
    1798
    -                SLL (OpReg W64 dst_reg) (OpReg W64 dst_reg) (OpImm (ImmInt shift)),
    
    1799
    -                BSTRPICK II64 (OpReg W64 reg_x) (OpReg W64 reg_x) (OpImm (ImmInt (shift-1))) (OpImm (ImmInt 0)),
    
    1800
    -                OR  (OpReg W64 dst_reg) (OpReg W64 dst_reg) (OpReg W64 reg_x),
    
    1801
    -                CTZ (OpReg W64 dst_reg) (OpReg W64 dst_reg)
    
    1802
    -               ]
    
    1803
    -             )
    
    1804
    -      | otherwise -> unsupported (MO_Ctz w)
    
    1805
    -      where
    
    1806
    -        shift = (widthToInt w)
    
    1983
    +  :: CmmExpr            -- address of func call
    
    1984
    +  -> ForeignConvention  -- calling convention
    
    1985
    +  -> [CmmFormal]        -- results
    
    1986
    +  -> [CmmActual]        -- arguments
    
    1987
    +  -> NatM InstrBlock
    
    1988
    +
    
    1989
    +
    
    1990
    +genCCall expr _conv@(ForeignConvention _ argHints _resHints _) dest_regs arg_regs = do
    
    1991
    +  (call_target, call_target_code) <- case expr of
    
    1992
    +    -- if this is a label, let's just directly to it.
    
    1993
    +    (CmmLit (CmmLabel lbl)) -> pure (TLabel lbl, nilOL)
    
    1994
    +    -- if it's not a label, let's compute the expression into a
    
    1995
    +    -- register and jump to that.
    
    1996
    +    _ -> do
    
    1997
    +      (reg, _format, reg_code) <- getSomeReg expr
    
    1998
    +      pure (TReg reg, reg_code)
    
    1999
    +  -- compute the code and register logic for all arg_regs.
    
    2000
    +  -- this will give us the format information to match on.
    
    2001
    +  arg_regs' <- mapM getSomeReg arg_regs
    
    2002
    +
    
    2003
    +  -- Now this is stupid.  Our Cmm expressions doesn't carry the proper sizes
    
    2004
    +  -- so while in Cmm we might get W64 incorrectly for an int, that is W32 in
    
    2005
    +  -- STG; this thenn breaks packing of stack arguments, if we need to pack
    
    2006
    +  -- for the pcs, e.g. darwinpcs.  Option one would be to fix the Int type
    
    2007
    +  -- in Cmm proper. Option two, which we choose here is to use extended Hint
    
    2008
    +  -- information to contain the size information and use that when packing
    
    2009
    +  -- arguments, spilled onto the stack.
    
    2010
    +  let
    
    2011
    +      arg_hints = take (length arg_regs) (argHints ++ repeat NoHint)
    
    2012
    +      arg_regs'' = zipWith (\(r, f, c) h -> (r,f,h,c)) arg_regs' arg_hints
    
    2013
    +
    
    2014
    +  (stackSpaceWords, passRegs, passArgumentsCode) <- passArguments allGpArgRegs allFpArgRegs arg_regs'' 0 [] nilOL
    
    2015
    +
    
    2016
    +  readResultsCode <- readResults allGpArgRegs allFpArgRegs dest_regs [] nilOL
    
    2017
    +
    
    2018
    +  let moveStackDown 0 = toOL [ PUSH_STACK_FRAME
    
    2019
    +                             , DELTA (-16)
    
    2020
    +                             ]
    
    2021
    +      moveStackDown i | odd i = moveStackDown (i + 1)
    
    2022
    +      moveStackDown i = toOL [ PUSH_STACK_FRAME
    
    2023
    +                             , SUB (OpReg W64 (spMachReg)) (OpReg W64 (spMachReg)) (OpImm (ImmInt (8 * i)))
    
    2024
    +                             , DELTA (-8 * i - 16)
    
    2025
    +                             ]
    
    2026
    +      moveStackUp 0 = toOL [ POP_STACK_FRAME
    
    2027
    +                           , DELTA 0
    
    2028
    +                           ]
    
    2029
    +      moveStackUp i | odd i = moveStackUp (i + 1)
    
    2030
    +      moveStackUp i = toOL [ ADD (OpReg W64 (spMachReg)) (OpReg W64 (spMachReg)) (OpImm (ImmInt (8 * i)))
    
    2031
    +                           , POP_STACK_FRAME
    
    2032
    +                           , DELTA 0
    
    2033
    +                           ]
    
    1807 2034
     
    
    1808
    -    PrimTarget (MO_BSwap w)
    
    1809
    -      | w `elem` [W16, W32, W64],
    
    1810
    -      [arg_reg] <- arg_regs,
    
    1811
    -      [dest_reg] <- dest_regs -> do
    
    1812
    -      platform <- getPlatform
    
    1813
    -      (reg_x, _, code_x) <- getSomeReg arg_reg
    
    1814
    -      let dst_reg = getRegisterReg platform (CmmLocal dest_reg)
    
    1815
    -      case w of
    
    1816
    -        W64 -> return ( code_x `appOL` toOL
    
    1817
    -                      [
    
    1818
    -                        REVBD (OpReg w dst_reg) (OpReg w reg_x)
    
    1819
    -                      ])
    
    1820
    -        W32 -> return ( code_x `appOL` toOL
    
    1821
    -                      [
    
    1822
    -                        REVB2W (OpReg w dst_reg) (OpReg w reg_x)
    
    1823
    -                      ])
    
    1824
    -        _ -> return ( code_x `appOL` toOL
    
    1825
    -                      [
    
    1826
    -                        REVB2H (OpReg w dst_reg) (OpReg w reg_x)
    
    1827
    -                      ])
    
    1828
    -      | otherwise -> unsupported (MO_BSwap w)
    
    1829
    -
    
    1830
    -    PrimTarget (MO_BRev w)
    
    1831
    -      | w `elem` [W8, W16, W32, W64],
    
    1832
    -      [arg_reg] <- arg_regs,
    
    1833
    -      [dest_reg] <- dest_regs -> do
    
    1834
    -      platform <- getPlatform
    
    1835
    -      (reg_x, _, code_x) <- getSomeReg arg_reg
    
    1836
    -      let dst_reg = getRegisterReg platform (CmmLocal dest_reg)
    
    1837
    -      case w of
    
    1838
    -        W8 -> return ( code_x `appOL` toOL
    
    1839
    -                     [
    
    1840
    -                       BITREV4B (OpReg W32 reg_x) (OpReg W32 reg_x),
    
    1841
    -                       AND (OpReg W64 dst_reg) (OpReg W64 reg_x) (OpImm (ImmInt 255))
    
    1842
    -                     ])
    
    1843
    -        W16 -> return ( code_x `appOL` toOL
    
    1844
    -                      [
    
    1845
    -                        BITREV (OpReg W64 reg_x) (OpReg W64 reg_x),
    
    1846
    -                        SRL (OpReg W64 dst_reg) (OpReg W64 reg_x) (OpImm (ImmInt 48))
    
    1847
    -                      ])
    
    1848
    -        _ -> return ( code_x `snocOL` BITREV (OpReg w dst_reg) (OpReg w reg_x))
    
    1849
    -      | otherwise -> unsupported (MO_BRev w)
    
    1850
    -
    
    1851
    -    -- mop :: CallishMachOp (see GHC.Cmm.MachOp)
    
    1852
    -    PrimTarget mop -> do
    
    1853
    -      -- We'll need config to construct forien targets
    
    1854
    -      case mop of
    
    1855
    -        -- 64 bit float ops
    
    1856
    -        MO_F64_Pwr   -> mkCCall "pow"
    
    1857
    -
    
    1858
    -        MO_F64_Sin   -> mkCCall "sin"
    
    1859
    -        MO_F64_Cos   -> mkCCall "cos"
    
    1860
    -        MO_F64_Tan   -> mkCCall "tan"
    
    1861
    -
    
    1862
    -        MO_F64_Sinh  -> mkCCall "sinh"
    
    1863
    -        MO_F64_Cosh  -> mkCCall "cosh"
    
    1864
    -        MO_F64_Tanh  -> mkCCall "tanh"
    
    1865
    -
    
    1866
    -        MO_F64_Asin  -> mkCCall "asin"
    
    1867
    -        MO_F64_Acos  -> mkCCall "acos"
    
    1868
    -        MO_F64_Atan  -> mkCCall "atan"
    
    1869
    -
    
    1870
    -        MO_F64_Asinh -> mkCCall "asinh"
    
    1871
    -        MO_F64_Acosh -> mkCCall "acosh"
    
    1872
    -        MO_F64_Atanh -> mkCCall "atanh"
    
    1873
    -
    
    1874
    -        MO_F64_Log   -> mkCCall "log"
    
    1875
    -        MO_F64_Log1P -> mkCCall "log1p"
    
    1876
    -        MO_F64_Exp   -> mkCCall "exp"
    
    1877
    -        MO_F64_ExpM1 -> mkCCall "expm1"
    
    1878
    -
    
    1879
    -        -- 32 bit float ops
    
    1880
    -        MO_F32_Pwr   -> mkCCall "powf"
    
    1881
    -
    
    1882
    -        MO_F32_Sin   -> mkCCall "sinf"
    
    1883
    -        MO_F32_Cos   -> mkCCall "cosf"
    
    1884
    -        MO_F32_Tan   -> mkCCall "tanf"
    
    1885
    -        MO_F32_Sinh  -> mkCCall "sinhf"
    
    1886
    -        MO_F32_Cosh  -> mkCCall "coshf"
    
    1887
    -        MO_F32_Tanh  -> mkCCall "tanhf"
    
    1888
    -        MO_F32_Asin  -> mkCCall "asinf"
    
    1889
    -        MO_F32_Acos  -> mkCCall "acosf"
    
    1890
    -        MO_F32_Atan  -> mkCCall "atanf"
    
    1891
    -        MO_F32_Asinh -> mkCCall "asinhf"
    
    1892
    -        MO_F32_Acosh -> mkCCall "acoshf"
    
    1893
    -        MO_F32_Atanh -> mkCCall "atanhf"
    
    1894
    -        MO_F32_Log   -> mkCCall "logf"
    
    1895
    -        MO_F32_Log1P -> mkCCall "log1pf"
    
    1896
    -        MO_F32_Exp   -> mkCCall "expf"
    
    1897
    -        MO_F32_ExpM1 -> mkCCall "expm1f"
    
    1898
    -
    
    1899
    -        -- 64-bit primops
    
    1900
    -        MO_I64_ToI   -> mkCCall "hs_int64ToInt"
    
    1901
    -        MO_I64_FromI -> mkCCall "hs_intToInt64"
    
    1902
    -        MO_W64_ToW   -> mkCCall "hs_word64ToWord"
    
    1903
    -        MO_W64_FromW -> mkCCall "hs_wordToWord64"
    
    1904
    -        MO_x64_Neg   -> mkCCall "hs_neg64"
    
    1905
    -        MO_x64_Add   -> mkCCall "hs_add64"
    
    1906
    -        MO_x64_Sub   -> mkCCall "hs_sub64"
    
    1907
    -        MO_x64_Mul   -> mkCCall "hs_mul64"
    
    1908
    -        MO_I64_Quot  -> mkCCall "hs_quotInt64"
    
    1909
    -        MO_I64_Rem   -> mkCCall "hs_remInt64"
    
    1910
    -        MO_W64_Quot  -> mkCCall "hs_quotWord64"
    
    1911
    -        MO_W64_Rem   -> mkCCall "hs_remWord64"
    
    1912
    -        MO_x64_And   -> mkCCall "hs_and64"
    
    1913
    -        MO_x64_Or    -> mkCCall "hs_or64"
    
    1914
    -        MO_x64_Xor   -> mkCCall "hs_xor64"
    
    1915
    -        MO_x64_Not   -> mkCCall "hs_not64"
    
    1916
    -        MO_x64_Shl   -> mkCCall "hs_uncheckedShiftL64"
    
    1917
    -        MO_I64_Shr   -> mkCCall "hs_uncheckedIShiftRA64"
    
    1918
    -        MO_W64_Shr   -> mkCCall "hs_uncheckedShiftRL64"
    
    1919
    -        MO_x64_Eq    -> mkCCall "hs_eq64"
    
    1920
    -        MO_x64_Ne    -> mkCCall "hs_ne64"
    
    1921
    -        MO_I64_Ge    -> mkCCall "hs_geInt64"
    
    1922
    -        MO_I64_Gt    -> mkCCall "hs_gtInt64"
    
    1923
    -        MO_I64_Le    -> mkCCall "hs_leInt64"
    
    1924
    -        MO_I64_Lt    -> mkCCall "hs_ltInt64"
    
    1925
    -        MO_W64_Ge    -> mkCCall "hs_geWord64"
    
    1926
    -        MO_W64_Gt    -> mkCCall "hs_gtWord64"
    
    1927
    -        MO_W64_Le    -> mkCCall "hs_leWord64"
    
    1928
    -        MO_W64_Lt    -> mkCCall "hs_ltWord64"
    
    1929
    -
    
    1930
    -        -- Conversion
    
    1931
    -        MO_UF_Conv w        -> mkCCall (word2FloatLabel w)
    
    1932
    -
    
    1933
    -        -- Optional MachOps
    
    1934
    -        -- These are enabled/disabled by backend flags: GHC.StgToCmm.Config
    
    1935
    -        MO_S_Mul2     _w -> unsupported mop
    
    1936
    -        MO_S_QuotRem  _w -> unsupported mop
    
    1937
    -        MO_U_QuotRem  _w -> unsupported mop
    
    1938
    -        MO_U_QuotRem2 _w -> unsupported mop
    
    1939
    -        MO_Add2       _w -> unsupported mop
    
    1940
    -        MO_AddWordC   _w -> unsupported mop
    
    1941
    -        MO_SubWordC   _w -> unsupported mop
    
    1942
    -        MO_AddIntC    _w -> unsupported mop
    
    1943
    -        MO_SubIntC    _w -> unsupported mop
    
    1944
    -        MO_U_Mul2     _w -> unsupported mop
    
    1945
    -
    
    1946
    -        MO_VS_Quot {} -> unsupported mop
    
    1947
    -        MO_VS_Rem {}  -> unsupported mop
    
    1948
    -        MO_VU_Quot {} -> unsupported mop
    
    1949
    -        MO_VU_Rem {}  -> unsupported mop
    
    1950
    -        MO_I64X2_Min -> unsupported mop
    
    1951
    -        MO_I64X2_Max -> unsupported mop
    
    1952
    -        MO_W64X2_Min -> unsupported mop
    
    1953
    -        MO_W64X2_Max -> unsupported mop
    
    1954
    -
    
    1955
    -        -- Memory Ordering
    
    1956
    -        -- Support finer-grained DBAR hints for LA664 and newer uarchs.
    
    1957
    -        -- These are treated as DBAR 0 on older uarchs, so we can start
    
    1958
    -        -- to unconditionally emit the new hints right away.
    
    1959
    -        MO_AcquireFence -> pure (unitOL (DBAR HintAcquire))
    
    1960
    -        MO_ReleaseFence -> pure (unitOL (DBAR HintRelease))
    
    1961
    -        MO_SeqCstFence  -> pure (unitOL (DBAR HintSeqcst))
    
    1962
    -
    
    1963
    -        MO_Touch        -> pure nilOL -- Keep variables live (when using interior pointers)
    
    1964
    -        -- Prefetch
    
    1965
    -        MO_Prefetch_Data _n -> pure nilOL -- Prefetch hint.
    
    1966
    -
    
    1967
    -        -- Memory copy/set/move/cmp, with alignment for optimization
    
    1968
    -
    
    1969
    -        -- TODO Optimize and use e.g. quad registers to move memory around instead
    
    1970
    -        -- of offloading this to memcpy. For small memcpys we can utilize
    
    1971
    -        -- the 128bit quad registers in NEON to move block of bytes around.
    
    1972
    -        -- Might also make sense of small memsets? Use xzr? What's the function
    
    1973
    -        -- call overhead?
    
    1974
    -        MO_Memcpy  _align   -> mkCCall "memcpy"
    
    1975
    -        MO_Memset  _align   -> mkCCall "memset"
    
    1976
    -        MO_Memmove _align   -> mkCCall "memmove"
    
    1977
    -        MO_Memcmp  _align   -> mkCCall "memcmp"
    
    1978
    -
    
    1979
    -        MO_SuspendThread    -> mkCCall "suspendThread"
    
    1980
    -        MO_ResumeThread     -> mkCCall "resumeThread"
    
    1981
    -
    
    1982
    -        MO_PopCnt w         -> mkCCall (popCntLabel w)
    
    1983
    -        MO_Pdep w           -> mkCCall (pdepLabel w)
    
    1984
    -        MO_Pext w           -> mkCCall (pextLabel w)
    
    1985
    -
    
    1986
    -    -- or a possibly side-effecting machine operation
    
    1987
    -        mo@(MO_AtomicRead w ord)
    
    1988
    -          | [p_reg] <- arg_regs
    
    1989
    -          , [dst_reg] <- dest_regs -> do
    
    1990
    -              (p, _fmt_p, code_p) <- getSomeReg p_reg
    
    1991
    -              platform <- getPlatform
    
    1992
    -              let instrs = case ord of
    
    1993
    -                      MemOrderRelaxed -> unitOL $ ann moDescr (LD (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p))
    
    1994
    -
    
    1995
    -                      MemOrderAcquire -> toOL [
    
    1996
    -                                                ann moDescr (LD (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p)),
    
    1997
    -                                                DBAR HintAcquire
    
    1998
    -                                              ]
    
    1999
    -                      MemOrderSeqCst -> toOL  [
    
    2000
    -                                                ann moDescr (LD (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p)),
    
    2001
    -                                                DBAR HintSeqcst
    
    2002
    -                                              ]
    
    2003
    -                      _ -> panic $ "Unexpected MemOrderRelease on an AtomicRead: " ++ show mo
    
    2004
    -                  dst = getRegisterReg platform (CmmLocal dst_reg)
    
    2005
    -                  moDescr = (text . show) mo
    
    2006
    -                  code = code_p `appOL` instrs
    
    2007
    -              pure code
    
    2008
    -          | otherwise -> panic "mal-formed AtomicRead"
    
    2009
    -
    
    2010
    -        mo@(MO_AtomicWrite w ord)
    
    2011
    -          | [p_reg, val_reg] <- arg_regs -> do
    
    2012
    -              (p, _fmt_p, code_p) <- getSomeReg p_reg
    
    2013
    -              (val, fmt_val, code_val) <- getSomeReg val_reg
    
    2014
    -              let instrs = case ord of
    
    2015
    -                      MemOrderRelaxed -> unitOL $ ann moDescr (ST fmt_val (OpReg w val) (OpAddr $ AddrReg p))
    
    2016
    -                      -- AMSWAP_DB* insns implentment a fully functional synchronization barrier, like DBAR 0x0.
    
    2017
    -                      -- This is terrible. And AMSWAPDB only supports ISA version greater than LA64V1_0. So,
    
    2018
    -                      -- implement with DBAR
    
    2019
    -                      MemOrderRelease -> toOL [
    
    2020
    -                                                ann moDescr (DBAR HintRelease),
    
    2021
    -                                                ST fmt_val (OpReg w val) (OpAddr $ AddrReg p)
    
    2022
    -                                              ]
    
    2023
    -                      MemOrderSeqCst  -> toOL [
    
    2024
    -                                                ann moDescr (DBAR HintSeqcst),
    
    2025
    -                                                ST fmt_val (OpReg w val) (OpAddr $ AddrReg p)
    
    2026
    -                                              ]
    
    2027
    -                      _ ->  panic $ "Unexpected MemOrderAcquire on an AtomicWrite" ++ show mo
    
    2028
    -                  moDescr = (text . show) mo
    
    2029
    -                  code =
    
    2030
    -                    code_p `appOL`
    
    2031
    -                    code_val `appOL`
    
    2032
    -                    instrs
    
    2033
    -              pure code
    
    2034
    -          | otherwise -> panic "mal-formed AtomicWrite"
    
    2035
    -
    
    2036
    -        MO_AtomicRMW w amop -> mkCCall (atomicRMWLabel w amop)
    
    2037
    -        MO_Cmpxchg w        -> mkCCall (cmpxchgLabel w)
    
    2038
    -        MO_Xchg w           -> mkCCall (xchgLabel w)
    
    2035
    +  let code =
    
    2036
    +        call_target_code -- compute the label (possibly into a register)
    
    2037
    +          `appOL` moveStackDown (stackSpaceWords)
    
    2038
    +          `appOL` passArgumentsCode -- put the arguments into x0, ...
    
    2039
    +          `snocOL` CALL call_target passRegs -- branch and link (C calls aren't tail calls, but return)
    
    2040
    +          `appOL` readResultsCode -- parse the results into registers
    
    2041
    +          `appOL` moveStackUp (stackSpaceWords)
    
    2042
    +  return code
    
    2039 2043
     
    
    2040 2044
       where
    
    2041
    -    unsupported :: Show a => a -> b
    
    2042
    -    unsupported mop = panic ("outOfLineCmmOp: " ++ show mop
    
    2043
    -                          ++ " not supported here")
    
    2044
    -
    
    2045
    -    mkCCall :: FastString -> NatM InstrBlock
    
    2046
    -    mkCCall name = do
    
    2047
    -      config <- getConfig
    
    2048
    -      target <-
    
    2049
    -        cmmMakeDynamicReference config CallReference
    
    2050
    -          $ mkForeignLabel name ForeignLabelInThisPackage IsFunction
    
    2051
    -      let cconv = ForeignConvention CCallConv [NoHint] [NoHint] CmmMayReturn
    
    2052
    -      genCCall (ForeignTarget target cconv) dest_regs arg_regs
    
    2053
    -
    
    2054 2045
         -- Implementiation of the LoongArch ABI calling convention.
    
    2055 2046
         -- https://github.com/loongson/la-abi-specs/blob/release/lapcs.adoc#passing-arguments
    
    2056 2047
         passArguments :: [Reg] -> [Reg] -> [(Reg, Format, ForeignHint, InstrBlock)] -> Int -> [Reg] -> InstrBlock -> NatM (Int, [Reg], InstrBlock)
    
    ... ... @@ -2129,10 +2120,10 @@ genCCall target dest_regs arg_regs = do
    2129 2120
         readResults _ _ [] _ accumCode = return accumCode
    
    2130 2121
         readResults [] _ _ _ _ = do
    
    2131 2122
           platform <- getPlatform
    
    2132
    -      pprPanic "genCCall, out of gp registers when reading results" (pdoc platform target)
    
    2123
    +      pprPanic "genCCall, out of gp registers when reading results" (pdoc platform expr)
    
    2133 2124
         readResults _ [] _ _ _ = do
    
    2134 2125
           platform <- getPlatform
    
    2135
    -      pprPanic "genCCall, out of fp registers when reading results" (pdoc platform target)
    
    2126
    +      pprPanic "genCCall, out of fp registers when reading results" (pdoc platform expr)
    
    2136 2127
         readResults (gpReg:gpRegs) (fpReg:fpRegs) (dst:dsts) accumRegs accumCode = do
    
    2137 2128
           -- gp/fp reg -> dst
    
    2138 2129
           platform <- getPlatform
    
    ... ... @@ -2150,13 +2141,6 @@ genCCall target dest_regs arg_regs = do
    2150 2141
                 -- truncate, otherwise an unexpectedly big value might be used in upfollowing calculations
    
    2151 2142
                 truncateReg W64 w r_dst
    
    2152 2143
     
    
    2153
    -    unaryFloatOp w op arg_reg dest_reg = do
    
    2154
    -      platform <- getPlatform
    
    2155
    -      (reg_fx, _format_x, code_fx) <- getFloatReg arg_reg
    
    2156
    -      let dst = getRegisterReg platform (CmmLocal dest_reg)
    
    2157
    -      let code = code_fx `appOL` op (OpReg w dst) (OpReg w reg_fx)
    
    2158
    -      pure code
    
    2159
    -
    
    2160 2144
     data BlockInRange = InRange | NotInRange BlockId
    
    2161 2145
     
    
    2162 2146
     genCondFarJump :: (MonadGetUnique m) => Cond -> Operand -> Operand -> BlockId -> m InstrBlock