Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC
Commits:
-
bbaa44a7
by Peng Fan at 2025-07-16T16:50:42-04:00
3 changed files:
- compiler/GHC/CmmToAsm/LA64/CodeGen.hs
- compiler/GHC/CmmToAsm/LA64/Instr.hs
- compiler/GHC/CmmToAsm/LA64/Ppr.hs
Changes:
| ... | ... | @@ -1910,13 +1910,12 @@ genCCall target dest_regs arg_regs = do |
| 1910 | 1910 | MO_W64X2_Max -> unsupported mop
|
| 1911 | 1911 | |
| 1912 | 1912 | -- Memory Ordering
|
| 1913 | - -- A hint value of 0 is mandatory by default, and it indicates a fully functional synchronization barrier.
|
|
| 1914 | - -- Only after all previous load/store access operations are completely executed, the DBAR 0 instruction can be executed;
|
|
| 1915 | - -- and only after the execution of DBAR 0 is completed, all subsequent load/store access operations can be executed.
|
|
| 1916 | - |
|
| 1917 | - MO_AcquireFence -> pure (unitOL (DBAR Hint0))
|
|
| 1918 | - MO_ReleaseFence -> pure (unitOL (DBAR Hint0))
|
|
| 1919 | - MO_SeqCstFence -> pure (unitOL (DBAR Hint0))
|
|
| 1913 | + -- Support finer-grained DBAR hints for LA664 and newer uarchs.
|
|
| 1914 | + -- These are treated as DBAR 0 on older uarchs, so we can start
|
|
| 1915 | + -- to unconditionally emit the new hints right away.
|
|
| 1916 | + MO_AcquireFence -> pure (unitOL (DBAR HintAcquire))
|
|
| 1917 | + MO_ReleaseFence -> pure (unitOL (DBAR HintRelease))
|
|
| 1918 | + MO_SeqCstFence -> pure (unitOL (DBAR HintSeqcst))
|
|
| 1920 | 1919 | |
| 1921 | 1920 | MO_Touch -> pure nilOL -- Keep variables live (when using interior pointers)
|
| 1922 | 1921 | -- Prefetch
|
| ... | ... | @@ -1954,12 +1953,11 @@ genCCall target dest_regs arg_regs = do |
| 1954 | 1953 | |
| 1955 | 1954 | MemOrderAcquire -> toOL [
|
| 1956 | 1955 | ann moDescr (LD (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p)),
|
| 1957 | - DBAR Hint0
|
|
| 1956 | + DBAR HintAcquire
|
|
| 1958 | 1957 | ]
|
| 1959 | - MemOrderSeqCst -> toOL [
|
|
| 1960 | - ann moDescr (DBAR Hint0),
|
|
| 1961 | - LD (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p),
|
|
| 1962 | - DBAR Hint0
|
|
| 1958 | + MemOrderSeqCst -> toOL [
|
|
| 1959 | + ann moDescr (LD (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p)),
|
|
| 1960 | + DBAR HintSeqcst
|
|
| 1963 | 1961 | ]
|
| 1964 | 1962 | _ -> panic $ "Unexpected MemOrderRelease on an AtomicRead: " ++ show mo
|
| 1965 | 1963 | dst = getRegisterReg platform (CmmLocal dst_reg)
|
| ... | ... | @@ -1974,15 +1972,9 @@ genCCall target dest_regs arg_regs = do |
| 1974 | 1972 | (val, fmt_val, code_val) <- getSomeReg val_reg
|
| 1975 | 1973 | let instrs = case ord of
|
| 1976 | 1974 | MemOrderRelaxed -> unitOL $ ann moDescr (ST fmt_val (OpReg w val) (OpAddr $ AddrReg p))
|
| 1977 | - MemOrderRelease -> toOL [
|
|
| 1978 | - ann moDescr (DBAR Hint0),
|
|
| 1979 | - ST fmt_val (OpReg w val) (OpAddr $ AddrReg p)
|
|
| 1980 | - ]
|
|
| 1981 | - MemOrderSeqCst -> toOL [
|
|
| 1982 | - ann moDescr (DBAR Hint0),
|
|
| 1983 | - ST fmt_val (OpReg w val) (OpAddr $ AddrReg p),
|
|
| 1984 | - DBAR Hint0
|
|
| 1985 | - ]
|
|
| 1975 | + -- implement with AMSWAPDB
|
|
| 1976 | + MemOrderRelease -> unitOL $ ann moDescr (AMSWAPDB fmt_val (OpReg w zeroReg) (OpReg w val) (OpReg w p))
|
|
| 1977 | + MemOrderSeqCst -> unitOL $ ann moDescr (AMSWAPDB fmt_val (OpReg w zeroReg) (OpReg w val) (OpReg w p))
|
|
| 1986 | 1978 | _ -> panic $ "Unexpected MemOrderAcquire on an AtomicWrite" ++ show mo
|
| 1987 | 1979 | moDescr = (text . show) mo
|
| 1988 | 1980 | code =
|
| ... | ... | @@ -169,6 +169,7 @@ regUsageOfInstr platform instr = case instr of |
| 169 | 169 | -- LDCOND dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
| 170 | 170 | -- STCOND dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
| 171 | 171 | -- 7. Atomic Memory Access Instructions --------------------------------------
|
| 172 | + AMSWAPDB _ dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
|
| 172 | 173 | -- 8. Barrier Instructions ---------------------------------------------------
|
| 173 | 174 | DBAR _hint -> usage ([], [])
|
| 174 | 175 | IBAR _hint -> usage ([], [])
|
| ... | ... | @@ -343,13 +344,13 @@ patchRegsOfInstr instr env = case instr of |
| 343 | 344 | STX f o1 o2 -> STX f (patchOp o1) (patchOp o2)
|
| 344 | 345 | LDPTR f o1 o2 -> LDPTR f (patchOp o1) (patchOp o2)
|
| 345 | 346 | STPTR f o1 o2 -> STPTR f (patchOp o1) (patchOp o2)
|
| 346 | - PRELD o1 o2 -> PRELD (patchOp o1) (patchOp o2)
|
|
| 347 | + PRELD o1 o2 -> PRELD (patchOp o1) (patchOp o2)
|
|
| 347 | 348 | -- 6. Bound Check Memory Access Instructions ---------------------------------
|
| 348 | 349 | -- LDCOND o1 o2 o3 -> LDCOND (patchOp o1) (patchOp o2) (patchOp o3)
|
| 349 | 350 | -- STCOND o1 o2 o3 -> STCOND (patchOp o1) (patchOp o2) (patchOp o3)
|
| 350 | 351 | -- 7. Atomic Memory Access Instructions --------------------------------------
|
| 352 | + AMSWAPDB f o1 o2 o3 -> AMSWAPDB f (patchOp o1) (patchOp o2) (patchOp o3)
|
|
| 351 | 353 | -- 8. Barrier Instructions ---------------------------------------------------
|
| 352 | - -- TODO: need fix
|
|
| 353 | 354 | DBAR o1 -> DBAR o1
|
| 354 | 355 | IBAR o1 -> IBAR o1
|
| 355 | 356 | -- 11. Floating Point Instructions -------------------------------------------
|
| ... | ... | @@ -734,6 +735,7 @@ data Instr |
| 734 | 735 | | PRELD Operand Operand
|
| 735 | 736 | -- 6. Bound Check Memory Access Instructions ---------------------------------
|
| 736 | 737 | -- 7. Atomic Memory Access Instructions --------------------------------------
|
| 738 | + | AMSWAPDB Format Operand Operand Operand
|
|
| 737 | 739 | -- 8. Barrier Instructions ---------------------------------------------------
|
| 738 | 740 | | DBAR BarrierType
|
| 739 | 741 | | IBAR BarrierType
|
| ... | ... | @@ -755,8 +757,13 @@ data Instr |
| 755 | 757 | -- fnmadd: d = - r1 * r2 - r3
|
| 756 | 758 | | FMA FMASign Operand Operand Operand Operand
|
| 757 | 759 | |
| 758 | --- TODO: Not complete.
|
|
| 759 | -data BarrierType = Hint0
|
|
| 760 | +data BarrierType
|
|
| 761 | + = Hint0
|
|
| 762 | + | Hint700
|
|
| 763 | + | HintAcquire
|
|
| 764 | + | HintRelease
|
|
| 765 | + | HintSeqcst
|
|
| 766 | + deriving (Eq, Show)
|
|
| 760 | 767 | |
| 761 | 768 | instrCon :: Instr -> String
|
| 762 | 769 | instrCon i =
|
| ... | ... | @@ -847,6 +854,7 @@ instrCon i = |
| 847 | 854 | LDPTR{} -> "LDPTR"
|
| 848 | 855 | STPTR{} -> "STPTR"
|
| 849 | 856 | PRELD{} -> "PRELD"
|
| 857 | + AMSWAPDB{} -> "AMSWAPDB"
|
|
| 850 | 858 | DBAR{} -> "DBAR"
|
| 851 | 859 | IBAR{} -> "IBAR"
|
| 852 | 860 | FCVT{} -> "FCVT"
|
| ... | ... | @@ -1015,6 +1015,10 @@ pprInstr platform instr = case instr of |
| 1015 | 1015 | -- LD{GT/LE}.{B/H/W/D}, ST{GT/LE}.{B/H/W/D}
|
| 1016 | 1016 | -- 7. Atomic Memory Access Instructions --------------------------------------
|
| 1017 | 1017 | -- AM{SWAP/ADD/AND/OR/XOR/MAX/MIN}[DB].{W/D}, AM{MAX/MIN}[_DB].{WU/DU}
|
| 1018 | + AMSWAPDB II8 o1 o2 o3 -> op3 (text "\tamswap_db.b") o1 o2 o3
|
|
| 1019 | + AMSWAPDB II16 o1 o2 o3 -> op3 (text "\tamswap_db.h") o1 o2 o3
|
|
| 1020 | + AMSWAPDB II32 o1 o2 o3 -> op3 (text "\tamswap_db.w") o1 o2 o3
|
|
| 1021 | + AMSWAPDB II64 o1 o2 o3 -> op3 (text "\tamswap_db.d") o1 o2 o3
|
|
| 1018 | 1022 | -- AM.{SWAP/ADD}[_DB].{B/H}
|
| 1019 | 1023 | -- AMCAS[_DB].{B/H/W/D}
|
| 1020 | 1024 | -- LL.{W/D}, SC.{W/D}
|
| ... | ... | @@ -1112,19 +1116,28 @@ pprInstr platform instr = case instr of |
| 1112 | 1116 | op3 op o1 o2 o3 = line $ op <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3
|
| 1113 | 1117 | op4 op o1 o2 o3 o4 = line $ op <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 <> comma <+> pprOp platform o4
|
| 1114 | 1118 | {-
|
| 1115 | - -- TODO: Support dbar with different hints.
|
|
| 1119 | + Support dbar with different hints.
|
|
| 1116 | 1120 | On LoongArch uses "dbar 0" (full completion barrier) for everything.
|
| 1117 | 1121 | But the full completion barrier has no performance to tell, so
|
| 1118 | 1122 | Loongson-3A6000 and newer processors have made finer granularity hints
|
| 1119 | 1123 | available:
|
| 1120 | 1124 | |
| 1125 | + Hint 0x700: barrier for "read after read" from the same address.
|
|
| 1121 | 1126 | Bit4: ordering or completion (0: completion, 1: ordering)
|
| 1122 | 1127 | Bit3: barrier for previous read (0: true, 1: false)
|
| 1123 | 1128 | Bit2: barrier for previous write (0: true, 1: false)
|
| 1124 | 1129 | Bit1: barrier for succeeding read (0: true, 1: false)
|
| 1125 | 1130 | Bit0: barrier for succeeding write (0: true, 1: false)
|
| 1131 | + |
|
| 1132 | + DBAR 0b10100: acquire
|
|
| 1133 | + DBAR 0b10010: release
|
|
| 1134 | + DBAR 0b10000: seqcst
|
|
| 1126 | 1135 | -}
|
| 1127 | 1136 | pprBarrierType Hint0 = text "0x0"
|
| 1137 | + pprBarrierType HintSeqcst = text "0x10"
|
|
| 1138 | + pprBarrierType HintRelease = text "0x12"
|
|
| 1139 | + pprBarrierType HintAcquire = text "0x14"
|
|
| 1140 | + pprBarrierType Hint700 = text "0x700"
|
|
| 1128 | 1141 | floatPrecission o | isSingleOp o = text "s"
|
| 1129 | 1142 | | isDoubleOp o = text "d"
|
| 1130 | 1143 | | otherwise = pprPanic "Impossible floating point precission: " (pprOp platform o)
|