Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC Commits: bbaa44a7 by Peng Fan at 2025-07-16T16:50:42-04:00 NCG/LA64: Support finer-grained DBAR hints For LA664 and newer uarchs, they have made finer granularity hints available: Bit4: ordering or completion (0: completion, 1: ordering) Bit3: barrier for previous read (0: true, 1: false) Bit2: barrier for previous write (0: true, 1: false) Bit1: barrier for succeeding read (0: true, 1: false) Bit0: barrier for succeeding write (0: true, 1: false) And not affect the existing models because other hints are treated as 'dbar 0' there. - - - - - 3 changed files: - compiler/GHC/CmmToAsm/LA64/CodeGen.hs - compiler/GHC/CmmToAsm/LA64/Instr.hs - compiler/GHC/CmmToAsm/LA64/Ppr.hs Changes: ===================================== compiler/GHC/CmmToAsm/LA64/CodeGen.hs ===================================== @@ -1910,13 +1910,12 @@ genCCall target dest_regs arg_regs = do MO_W64X2_Max -> unsupported mop -- Memory Ordering - -- A hint value of 0 is mandatory by default, and it indicates a fully functional synchronization barrier. - -- Only after all previous load/store access operations are completely executed, the DBAR 0 instruction can be executed; - -- and only after the execution of DBAR 0 is completed, all subsequent load/store access operations can be executed. - - MO_AcquireFence -> pure (unitOL (DBAR Hint0)) - MO_ReleaseFence -> pure (unitOL (DBAR Hint0)) - MO_SeqCstFence -> pure (unitOL (DBAR Hint0)) + -- Support finer-grained DBAR hints for LA664 and newer uarchs. + -- These are treated as DBAR 0 on older uarchs, so we can start + -- to unconditionally emit the new hints right away. + MO_AcquireFence -> pure (unitOL (DBAR HintAcquire)) + MO_ReleaseFence -> pure (unitOL (DBAR HintRelease)) + MO_SeqCstFence -> pure (unitOL (DBAR HintSeqcst)) MO_Touch -> pure nilOL -- Keep variables live (when using interior pointers) -- Prefetch @@ -1954,12 +1953,11 @@ genCCall target dest_regs arg_regs = do MemOrderAcquire -> toOL [ ann moDescr (LD (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p)), - DBAR Hint0 + DBAR HintAcquire ] - MemOrderSeqCst -> toOL [ - ann moDescr (DBAR Hint0), - LD (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p), - DBAR Hint0 + MemOrderSeqCst -> toOL [ + ann moDescr (LD (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p)), + DBAR HintSeqcst ] _ -> panic $ "Unexpected MemOrderRelease on an AtomicRead: " ++ show mo dst = getRegisterReg platform (CmmLocal dst_reg) @@ -1974,15 +1972,9 @@ genCCall target dest_regs arg_regs = do (val, fmt_val, code_val) <- getSomeReg val_reg let instrs = case ord of MemOrderRelaxed -> unitOL $ ann moDescr (ST fmt_val (OpReg w val) (OpAddr $ AddrReg p)) - MemOrderRelease -> toOL [ - ann moDescr (DBAR Hint0), - ST fmt_val (OpReg w val) (OpAddr $ AddrReg p) - ] - MemOrderSeqCst -> toOL [ - ann moDescr (DBAR Hint0), - ST fmt_val (OpReg w val) (OpAddr $ AddrReg p), - DBAR Hint0 - ] + -- implement with AMSWAPDB + MemOrderRelease -> unitOL $ ann moDescr (AMSWAPDB fmt_val (OpReg w zeroReg) (OpReg w val) (OpReg w p)) + MemOrderSeqCst -> unitOL $ ann moDescr (AMSWAPDB fmt_val (OpReg w zeroReg) (OpReg w val) (OpReg w p)) _ -> panic $ "Unexpected MemOrderAcquire on an AtomicWrite" ++ show mo moDescr = (text . show) mo code = ===================================== compiler/GHC/CmmToAsm/LA64/Instr.hs ===================================== @@ -169,6 +169,7 @@ regUsageOfInstr platform instr = case instr of -- LDCOND dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) -- STCOND dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) -- 7. Atomic Memory Access Instructions -------------------------------------- + AMSWAPDB _ dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) -- 8. Barrier Instructions --------------------------------------------------- DBAR _hint -> usage ([], []) IBAR _hint -> usage ([], []) @@ -343,13 +344,13 @@ patchRegsOfInstr instr env = case instr of STX f o1 o2 -> STX f (patchOp o1) (patchOp o2) LDPTR f o1 o2 -> LDPTR f (patchOp o1) (patchOp o2) STPTR f o1 o2 -> STPTR f (patchOp o1) (patchOp o2) - PRELD o1 o2 -> PRELD (patchOp o1) (patchOp o2) + PRELD o1 o2 -> PRELD (patchOp o1) (patchOp o2) -- 6. Bound Check Memory Access Instructions --------------------------------- -- LDCOND o1 o2 o3 -> LDCOND (patchOp o1) (patchOp o2) (patchOp o3) -- STCOND o1 o2 o3 -> STCOND (patchOp o1) (patchOp o2) (patchOp o3) -- 7. Atomic Memory Access Instructions -------------------------------------- + AMSWAPDB f o1 o2 o3 -> AMSWAPDB f (patchOp o1) (patchOp o2) (patchOp o3) -- 8. Barrier Instructions --------------------------------------------------- - -- TODO: need fix DBAR o1 -> DBAR o1 IBAR o1 -> IBAR o1 -- 11. Floating Point Instructions ------------------------------------------- @@ -734,6 +735,7 @@ data Instr | PRELD Operand Operand -- 6. Bound Check Memory Access Instructions --------------------------------- -- 7. Atomic Memory Access Instructions -------------------------------------- + | AMSWAPDB Format Operand Operand Operand -- 8. Barrier Instructions --------------------------------------------------- | DBAR BarrierType | IBAR BarrierType @@ -755,8 +757,13 @@ data Instr -- fnmadd: d = - r1 * r2 - r3 | FMA FMASign Operand Operand Operand Operand --- TODO: Not complete. -data BarrierType = Hint0 +data BarrierType + = Hint0 + | Hint700 + | HintAcquire + | HintRelease + | HintSeqcst + deriving (Eq, Show) instrCon :: Instr -> String instrCon i = @@ -847,6 +854,7 @@ instrCon i = LDPTR{} -> "LDPTR" STPTR{} -> "STPTR" PRELD{} -> "PRELD" + AMSWAPDB{} -> "AMSWAPDB" DBAR{} -> "DBAR" IBAR{} -> "IBAR" FCVT{} -> "FCVT" ===================================== compiler/GHC/CmmToAsm/LA64/Ppr.hs ===================================== @@ -1015,6 +1015,10 @@ pprInstr platform instr = case instr of -- LD{GT/LE}.{B/H/W/D}, ST{GT/LE}.{B/H/W/D} -- 7. Atomic Memory Access Instructions -------------------------------------- -- AM{SWAP/ADD/AND/OR/XOR/MAX/MIN}[DB].{W/D}, AM{MAX/MIN}[_DB].{WU/DU} + AMSWAPDB II8 o1 o2 o3 -> op3 (text "\tamswap_db.b") o1 o2 o3 + AMSWAPDB II16 o1 o2 o3 -> op3 (text "\tamswap_db.h") o1 o2 o3 + AMSWAPDB II32 o1 o2 o3 -> op3 (text "\tamswap_db.w") o1 o2 o3 + AMSWAPDB II64 o1 o2 o3 -> op3 (text "\tamswap_db.d") o1 o2 o3 -- AM.{SWAP/ADD}[_DB].{B/H} -- AMCAS[_DB].{B/H/W/D} -- LL.{W/D}, SC.{W/D} @@ -1112,19 +1116,28 @@ pprInstr platform instr = case instr of op3 op o1 o2 o3 = line $ op <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 op4 op o1 o2 o3 o4 = line $ op <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 <> comma <+> pprOp platform o4 {- - -- TODO: Support dbar with different hints. + Support dbar with different hints. On LoongArch uses "dbar 0" (full completion barrier) for everything. But the full completion barrier has no performance to tell, so Loongson-3A6000 and newer processors have made finer granularity hints available: + Hint 0x700: barrier for "read after read" from the same address. Bit4: ordering or completion (0: completion, 1: ordering) Bit3: barrier for previous read (0: true, 1: false) Bit2: barrier for previous write (0: true, 1: false) Bit1: barrier for succeeding read (0: true, 1: false) Bit0: barrier for succeeding write (0: true, 1: false) + + DBAR 0b10100: acquire + DBAR 0b10010: release + DBAR 0b10000: seqcst -} pprBarrierType Hint0 = text "0x0" + pprBarrierType HintSeqcst = text "0x10" + pprBarrierType HintRelease = text "0x12" + pprBarrierType HintAcquire = text "0x14" + pprBarrierType Hint700 = text "0x700" floatPrecission o | isSingleOp o = text "s" | isDoubleOp o = text "d" | otherwise = pprPanic "Impossible floating point precission: " (pprOp platform o) View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/bbaa44a784fb041ee91858743eaa7e00... -- View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/bbaa44a784fb041ee91858743eaa7e00... You're receiving this email because of your account on gitlab.haskell.org.