[Git][ghc/ghc][wip/marge_bot_batch_merge_job] 7 commits: NCG/LA64: add cmpxchg and xchg primops
Marge Bot pushed to branch wip/marge_bot_batch_merge_job at Glasgow Haskell Compiler / GHC
Commits:
05586983 by Peng Fan at 2026-04-01T05:14:50-04:00
NCG/LA64: add cmpxchg and xchg primops
And append some new instructions for LA664 uarch.
Apply fix to cmpxchg-prim by Andreas Klebinger.
Suggestions in https://gitlab.haskell.org/ghc/ghc/-/merge_requests/15515
- - - - -
501f6520 by Duncan Coutts at 2026-04-01T05:14:51-04:00
Remove signal-based ticker implementations
Fixes issue #27073
All supported platforms should work with the pthreads + nanosleep based
ticker implementation. This avoids all the problems with using signals.
In practice, all supported platforms were probably using the non-signal
tickers already, which is probably why we do not get lots of reports
about deadlocks and other weirdness: we were definately using functions
that are not async signal safe in the tick handler (such as fflush to
flussh the eventlog).
Only Solaris was explicitly using the timer_create ticker impl, and even
Solaris could probably use the pthreads one (if anyone cared: Solaris is
no longer a Teir 3 supported platform).
Plausibly the only supported platform that this will change will be AIX,
which should now use the pthreads impl.
- - - - -
efee2a42 by Duncan Coutts at 2026-04-01T05:14:51-04:00
Tidy up some timer/ticker comments elsewhere
- - - - -
7a5dbd06 by Duncan Coutts at 2026-04-01T05:14:51-04:00
Remove now-unused install_vtalrm_handler
Support function used by both of the signal-based ticker
implementations.
- - - - -
8557b4e4 by Duncan Coutts at 2026-04-01T05:14:51-04:00
No longer probe for timer_create in rts/configure
It was only used by the TimerCreate.c ticker impl.
- - - - -
4cbcb41d by Duncan Coutts at 2026-04-01T05:14:51-04:00
Note that rtsTimerSignal is deprecated.
- - - - -
e8547953 by Simon Jakobi at 2026-04-01T05:14:52-04:00
Add perf test for #13960
Closes #13960.
- - - - -
15 changed files:
- compiler/GHC/CmmToAsm/LA64/CodeGen.hs
- compiler/GHC/CmmToAsm/LA64/Instr.hs
- compiler/GHC/CmmToAsm/LA64/Ppr.hs
- − m4/fp_check_timer_create.m4
- rts/Timer.c
- rts/configure.ac
- rts/include/rts/Timer.h
- rts/include/stg/SMP.h
- rts/posix/Signals.c
- rts/posix/Signals.h
- rts/posix/Ticker.c
- − rts/posix/ticker/Setitimer.c
- − rts/posix/ticker/TimerCreate.c
- + testsuite/tests/perf/compiler/T13960.hs
- testsuite/tests/perf/compiler/all.T
Changes:
=====================================
compiler/GHC/CmmToAsm/LA64/CodeGen.hs
=====================================
@@ -57,6 +57,12 @@ import Control.Monad
import GHC.Cmm.Dataflow.Label
import GHC.Types.Unique.DSM
import GHC.Types.Literal.Floating
+import GHC.Unit.Types ( ghcInternalUnitId )
+
+la664Enabled :: NatM Bool
+la664Enabled = do
+ config <- getConfig
+ return (ncgLa664Enabled config)
-- [General layout of an NCG]
cmmTopCodeGen ::
@@ -1651,6 +1657,10 @@ genPrim (MO_Prefetch_Data _n) [] [_] = return nilOL
genPrim (MO_AtomicRead w mo) [dst] [addr] = genAtomicRead w mo dst addr
genPrim (MO_AtomicWrite w mo) [] [addr,val] = genAtomicWrite w mo addr val
+genPrim (MO_AtomicRMW width amop) [dst] [addr,n] = genLibCCall (atomicRMWLabel width amop) [dst] [addr,n]
+genPrim (MO_Cmpxchg width) [dst] [addr,expe,new] = genCmpxchg width dst addr expe new
+genPrim (MO_Xchg width) [dst] [addr,value] = genXchg width dst addr value
+
genPrim mop@(MO_S_Mul2 _w) _ _ = unsupported mop
genPrim mop@(MO_S_QuotRem _w) _ _ = unsupported mop
genPrim mop@(MO_U_QuotRem _w) _ _ = unsupported mop
@@ -1674,9 +1684,6 @@ genPrim (MO_PopCnt width) [dst] [src] = genLibCCall (popCntLabel w
genPrim (MO_Pdep width) [dst] [src,mask] = genLibCCall (pdepLabel width) [dst] [src,mask]
genPrim (MO_Pext width) [dst] [src,mask] = genLibCCall (pextLabel width) [dst] [src,mask]
genPrim (MO_UF_Conv width) [dst] [src] = genLibCCall (word2FloatLabel width) [dst] [src]
-genPrim (MO_AtomicRMW width amop) [dst] [addr,n] = genLibCCall (atomicRMWLabel width amop) [dst] [addr,n]
-genPrim (MO_Cmpxchg width) [dst] [addr,old,new] = genLibCCall (cmpxchgLabel width) [dst] [addr,old,new]
-genPrim (MO_Xchg width) [dst] [addr,val] = genLibCCall (xchgLabel width) [dst] [addr,val]
genPrim (MO_Memcpy _align) [] [dst,src,n] = genLibCCall (fsLit "memcpy") [] [dst,src,n]
genPrim (MO_Memmove _align) [] [dst,src,n] = genLibCCall (fsLit "memmove") [] [dst,src,n]
genPrim (MO_Memcmp _align) [rst] [dst,src,n] = genLibCCall (fsLit "memcmp") [rst] [dst,src,n]
@@ -1872,6 +1879,20 @@ genBitRev w dst src = do
)
_ -> return ( code_x `snocOL` BITREV (OpReg w dst_reg) (OpReg w reg_x))
+genPrimCCall
+ :: FastString
+ -> [CmmFormal]
+ -> [CmmActual]
+ -> NatM InstrBlock
+
+genPrimCCall name dsts args = do
+ config <- getConfig
+ target <-
+ cmmMakeDynamicReference config CallReference
+ $ mkCmmCodeLabel ghcInternalUnitId name
+ let cconv = ForeignConvention CCallConv [NoHint] [NoHint] CmmMayReturn
+ genCCall target cconv dsts args
+
-- Generate C call to the given function in libc
genLibCCall :: FastString -> [CmmFormal] -> [CmmActual] -> NatM InstrBlock
genLibCCall name dsts args = do
@@ -1945,6 +1966,52 @@ genAtomicWrite w mo addr val = do
)
_ -> panic $ "Unexpected MemOrderAcquire on an AtomicWrite" ++ show mo
+genCmpxchg :: Width -> LocalReg -> CmmExpr -> CmmExpr -> CmmExpr -> NatM InstrBlock
+genCmpxchg w dst addr expe new = do
+ config <- getConfig
+ let
+ platform = ncgPlatform config
+ format = intFormat w
+
+ la664Enabled >>= \case
+
+ True -> do
+ (addr_reg, _, code_addr) <- getSomeReg addr
+ (expe_reg, _, code_expe) <- getSomeReg expe
+ (new_reg, _, code_new) <- getSomeReg new
+ let dst_reg = getRegisterReg platform (CmmLocal dst)
+ return $ code_addr `appOL` code_expe `appOL` code_new `appOL` toOL
+ [
+ -- Behave like the GCC builtin CAS operation
+ AMCASDB format (OpReg w expe_reg) (OpReg w new_reg) (OpReg w addr_reg),
+ MOV (OpReg w dst_reg) (OpReg w expe_reg)
+ ]
+
+ False ->
+ genPrimCCall (cmpxchgLabel w) [dst] [addr,expe,new]
+
+genXchg :: Width -> LocalReg -> CmmExpr -> CmmExpr -> NatM InstrBlock
+genXchg w dst addr val = do
+ config <- getConfig
+ tmp <- getNewRegNat II64
+ let
+ platform = ncgPlatform config
+ format = intFormat w
+
+ la664Enabled >>= \case
+
+ True -> do
+ (addr_reg, _, code_addr) <- getSomeReg addr
+ (val_reg, _, code_val) <- getSomeReg val
+ let dst_reg = getRegisterReg platform (CmmLocal dst)
+ return $ code_addr `appOL` code_val `appOL` toOL
+ [
+ AMSWAPDB format (OpReg w tmp) (OpReg w val_reg) (OpReg w addr_reg),
+ MOV (OpReg W64 dst_reg) (OpReg W64 tmp)
+ ]
+ False ->
+ genPrimCCall (xchgLabel w) [dst] [addr,val]
+
-- -----------------------------------------------------------------------------
{-
Generating C calls
@@ -1977,6 +2044,7 @@ member of a structure or union argument, or a vector/floating-point argument
wider than FRLEN may be passed in a GAR.
-}
+-- Generate C call to the given function in ghc-prim
genCCall
:: CmmExpr -- address of func call
-> ForeignConvention -- calling convention
=====================================
compiler/GHC/CmmToAsm/LA64/Instr.hs
=====================================
@@ -150,10 +150,11 @@ regUsageOfInstr platform instr = case instr of
-- ranges, corresponding to 2 and 1 instruction implementations respectively.
--
-- BCOND1 is selected by default.
- BCOND1 _ j d t -> usage (regTarget t ++ regOp j ++ regOp d, [])
- BCOND _ j d t -> usage (regTarget t ++ regOp j ++ regOp d, [])
- BEQZ j t -> usage (regTarget t ++ regOp j, [])
- BNEZ j t -> usage (regTarget t ++ regOp j, [])
+ BCOND1 _ j d t -> usage (regTarget t ++ regOp j ++ regOp d, [])
+ BCOND _ j d t -> usage (regTarget t ++ regOp j ++ regOp d, [])
+ BEQZ1 o1 o2 -> usage (regOp o1 ++ regOp o2, [])
+ BEQZ j t -> usage (regTarget t ++ regOp j, [])
+ BNEZ j t -> usage (regTarget t ++ regOp j, [])
-- 5. Common Memory Access Instructions --------------------------------------
LD _ dst src -> usage (regOp src, regOp dst)
LDU _ dst src -> usage (regOp src, regOp dst)
@@ -168,7 +169,17 @@ regUsageOfInstr platform instr = case instr of
-- LDCOND dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
-- STCOND dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
-- 7. Atomic Memory Access Instructions --------------------------------------
+ -- In LoongArch, if the AM* atomic memory access instruction has the same register number as rd and rj,
+ -- the execution will trigger an Instruction Non-defined Exception. Here should be avoided.
AMSWAPDB _ dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ AMADDDB _ dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ AMANDDB _ dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ AMORDB _ dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ AMXORDB _ dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
+ --AMCASDB _ dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp src1 ++ regOp src2 ++ regOp dst)
+ AMCASDB _ dst src1 src2 -> usage (regOp dst ++ regOp src1 ++ regOp src2, regOp dst)
+ LL _ dst src1 _ -> usage (regOp src1, regOp dst)
+ SC _ dst src1 _ -> usage (regOp src1, regOp dst)
-- 8. Barrier Instructions ---------------------------------------------------
DBAR _hint -> usage ([], [])
IBAR _hint -> usage ([], [])
@@ -330,6 +341,7 @@ patchRegsOfInstr instr env = case instr of
TAIL36 r t -> TAIL36 (patchOp r) (patchTarget t)
BCOND1 c j d t -> BCOND1 c (patchOp j) (patchOp d) (patchTarget t)
BCOND c j d t -> BCOND c (patchOp j) (patchOp d) (patchTarget t)
+ BEQZ1 o1 o2 -> BEQZ1 (patchOp o1) (patchOp o2)
BEQZ j t -> BEQZ (patchOp j) (patchTarget t)
BNEZ j t -> BNEZ (patchOp j) (patchTarget t)
-- 5. Common Memory Access Instructions --------------------------------------
@@ -348,6 +360,13 @@ patchRegsOfInstr instr env = case instr of
-- STCOND o1 o2 o3 -> STCOND (patchOp o1) (patchOp o2) (patchOp o3)
-- 7. Atomic Memory Access Instructions --------------------------------------
AMSWAPDB f o1 o2 o3 -> AMSWAPDB f (patchOp o1) (patchOp o2) (patchOp o3)
+ AMADDDB f o1 o2 o3 -> AMADDDB f (patchOp o1) (patchOp o2) (patchOp o3)
+ AMANDDB f o1 o2 o3 -> AMANDDB f (patchOp o1) (patchOp o2) (patchOp o3)
+ AMORDB f o1 o2 o3 -> AMORDB f (patchOp o1) (patchOp o2) (patchOp o3)
+ AMXORDB f o1 o2 o3 -> AMXORDB f (patchOp o1) (patchOp o2) (patchOp o3)
+ AMCASDB f o1 o2 o3 -> AMCASDB f (patchOp o1) (patchOp o2) (patchOp o3)
+ LL f o1 o2 o3 -> LL f (patchOp o1) (patchOp o2) (patchOp o3)
+ SC f o1 o2 o3 -> SC f (patchOp o1) (patchOp o2) (patchOp o3)
-- 8. Barrier Instructions ---------------------------------------------------
DBAR o1 -> DBAR o1
IBAR o1 -> IBAR o1
@@ -398,6 +417,7 @@ isJumpishInstr instr = case instr of
TAIL36 {} -> True
BCOND1 {} -> True
BCOND {} -> True
+ BEQZ1 {} -> True
BEQZ {} -> True
BNEZ {} -> True
_ -> False
@@ -718,6 +738,7 @@ data Instr
| TAIL36 Operand Target
| BCOND1 Cond Operand Operand Target
| BCOND Cond Operand Operand Target
+ | BEQZ1 Operand Operand
| BEQZ Operand Target
| BNEZ Operand Target
-- 5. Common Memory Access Instructions --------------------------------------
@@ -733,6 +754,13 @@ data Instr
-- 6. Bound Check Memory Access Instructions ---------------------------------
-- 7. Atomic Memory Access Instructions --------------------------------------
| AMSWAPDB Format Operand Operand Operand
+ | AMADDDB Format Operand Operand Operand
+ | AMANDDB Format Operand Operand Operand
+ | AMORDB Format Operand Operand Operand
+ | AMXORDB Format Operand Operand Operand
+ | AMCASDB Format Operand Operand Operand
+ | LL Format Operand Operand Operand
+ | SC Format Operand Operand Operand
-- 8. Barrier Instructions ---------------------------------------------------
| DBAR BarrierType
| IBAR BarrierType
@@ -839,6 +867,7 @@ instrCon i =
TAIL36{} -> "TAIL36"
BCOND1{} -> "BCOND1"
BCOND{} -> "BCOND"
+ BEQZ1{} -> "BEQZ1"
BEQZ{} -> "BEQZ"
BNEZ{} -> "BNEZ"
LD{} -> "LD"
@@ -851,6 +880,13 @@ instrCon i =
STPTR{} -> "STPTR"
PRELD{} -> "PRELD"
AMSWAPDB{} -> "AMSWAPDB"
+ AMADDDB{} -> "AMADDDB"
+ AMANDDB{} -> "AMANDDB"
+ AMORDB{} -> "AMORDB"
+ AMXORDB{} -> "AMXORDB"
+ AMCASDB{} -> "AMCASDB"
+ LL{} -> "LL"
+ SC{} -> "SC"
DBAR{} -> "DBAR"
IBAR{} -> "IBAR"
FCVT{} -> "FCVT"
=====================================
compiler/GHC/CmmToAsm/LA64/Ppr.hs
=====================================
@@ -852,6 +852,7 @@ pprInstr platform instr = case instr of
line $ text "\tbgeu" <+> pprOp platform d <> comma <+> pprOp platform j <> comma <+> pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
UGT ->
line $ text "\tbltu" <+> pprOp platform d <> comma <+> pprOp platform j <> comma <+> pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
+
_ -> line $ text "\t" <> pprBcond c <+> pprOp platform j <> comma <+> pprOp platform d <> comma <+> pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
BCOND1 _ _ _ (TLabel _) -> panic "LA64.ppr: BCOND1: No conditional branching to TLabel!"
@@ -916,17 +917,18 @@ pprInstr platform instr = case instr of
BCOND _ _ _ (TReg _) -> panic "LA64.ppr: BCOND: No conditional branching to registers!"
+ BEQZ1 o1 o2 | isImmOp o2 -> op2 (text "\tbeqz") o1 o2
BEQZ j (TBlock bid) ->
line $ text "\tbeqz" <+> pprOp platform j <> comma <+> pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
BEQZ j (TLabel lbl) ->
line $ text "\tbeqz" <+> pprOp platform j <> comma <+> pprAsmLabel platform lbl
- BEQZ _ (TReg _) -> panic "LA64.ppr: BEQZ: No conditional branching to registers!"
+ BEQZ _ (TReg _) -> panic "LA64.ppr: BEQZ: No conditional branching to registers!"
BNEZ j (TBlock bid) ->
line $ text "\tbnez" <+> pprOp platform j <> comma <+> pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
BNEZ j (TLabel lbl) ->
line $ text "\tbnez" <+> pprOp platform j <> comma <+> pprAsmLabel platform lbl
- BNEZ _ (TReg _) -> panic "LA64.ppr: BNEZ: No conditional branching to registers!"
+ BNEZ _ (TReg _) -> panic "LA64.ppr: BNEZ: No conditional branching to registers!"
-- 5. Common Memory Access Instructions --------------------------------------
-- LD.{B[U]/H[U]/W[U]/D}, ST.{B/H/W/D}: AddrRegImm
@@ -1020,8 +1022,29 @@ pprInstr platform instr = case instr of
AMSWAPDB II32 o1 o2 o3 -> op3 (text "\tamswap_db.w") o1 o2 o3
AMSWAPDB II64 o1 o2 o3 -> op3 (text "\tamswap_db.d") o1 o2 o3
-- AM.{SWAP/ADD}[_DB].{B/H}
+ AMADDDB II8 o1 o2 o3 -> op3 (text "\tamadd_db.b") o1 o2 o3
+ AMADDDB II16 o1 o2 o3 -> op3 (text "\tamadd_db.h") o1 o2 o3
+ AMADDDB II32 o1 o2 o3 -> op3 (text "\tamadd_db.w") o1 o2 o3
+ AMADDDB II64 o1 o2 o3 -> op3 (text "\tamadd_db.d") o1 o2 o3
+
+ AMANDDB II32 o1 o2 o3 -> op3 (text "\tamand_db.w") o1 o2 o3
+ AMANDDB II64 o1 o2 o3 -> op3 (text "\tamand_db.d") o1 o2 o3
+
+ AMORDB II32 o1 o2 o3 -> op3 (text "\tamor_db.w") o1 o2 o3
+ AMORDB II64 o1 o2 o3 -> op3 (text "\tamor_db.d") o1 o2 o3
+
+ AMXORDB II32 o1 o2 o3 -> op3 (text "\tamxor_db.w") o1 o2 o3
+ AMXORDB II64 o1 o2 o3 -> op3 (text "\tamxor_db.d") o1 o2 o3
-- AMCAS[_DB].{B/H/W/D}
+ AMCASDB II8 o1 o2 o3 -> op3 (text "\tamcas_db.b") o1 o2 o3
+ AMCASDB II16 o1 o2 o3 -> op3 (text "\tamcas_db.h") o1 o2 o3
+ AMCASDB II32 o1 o2 o3 -> op3 (text "\tamcas_db.w") o1 o2 o3
+ AMCASDB II64 o1 o2 o3 -> op3 (text "\tamcas_db.d") o1 o2 o3
-- LL.{W/D}, SC.{W/D}
+ LL II32 o1 o2 o3 -> op3 (text "\tll.w") o1 o2 o3
+ SC II32 o1 o2 o3 -> op3 (text "\tsc.w") o1 o2 o3
+ LL II64 o1 o2 o3 -> op3 (text "\tll.d") o1 o2 o3
+ SC II64 o1 o2 o3 -> op3 (text "\tsc.d") o1 o2 o3
-- SC.Q
-- LL.ACQ.{W/D}, SC.REL.{W/D}
-- 8. Barrier Instructions ---------------------------------------------------
=====================================
m4/fp_check_timer_create.m4 deleted
=====================================
@@ -1,110 +0,0 @@
-# Check for a working timer_create(). We need a pretty detailed check
-# here, because there exist partially-working implementations of
-# timer_create() in certain versions of Linux (see bug #1933).
-#
-AC_DEFUN([FP_CHECK_TIMER_CREATE],[
-AC_CHECK_FUNC([timer_create],[HAVE_timer_create=yes],[HAVE_timer_create=no])
-
-if test "$HAVE_timer_create" = "yes"
-then
- if test "$cross_compiling" = "yes"
- then
- # We can't test timer_create when we're cross-compiling, so we
- # optimistiaclly assume that it actually works properly.
- AC_DEFINE([USE_TIMER_CREATE], 1, [Define to 1 if we can use timer_create(CLOCK_REALTIME,...)])
- else
- AC_CACHE_CHECK([for a working timer_create(CLOCK_REALTIME)],
- [fptools_cv_timer_create_works],
- [AC_TRY_RUN([
-#include
participants (1)
-
Marge Bot (@marge-bot)