Peter Trommler pushed to branch wip/T27135 at Glasgow Haskell Compiler / GHC
Commits:
-
b824ccf9
by Peter Trommler at 2026-04-04T19:08:37+02:00
9 changed files:
- rts/Capability.h
- rts/Interpreter.c
- rts/Proftimer.c
- rts/Schedule.c
- rts/Schedule.h
- rts/Timer.c
- rts/include/stg/SMP.h
- rts/posix/ticker/Pthread.c
- rts/posix/ticker/TimerFd.c
Changes:
| ... | ... | @@ -489,14 +489,14 @@ stopCapability (Capability *cap) |
| 489 | 489 | // It may not work - the thread might be updating HpLim itself
|
| 490 | 490 | // at the same time - so we also have the context_switch/interrupted
|
| 491 | 491 | // flags as a sticky way to tell the thread to stop.
|
| 492 | - RELAXED_STORE_ALWAYS(&cap->r.rHpLim, NULL);
|
|
| 492 | + atomic_store_explicit(&cap->r.rHpLim, NULL, memory_order_relaxed);
|
|
| 493 | 493 | }
|
| 494 | 494 | |
| 495 | 495 | INLINE_HEADER void
|
| 496 | 496 | interruptCapability (Capability *cap)
|
| 497 | 497 | {
|
| 498 | 498 | stopCapability(cap);
|
| 499 | - RELAXED_STORE_ALWAYS(&cap->interrupt, true);
|
|
| 499 | + atomic_store_explicit(&cap->interrupt, true, memory_order_relaxed);
|
|
| 500 | 500 | }
|
| 501 | 501 | |
| 502 | 502 | INLINE_HEADER void
|
| ... | ... | @@ -505,7 +505,7 @@ contextSwitchCapability (Capability *cap, bool immediately) |
| 505 | 505 | if(immediately) {
|
| 506 | 506 | stopCapability(cap);
|
| 507 | 507 | }
|
| 508 | - RELAXED_STORE_ALWAYS(&cap->context_switch, true);
|
|
| 508 | + atomic_store_explicit(&cap->context_switch, true, memory_order_relaxed);
|
|
| 509 | 509 | }
|
| 510 | 510 | |
| 511 | 511 | #if defined(THREADED_RTS)
|
| ... | ... | @@ -695,7 +695,7 @@ interpretBCO (Capability* cap) |
| 695 | 695 | |
| 696 | 696 | // N.B. HpLim is the context-switch flag; when it
|
| 697 | 697 | // goes to zero we must return to the scheduler.
|
| 698 | - RELAXED_STORE_ALWAYS(&cap->r.rHpLim, (P_)1);
|
|
| 698 | + atomic_store_explicit(&cap->r.rHpLim, (P_)1, memory_order_relaxed);
|
|
| 699 | 699 | |
| 700 | 700 | IF_DEBUG(interpreter,
|
| 701 | 701 | debugBelch(
|
| ... | ... | @@ -46,7 +46,7 @@ void |
| 46 | 46 | stopProfTimer( void )
|
| 47 | 47 | {
|
| 48 | 48 | #if defined(PROFILING)
|
| 49 | - RELAXED_STORE_ALWAYS(&do_prof_ticks, false);
|
|
| 49 | + atomic_store_explicit(&do_prof_ticks, false, memory_order_relaxed);
|
|
| 50 | 50 | #endif
|
| 51 | 51 | }
|
| 52 | 52 | |
| ... | ... | @@ -54,7 +54,7 @@ void |
| 54 | 54 | startProfTimer( void )
|
| 55 | 55 | {
|
| 56 | 56 | #if defined(PROFILING)
|
| 57 | - RELAXED_STORE_ALWAYS(&do_prof_ticks, true);
|
|
| 57 | + atomic_store_explicit(&do_prof_ticks, true, memory_order_relaxed);
|
|
| 58 | 58 | #endif
|
| 59 | 59 | }
|
| 60 | 60 | |
| ... | ... | @@ -62,7 +62,7 @@ void |
| 62 | 62 | stopHeapProfTimer( void )
|
| 63 | 63 | {
|
| 64 | 64 | if (RtsFlags.ProfFlags.doHeapProfile){
|
| 65 | - RELAXED_STORE_ALWAYS(&heap_prof_timer_active, false);
|
|
| 65 | + atomic_store_explicit(&heap_prof_timer_active, false, memory_order_relaxed);
|
|
| 66 | 66 | pauseHeapProfTimer();
|
| 67 | 67 | }
|
| 68 | 68 | }
|
| ... | ... | @@ -71,14 +71,14 @@ void |
| 71 | 71 | startHeapProfTimer( void )
|
| 72 | 72 | {
|
| 73 | 73 | if (RtsFlags.ProfFlags.doHeapProfile){
|
| 74 | - RELAXED_STORE_ALWAYS(&heap_prof_timer_active, true);
|
|
| 74 | + atomic_store_explicit(&heap_prof_timer_active, true, memory_order_relaxed);
|
|
| 75 | 75 | resumeHeapProfTimer();
|
| 76 | 76 | }
|
| 77 | 77 | }
|
| 78 | 78 | |
| 79 | 79 | void
|
| 80 | 80 | pauseHeapProfTimer ( void ) {
|
| 81 | - RELAXED_STORE_ALWAYS(&do_heap_prof_ticks, false);
|
|
| 81 | + atomic_store_explicit(&do_heap_prof_ticks, false, memory_order_relaxed);
|
|
| 82 | 82 | }
|
| 83 | 83 | |
| 84 | 84 | |
| ... | ... | @@ -86,7 +86,7 @@ void |
| 86 | 86 | resumeHeapProfTimer ( void ) {
|
| 87 | 87 | if (RtsFlags.ProfFlags.doHeapProfile &&
|
| 88 | 88 | RtsFlags.ProfFlags.heapProfileIntervalTicks > 0) {
|
| 89 | - RELAXED_STORE_ALWAYS(&do_heap_prof_ticks, true);
|
|
| 89 | + atomic_store_explicit(&do_heap_prof_ticks, true, memory_order_relaxed);
|
|
| 90 | 90 | }
|
| 91 | 91 | }
|
| 92 | 92 | |
| ... | ... | @@ -94,14 +94,14 @@ void |
| 94 | 94 | requestHeapCensus( void ){
|
| 95 | 95 | // If no profiling mode is passed then just ignore the call.
|
| 96 | 96 | if (RtsFlags.ProfFlags.doHeapProfile){
|
| 97 | - RELAXED_STORE_ALWAYS(&performHeapProfile, true);
|
|
| 97 | + atomic_store_explicit(&performHeapProfile, true, memory_order_relaxed);
|
|
| 98 | 98 | }
|
| 99 | 99 | }
|
| 100 | 100 | |
| 101 | 101 | void
|
| 102 | 102 | initProfTimer( void )
|
| 103 | 103 | {
|
| 104 | - RELAXED_STORE_ALWAYS(&performHeapProfile, false);
|
|
| 104 | + atomic_store_explicit(&performHeapProfile, false, memory_order_relaxed);
|
|
| 105 | 105 | |
| 106 | 106 | ticks_to_heap_profile = RtsFlags.ProfFlags.heapProfileIntervalTicks;
|
| 107 | 107 | |
| ... | ... | @@ -120,7 +120,7 @@ handleProfTick(void) |
| 120 | 120 | {
|
| 121 | 121 | #if defined(PROFILING)
|
| 122 | 122 | total_ticks++;
|
| 123 | - if (RELAXED_LOAD_ALWAYS(&do_prof_ticks)) {
|
|
| 123 | + if (atomic_load_explicit(&do_prof_ticks, memory_order_relaxed)) {
|
|
| 124 | 124 | uint32_t n;
|
| 125 | 125 | for (n=0; n < getNumCapabilities(); n++) {
|
| 126 | 126 | Capability *cap = getCapability(n);
|
| ... | ... | @@ -136,16 +136,17 @@ handleProfTick(void) |
| 136 | 136 | ticks_to_ticky_sample--;
|
| 137 | 137 | if (ticks_to_ticky_sample <= 0) {
|
| 138 | 138 | ticks_to_ticky_sample = RtsFlags.ProfFlags.heapProfileIntervalTicks;
|
| 139 | - RELAXED_STORE_ALWAYS(&performTickySample, true);
|
|
| 139 | + atomic_store_explicit(&performTickySample, true, memory_order_relaxed);
|
|
| 140 | 140 | }
|
| 141 | 141 | }
|
| 142 | 142 | #endif
|
| 143 | 143 | |
| 144 | - if (RELAXED_LOAD_ALWAYS(&do_heap_prof_ticks) && RELAXED_LOAD_ALWAYS(&heap_prof_timer_active)) {
|
|
| 144 | + if (atomic_load_explicit(&do_heap_prof_ticks, memory_order_relaxed) &&
|
|
| 145 | + atomic_load_explicit(&heap_prof_timer_active, memory_order_relaxed)) {
|
|
| 145 | 146 | ticks_to_heap_profile--;
|
| 146 | 147 | if (ticks_to_heap_profile <= 0) {
|
| 147 | 148 | ticks_to_heap_profile = RtsFlags.ProfFlags.heapProfileIntervalTicks;
|
| 148 | - RELAXED_STORE_ALWAYS(&performHeapProfile, true);
|
|
| 149 | + atomic_store_explicit(&performHeapProfile, true, memory_order_relaxed);
|
|
| 149 | 150 | }
|
| 150 | 151 | }
|
| 151 | 152 | } |
| ... | ... | @@ -410,7 +410,7 @@ schedule (Capability *initialCapability, Task *task) |
| 410 | 410 | if (RtsFlags.ConcFlags.ctxtSwitchTicks == 0 &&
|
| 411 | 411 | (!emptyRunQueue(cap) ||
|
| 412 | 412 | anyPendingTimeoutsOrIO(cap))) {
|
| 413 | - RELAXED_STORE(&cap->context_switch, 1);
|
|
| 413 | + atomic_store_explicit(&cap->context_switch, 1, memory_order_relaxed);
|
|
| 414 | 414 | }
|
| 415 | 415 | |
| 416 | 416 | run_thread:
|
| ... | ... | @@ -1153,13 +1153,13 @@ schedulePostRunThread (Capability *cap, StgTSO *t) |
| 1153 | 1153 | static bool
|
| 1154 | 1154 | scheduleHandleHeapOverflow( Capability *cap, StgTSO *t )
|
| 1155 | 1155 | {
|
| 1156 | - if (RELAXED_LOAD_ALWAYS(&cap->r.rHpLim) == NULL ||
|
|
| 1157 | - RELAXED_LOAD_ALWAYS(&cap->context_switch)) {
|
|
| 1156 | + if (atomic_load_explicit(&cap->r.rHpLim, memory_order_relaxed) == NULL ||
|
|
| 1157 | + atomic_load_explicit(&cap->context_switch, memory_order_relaxed)) {
|
|
| 1158 | 1158 | // Sometimes we miss a context switch, e.g. when calling
|
| 1159 | 1159 | // primitives in a tight loop, MAYBE_GC() doesn't check the
|
| 1160 | 1160 | // context switch flag, and we end up waiting for a GC.
|
| 1161 | 1161 | // See #1984, and concurrent/should_run/1984
|
| 1162 | - RELAXED_STORE_ALWAYS(&cap->context_switch, 0);
|
|
| 1162 | + atomic_store_explicit(&cap->context_switch, 0, memory_order_relaxed);
|
|
| 1163 | 1163 | appendToRunQueue(cap,t);
|
| 1164 | 1164 | } else {
|
| 1165 | 1165 | pushOnRunQueue(cap,t);
|
| ... | ... | @@ -1264,8 +1264,8 @@ scheduleHandleYield( Capability *cap, StgTSO *t, uint32_t prev_what_next ) |
| 1264 | 1264 | // the CPU because the tick always arrives during GC). This way
|
| 1265 | 1265 | // penalises threads that do a lot of allocation, but that seems
|
| 1266 | 1266 | // better than the alternative.
|
| 1267 | - if (RELAXED_LOAD_ALWAYS(&cap->context_switch) != 0) {
|
|
| 1268 | - RELAXED_STORE_ALWAYS(&cap->context_switch, 0);
|
|
| 1267 | + if (atomic_load_explicit(&cap->context_switch, memory_order_relaxed) != 0) {
|
|
| 1268 | + atomic_store_explicit(&cap->context_switch, 0, memory_order_relaxed);
|
|
| 1269 | 1269 | appendToRunQueue(cap,t);
|
| 1270 | 1270 | } else {
|
| 1271 | 1271 | pushOnRunQueue(cap,t);
|
| ... | ... | @@ -74,12 +74,12 @@ extern StgWord sched_state; |
| 74 | 74 | |
| 75 | 75 | INLINE_HEADER void setSchedState(enum SchedState ss)
|
| 76 | 76 | {
|
| 77 | - SEQ_CST_STORE_ALWAYS(&sched_state, (StgWord) ss);
|
|
| 77 | + atomic_store(&sched_state, (StgWord) ss);
|
|
| 78 | 78 | }
|
| 79 | 79 | |
| 80 | 80 | INLINE_HEADER enum SchedState getSchedState(void)
|
| 81 | 81 | {
|
| 82 | - return (enum SchedState) SEQ_CST_LOAD_ALWAYS(&sched_state);
|
|
| 82 | + return (enum SchedState) atomic_load(&sched_state);
|
|
| 83 | 83 | }
|
| 84 | 84 | |
| 85 | 85 | /*
|
| ... | ... | @@ -124,14 +124,14 @@ extern StgWord recent_activity; |
| 124 | 124 | INLINE_HEADER enum RecentActivity
|
| 125 | 125 | setRecentActivity(enum RecentActivity new_value)
|
| 126 | 126 | {
|
| 127 | - StgWord old = SEQ_CST_XCHG_ALWAYS((StgPtr) &recent_activity, (StgWord) new_value);
|
|
| 127 | + StgWord old = atomic_exchange((StgPtr) &recent_activity, (StgWord) new_value);
|
|
| 128 | 128 | return (enum RecentActivity) old;
|
| 129 | 129 | }
|
| 130 | 130 | |
| 131 | 131 | INLINE_HEADER enum RecentActivity
|
| 132 | 132 | getRecentActivity(void)
|
| 133 | 133 | {
|
| 134 | - return (enum RecentActivity) RELAXED_LOAD_ALWAYS(&recent_activity);
|
|
| 134 | + return (enum RecentActivity) atomic_load_explicit(&recent_activity, memory_order_relaxed);
|
|
| 135 | 135 | }
|
| 136 | 136 | |
| 137 | 137 | extern bool heap_overflow;
|
| ... | ... | @@ -114,7 +114,7 @@ handle_tick(int unused STG_UNUSED) |
| 114 | 114 | {
|
| 115 | 115 | handleProfTick();
|
| 116 | 116 | if (RtsFlags.ConcFlags.ctxtSwitchTicks > 0
|
| 117 | - && SEQ_CST_LOAD_ALWAYS(&timer_disabled) == 0)
|
|
| 117 | + && atomic_load(&timer_disabled) == 0)
|
|
| 118 | 118 | {
|
| 119 | 119 | ticks_to_ctxt_switch--;
|
| 120 | 120 | if (ticks_to_ctxt_switch <= 0) {
|
| ... | ... | @@ -189,7 +189,7 @@ initTimer(void) |
| 189 | 189 | if (RtsFlags.MiscFlags.tickInterval != 0) {
|
| 190 | 190 | initTicker(RtsFlags.MiscFlags.tickInterval, handle_tick);
|
| 191 | 191 | }
|
| 192 | - SEQ_CST_STORE_ALWAYS(&timer_disabled, 1);
|
|
| 192 | + atomic_store(&timer_disabled, 1);
|
|
| 193 | 193 | #endif
|
| 194 | 194 | }
|
| 195 | 195 | |
| ... | ... | @@ -197,7 +197,7 @@ void |
| 197 | 197 | startTimer(void)
|
| 198 | 198 | {
|
| 199 | 199 | #if defined(HAVE_PREEMPTION)
|
| 200 | - if (SEQ_CST_SUB_ALWAYS(&timer_disabled, 1) == 0) {
|
|
| 200 | + if (atomic_fetch_sub(&timer_disabled, 1) == 1) {
|
|
| 201 | 201 | if (RtsFlags.MiscFlags.tickInterval != 0) {
|
| 202 | 202 | startTicker();
|
| 203 | 203 | }
|
| ... | ... | @@ -209,7 +209,7 @@ void |
| 209 | 209 | stopTimer(void)
|
| 210 | 210 | {
|
| 211 | 211 | #if defined(HAVE_PREEMPTION)
|
| 212 | - if (SEQ_CST_ADD_ALWAYS(&timer_disabled, 1) == 1) {
|
|
| 212 | + if (atomic_fetch_add(&timer_disabled, 1) == 0) {
|
|
| 213 | 213 | if (RtsFlags.MiscFlags.tickInterval != 0) {
|
| 214 | 214 | stopTicker();
|
| 215 | 215 | }
|
| ... | ... | @@ -20,24 +20,6 @@ void arm_atomic_spin_lock(void); |
| 20 | 20 | void arm_atomic_spin_unlock(void);
|
| 21 | 21 | #endif
|
| 22 | 22 | |
| 23 | -// Unconditionally atomic operations
|
|
| 24 | -// These are atomic even in the non-threaded RTS. These are necessary in the
|
|
| 25 | -// Proftimer implementation, which may be called from the pthreads-based
|
|
| 26 | -// Ticker implementation.
|
|
| 27 | -#define RELAXED_LOAD_ALWAYS(ptr) atomic_load_explicit(ptr, memory_order_relaxed)
|
|
| 28 | -#define RELAXED_STORE_ALWAYS(ptr,val) atomic_store_explicit(ptr, val, memory_order_relaxed)
|
|
| 29 | - |
|
| 30 | -// Acquire/release atomic operations
|
|
| 31 | -#define ACQUIRE_LOAD_ALWAYS(ptr) atomic_load_explicit(ptr, memory_order_acquire)
|
|
| 32 | -#define RELEASE_STORE_ALWAYS(ptr,val) atomic_store_explicit(ptr, val, memory_order_release)
|
|
| 33 | - |
|
| 34 | -// Sequentially consistent atomic operations
|
|
| 35 | -#define SEQ_CST_LOAD_ALWAYS(ptr) atomic_load(ptr)
|
|
| 36 | -#define SEQ_CST_STORE_ALWAYS(ptr,val) atomic_store(ptr, val)
|
|
| 37 | -#define SEQ_CST_ADD_ALWAYS(ptr,val) (atomic_fetch_add(ptr, val) + val)
|
|
| 38 | -#define SEQ_CST_SUB_ALWAYS(ptr,val) (atomic_fetch_sub(ptr, val) - val)
|
|
| 39 | -#define SEQ_CST_XCHG_ALWAYS(ptr,val) atomic_exchange(ptr, val);
|
|
| 40 | - |
|
| 41 | 23 | #if defined(THREADED_RTS)
|
| 42 | 24 | |
| 43 | 25 | /* ----------------------------------------------------------------------------
|
| ... | ... | @@ -92,13 +92,13 @@ static void *itimer_thread_func(void *_handle_tick) |
| 92 | 92 | |
| 93 | 93 | // Relaxed is sufficient: If we don't see that exited was set in one iteration we will
|
| 94 | 94 | // see it next time.
|
| 95 | - while (!RELAXED_LOAD_ALWAYS(&exited)) {
|
|
| 95 | + while (!atomic_load_explicit(&exited, memory_order_relaxed)) {
|
|
| 96 | 96 | if (rtsSleep(itimer_interval) != 0) {
|
| 97 | 97 | sysErrorBelch("Ticker: sleep failed: %s", strerror(errno));
|
| 98 | 98 | }
|
| 99 | 99 | |
| 100 | 100 | // first try a cheap test
|
| 101 | - if (RELAXED_LOAD_ALWAYS(&stopped)) {
|
|
| 101 | + if (atomic_load_explicit(&stopped, memory_order_relaxed)) {
|
|
| 102 | 102 | OS_ACQUIRE_LOCK(&mutex);
|
| 103 | 103 | // should we really stop?
|
| 104 | 104 | if (stopped) {
|
| ... | ... | @@ -109,7 +109,7 @@ static void *itimer_thread_func(void *_handle_tick) |
| 109 | 109 | |
| 110 | 110 | // Relaxed is sufficient: If we don't see that exited was set in one iteration we will
|
| 111 | 111 | // see it next time.
|
| 112 | - while (!RELAXED_LOAD_ALWAYS(&exited)) {
|
|
| 112 | + while (!atomic_load_explicit(&exited, memory_order_relaxed)) {
|
|
| 113 | 113 | if (poll(pollfds, 2, -1) == -1) {
|
| 114 | 114 | // While the RTS attempts to mask signals, some foreign libraries
|
| 115 | 115 | // may rely on signal delivery may unmask them. Consequently we may
|
| ... | ... | @@ -143,7 +143,7 @@ static void *itimer_thread_func(void *_handle_tick) |
| 143 | 143 | }
|
| 144 | 144 | |
| 145 | 145 | // first try a cheap test
|
| 146 | - if (RELAXED_LOAD_ALWAYS(&stopped)) {
|
|
| 146 | + if (atomic_load_explicit(&stopped, memory_order_relaxed)) {
|
|
| 147 | 147 | OS_ACQUIRE_LOCK(&mutex);
|
| 148 | 148 | // should we really stop?
|
| 149 | 149 | if (stopped) {
|