Duncan Coutts pushed to branch wip/io-manager-deadlock-detection at Glasgow Haskell Compiler / GHC

Commits:

4 changed files:

Changes:

  • libraries/ghc-internal/src/GHC/Internal/Event/Thread.hs
    ... ... @@ -223,38 +223,48 @@ threadWaitWriteSTM = threadWaitSTM evtWrite
    223 223
     {-# INLINE threadWaitWriteSTM #-}
    
    224 224
     
    
    225 225
     
    
    226
    --- | Retrieve the system event manager for the capability on which the
    
    227
    --- calling thread is running.
    
    228
    ---
    
    229
    --- This function always returns 'Just' the current thread's event manager
    
    230
    --- when using the threaded RTS and 'Nothing' otherwise.
    
    231
    -getSystemEventManager :: IO (Maybe EventManager)
    
    232
    -getSystemEventManager = do
    
    226
    +getSystemEventManager_ :: IO EventManager
    
    227
    +getSystemEventManager_ = do
    
    233 228
       t <- myThreadId
    
    234 229
       eventManagerArray <- readIORef eventManager
    
    235 230
       let r = boundsIOArray eventManagerArray
    
    236 231
       (cap, _) <- threadCapability t
    
    237
    -  -- It is possible that we've just increased the number of capabilities and the
    
    238
    -  -- new EventManager has not yet been constructed by
    
    239
    -  -- 'ioManagerCapabilitiesChanged'. We expect this to happen very rarely.
    
    240
    -  -- T21561 exercises this.
    
    241
    -  -- Two options to proceed:
    
    242
    -  --  1) return the EventManager for capability 0. This is guaranteed to exist,
    
    243
    -  --     and "shouldn't" cause any correctness issues.
    
    244
    -  --  2) Busy wait, with or without a call to 'yield'. This can't deadlock,
    
    245
    -  --     because we must be on a brand capability and there must be a call to
    
    246
    -  --     'ioManagerCapabilitiesChanged' pending.
    
    232
    +  -- It is possible that either:
    
    233
    +  --  1) we've just started the RTS or done forkProcess and the EventManager
    
    234
    +  --     is still being started concurrently by 'ensureIOManagerIsRunning'.
    
    235
    +  --     This usually happens once at startup.
    
    236
    +  --  2) we've just increased the number of capabilities and the new
    
    237
    +  --     EventManager is being started concurrently by
    
    238
    +  --     'ioManagerCapabilitiesChanged'. We expect this to happen very rarely.
    
    239
    +  --     T21561 exercises this.
    
    240
    +  --
    
    241
    +  -- For both situations we follow the strategy to busy wait, with a call to
    
    242
    +  -- 'yield'. This can't deadlock, because there must be a call to either
    
    243
    +  -- 'ensureIOManagerIsRunning' or 'ioManagerCapabilitiesChanged' running.
    
    247 244
       --
    
    248
    -  -- We take the second option, with the yield, judging it the most robust.
    
    249 245
       if not (inRange r cap)
    
    250
    -    then yield >> getSystemEventManager
    
    251
    -    else fmap snd `fmap` readIOArray eventManagerArray cap
    
    246
    +    then yield >> getSystemEventManager_ -- for num caps changed
    
    247
    +    else do
    
    248
    +      mem <- readIOArray eventManagerArray cap
    
    249
    +      case mem of
    
    250
    +        Just (_, em) -> return em
    
    251
    +        Nothing
    
    252
    +          | threaded  -> yield >> getSystemEventManager_ -- for startup
    
    253
    +          | otherwise -> err
    
    254
    +  where
    
    255
    +    err = error "GHC.Internal.Event.Thread.getSystemEventManager: the EventManager requires linking against the threaded runtime"
    
    252 256
     
    
    253
    -getSystemEventManager_ :: IO EventManager
    
    254
    -getSystemEventManager_ = do
    
    255
    -  Just mgr <- getSystemEventManager
    
    256
    -  return mgr
    
    257
    -{-# INLINE getSystemEventManager_ #-}
    
    257
    +
    
    258
    +-- | Retrieve the system event manager for the capability on which the
    
    259
    +-- calling thread is running.
    
    260
    +--
    
    261
    +-- This function always returns 'Just' the current thread's event
    
    262
    +-- manager when using the threaded RTS and 'Nothing' otherwise.
    
    263
    +getSystemEventManager :: IO (Maybe EventManager)
    
    264
    +getSystemEventManager
    
    265
    +  | threaded  = Just `fmap` getSystemEventManager_
    
    266
    +  | otherwise = return Nothing
    
    267
    +{-# INLINE getSystemEventManager #-}
    
    258 268
     
    
    259 269
     foreign import ccall unsafe "getOrSetSystemEventThreadEventManagerStore"
    
    260 270
         getOrSetSystemEventThreadEventManagerStore :: Ptr a -> IO (Ptr a)
    
    ... ... @@ -299,8 +309,18 @@ ioManagerLock = unsafePerformIO $ do
    299 309
        sharedCAF m getOrSetSystemEventThreadIOManagerThreadStore
    
    300 310
     
    
    301 311
     getSystemTimerManager :: IO TM.TimerManager
    
    302
    -getSystemTimerManager =
    
    303
    -  fromMaybe err `fmap` readIORef timerManager
    
    312
    +getSystemTimerManager = do
    
    313
    +  mtm <- readIORef timerManager
    
    314
    +  case mtm of
    
    315
    +    Just tm -> return tm
    
    316
    +    Nothing
    
    317
    +      -- Same logic as in getSystemEventManager: yield and try again.
    
    318
    +      -- This can't deadlock, because we must be on a brand new
    
    319
    +      -- capability (either the main cap during startup or a new cap
    
    320
    +      -- after forkProcess) and there must be a call to
    
    321
    +      -- 'ensureIOManagerIsRunning' pending.
    
    322
    +      | threaded  -> yield >> getSystemTimerManager
    
    323
    +      | otherwise -> err
    
    304 324
         where
    
    305 325
           err = error "GHC.Internal.Event.Thread.getSystemTimerManager: the TimerManager requires linking against the threaded runtime"
    
    306 326
     
    

  • rts/RtsStartup.c
    ... ... @@ -625,7 +625,9 @@ hs_exit_(bool wait_foreign)
    625 625
           hs_restoreConsoleCP();
    
    626 626
     #endif
    
    627 627
     
    
    628
    -   finiUserSignals();
    
    628
    +#if defined(RTS_USER_SIGNALS)
    
    629
    +    finiUserSignals();
    
    630
    +#endif
    
    629 631
     
    
    630 632
         /* tear down statistics subsystem */
    
    631 633
         stat_exit();
    

  • rts/Schedule.c
    ... ... @@ -173,8 +173,9 @@ static void deleteAllThreads (void);
    173 173
     static void deleteThread_(StgTSO *tso);
    
    174 174
     #endif
    
    175 175
     
    
    176
    -static void removeFromRunQueue (Capability *cap, StgTSO *tso);
    
    176
    +#if defined(FORKPROCESS_PRIMOP_SUPPORTED)
    
    177 177
     static void truncateRunQueue(Capability *cap);
    
    178
    +#endif
    
    178 179
     static StgTSO *popRunQueue (Capability *cap);
    
    179 180
     
    
    180 181
     /* ---------------------------------------------------------------------------
    
    ... ... @@ -3025,6 +3026,7 @@ static StgTSO *popRunQueue (Capability *cap)
    3025 3026
         return t;
    
    3026 3027
     }
    
    3027 3028
     
    
    3029
    +#if defined(FORKPROCESS_PRIMOP_SUPPORTED)
    
    3028 3030
     static void truncateRunQueue(Capability *cap)
    
    3029 3031
     {
    
    3030 3032
         // Can only be called by the task owning the capability.
    
    ... ... @@ -3035,6 +3037,7 @@ static void truncateRunQueue(Capability *cap)
    3035 3037
         cap->run_queue_tl = END_TSO_QUEUE;
    
    3036 3038
         cap->n_run_queue = 0;
    
    3037 3039
     }
    
    3040
    +#endif
    
    3038 3041
     
    
    3039 3042
     static void removeFromRunQueue (Capability *cap, StgTSO *tso)
    
    3040 3043
     {
    

  • rts/posix/FdWakeup.c
    ... ... @@ -47,7 +47,7 @@ static void fcntl_CLOEXEC_NONBLOCK(int fd)
    47 47
     
    
    48 48
     void newFdWakeup(int *wakeup_fd_r, int *wakeup_fd_w)
    
    49 49
     {
    
    50
    -#if HAVE_EVENTFD
    
    50
    +#if defined(HAVE_EVENTFD)
    
    51 51
         int wakeup_fd;
    
    52 52
     #if defined(EFD_CLOEXEC) && defined(EFD_NONBLOCK)
    
    53 53
         wakeup_fd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
    
    ... ... @@ -79,7 +79,7 @@ void newFdWakeup(int *wakeup_fd_r, int *wakeup_fd_w)
    79 79
     
    
    80 80
     void closeFdWakeup(int wakeup_fd_r, int wakeup_fd_w)
    
    81 81
     {
    
    82
    -#if HAVE_EVENTFD
    
    82
    +#if defined(HAVE_EVENTFD)
    
    83 83
         ASSERT(wakeup_fd_r == wakeup_fd_w);
    
    84 84
         close(wakeup_fd_r);
    
    85 85
     #else
    
    ... ... @@ -92,7 +92,7 @@ void closeFdWakeup(int wakeup_fd_r, int wakeup_fd_w)
    92 92
     void sendFdWakeup(int wakeup_fd_w)
    
    93 93
     {
    
    94 94
         int res;
    
    95
    -#if HAVE_EVENTFD
    
    95
    +#if defined(HAVE_EVENTFD)
    
    96 96
         uint64_t val = 1;
    
    97 97
         res = write(wakeup_fd_w, &val, 8);
    
    98 98
     #else
    
    ... ... @@ -110,7 +110,7 @@ void sendFdWakeup(int wakeup_fd_w)
    110 110
     void collectFdWakeup(int wakeup_fd_r)
    
    111 111
     {
    
    112 112
         int res;
    
    113
    -#if HAVE_EVENTFD
    
    113
    +#if defined(HAVE_EVENTFD)
    
    114 114
         uint64_t buf;
    
    115 115
         /* eventfd combines events into one counter, so a single read is enough */
    
    116 116
         res = read(wakeup_fd_r, &buf, 8);