Ben Gamari pushed to branch wip/T26053 at Glasgow Haskell Compiler / GHC

Commits:

3 changed files:

Changes:

  • rts/include/stg/SMP.h
    ... ... @@ -516,6 +516,14 @@ atomic_dec(StgVolatilePtr p)
    516 516
     #endif
    
    517 517
     }
    
    518 518
     
    
    519
    +EXTERN_INLINE StgWord
    
    520
    +atomic_or(StgVolatilePtr p, StgWord v)
    
    521
    +{
    
    522
    +    // TODO: This ordering is stronger than necessary for many users (e.g.
    
    523
    +    // setting flags).
    
    524
    +    return __atomic_or_fetch(p, v, __ATOMIC_SEQ_CST);
    
    525
    +}
    
    526
    +
    
    519 527
     /*
    
    520 528
      * Some architectures have a way to tell the CPU that we're in a
    
    521 529
      * busy-wait loop, and the processor should look for something else to
    

  • rts/sm/NonMoving.c
    ... ... @@ -560,7 +560,7 @@ Mutex concurrent_coll_finished_lock;
    560 560
      *   arbitrary allocator sizes, we need to do some precomputation and make
    
    561 561
      *   use of the integer division by constants optimisation.
    
    562 562
      *
    
    563
    - * We currenlty try to balance these considerations by adopting the following scheme.
    
    563
    + * We currently try to balance these considerations by adopting the following scheme.
    
    564 564
      * We have nonmoving_alloca_dense_cnt "dense" allocators starting with size
    
    565 565
      * NONMOVING_ALLOCA0, and incrementing by NONMOVING_ALLOCA_DENSE_INCREMENT.
    
    566 566
      * These service the vast majority of allocations.
    

  • rts/sm/NonMovingMark.c
    ... ... @@ -722,13 +722,14 @@ STATIC_INLINE bool needs_upd_rem_set_mark(StgClosure *p)
    722 722
     {
    
    723 723
         // TODO: Deduplicate with mark_closure
    
    724 724
         bdescr *bd = Bdescr((StgPtr) p);
    
    725
    +    uint16_t flags = RELAXED_LOAD(&bd->flags);
    
    725 726
         if (bd->gen != oldest_gen) {
    
    726 727
             return false;
    
    727
    -    } else if (bd->flags & BF_LARGE) {
    
    728
    -        if (! (bd->flags & BF_NONMOVING_SWEEPING)) {
    
    728
    +    } else if (flags & BF_LARGE) {
    
    729
    +        if (! (flags & BF_NONMOVING_SWEEPING)) {
    
    729 730
                 return false;
    
    730 731
             } else {
    
    731
    -            return ! (bd->flags & BF_MARKED);
    
    732
    +            return ! (flags & BF_MARKED);
    
    732 733
             }
    
    733 734
         } else {
    
    734 735
             struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) p);
    
    ... ... @@ -740,8 +741,8 @@ STATIC_INLINE bool needs_upd_rem_set_mark(StgClosure *p)
    740 741
     static void finish_upd_rem_set_mark_large(bdescr* bd) {
    
    741 742
         // Someone else may have already marked it.
    
    742 743
         ACQUIRE_LOCK(&nonmoving_large_objects_mutex);
    
    743
    -    if (! (bd->flags & BF_MARKED)) {
    
    744
    -        bd->flags |= BF_MARKED;
    
    744
    +    if (! (RELAXED_LOAD(&bd->flags) & BF_MARKED)) {
    
    745
    +        atomic_or(&bd->flags, BF_MARKED);
    
    745 746
             dbl_link_remove(bd, &nonmoving_large_objects);
    
    746 747
             dbl_link_onto(bd, &nonmoving_marked_large_objects);
    
    747 748
             n_nonmoving_large_blocks -= bd->blocks;
    
    ... ... @@ -785,18 +786,28 @@ void updateRemembSetPushStack(Capability *cap, StgStack *stack)
    785 786
                 debugTrace(DEBUG_nonmoving_gc, "upd_rem_set: STACK %p", stack->sp);
    
    786 787
                 trace_stack(&cap->upd_rem_set.queue, stack);
    
    787 788
                 finish_upd_rem_set_mark((StgClosure *) stack);
    
    788
    -            return;
    
    789 789
             } else {
    
    790 790
                 // The concurrent GC has claimed the right to mark the stack.
    
    791 791
                 // Wait until it finishes marking before proceeding with
    
    792 792
                 // mutation.
    
    793
    -            while (needs_upd_rem_set_mark((StgClosure *) stack))
    
    793
    +            uint64_t iters = 0;
    
    794
    +            while (needs_upd_rem_set_mark((StgClosure *) stack)) {
    
    795
    +                iters++;
    
    796
    +                if (iters > 100000000) {
    
    797
    +                    bdescr *bd = Bdescr(stack);
    
    798
    +                    debugBelch("updateRemSetPushStack: stuck: %p\n", stack);
    
    799
    +                    debugBelch("                       bd->flags: %x\n", bd->flags);
    
    800
    +                    debugBelch("                       epoch    : %x\n", nonmovingMarkEpoch);
    
    801
    +                    debugBelch("                       marking:   %x\n", stack->marking);
    
    802
    +                    abort();
    
    803
    +                }
    
    804
    +
    
    794 805
     #if defined(PARALLEL_GC)
    
    795 806
                     busy_wait_nop(); // TODO: Spinning here is unfortunate
    
    796 807
     #else
    
    797 808
                     ;
    
    798 809
     #endif
    
    799
    -            return;
    
    810
    +            }
    
    800 811
             }
    
    801 812
         }
    
    802 813
     }
    
    ... ... @@ -1371,35 +1382,36 @@ mark_closure (MarkQueue *queue, const StgClosure *p0, StgClosure **origin)
    1371 1382
     
    
    1372 1383
         // N.B. only the first block of a compact region is guaranteed to carry
    
    1373 1384
         // BF_NONMOVING; consequently we must separately check for BF_COMPACT.
    
    1374
    -    if (bd->flags & (BF_COMPACT | BF_NONMOVING)) {
    
    1385
    +    const uint16_t flags = RELAXED_LOAD(&bd->flags);
    
    1386
    +    if (flags & (BF_COMPACT | BF_NONMOVING)) {
    
    1375 1387
     
    
    1376
    -        if (bd->flags & BF_COMPACT) {
    
    1388
    +        if (flags & BF_COMPACT) {
    
    1377 1389
                 StgCompactNFData *str = objectGetCompact((StgClosure*)p);
    
    1378 1390
                 bd = Bdescr((P_)str);
    
    1379 1391
     
    
    1380
    -            if (! (bd->flags & BF_NONMOVING_SWEEPING)) {
    
    1392
    +            if (! (flags & BF_NONMOVING_SWEEPING)) {
    
    1381 1393
                     // Not in the snapshot
    
    1382 1394
                     return;
    
    1383 1395
                 }
    
    1384 1396
     
    
    1385
    -            if (! (bd->flags & BF_MARKED)) {
    
    1397
    +            if (! (flags & BF_MARKED)) {
    
    1386 1398
                     dbl_link_remove(bd, &nonmoving_compact_objects);
    
    1387 1399
                     dbl_link_onto(bd, &nonmoving_marked_compact_objects);
    
    1388 1400
                     StgWord blocks = str->totalW / BLOCK_SIZE_W;
    
    1389 1401
                     n_nonmoving_compact_blocks -= blocks;
    
    1390 1402
                     n_nonmoving_marked_compact_blocks += blocks;
    
    1391
    -                bd->flags |= BF_MARKED;
    
    1403
    +                atomic_or(&bd->flags, BF_MARKED);
    
    1392 1404
                 }
    
    1393 1405
     
    
    1394 1406
                 // N.B. the object being marked is in a compact region so by
    
    1395 1407
                 // definition there is no need to do any tracing here.
    
    1396 1408
                 goto done;
    
    1397
    -        } else if (bd->flags & BF_LARGE) {
    
    1398
    -            if (! (bd->flags & BF_NONMOVING_SWEEPING)) {
    
    1409
    +        } else if (flags & BF_LARGE) {
    
    1410
    +            if (! (flags & BF_NONMOVING_SWEEPING)) {
    
    1399 1411
                     // Not in the snapshot
    
    1400 1412
                     goto done;
    
    1401 1413
                 }
    
    1402
    -            if (bd->flags & BF_MARKED) {
    
    1414
    +            if (flags & BF_MARKED) {
    
    1403 1415
                     goto done;
    
    1404 1416
                 }
    
    1405 1417
             } else {
    
    ... ... @@ -1731,24 +1743,25 @@ mark_closure (MarkQueue *queue, const StgClosure *p0, StgClosure **origin)
    1731 1743
          * the object's pointers since in the case of marking stacks there may be a
    
    1732 1744
          * mutator waiting for us to finish so it can start execution.
    
    1733 1745
          */
    
    1734
    -    if (bd->flags & BF_LARGE) {
    
    1746
    +    uint16_t bd_flags = RELAXED_LOAD(&bd->flags);
    
    1747
    +    if (bd_flags & BF_LARGE) {
    
    1735 1748
             /* Marking a large object isn't idempotent since we move it to
    
    1736 1749
              * nonmoving_marked_large_objects; to ensure that we don't repeatedly
    
    1737 1750
              * mark a large object, we only set BF_MARKED on large objects in the
    
    1738 1751
              * nonmoving heap while holding nonmoving_large_objects_mutex
    
    1739 1752
              */
    
    1740 1753
             ACQUIRE_LOCK(&nonmoving_large_objects_mutex);
    
    1741
    -        if (! (bd->flags & BF_MARKED)) {
    
    1754
    +        if (! (bd_flags & BF_MARKED)) {
    
    1742 1755
                 // Remove the object from nonmoving_large_objects and link it to
    
    1743 1756
                 // nonmoving_marked_large_objects
    
    1744 1757
                 dbl_link_remove(bd, &nonmoving_large_objects);
    
    1745 1758
                 dbl_link_onto(bd, &nonmoving_marked_large_objects);
    
    1746 1759
                 n_nonmoving_large_blocks -= bd->blocks;
    
    1747 1760
                 n_nonmoving_marked_large_blocks += bd->blocks;
    
    1748
    -            bd->flags |= BF_MARKED;
    
    1761
    +            RELAXED_STORE(&bd->flags, flags | BF_MARKED);
    
    1749 1762
             }
    
    1750 1763
             RELEASE_LOCK(&nonmoving_large_objects_mutex);
    
    1751
    -    } else if (bd->flags & BF_NONMOVING) {
    
    1764
    +    } else if (bd_flags & BF_NONMOVING) {
    
    1752 1765
             // TODO: Kill repetition
    
    1753 1766
             struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) p);
    
    1754 1767
             nonmoving_block_idx block_idx = nonmovingGetBlockIdx((StgPtr) p);
    
    ... ... @@ -1927,7 +1940,7 @@ static bool nonmovingIsNowAlive (StgClosure *p)
    1927 1940
     
    
    1928 1941
         bdescr *bd = Bdescr((P_)p);
    
    1929 1942
     
    
    1930
    -    const uint16_t flags = bd->flags;
    
    1943
    +    const uint16_t flags = RELAXED_LOAD(&bd->flags);
    
    1931 1944
         if (flags & BF_LARGE) {
    
    1932 1945
             if (flags & BF_PINNED && !(flags & BF_NONMOVING)) {
    
    1933 1946
                 // In this case we have a pinned object living in a non-full
    
    ... ... @@ -1937,15 +1950,15 @@ static bool nonmovingIsNowAlive (StgClosure *p)
    1937 1950
                 return true;
    
    1938 1951
             }
    
    1939 1952
     
    
    1940
    -        ASSERT(bd->flags & BF_NONMOVING);
    
    1941
    -        return (bd->flags & BF_NONMOVING_SWEEPING) == 0
    
    1953
    +        ASSERT(flags & BF_NONMOVING);
    
    1954
    +        return (flags & BF_NONMOVING_SWEEPING) == 0
    
    1942 1955
                        // the large object wasn't in the snapshot and therefore wasn't marked
    
    1943
    -            || (bd->flags & BF_MARKED) != 0;
    
    1956
    +            || (flags & BF_MARKED) != 0;
    
    1944 1957
                        // The object was marked
    
    1945 1958
         } else {
    
    1946 1959
             // All non-static objects in the non-moving heap should be marked as
    
    1947 1960
             // BF_NONMOVING.
    
    1948
    -        ASSERT(bd->flags & BF_NONMOVING);
    
    1961
    +        ASSERT(flags & BF_NONMOVING);
    
    1949 1962
     
    
    1950 1963
             struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) p);
    
    1951 1964
             StgClosure *snapshot_loc =