Marge Bot pushed to branch wip/marge_bot_batch_merge_job at Glasgow Haskell Compiler / GHC

Commits:

11 changed files:

Changes:

  • compiler/GHC/CmmToAsm/LA64/CodeGen.hs
    ... ... @@ -1910,13 +1910,12 @@ genCCall target dest_regs arg_regs = do
    1910 1910
             MO_W64X2_Max -> unsupported mop
    
    1911 1911
     
    
    1912 1912
             -- Memory Ordering
    
    1913
    -        -- A hint value of 0 is mandatory by default, and it indicates a fully functional synchronization barrier.
    
    1914
    -        -- Only after all previous load/store access operations are completely executed, the DBAR 0 instruction can be executed;
    
    1915
    -        -- and only after the execution of DBAR 0 is completed, all subsequent load/store access operations can be executed.
    
    1916
    -
    
    1917
    -        MO_AcquireFence -> pure (unitOL (DBAR Hint0))
    
    1918
    -        MO_ReleaseFence -> pure (unitOL (DBAR Hint0))
    
    1919
    -        MO_SeqCstFence  -> pure (unitOL (DBAR Hint0))
    
    1913
    +        -- Support finer-grained DBAR hints for LA664 and newer uarchs.
    
    1914
    +        -- These are treated as DBAR 0 on older uarchs, so we can start
    
    1915
    +        -- to unconditionally emit the new hints right away.
    
    1916
    +        MO_AcquireFence -> pure (unitOL (DBAR HintAcquire))
    
    1917
    +        MO_ReleaseFence -> pure (unitOL (DBAR HintRelease))
    
    1918
    +        MO_SeqCstFence  -> pure (unitOL (DBAR HintSeqcst))
    
    1920 1919
     
    
    1921 1920
             MO_Touch        -> pure nilOL -- Keep variables live (when using interior pointers)
    
    1922 1921
             -- Prefetch
    
    ... ... @@ -1954,12 +1953,11 @@ genCCall target dest_regs arg_regs = do
    1954 1953
     
    
    1955 1954
                           MemOrderAcquire -> toOL [
    
    1956 1955
                                                     ann moDescr (LD (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p)),
    
    1957
    -                                                DBAR Hint0
    
    1956
    +                                                DBAR HintAcquire
    
    1958 1957
                                                   ]
    
    1959
    -                      MemOrderSeqCst -> toOL [
    
    1960
    -                                                ann moDescr (DBAR Hint0),
    
    1961
    -                                                LD (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p),
    
    1962
    -                                                DBAR Hint0
    
    1958
    +                      MemOrderSeqCst -> toOL  [
    
    1959
    +                                                ann moDescr (LD (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p)),
    
    1960
    +                                                DBAR HintSeqcst
    
    1963 1961
                                                   ]
    
    1964 1962
                           _ -> panic $ "Unexpected MemOrderRelease on an AtomicRead: " ++ show mo
    
    1965 1963
                       dst = getRegisterReg platform (CmmLocal dst_reg)
    
    ... ... @@ -1974,15 +1972,9 @@ genCCall target dest_regs arg_regs = do
    1974 1972
                   (val, fmt_val, code_val) <- getSomeReg val_reg
    
    1975 1973
                   let instrs = case ord of
    
    1976 1974
                           MemOrderRelaxed -> unitOL $ ann moDescr (ST fmt_val (OpReg w val) (OpAddr $ AddrReg p))
    
    1977
    -                      MemOrderRelease -> toOL [
    
    1978
    -                                                ann moDescr (DBAR Hint0),
    
    1979
    -                                                ST fmt_val (OpReg w val) (OpAddr $ AddrReg p)
    
    1980
    -                                              ]
    
    1981
    -                      MemOrderSeqCst  -> toOL [
    
    1982
    -                                                ann moDescr (DBAR Hint0),
    
    1983
    -                                                ST fmt_val (OpReg w val) (OpAddr $ AddrReg p),
    
    1984
    -                                                DBAR Hint0
    
    1985
    -                                              ]
    
    1975
    +                      -- implement with AMSWAPDB
    
    1976
    +                      MemOrderRelease -> unitOL $ ann moDescr (AMSWAPDB fmt_val (OpReg w zeroReg) (OpReg w val) (OpReg w p))
    
    1977
    +                      MemOrderSeqCst  -> unitOL $ ann moDescr (AMSWAPDB fmt_val (OpReg w zeroReg) (OpReg w val) (OpReg w p))
    
    1986 1978
                           _ ->  panic $ "Unexpected MemOrderAcquire on an AtomicWrite" ++ show mo
    
    1987 1979
                       moDescr = (text . show) mo
    
    1988 1980
                       code =
    

  • compiler/GHC/CmmToAsm/LA64/Instr.hs
    ... ... @@ -169,6 +169,7 @@ regUsageOfInstr platform instr = case instr of
    169 169
       -- LDCOND dst src1 src2     -> usage (regOp src1 ++ regOp src2, regOp dst)
    
    170 170
       -- STCOND dst src1 src2     -> usage (regOp src1 ++ regOp src2, regOp dst)
    
    171 171
       -- 7. Atomic Memory Access Instructions --------------------------------------
    
    172
    +  AMSWAPDB _ dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
    
    172 173
       -- 8. Barrier Instructions ---------------------------------------------------
    
    173 174
       DBAR _hint               -> usage ([], [])
    
    174 175
       IBAR _hint               -> usage ([], [])
    
    ... ... @@ -343,13 +344,13 @@ patchRegsOfInstr instr env = case instr of
    343 344
         STX f o1 o2        -> STX f (patchOp o1)  (patchOp o2)
    
    344 345
         LDPTR f o1 o2      -> LDPTR f (patchOp o1)  (patchOp o2)
    
    345 346
         STPTR f o1 o2      -> STPTR f (patchOp o1)  (patchOp o2)
    
    346
    -    PRELD o1 o2         -> PRELD (patchOp o1) (patchOp o2)
    
    347
    +    PRELD o1 o2        -> PRELD (patchOp o1) (patchOp o2)
    
    347 348
         -- 6. Bound Check Memory Access Instructions ---------------------------------
    
    348 349
         -- LDCOND o1 o2 o3       -> LDCOND  (patchOp o1)  (patchOp o2)  (patchOp o3)
    
    349 350
         -- STCOND o1 o2 o3       -> STCOND  (patchOp o1)  (patchOp o2)  (patchOp o3)
    
    350 351
         -- 7. Atomic Memory Access Instructions --------------------------------------
    
    352
    +    AMSWAPDB f o1 o2 o3 -> AMSWAPDB f (patchOp o1) (patchOp o2) (patchOp o3)
    
    351 353
         -- 8. Barrier Instructions ---------------------------------------------------
    
    352
    -    -- TODO: need fix
    
    353 354
         DBAR o1             -> DBAR o1
    
    354 355
         IBAR o1             -> IBAR o1
    
    355 356
         -- 11. Floating Point Instructions -------------------------------------------
    
    ... ... @@ -734,6 +735,7 @@ data Instr
    734 735
         | PRELD Operand Operand
    
    735 736
         -- 6. Bound Check Memory Access Instructions ---------------------------------
    
    736 737
         -- 7. Atomic Memory Access Instructions --------------------------------------
    
    738
    +    | AMSWAPDB Format Operand Operand Operand
    
    737 739
         -- 8. Barrier Instructions ---------------------------------------------------
    
    738 740
         | DBAR BarrierType
    
    739 741
         | IBAR BarrierType
    
    ... ... @@ -755,8 +757,13 @@ data Instr
    755 757
         --  fnmadd: d = - r1 * r2 - r3
    
    756 758
         | FMA FMASign Operand Operand Operand Operand
    
    757 759
     
    
    758
    --- TODO: Not complete.
    
    759
    -data BarrierType = Hint0
    
    760
    +data BarrierType
    
    761
    +  = Hint0
    
    762
    +  | Hint700
    
    763
    +  | HintAcquire
    
    764
    +  | HintRelease
    
    765
    +  | HintSeqcst
    
    766
    +  deriving (Eq, Show)
    
    760 767
     
    
    761 768
     instrCon :: Instr -> String
    
    762 769
     instrCon i =
    
    ... ... @@ -847,6 +854,7 @@ instrCon i =
    847 854
           LDPTR{} -> "LDPTR"
    
    848 855
           STPTR{} -> "STPTR"
    
    849 856
           PRELD{} -> "PRELD"
    
    857
    +      AMSWAPDB{} -> "AMSWAPDB"
    
    850 858
           DBAR{} -> "DBAR"
    
    851 859
           IBAR{} -> "IBAR"
    
    852 860
           FCVT{} -> "FCVT"
    

  • compiler/GHC/CmmToAsm/LA64/Ppr.hs
    ... ... @@ -1015,6 +1015,10 @@ pprInstr platform instr = case instr of
    1015 1015
         -- LD{GT/LE}.{B/H/W/D}, ST{GT/LE}.{B/H/W/D}
    
    1016 1016
       -- 7. Atomic Memory Access Instructions --------------------------------------
    
    1017 1017
         -- AM{SWAP/ADD/AND/OR/XOR/MAX/MIN}[DB].{W/D}, AM{MAX/MIN}[_DB].{WU/DU}
    
    1018
    +  AMSWAPDB II8 o1 o2 o3 -> op3 (text "\tamswap_db.b") o1 o2 o3
    
    1019
    +  AMSWAPDB II16 o1 o2 o3 -> op3 (text "\tamswap_db.h") o1 o2 o3
    
    1020
    +  AMSWAPDB II32 o1 o2 o3 -> op3 (text "\tamswap_db.w") o1 o2 o3
    
    1021
    +  AMSWAPDB II64 o1 o2 o3 -> op3 (text "\tamswap_db.d") o1 o2 o3
    
    1018 1022
         -- AM.{SWAP/ADD}[_DB].{B/H}
    
    1019 1023
         -- AMCAS[_DB].{B/H/W/D}
    
    1020 1024
         -- LL.{W/D}, SC.{W/D}
    
    ... ... @@ -1112,19 +1116,28 @@ pprInstr platform instr = case instr of
    1112 1116
             op3 op o1 o2 o3     = line $ op <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3
    
    1113 1117
             op4 op o1 o2 o3 o4  = line $ op <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 <> comma <+> pprOp platform o4
    
    1114 1118
     {-
    
    1115
    -    -- TODO: Support dbar with different hints.
    
    1119
    +    Support dbar with different hints.
    
    1116 1120
         On LoongArch uses "dbar 0" (full completion barrier) for everything.
    
    1117 1121
         But the full completion barrier has no performance to tell, so
    
    1118 1122
         Loongson-3A6000 and newer processors have made finer granularity hints
    
    1119 1123
         available:
    
    1120 1124
     
    
    1125
    +    Hint 0x700: barrier for "read after read" from the same address.
    
    1121 1126
         Bit4: ordering or completion (0: completion, 1: ordering)
    
    1122 1127
         Bit3: barrier for previous read (0: true, 1: false)
    
    1123 1128
         Bit2: barrier for previous write (0: true, 1: false)
    
    1124 1129
         Bit1: barrier for succeeding read (0: true, 1: false)
    
    1125 1130
         Bit0: barrier for succeeding write (0: true, 1: false)
    
    1131
    +
    
    1132
    +    DBAR 0b10100: acquire
    
    1133
    +    DBAR 0b10010: release
    
    1134
    +    DBAR 0b10000: seqcst
    
    1126 1135
     -}
    
    1127 1136
             pprBarrierType Hint0 = text "0x0"
    
    1137
    +        pprBarrierType HintSeqcst  = text "0x10"
    
    1138
    +        pprBarrierType HintRelease = text "0x12"
    
    1139
    +        pprBarrierType HintAcquire = text "0x14"
    
    1140
    +        pprBarrierType Hint700 = text "0x700"
    
    1128 1141
             floatPrecission o | isSingleOp o = text "s"
    
    1129 1142
                               | isDoubleOp o = text "d"
    
    1130 1143
                               | otherwise  = pprPanic "Impossible floating point precission: " (pprOp platform o)
    

  • compiler/GHC/Core/LateCC/OverloadedCalls.hs
    ... ... @@ -20,7 +20,6 @@ import GHC.Core.Make
    20 20
     import GHC.Core.Predicate
    
    21 21
     import GHC.Core.Type
    
    22 22
     import GHC.Core.Utils
    
    23
    -import GHC.Tc.Utils.TcType
    
    24 23
     import GHC.Types.Id
    
    25 24
     import GHC.Types.Name
    
    26 25
     import GHC.Types.SrcLoc
    
    ... ... @@ -29,6 +28,41 @@ import GHC.Types.Var
    29 28
     
    
    30 29
     type OverloadedCallsCCState = Strict.Maybe SrcSpan
    
    31 30
     
    
    31
    +{- Note [Overloaded Calls and join points]
    
    32
    +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    33
    +Currently GHC considers cost centres as destructive to
    
    34
    +join contexts. Or in other words this is not considered valid:
    
    35
    +
    
    36
    +    join f x = ...
    
    37
    +    in
    
    38
    +              ... -> scc<tick> jmp
    
    39
    +
    
    40
    +This makes the functionality of `-fprof-late-overloaded-calls` not feasible
    
    41
    +for join points in general. We used to try to work around this by putting the
    
    42
    +ticks on the rhs of the join point rather than around the jump. However beyond
    
    43
    +the loss of accuracy this was broken for recursive join points as we ended up
    
    44
    +with something like:
    
    45
    +
    
    46
    +    rec-join f x = scc<tick> ... jmp f x
    
    47
    +
    
    48
    +Which similarly is not valid as the tick once again destroys the tail call.
    
    49
    +One might think we could limit ourselves to non-recursive tail calls and do
    
    50
    +something clever like:
    
    51
    +
    
    52
    +  join f x = scc<tick> ...
    
    53
    +  in ... jmp f x
    
    54
    +
    
    55
    +And sometimes this works! But sometimes the full rhs would look something like:
    
    56
    +
    
    57
    +  join g x = ....
    
    58
    +  join f x = scc<tick> ... -> jmp g x
    
    59
    +
    
    60
    +Which, would again no longer be valid. I believe in the long run we can make
    
    61
    +cost centre ticks non-destructive to join points. Or we could keep track of
    
    62
    +where we are/are not allowed to insert a cost centre. But in the short term I will
    
    63
    +simply disable the annotation of join calls under this flag.
    
    64
    +-}
    
    65
    +
    
    32 66
     -- | Insert cost centres on function applications with dictionary arguments. The
    
    33 67
     -- source locations attached to the cost centres is approximated based on the
    
    34 68
     -- "closest" source note encountered in the traversal.
    
    ... ... @@ -52,21 +86,10 @@ overloadedCallsCC =
    52 86
                  CoreBndr
    
    53 87
               -> LateCCM OverloadedCallsCCState CoreExpr
    
    54 88
               -> LateCCM OverloadedCallsCCState CoreExpr
    
    55
    -        wrap_if_join b pexpr = do
    
    89
    +        wrap_if_join _b pexpr = do
    
    90
    +            -- See Note [Overloaded Calls and join points]
    
    56 91
                 expr <- pexpr
    
    57
    -            if isJoinId b && isOverloadedTy (exprType expr) then do
    
    58
    -              let
    
    59
    -                cc_name :: FastString
    
    60
    -                cc_name = fsLit "join-rhs-" `appendFS` getOccFS b
    
    61
    -
    
    62
    -              cc_srcspan <-
    
    63
    -                fmap (Strict.fromMaybe (UnhelpfulSpan UnhelpfulNoLocationInfo)) $
    
    64
    -                  lift $ gets lateCCState_extra
    
    65
    -
    
    66
    -              insertCC cc_name cc_srcspan expr
    
    67
    -            else
    
    68
    -              return expr
    
    69
    -
    
    92
    +            return expr
    
    70 93
     
    
    71 94
         processExpr :: CoreExpr -> LateCCM OverloadedCallsCCState CoreExpr
    
    72 95
         processExpr expr =
    
    ... ... @@ -99,6 +122,7 @@ overloadedCallsCC =
    99 122
     
    
    100 123
                   -- Avoid instrumenting join points.
    
    101 124
                   -- (See comment in processBind above)
    
    125
    +              -- Also see Note [Overloaded Calls and join points]
    
    102 126
                 && not (isJoinVarExpr f)
    
    103 127
               then do
    
    104 128
                 -- Extract a name and source location from the function being
    

  • docs/users_guide/profiling.rst
    ... ... @@ -571,9 +571,7 @@ of your profiled program will be different to that of the unprofiled one.
    571 571
         Some overloaded calls may not be annotated, specifically in cases where the
    
    572 572
         optimizer turns an overloaded function into a join point. Calls to such
    
    573 573
         functions will not be wrapped in ``SCC`` annotations, since it would make
    
    574
    -    them non-tail calls, which is a requirement for join points. Instead,
    
    575
    -    ``SCC`` annotations are added around the body of overloaded join variables
    
    576
    -    and given distinct names (``join-rhs-<var>``) to avoid confusion.
    
    574
    +    them non-tail calls, which is a requirement for join points.
    
    577 575
     
    
    578 576
     .. ghc-flag:: -fprof-cafs
    
    579 577
         :shortdesc: Auto-add ``SCC``\\ s to all CAFs
    

  • rts/ProfHeap.c
    ... ... @@ -557,7 +557,7 @@ initHeapProfiling(void)
    557 557
     
    
    558 558
         restore_locale();
    
    559 559
     
    
    560
    -    traceHeapProfBegin(0);
    
    560
    +    traceInitEvent(traceHeapProfBegin);
    
    561 561
     }
    
    562 562
     
    
    563 563
     void
    
    ... ... @@ -896,17 +896,17 @@ dumpCensus( Census *census )
    896 896
     
    
    897 897
     
    
    898 898
             // Eventlog
    
    899
    -        traceHeapProfSampleString(0, "VOID",
    
    899
    +        traceHeapProfSampleString("VOID",
    
    900 900
                     (census->void_total * sizeof(W_)));
    
    901
    -        traceHeapProfSampleString(0, "LAG",
    
    901
    +        traceHeapProfSampleString("LAG",
    
    902 902
                     ((census->not_used - census->void_total) *
    
    903 903
                                          sizeof(W_)));
    
    904
    -        traceHeapProfSampleString(0, "USE",
    
    904
    +        traceHeapProfSampleString("USE",
    
    905 905
                     ((census->used - census->drag_total) *
    
    906 906
                                          sizeof(W_)));
    
    907
    -        traceHeapProfSampleString(0, "INHERENT_USE",
    
    907
    +        traceHeapProfSampleString("INHERENT_USE",
    
    908 908
                     (census->prim * sizeof(W_)));
    
    909
    -        traceHeapProfSampleString(0, "DRAG",
    
    909
    +        traceHeapProfSampleString("DRAG",
    
    910 910
                     (census->drag_total * sizeof(W_)));
    
    911 911
     
    
    912 912
             traceHeapProfSampleEnd(era);
    
    ... ... @@ -941,33 +941,33 @@ dumpCensus( Census *census )
    941 941
             switch (RtsFlags.ProfFlags.doHeapProfile) {
    
    942 942
             case HEAP_BY_CLOSURE_TYPE:
    
    943 943
                 fprintf(hp_file, "%s", (char *)ctr->identity);
    
    944
    -            traceHeapProfSampleString(0, (char *)ctr->identity,
    
    944
    +            traceHeapProfSampleString((char *)ctr->identity,
    
    945 945
                                           count * sizeof(W_));
    
    946 946
                 break;
    
    947 947
             case HEAP_BY_INFO_TABLE:
    
    948 948
                 fprintf(hp_file, "%p", ctr->identity);
    
    949 949
                 char str[100];
    
    950 950
                 sprintf(str, "%p", ctr->identity);
    
    951
    -            traceHeapProfSampleString(0, str, count * sizeof(W_));
    
    951
    +            traceHeapProfSampleString(str, count * sizeof(W_));
    
    952 952
                 break;
    
    953 953
     #if defined(PROFILING)
    
    954 954
             case HEAP_BY_CCS:
    
    955 955
                 fprint_ccs(hp_file, (CostCentreStack *)ctr->identity,
    
    956 956
                            RtsFlags.ProfFlags.ccsLength);
    
    957
    -            traceHeapProfSampleCostCentre(0, (CostCentreStack *)ctr->identity,
    
    957
    +            traceHeapProfSampleCostCentre((CostCentreStack *)ctr->identity,
    
    958 958
                                               count * sizeof(W_));
    
    959 959
                 break;
    
    960 960
             case HEAP_BY_ERA:
    
    961 961
                 fprintf(hp_file, "%" FMT_Word, (StgWord)ctr->identity);
    
    962 962
                 char str_era[100];
    
    963 963
                 sprintf(str_era, "%" FMT_Word, (StgWord)ctr->identity);
    
    964
    -            traceHeapProfSampleString(0, str_era, count * sizeof(W_));
    
    964
    +            traceHeapProfSampleString(str_era, count * sizeof(W_));
    
    965 965
                 break;
    
    966 966
             case HEAP_BY_MOD:
    
    967 967
             case HEAP_BY_DESCR:
    
    968 968
             case HEAP_BY_TYPE:
    
    969 969
                 fprintf(hp_file, "%s", (char *)ctr->identity);
    
    970
    -            traceHeapProfSampleString(0, (char *)ctr->identity,
    
    970
    +            traceHeapProfSampleString((char *)ctr->identity,
    
    971 971
                                           count * sizeof(W_));
    
    972 972
                 break;
    
    973 973
             case HEAP_BY_RETAINER:
    

  • rts/RetainerSet.c
    ... ... @@ -238,7 +238,7 @@ printRetainerSetShort(FILE *f, RetainerSet *rs, W_ total_size, uint32_t max_leng
    238 238
             }
    
    239 239
         }
    
    240 240
         fputs(tmp, f);
    
    241
    -    traceHeapProfSampleString(0, tmp, total_size);
    
    241
    +    traceHeapProfSampleString(tmp, total_size);
    
    242 242
     }
    
    243 243
     
    
    244 244
     /* -----------------------------------------------------------------------------
    

  • rts/Trace.c
    ... ... @@ -647,10 +647,10 @@ void traceTaskDelete_ (Task *task)
    647 647
         }
    
    648 648
     }
    
    649 649
     
    
    650
    -void traceHeapProfBegin(StgWord8 profile_id)
    
    650
    +void traceHeapProfBegin(void)
    
    651 651
     {
    
    652 652
         if (eventlog_enabled) {
    
    653
    -        postHeapProfBegin(profile_id);
    
    653
    +        postHeapProfBegin();
    
    654 654
         }
    
    655 655
     }
    
    656 656
     void traceHeapBioProfSampleBegin(StgInt era, StgWord64 time)
    
    ... ... @@ -674,11 +674,10 @@ void traceHeapProfSampleEnd(StgInt era)
    674 674
         }
    
    675 675
     }
    
    676 676
     
    
    677
    -void traceHeapProfSampleString(StgWord8 profile_id,
    
    678
    -                               const char *label, StgWord residency)
    
    677
    +void traceHeapProfSampleString(const char *label, StgWord residency)
    
    679 678
     {
    
    680 679
         if (eventlog_enabled) {
    
    681
    -        postHeapProfSampleString(profile_id, label, residency);
    
    680
    +        postHeapProfSampleString(label, residency);
    
    682 681
         }
    
    683 682
     }
    
    684 683
     
    
    ... ... @@ -718,11 +717,10 @@ void traceHeapProfCostCentre(StgWord32 ccID,
    718 717
     }
    
    719 718
     
    
    720 719
     // This one is for .hp samples
    
    721
    -void traceHeapProfSampleCostCentre(StgWord8 profile_id,
    
    722
    -                                   CostCentreStack *stack, StgWord residency)
    
    720
    +void traceHeapProfSampleCostCentre(CostCentreStack *stack, StgWord residency)
    
    723 721
     {
    
    724 722
         if (eventlog_enabled) {
    
    725
    -        postHeapProfSampleCostCentre(profile_id, stack, residency);
    
    723
    +        postHeapProfSampleCostCentre(stack, residency);
    
    726 724
         }
    
    727 725
     }
    
    728 726
     
    

  • rts/Trace.h
    ... ... @@ -303,20 +303,18 @@ void traceTaskMigrate_ (Task *task,
    303 303
     
    
    304 304
     void traceTaskDelete_ (Task       *task);
    
    305 305
     
    
    306
    -void traceHeapProfBegin(StgWord8 profile_id);
    
    306
    +void traceHeapProfBegin(void);
    
    307 307
     void traceHeapProfSampleBegin(StgInt era);
    
    308 308
     void traceHeapBioProfSampleBegin(StgInt era, StgWord64 time);
    
    309 309
     void traceHeapProfSampleEnd(StgInt era);
    
    310
    -void traceHeapProfSampleString(StgWord8 profile_id,
    
    311
    -                               const char *label, StgWord residency);
    
    310
    +void traceHeapProfSampleString(const char *label, StgWord residency);
    
    312 311
     #if defined(PROFILING)
    
    313 312
     void traceHeapProfCostCentre(StgWord32 ccID,
    
    314 313
                                  const char *label,
    
    315 314
                                  const char *module,
    
    316 315
                                  const char *srcloc,
    
    317 316
                                  StgBool is_caf);
    
    318
    -void traceHeapProfSampleCostCentre(StgWord8 profile_id,
    
    319
    -                                   CostCentreStack *stack, StgWord residency);
    
    317
    +void traceHeapProfSampleCostCentre(CostCentreStack *stack, StgWord residency);
    
    320 318
     
    
    321 319
     void traceProfSampleCostCentre(Capability *cap,
    
    322 320
                                    CostCentreStack *stack, StgWord ticks);
    
    ... ... @@ -369,14 +367,14 @@ void flushTrace(void);
    369 367
     #define traceTaskCreate_(taskID, cap) /* nothing */
    
    370 368
     #define traceTaskMigrate_(taskID, cap, new_cap) /* nothing */
    
    371 369
     #define traceTaskDelete_(taskID) /* nothing */
    
    372
    -#define traceHeapProfBegin(profile_id) /* nothing */
    
    370
    +#define traceHeapProfBegin() /* nothing */
    
    373 371
     #define traceHeapProfCostCentre(ccID, label, module, srcloc, is_caf) /* nothing */
    
    374 372
     #define traceIPE(ipe) /* nothing */
    
    375 373
     #define traceHeapProfSampleBegin(era) /* nothing */
    
    376 374
     #define traceHeapBioProfSampleBegin(era, time) /* nothing */
    
    377 375
     #define traceHeapProfSampleEnd(era) /* nothing */
    
    378
    -#define traceHeapProfSampleCostCentre(profile_id, stack, residency) /* nothing */
    
    379
    -#define traceHeapProfSampleString(profile_id, label, residency) /* nothing */
    
    376
    +#define traceHeapProfSampleCostCentre(stack, residency) /* nothing */
    
    377
    +#define traceHeapProfSampleString(label, residency) /* nothing */
    
    380 378
     
    
    381 379
     #define traceConcMarkBegin() /* nothing */
    
    382 380
     #define traceConcMarkEnd(marked_obj_count) /* nothing */
    

  • rts/eventlog/EventLog.c
    ... ... @@ -95,6 +95,13 @@ bool eventlog_enabled; // protected by state_change_mutex to ensure
    95 95
      * buffer size, EVENT_LOG_SIZE. We must ensure that no variable-length event
    
    96 96
      * exceeds this limit. For this reason we impose maximum length limits on
    
    97 97
      * fields which may have unbounded values.
    
    98
    + *
    
    99
    + * Note [Profile ID]
    
    100
    + * ~~~~~~~~~~~~~~~~~
    
    101
    + * The profile ID field of eventlog entries is reserved for future use,
    
    102
    + * with an eye towards supporting multiple parallel heap profiles.
    
    103
    + * In the current RTS, the profile ID is hardcoded to 0.
    
    104
    + *
    
    98 105
      */
    
    99 106
     
    
    100 107
     static const EventLogWriter *event_log_writer = NULL;
    
    ... ... @@ -1219,7 +1226,7 @@ static HeapProfBreakdown getHeapProfBreakdown(void)
    1219 1226
         }
    
    1220 1227
     }
    
    1221 1228
     
    
    1222
    -void postHeapProfBegin(StgWord8 profile_id)
    
    1229
    +void postHeapProfBegin(void)
    
    1223 1230
     {
    
    1224 1231
         ACQUIRE_LOCK(&eventBufMutex);
    
    1225 1232
         PROFILING_FLAGS *flags = &RtsFlags.ProfFlags;
    
    ... ... @@ -1244,7 +1251,8 @@ void postHeapProfBegin(StgWord8 profile_id)
    1244 1251
         CHECK(!ensureRoomForVariableEvent(&eventBuf, len));
    
    1245 1252
         postEventHeader(&eventBuf, EVENT_HEAP_PROF_BEGIN);
    
    1246 1253
         postPayloadSize(&eventBuf, len);
    
    1247
    -    postWord8(&eventBuf, profile_id);
    
    1254
    +    // See Note [Profile ID].
    
    1255
    +    postWord8(&eventBuf, 0);
    
    1248 1256
         postWord64(&eventBuf, TimeToNS(flags->heapProfileInterval));
    
    1249 1257
         postWord32(&eventBuf, getHeapProfBreakdown());
    
    1250 1258
         postStringLen(&eventBuf, flags->modSelector, modSelector_len);
    
    ... ... @@ -1286,8 +1294,7 @@ void postHeapProfSampleEnd(StgInt era)
    1286 1294
         RELEASE_LOCK(&eventBufMutex);
    
    1287 1295
     }
    
    1288 1296
     
    
    1289
    -void postHeapProfSampleString(StgWord8 profile_id,
    
    1290
    -                              const char *label,
    
    1297
    +void postHeapProfSampleString(const char *label,
    
    1291 1298
                                   StgWord64 residency)
    
    1292 1299
     {
    
    1293 1300
         ACQUIRE_LOCK(&eventBufMutex);
    
    ... ... @@ -1296,7 +1303,8 @@ void postHeapProfSampleString(StgWord8 profile_id,
    1296 1303
         CHECK(!ensureRoomForVariableEvent(&eventBuf, len));
    
    1297 1304
         postEventHeader(&eventBuf, EVENT_HEAP_PROF_SAMPLE_STRING);
    
    1298 1305
         postPayloadSize(&eventBuf, len);
    
    1299
    -    postWord8(&eventBuf, profile_id);
    
    1306
    +    // See Note [Profile ID].
    
    1307
    +    postWord8(&eventBuf, 0);
    
    1300 1308
         postWord64(&eventBuf, residency);
    
    1301 1309
         postStringLen(&eventBuf, label, label_len);
    
    1302 1310
         RELEASE_LOCK(&eventBufMutex);
    
    ... ... @@ -1325,8 +1333,7 @@ void postHeapProfCostCentre(StgWord32 ccID,
    1325 1333
         RELEASE_LOCK(&eventBufMutex);
    
    1326 1334
     }
    
    1327 1335
     
    
    1328
    -void postHeapProfSampleCostCentre(StgWord8 profile_id,
    
    1329
    -                                  CostCentreStack *stack,
    
    1336
    +void postHeapProfSampleCostCentre(CostCentreStack *stack,
    
    1330 1337
                                       StgWord64 residency)
    
    1331 1338
     {
    
    1332 1339
         ACQUIRE_LOCK(&eventBufMutex);
    
    ... ... @@ -1340,7 +1347,8 @@ void postHeapProfSampleCostCentre(StgWord8 profile_id,
    1340 1347
         CHECK(!ensureRoomForVariableEvent(&eventBuf, len));
    
    1341 1348
         postEventHeader(&eventBuf, EVENT_HEAP_PROF_SAMPLE_COST_CENTRE);
    
    1342 1349
         postPayloadSize(&eventBuf, len);
    
    1343
    -    postWord8(&eventBuf, profile_id);
    
    1350
    +    // See Note [Profile ID].
    
    1351
    +    postWord8(&eventBuf, 0);
    
    1344 1352
         postWord64(&eventBuf, residency);
    
    1345 1353
         postWord8(&eventBuf, depth);
    
    1346 1354
         for (ccs = stack;
    

  • rts/eventlog/EventLog.h
    ... ... @@ -163,14 +163,13 @@ void postTaskMigrateEvent (EventTaskId taskId,
    163 163
     
    
    164 164
     void postTaskDeleteEvent (EventTaskId taskId);
    
    165 165
     
    
    166
    -void postHeapProfBegin(StgWord8 profile_id);
    
    166
    +void postHeapProfBegin(void);
    
    167 167
     
    
    168 168
     void postHeapProfSampleBegin(StgInt era);
    
    169 169
     void postHeapBioProfSampleBegin(StgInt era, StgWord64 time_ns);
    
    170 170
     void postHeapProfSampleEnd(StgInt era);
    
    171 171
     
    
    172
    -void postHeapProfSampleString(StgWord8 profile_id,
    
    173
    -                              const char *label,
    
    172
    +void postHeapProfSampleString(const char *label,
    
    174 173
                                   StgWord64 residency);
    
    175 174
     
    
    176 175
     #if defined(PROFILING)
    
    ... ... @@ -180,8 +179,7 @@ void postHeapProfCostCentre(StgWord32 ccID,
    180 179
                                 const char *srcloc,
    
    181 180
                                 StgBool is_caf);
    
    182 181
     
    
    183
    -void postHeapProfSampleCostCentre(StgWord8 profile_id,
    
    184
    -                                  CostCentreStack *stack,
    
    182
    +void postHeapProfSampleCostCentre(CostCentreStack *stack,
    
    185 183
                                       StgWord64 residency);
    
    186 184
     
    
    187 185
     void postProfSampleCostCentre(Capability *cap,