Marge Bot pushed to branch wip/marge_bot_batch_merge_job at Glasgow Haskell Compiler / GHC
Commits:
-
e8acd2e7
by Wen Kokke at 2025-07-16T08:37:04-04:00
-
76d392a2
by Wen Kokke at 2025-07-16T08:37:04-04:00
-
e2196818
by Peng Fan at 2025-07-16T09:09:57-04:00
-
847209fa
by Andreas Klebinger at 2025-07-16T09:09:58-04:00
11 changed files:
- compiler/GHC/CmmToAsm/LA64/CodeGen.hs
- compiler/GHC/CmmToAsm/LA64/Instr.hs
- compiler/GHC/CmmToAsm/LA64/Ppr.hs
- compiler/GHC/Core/LateCC/OverloadedCalls.hs
- docs/users_guide/profiling.rst
- rts/ProfHeap.c
- rts/RetainerSet.c
- rts/Trace.c
- rts/Trace.h
- rts/eventlog/EventLog.c
- rts/eventlog/EventLog.h
Changes:
| ... | ... | @@ -1910,13 +1910,12 @@ genCCall target dest_regs arg_regs = do |
| 1910 | 1910 | MO_W64X2_Max -> unsupported mop
|
| 1911 | 1911 | |
| 1912 | 1912 | -- Memory Ordering
|
| 1913 | - -- A hint value of 0 is mandatory by default, and it indicates a fully functional synchronization barrier.
|
|
| 1914 | - -- Only after all previous load/store access operations are completely executed, the DBAR 0 instruction can be executed;
|
|
| 1915 | - -- and only after the execution of DBAR 0 is completed, all subsequent load/store access operations can be executed.
|
|
| 1916 | - |
|
| 1917 | - MO_AcquireFence -> pure (unitOL (DBAR Hint0))
|
|
| 1918 | - MO_ReleaseFence -> pure (unitOL (DBAR Hint0))
|
|
| 1919 | - MO_SeqCstFence -> pure (unitOL (DBAR Hint0))
|
|
| 1913 | + -- Support finer-grained DBAR hints for LA664 and newer uarchs.
|
|
| 1914 | + -- These are treated as DBAR 0 on older uarchs, so we can start
|
|
| 1915 | + -- to unconditionally emit the new hints right away.
|
|
| 1916 | + MO_AcquireFence -> pure (unitOL (DBAR HintAcquire))
|
|
| 1917 | + MO_ReleaseFence -> pure (unitOL (DBAR HintRelease))
|
|
| 1918 | + MO_SeqCstFence -> pure (unitOL (DBAR HintSeqcst))
|
|
| 1920 | 1919 | |
| 1921 | 1920 | MO_Touch -> pure nilOL -- Keep variables live (when using interior pointers)
|
| 1922 | 1921 | -- Prefetch
|
| ... | ... | @@ -1954,12 +1953,11 @@ genCCall target dest_regs arg_regs = do |
| 1954 | 1953 | |
| 1955 | 1954 | MemOrderAcquire -> toOL [
|
| 1956 | 1955 | ann moDescr (LD (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p)),
|
| 1957 | - DBAR Hint0
|
|
| 1956 | + DBAR HintAcquire
|
|
| 1958 | 1957 | ]
|
| 1959 | - MemOrderSeqCst -> toOL [
|
|
| 1960 | - ann moDescr (DBAR Hint0),
|
|
| 1961 | - LD (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p),
|
|
| 1962 | - DBAR Hint0
|
|
| 1958 | + MemOrderSeqCst -> toOL [
|
|
| 1959 | + ann moDescr (LD (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p)),
|
|
| 1960 | + DBAR HintSeqcst
|
|
| 1963 | 1961 | ]
|
| 1964 | 1962 | _ -> panic $ "Unexpected MemOrderRelease on an AtomicRead: " ++ show mo
|
| 1965 | 1963 | dst = getRegisterReg platform (CmmLocal dst_reg)
|
| ... | ... | @@ -1974,15 +1972,9 @@ genCCall target dest_regs arg_regs = do |
| 1974 | 1972 | (val, fmt_val, code_val) <- getSomeReg val_reg
|
| 1975 | 1973 | let instrs = case ord of
|
| 1976 | 1974 | MemOrderRelaxed -> unitOL $ ann moDescr (ST fmt_val (OpReg w val) (OpAddr $ AddrReg p))
|
| 1977 | - MemOrderRelease -> toOL [
|
|
| 1978 | - ann moDescr (DBAR Hint0),
|
|
| 1979 | - ST fmt_val (OpReg w val) (OpAddr $ AddrReg p)
|
|
| 1980 | - ]
|
|
| 1981 | - MemOrderSeqCst -> toOL [
|
|
| 1982 | - ann moDescr (DBAR Hint0),
|
|
| 1983 | - ST fmt_val (OpReg w val) (OpAddr $ AddrReg p),
|
|
| 1984 | - DBAR Hint0
|
|
| 1985 | - ]
|
|
| 1975 | + -- implement with AMSWAPDB
|
|
| 1976 | + MemOrderRelease -> unitOL $ ann moDescr (AMSWAPDB fmt_val (OpReg w zeroReg) (OpReg w val) (OpReg w p))
|
|
| 1977 | + MemOrderSeqCst -> unitOL $ ann moDescr (AMSWAPDB fmt_val (OpReg w zeroReg) (OpReg w val) (OpReg w p))
|
|
| 1986 | 1978 | _ -> panic $ "Unexpected MemOrderAcquire on an AtomicWrite" ++ show mo
|
| 1987 | 1979 | moDescr = (text . show) mo
|
| 1988 | 1980 | code =
|
| ... | ... | @@ -169,6 +169,7 @@ regUsageOfInstr platform instr = case instr of |
| 169 | 169 | -- LDCOND dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
| 170 | 170 | -- STCOND dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
| 171 | 171 | -- 7. Atomic Memory Access Instructions --------------------------------------
|
| 172 | + AMSWAPDB _ dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
|
| 172 | 173 | -- 8. Barrier Instructions ---------------------------------------------------
|
| 173 | 174 | DBAR _hint -> usage ([], [])
|
| 174 | 175 | IBAR _hint -> usage ([], [])
|
| ... | ... | @@ -343,13 +344,13 @@ patchRegsOfInstr instr env = case instr of |
| 343 | 344 | STX f o1 o2 -> STX f (patchOp o1) (patchOp o2)
|
| 344 | 345 | LDPTR f o1 o2 -> LDPTR f (patchOp o1) (patchOp o2)
|
| 345 | 346 | STPTR f o1 o2 -> STPTR f (patchOp o1) (patchOp o2)
|
| 346 | - PRELD o1 o2 -> PRELD (patchOp o1) (patchOp o2)
|
|
| 347 | + PRELD o1 o2 -> PRELD (patchOp o1) (patchOp o2)
|
|
| 347 | 348 | -- 6. Bound Check Memory Access Instructions ---------------------------------
|
| 348 | 349 | -- LDCOND o1 o2 o3 -> LDCOND (patchOp o1) (patchOp o2) (patchOp o3)
|
| 349 | 350 | -- STCOND o1 o2 o3 -> STCOND (patchOp o1) (patchOp o2) (patchOp o3)
|
| 350 | 351 | -- 7. Atomic Memory Access Instructions --------------------------------------
|
| 352 | + AMSWAPDB f o1 o2 o3 -> AMSWAPDB f (patchOp o1) (patchOp o2) (patchOp o3)
|
|
| 351 | 353 | -- 8. Barrier Instructions ---------------------------------------------------
|
| 352 | - -- TODO: need fix
|
|
| 353 | 354 | DBAR o1 -> DBAR o1
|
| 354 | 355 | IBAR o1 -> IBAR o1
|
| 355 | 356 | -- 11. Floating Point Instructions -------------------------------------------
|
| ... | ... | @@ -734,6 +735,7 @@ data Instr |
| 734 | 735 | | PRELD Operand Operand
|
| 735 | 736 | -- 6. Bound Check Memory Access Instructions ---------------------------------
|
| 736 | 737 | -- 7. Atomic Memory Access Instructions --------------------------------------
|
| 738 | + | AMSWAPDB Format Operand Operand Operand
|
|
| 737 | 739 | -- 8. Barrier Instructions ---------------------------------------------------
|
| 738 | 740 | | DBAR BarrierType
|
| 739 | 741 | | IBAR BarrierType
|
| ... | ... | @@ -755,8 +757,13 @@ data Instr |
| 755 | 757 | -- fnmadd: d = - r1 * r2 - r3
|
| 756 | 758 | | FMA FMASign Operand Operand Operand Operand
|
| 757 | 759 | |
| 758 | --- TODO: Not complete.
|
|
| 759 | -data BarrierType = Hint0
|
|
| 760 | +data BarrierType
|
|
| 761 | + = Hint0
|
|
| 762 | + | Hint700
|
|
| 763 | + | HintAcquire
|
|
| 764 | + | HintRelease
|
|
| 765 | + | HintSeqcst
|
|
| 766 | + deriving (Eq, Show)
|
|
| 760 | 767 | |
| 761 | 768 | instrCon :: Instr -> String
|
| 762 | 769 | instrCon i =
|
| ... | ... | @@ -847,6 +854,7 @@ instrCon i = |
| 847 | 854 | LDPTR{} -> "LDPTR"
|
| 848 | 855 | STPTR{} -> "STPTR"
|
| 849 | 856 | PRELD{} -> "PRELD"
|
| 857 | + AMSWAPDB{} -> "AMSWAPDB"
|
|
| 850 | 858 | DBAR{} -> "DBAR"
|
| 851 | 859 | IBAR{} -> "IBAR"
|
| 852 | 860 | FCVT{} -> "FCVT"
|
| ... | ... | @@ -1015,6 +1015,10 @@ pprInstr platform instr = case instr of |
| 1015 | 1015 | -- LD{GT/LE}.{B/H/W/D}, ST{GT/LE}.{B/H/W/D}
|
| 1016 | 1016 | -- 7. Atomic Memory Access Instructions --------------------------------------
|
| 1017 | 1017 | -- AM{SWAP/ADD/AND/OR/XOR/MAX/MIN}[DB].{W/D}, AM{MAX/MIN}[_DB].{WU/DU}
|
| 1018 | + AMSWAPDB II8 o1 o2 o3 -> op3 (text "\tamswap_db.b") o1 o2 o3
|
|
| 1019 | + AMSWAPDB II16 o1 o2 o3 -> op3 (text "\tamswap_db.h") o1 o2 o3
|
|
| 1020 | + AMSWAPDB II32 o1 o2 o3 -> op3 (text "\tamswap_db.w") o1 o2 o3
|
|
| 1021 | + AMSWAPDB II64 o1 o2 o3 -> op3 (text "\tamswap_db.d") o1 o2 o3
|
|
| 1018 | 1022 | -- AM.{SWAP/ADD}[_DB].{B/H}
|
| 1019 | 1023 | -- AMCAS[_DB].{B/H/W/D}
|
| 1020 | 1024 | -- LL.{W/D}, SC.{W/D}
|
| ... | ... | @@ -1112,19 +1116,28 @@ pprInstr platform instr = case instr of |
| 1112 | 1116 | op3 op o1 o2 o3 = line $ op <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3
|
| 1113 | 1117 | op4 op o1 o2 o3 o4 = line $ op <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 <> comma <+> pprOp platform o4
|
| 1114 | 1118 | {-
|
| 1115 | - -- TODO: Support dbar with different hints.
|
|
| 1119 | + Support dbar with different hints.
|
|
| 1116 | 1120 | On LoongArch uses "dbar 0" (full completion barrier) for everything.
|
| 1117 | 1121 | But the full completion barrier has no performance to tell, so
|
| 1118 | 1122 | Loongson-3A6000 and newer processors have made finer granularity hints
|
| 1119 | 1123 | available:
|
| 1120 | 1124 | |
| 1125 | + Hint 0x700: barrier for "read after read" from the same address.
|
|
| 1121 | 1126 | Bit4: ordering or completion (0: completion, 1: ordering)
|
| 1122 | 1127 | Bit3: barrier for previous read (0: true, 1: false)
|
| 1123 | 1128 | Bit2: barrier for previous write (0: true, 1: false)
|
| 1124 | 1129 | Bit1: barrier for succeeding read (0: true, 1: false)
|
| 1125 | 1130 | Bit0: barrier for succeeding write (0: true, 1: false)
|
| 1131 | + |
|
| 1132 | + DBAR 0b10100: acquire
|
|
| 1133 | + DBAR 0b10010: release
|
|
| 1134 | + DBAR 0b10000: seqcst
|
|
| 1126 | 1135 | -}
|
| 1127 | 1136 | pprBarrierType Hint0 = text "0x0"
|
| 1137 | + pprBarrierType HintSeqcst = text "0x10"
|
|
| 1138 | + pprBarrierType HintRelease = text "0x12"
|
|
| 1139 | + pprBarrierType HintAcquire = text "0x14"
|
|
| 1140 | + pprBarrierType Hint700 = text "0x700"
|
|
| 1128 | 1141 | floatPrecission o | isSingleOp o = text "s"
|
| 1129 | 1142 | | isDoubleOp o = text "d"
|
| 1130 | 1143 | | otherwise = pprPanic "Impossible floating point precission: " (pprOp platform o)
|
| ... | ... | @@ -20,7 +20,6 @@ import GHC.Core.Make |
| 20 | 20 | import GHC.Core.Predicate
|
| 21 | 21 | import GHC.Core.Type
|
| 22 | 22 | import GHC.Core.Utils
|
| 23 | -import GHC.Tc.Utils.TcType
|
|
| 24 | 23 | import GHC.Types.Id
|
| 25 | 24 | import GHC.Types.Name
|
| 26 | 25 | import GHC.Types.SrcLoc
|
| ... | ... | @@ -29,6 +28,41 @@ import GHC.Types.Var |
| 29 | 28 | |
| 30 | 29 | type OverloadedCallsCCState = Strict.Maybe SrcSpan
|
| 31 | 30 | |
| 31 | +{- Note [Overloaded Calls and join points]
|
|
| 32 | +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
| 33 | +Currently GHC considers cost centres as destructive to
|
|
| 34 | +join contexts. Or in other words this is not considered valid:
|
|
| 35 | + |
|
| 36 | + join f x = ...
|
|
| 37 | + in
|
|
| 38 | + ... -> scc<tick> jmp
|
|
| 39 | + |
|
| 40 | +This makes the functionality of `-fprof-late-overloaded-calls` not feasible
|
|
| 41 | +for join points in general. We used to try to work around this by putting the
|
|
| 42 | +ticks on the rhs of the join point rather than around the jump. However beyond
|
|
| 43 | +the loss of accuracy this was broken for recursive join points as we ended up
|
|
| 44 | +with something like:
|
|
| 45 | + |
|
| 46 | + rec-join f x = scc<tick> ... jmp f x
|
|
| 47 | + |
|
| 48 | +Which similarly is not valid as the tick once again destroys the tail call.
|
|
| 49 | +One might think we could limit ourselves to non-recursive tail calls and do
|
|
| 50 | +something clever like:
|
|
| 51 | + |
|
| 52 | + join f x = scc<tick> ...
|
|
| 53 | + in ... jmp f x
|
|
| 54 | + |
|
| 55 | +And sometimes this works! But sometimes the full rhs would look something like:
|
|
| 56 | + |
|
| 57 | + join g x = ....
|
|
| 58 | + join f x = scc<tick> ... -> jmp g x
|
|
| 59 | + |
|
| 60 | +Which, would again no longer be valid. I believe in the long run we can make
|
|
| 61 | +cost centre ticks non-destructive to join points. Or we could keep track of
|
|
| 62 | +where we are/are not allowed to insert a cost centre. But in the short term I will
|
|
| 63 | +simply disable the annotation of join calls under this flag.
|
|
| 64 | +-}
|
|
| 65 | + |
|
| 32 | 66 | -- | Insert cost centres on function applications with dictionary arguments. The
|
| 33 | 67 | -- source locations attached to the cost centres is approximated based on the
|
| 34 | 68 | -- "closest" source note encountered in the traversal.
|
| ... | ... | @@ -52,21 +86,10 @@ overloadedCallsCC = |
| 52 | 86 | CoreBndr
|
| 53 | 87 | -> LateCCM OverloadedCallsCCState CoreExpr
|
| 54 | 88 | -> LateCCM OverloadedCallsCCState CoreExpr
|
| 55 | - wrap_if_join b pexpr = do
|
|
| 89 | + wrap_if_join _b pexpr = do
|
|
| 90 | + -- See Note [Overloaded Calls and join points]
|
|
| 56 | 91 | expr <- pexpr
|
| 57 | - if isJoinId b && isOverloadedTy (exprType expr) then do
|
|
| 58 | - let
|
|
| 59 | - cc_name :: FastString
|
|
| 60 | - cc_name = fsLit "join-rhs-" `appendFS` getOccFS b
|
|
| 61 | - |
|
| 62 | - cc_srcspan <-
|
|
| 63 | - fmap (Strict.fromMaybe (UnhelpfulSpan UnhelpfulNoLocationInfo)) $
|
|
| 64 | - lift $ gets lateCCState_extra
|
|
| 65 | - |
|
| 66 | - insertCC cc_name cc_srcspan expr
|
|
| 67 | - else
|
|
| 68 | - return expr
|
|
| 69 | - |
|
| 92 | + return expr
|
|
| 70 | 93 | |
| 71 | 94 | processExpr :: CoreExpr -> LateCCM OverloadedCallsCCState CoreExpr
|
| 72 | 95 | processExpr expr =
|
| ... | ... | @@ -99,6 +122,7 @@ overloadedCallsCC = |
| 99 | 122 | |
| 100 | 123 | -- Avoid instrumenting join points.
|
| 101 | 124 | -- (See comment in processBind above)
|
| 125 | + -- Also see Note [Overloaded Calls and join points]
|
|
| 102 | 126 | && not (isJoinVarExpr f)
|
| 103 | 127 | then do
|
| 104 | 128 | -- Extract a name and source location from the function being
|
| ... | ... | @@ -571,9 +571,7 @@ of your profiled program will be different to that of the unprofiled one. |
| 571 | 571 | Some overloaded calls may not be annotated, specifically in cases where the
|
| 572 | 572 | optimizer turns an overloaded function into a join point. Calls to such
|
| 573 | 573 | functions will not be wrapped in ``SCC`` annotations, since it would make
|
| 574 | - them non-tail calls, which is a requirement for join points. Instead,
|
|
| 575 | - ``SCC`` annotations are added around the body of overloaded join variables
|
|
| 576 | - and given distinct names (``join-rhs-<var>``) to avoid confusion.
|
|
| 574 | + them non-tail calls, which is a requirement for join points.
|
|
| 577 | 575 | |
| 578 | 576 | .. ghc-flag:: -fprof-cafs
|
| 579 | 577 | :shortdesc: Auto-add ``SCC``\\ s to all CAFs
|
| ... | ... | @@ -557,7 +557,7 @@ initHeapProfiling(void) |
| 557 | 557 | |
| 558 | 558 | restore_locale();
|
| 559 | 559 | |
| 560 | - traceHeapProfBegin(0);
|
|
| 560 | + traceInitEvent(traceHeapProfBegin);
|
|
| 561 | 561 | }
|
| 562 | 562 | |
| 563 | 563 | void
|
| ... | ... | @@ -896,17 +896,17 @@ dumpCensus( Census *census ) |
| 896 | 896 | |
| 897 | 897 | |
| 898 | 898 | // Eventlog
|
| 899 | - traceHeapProfSampleString(0, "VOID",
|
|
| 899 | + traceHeapProfSampleString("VOID",
|
|
| 900 | 900 | (census->void_total * sizeof(W_)));
|
| 901 | - traceHeapProfSampleString(0, "LAG",
|
|
| 901 | + traceHeapProfSampleString("LAG",
|
|
| 902 | 902 | ((census->not_used - census->void_total) *
|
| 903 | 903 | sizeof(W_)));
|
| 904 | - traceHeapProfSampleString(0, "USE",
|
|
| 904 | + traceHeapProfSampleString("USE",
|
|
| 905 | 905 | ((census->used - census->drag_total) *
|
| 906 | 906 | sizeof(W_)));
|
| 907 | - traceHeapProfSampleString(0, "INHERENT_USE",
|
|
| 907 | + traceHeapProfSampleString("INHERENT_USE",
|
|
| 908 | 908 | (census->prim * sizeof(W_)));
|
| 909 | - traceHeapProfSampleString(0, "DRAG",
|
|
| 909 | + traceHeapProfSampleString("DRAG",
|
|
| 910 | 910 | (census->drag_total * sizeof(W_)));
|
| 911 | 911 | |
| 912 | 912 | traceHeapProfSampleEnd(era);
|
| ... | ... | @@ -941,33 +941,33 @@ dumpCensus( Census *census ) |
| 941 | 941 | switch (RtsFlags.ProfFlags.doHeapProfile) {
|
| 942 | 942 | case HEAP_BY_CLOSURE_TYPE:
|
| 943 | 943 | fprintf(hp_file, "%s", (char *)ctr->identity);
|
| 944 | - traceHeapProfSampleString(0, (char *)ctr->identity,
|
|
| 944 | + traceHeapProfSampleString((char *)ctr->identity,
|
|
| 945 | 945 | count * sizeof(W_));
|
| 946 | 946 | break;
|
| 947 | 947 | case HEAP_BY_INFO_TABLE:
|
| 948 | 948 | fprintf(hp_file, "%p", ctr->identity);
|
| 949 | 949 | char str[100];
|
| 950 | 950 | sprintf(str, "%p", ctr->identity);
|
| 951 | - traceHeapProfSampleString(0, str, count * sizeof(W_));
|
|
| 951 | + traceHeapProfSampleString(str, count * sizeof(W_));
|
|
| 952 | 952 | break;
|
| 953 | 953 | #if defined(PROFILING)
|
| 954 | 954 | case HEAP_BY_CCS:
|
| 955 | 955 | fprint_ccs(hp_file, (CostCentreStack *)ctr->identity,
|
| 956 | 956 | RtsFlags.ProfFlags.ccsLength);
|
| 957 | - traceHeapProfSampleCostCentre(0, (CostCentreStack *)ctr->identity,
|
|
| 957 | + traceHeapProfSampleCostCentre((CostCentreStack *)ctr->identity,
|
|
| 958 | 958 | count * sizeof(W_));
|
| 959 | 959 | break;
|
| 960 | 960 | case HEAP_BY_ERA:
|
| 961 | 961 | fprintf(hp_file, "%" FMT_Word, (StgWord)ctr->identity);
|
| 962 | 962 | char str_era[100];
|
| 963 | 963 | sprintf(str_era, "%" FMT_Word, (StgWord)ctr->identity);
|
| 964 | - traceHeapProfSampleString(0, str_era, count * sizeof(W_));
|
|
| 964 | + traceHeapProfSampleString(str_era, count * sizeof(W_));
|
|
| 965 | 965 | break;
|
| 966 | 966 | case HEAP_BY_MOD:
|
| 967 | 967 | case HEAP_BY_DESCR:
|
| 968 | 968 | case HEAP_BY_TYPE:
|
| 969 | 969 | fprintf(hp_file, "%s", (char *)ctr->identity);
|
| 970 | - traceHeapProfSampleString(0, (char *)ctr->identity,
|
|
| 970 | + traceHeapProfSampleString((char *)ctr->identity,
|
|
| 971 | 971 | count * sizeof(W_));
|
| 972 | 972 | break;
|
| 973 | 973 | case HEAP_BY_RETAINER:
|
| ... | ... | @@ -238,7 +238,7 @@ printRetainerSetShort(FILE *f, RetainerSet *rs, W_ total_size, uint32_t max_leng |
| 238 | 238 | }
|
| 239 | 239 | }
|
| 240 | 240 | fputs(tmp, f);
|
| 241 | - traceHeapProfSampleString(0, tmp, total_size);
|
|
| 241 | + traceHeapProfSampleString(tmp, total_size);
|
|
| 242 | 242 | }
|
| 243 | 243 | |
| 244 | 244 | /* -----------------------------------------------------------------------------
|
| ... | ... | @@ -647,10 +647,10 @@ void traceTaskDelete_ (Task *task) |
| 647 | 647 | }
|
| 648 | 648 | }
|
| 649 | 649 | |
| 650 | -void traceHeapProfBegin(StgWord8 profile_id)
|
|
| 650 | +void traceHeapProfBegin(void)
|
|
| 651 | 651 | {
|
| 652 | 652 | if (eventlog_enabled) {
|
| 653 | - postHeapProfBegin(profile_id);
|
|
| 653 | + postHeapProfBegin();
|
|
| 654 | 654 | }
|
| 655 | 655 | }
|
| 656 | 656 | void traceHeapBioProfSampleBegin(StgInt era, StgWord64 time)
|
| ... | ... | @@ -674,11 +674,10 @@ void traceHeapProfSampleEnd(StgInt era) |
| 674 | 674 | }
|
| 675 | 675 | }
|
| 676 | 676 | |
| 677 | -void traceHeapProfSampleString(StgWord8 profile_id,
|
|
| 678 | - const char *label, StgWord residency)
|
|
| 677 | +void traceHeapProfSampleString(const char *label, StgWord residency)
|
|
| 679 | 678 | {
|
| 680 | 679 | if (eventlog_enabled) {
|
| 681 | - postHeapProfSampleString(profile_id, label, residency);
|
|
| 680 | + postHeapProfSampleString(label, residency);
|
|
| 682 | 681 | }
|
| 683 | 682 | }
|
| 684 | 683 | |
| ... | ... | @@ -718,11 +717,10 @@ void traceHeapProfCostCentre(StgWord32 ccID, |
| 718 | 717 | }
|
| 719 | 718 | |
| 720 | 719 | // This one is for .hp samples
|
| 721 | -void traceHeapProfSampleCostCentre(StgWord8 profile_id,
|
|
| 722 | - CostCentreStack *stack, StgWord residency)
|
|
| 720 | +void traceHeapProfSampleCostCentre(CostCentreStack *stack, StgWord residency)
|
|
| 723 | 721 | {
|
| 724 | 722 | if (eventlog_enabled) {
|
| 725 | - postHeapProfSampleCostCentre(profile_id, stack, residency);
|
|
| 723 | + postHeapProfSampleCostCentre(stack, residency);
|
|
| 726 | 724 | }
|
| 727 | 725 | }
|
| 728 | 726 |
| ... | ... | @@ -303,20 +303,18 @@ void traceTaskMigrate_ (Task *task, |
| 303 | 303 | |
| 304 | 304 | void traceTaskDelete_ (Task *task);
|
| 305 | 305 | |
| 306 | -void traceHeapProfBegin(StgWord8 profile_id);
|
|
| 306 | +void traceHeapProfBegin(void);
|
|
| 307 | 307 | void traceHeapProfSampleBegin(StgInt era);
|
| 308 | 308 | void traceHeapBioProfSampleBegin(StgInt era, StgWord64 time);
|
| 309 | 309 | void traceHeapProfSampleEnd(StgInt era);
|
| 310 | -void traceHeapProfSampleString(StgWord8 profile_id,
|
|
| 311 | - const char *label, StgWord residency);
|
|
| 310 | +void traceHeapProfSampleString(const char *label, StgWord residency);
|
|
| 312 | 311 | #if defined(PROFILING)
|
| 313 | 312 | void traceHeapProfCostCentre(StgWord32 ccID,
|
| 314 | 313 | const char *label,
|
| 315 | 314 | const char *module,
|
| 316 | 315 | const char *srcloc,
|
| 317 | 316 | StgBool is_caf);
|
| 318 | -void traceHeapProfSampleCostCentre(StgWord8 profile_id,
|
|
| 319 | - CostCentreStack *stack, StgWord residency);
|
|
| 317 | +void traceHeapProfSampleCostCentre(CostCentreStack *stack, StgWord residency);
|
|
| 320 | 318 | |
| 321 | 319 | void traceProfSampleCostCentre(Capability *cap,
|
| 322 | 320 | CostCentreStack *stack, StgWord ticks);
|
| ... | ... | @@ -369,14 +367,14 @@ void flushTrace(void); |
| 369 | 367 | #define traceTaskCreate_(taskID, cap) /* nothing */
|
| 370 | 368 | #define traceTaskMigrate_(taskID, cap, new_cap) /* nothing */
|
| 371 | 369 | #define traceTaskDelete_(taskID) /* nothing */
|
| 372 | -#define traceHeapProfBegin(profile_id) /* nothing */
|
|
| 370 | +#define traceHeapProfBegin() /* nothing */
|
|
| 373 | 371 | #define traceHeapProfCostCentre(ccID, label, module, srcloc, is_caf) /* nothing */
|
| 374 | 372 | #define traceIPE(ipe) /* nothing */
|
| 375 | 373 | #define traceHeapProfSampleBegin(era) /* nothing */
|
| 376 | 374 | #define traceHeapBioProfSampleBegin(era, time) /* nothing */
|
| 377 | 375 | #define traceHeapProfSampleEnd(era) /* nothing */
|
| 378 | -#define traceHeapProfSampleCostCentre(profile_id, stack, residency) /* nothing */
|
|
| 379 | -#define traceHeapProfSampleString(profile_id, label, residency) /* nothing */
|
|
| 376 | +#define traceHeapProfSampleCostCentre(stack, residency) /* nothing */
|
|
| 377 | +#define traceHeapProfSampleString(label, residency) /* nothing */
|
|
| 380 | 378 | |
| 381 | 379 | #define traceConcMarkBegin() /* nothing */
|
| 382 | 380 | #define traceConcMarkEnd(marked_obj_count) /* nothing */
|
| ... | ... | @@ -95,6 +95,13 @@ bool eventlog_enabled; // protected by state_change_mutex to ensure |
| 95 | 95 | * buffer size, EVENT_LOG_SIZE. We must ensure that no variable-length event
|
| 96 | 96 | * exceeds this limit. For this reason we impose maximum length limits on
|
| 97 | 97 | * fields which may have unbounded values.
|
| 98 | + *
|
|
| 99 | + * Note [Profile ID]
|
|
| 100 | + * ~~~~~~~~~~~~~~~~~
|
|
| 101 | + * The profile ID field of eventlog entries is reserved for future use,
|
|
| 102 | + * with an eye towards supporting multiple parallel heap profiles.
|
|
| 103 | + * In the current RTS, the profile ID is hardcoded to 0.
|
|
| 104 | + *
|
|
| 98 | 105 | */
|
| 99 | 106 | |
| 100 | 107 | static const EventLogWriter *event_log_writer = NULL;
|
| ... | ... | @@ -1219,7 +1226,7 @@ static HeapProfBreakdown getHeapProfBreakdown(void) |
| 1219 | 1226 | }
|
| 1220 | 1227 | }
|
| 1221 | 1228 | |
| 1222 | -void postHeapProfBegin(StgWord8 profile_id)
|
|
| 1229 | +void postHeapProfBegin(void)
|
|
| 1223 | 1230 | {
|
| 1224 | 1231 | ACQUIRE_LOCK(&eventBufMutex);
|
| 1225 | 1232 | PROFILING_FLAGS *flags = &RtsFlags.ProfFlags;
|
| ... | ... | @@ -1244,7 +1251,8 @@ void postHeapProfBegin(StgWord8 profile_id) |
| 1244 | 1251 | CHECK(!ensureRoomForVariableEvent(&eventBuf, len));
|
| 1245 | 1252 | postEventHeader(&eventBuf, EVENT_HEAP_PROF_BEGIN);
|
| 1246 | 1253 | postPayloadSize(&eventBuf, len);
|
| 1247 | - postWord8(&eventBuf, profile_id);
|
|
| 1254 | + // See Note [Profile ID].
|
|
| 1255 | + postWord8(&eventBuf, 0);
|
|
| 1248 | 1256 | postWord64(&eventBuf, TimeToNS(flags->heapProfileInterval));
|
| 1249 | 1257 | postWord32(&eventBuf, getHeapProfBreakdown());
|
| 1250 | 1258 | postStringLen(&eventBuf, flags->modSelector, modSelector_len);
|
| ... | ... | @@ -1286,8 +1294,7 @@ void postHeapProfSampleEnd(StgInt era) |
| 1286 | 1294 | RELEASE_LOCK(&eventBufMutex);
|
| 1287 | 1295 | }
|
| 1288 | 1296 | |
| 1289 | -void postHeapProfSampleString(StgWord8 profile_id,
|
|
| 1290 | - const char *label,
|
|
| 1297 | +void postHeapProfSampleString(const char *label,
|
|
| 1291 | 1298 | StgWord64 residency)
|
| 1292 | 1299 | {
|
| 1293 | 1300 | ACQUIRE_LOCK(&eventBufMutex);
|
| ... | ... | @@ -1296,7 +1303,8 @@ void postHeapProfSampleString(StgWord8 profile_id, |
| 1296 | 1303 | CHECK(!ensureRoomForVariableEvent(&eventBuf, len));
|
| 1297 | 1304 | postEventHeader(&eventBuf, EVENT_HEAP_PROF_SAMPLE_STRING);
|
| 1298 | 1305 | postPayloadSize(&eventBuf, len);
|
| 1299 | - postWord8(&eventBuf, profile_id);
|
|
| 1306 | + // See Note [Profile ID].
|
|
| 1307 | + postWord8(&eventBuf, 0);
|
|
| 1300 | 1308 | postWord64(&eventBuf, residency);
|
| 1301 | 1309 | postStringLen(&eventBuf, label, label_len);
|
| 1302 | 1310 | RELEASE_LOCK(&eventBufMutex);
|
| ... | ... | @@ -1325,8 +1333,7 @@ void postHeapProfCostCentre(StgWord32 ccID, |
| 1325 | 1333 | RELEASE_LOCK(&eventBufMutex);
|
| 1326 | 1334 | }
|
| 1327 | 1335 | |
| 1328 | -void postHeapProfSampleCostCentre(StgWord8 profile_id,
|
|
| 1329 | - CostCentreStack *stack,
|
|
| 1336 | +void postHeapProfSampleCostCentre(CostCentreStack *stack,
|
|
| 1330 | 1337 | StgWord64 residency)
|
| 1331 | 1338 | {
|
| 1332 | 1339 | ACQUIRE_LOCK(&eventBufMutex);
|
| ... | ... | @@ -1340,7 +1347,8 @@ void postHeapProfSampleCostCentre(StgWord8 profile_id, |
| 1340 | 1347 | CHECK(!ensureRoomForVariableEvent(&eventBuf, len));
|
| 1341 | 1348 | postEventHeader(&eventBuf, EVENT_HEAP_PROF_SAMPLE_COST_CENTRE);
|
| 1342 | 1349 | postPayloadSize(&eventBuf, len);
|
| 1343 | - postWord8(&eventBuf, profile_id);
|
|
| 1350 | + // See Note [Profile ID].
|
|
| 1351 | + postWord8(&eventBuf, 0);
|
|
| 1344 | 1352 | postWord64(&eventBuf, residency);
|
| 1345 | 1353 | postWord8(&eventBuf, depth);
|
| 1346 | 1354 | for (ccs = stack;
|
| ... | ... | @@ -163,14 +163,13 @@ void postTaskMigrateEvent (EventTaskId taskId, |
| 163 | 163 | |
| 164 | 164 | void postTaskDeleteEvent (EventTaskId taskId);
|
| 165 | 165 | |
| 166 | -void postHeapProfBegin(StgWord8 profile_id);
|
|
| 166 | +void postHeapProfBegin(void);
|
|
| 167 | 167 | |
| 168 | 168 | void postHeapProfSampleBegin(StgInt era);
|
| 169 | 169 | void postHeapBioProfSampleBegin(StgInt era, StgWord64 time_ns);
|
| 170 | 170 | void postHeapProfSampleEnd(StgInt era);
|
| 171 | 171 | |
| 172 | -void postHeapProfSampleString(StgWord8 profile_id,
|
|
| 173 | - const char *label,
|
|
| 172 | +void postHeapProfSampleString(const char *label,
|
|
| 174 | 173 | StgWord64 residency);
|
| 175 | 174 | |
| 176 | 175 | #if defined(PROFILING)
|
| ... | ... | @@ -180,8 +179,7 @@ void postHeapProfCostCentre(StgWord32 ccID, |
| 180 | 179 | const char *srcloc,
|
| 181 | 180 | StgBool is_caf);
|
| 182 | 181 | |
| 183 | -void postHeapProfSampleCostCentre(StgWord8 profile_id,
|
|
| 184 | - CostCentreStack *stack,
|
|
| 182 | +void postHeapProfSampleCostCentre(CostCentreStack *stack,
|
|
| 185 | 183 | StgWord64 residency);
|
| 186 | 184 | |
| 187 | 185 | void postProfSampleCostCentre(Capability *cap,
|