Matthew Pickering pushed to branch wip/stable-ipe-info at Glasgow Haskell Compiler / GHC
Commits:
-
59f62065
by Matthew Pickering at 2025-07-03T16:01:26+01:00
5 changed files:
- compiler/GHC/StgToCmm/InfoTableProv.hs
- rts/IPE.c
- rts/ProfHeap.c
- rts/eventlog/EventLog.c
- rts/include/rts/IPE.h
Changes:
| ... | ... | @@ -158,6 +158,9 @@ emitIpeBufferListNode this_mod ents dus0 = do |
| 158 | 158 | [ -- 'next' field
|
| 159 | 159 | zeroCLit platform
|
| 160 | 160 | |
| 161 | + -- 'node_id' field
|
|
| 162 | + , zeroCLit platform
|
|
| 163 | + |
|
| 161 | 164 | -- 'compressed' field
|
| 162 | 165 | , int do_compress
|
| 163 | 166 |
| ... | ... | @@ -62,6 +62,22 @@ entry's containing IpeBufferListNode and its index in that node. |
| 62 | 62 | When the user looks up an IPE entry, we convert it to the user-facing
|
| 63 | 63 | InfoProvEnt representation.
|
| 64 | 64 | |
| 65 | +Note [Stable identifiers for IPE entries]
|
|
| 66 | +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
| 67 | + |
|
| 68 | +Each IPE entry is given a stable identifier which remains the same across
|
|
| 69 | +different runs of the executable (unlike the address of the info table).
|
|
| 70 | + |
|
| 71 | +The identifier is a 64-bit word which consists of two parts.
|
|
| 72 | + |
|
| 73 | +* The high 32-bits are a per-node identifier.
|
|
| 74 | +* The low 32-bits are the index of the entry in the node.
|
|
| 75 | + |
|
| 76 | +When a node is queued in the pending list by `registerInfoProvList` it is
|
|
| 77 | +given a unique identifier from an incrementing global variable.
|
|
| 78 | + |
|
| 79 | +The unique key can be computed by using the `IPE_ENTRY_KEY` macro.
|
|
| 80 | + |
|
| 65 | 81 | */
|
| 66 | 82 | |
| 67 | 83 | typedef struct {
|
| ... | ... | @@ -69,6 +85,13 @@ typedef struct { |
| 69 | 85 | uint32_t idx;
|
| 70 | 86 | } IpeMapEntry;
|
| 71 | 87 | |
| 88 | +// See Note [Stable identifiers for IPE entries]
|
|
| 89 | +#define IPE_ENTRY_KEY(entry) \
|
|
| 90 | + MAKE_IPE_KEY((entry).node->node_id, (entry).idx)
|
|
| 91 | + |
|
| 92 | +#define MAKE_IPE_KEY(module_id, idx) \
|
|
| 93 | + ((((uint64_t)(module_id)) << 32) | ((uint64_t)(idx)))
|
|
| 94 | + |
|
| 72 | 95 | #if defined(THREADED_RTS)
|
| 73 | 96 | static Mutex ipeMapLock;
|
| 74 | 97 | #endif
|
| ... | ... | @@ -78,6 +101,9 @@ static HashTable *ipeMap = NULL; |
| 78 | 101 | // Accessed atomically
|
| 79 | 102 | static IpeBufferListNode *ipeBufferList = NULL;
|
| 80 | 103 | |
| 104 | +// A global counter which is used to give an IPE entry a unique value across runs.
|
|
| 105 | +static StgWord next_module_id = 1; // Start at 1 to reserve 0 as "invalid"
|
|
| 106 | + |
|
| 81 | 107 | static void decompressIPEBufferListNodeIfCompressed(IpeBufferListNode*);
|
| 82 | 108 | static void updateIpeMap(void);
|
| 83 | 109 | |
| ... | ... | @@ -114,6 +140,7 @@ static InfoProvEnt ipeBufferEntryToIpe(const IpeBufferListNode *node, uint32_t i |
| 114 | 140 | return (InfoProvEnt) {
|
| 115 | 141 | .info = node->tables[idx],
|
| 116 | 142 | .prov = {
|
| 143 | + .info_prov_id = MAKE_IPE_KEY(node->node_id, idx),
|
|
| 117 | 144 | .table_name = &strings[ent->table_name],
|
| 118 | 145 | .closure_desc = ent->closure_desc,
|
| 119 | 146 | .ty_desc = &strings[ent->ty_desc],
|
| ... | ... | @@ -181,9 +208,22 @@ A performance test for IPE registration and lookup can be found here: |
| 181 | 208 | https://gitlab.haskell.org/ghc/ghc/-/merge_requests/5724#note_370806
|
| 182 | 209 | */
|
| 183 | 210 | void registerInfoProvList(IpeBufferListNode *node) {
|
| 211 | + |
|
| 212 | + // Grab a fresh module_id
|
|
| 213 | + uint32_t module_id;
|
|
| 214 | + StgWord temp_module_id;
|
|
| 215 | + while (true) {
|
|
| 216 | + temp_module_id = next_module_id;
|
|
| 217 | + if (cas(&next_module_id, temp_module_id, temp_module_id+1) == temp_module_id) {
|
|
| 218 | + module_id = (uint32_t) temp_module_id;
|
|
| 219 | + break;
|
|
| 220 | + }
|
|
| 221 | + |
|
| 222 | + }
|
|
| 184 | 223 | while (true) {
|
| 185 | 224 | IpeBufferListNode *old = RELAXED_LOAD(&ipeBufferList);
|
| 186 | 225 | node->next = old;
|
| 226 | + node->node_id = module_id;
|
|
| 187 | 227 | if (cas_ptr((volatile void **) &ipeBufferList, old, node) == (void *) old) {
|
| 188 | 228 | return;
|
| 189 | 229 | }
|
| ... | ... | @@ -205,6 +245,18 @@ bool lookupIPE(const StgInfoTable *info, InfoProvEnt *out) { |
| 205 | 245 | }
|
| 206 | 246 | }
|
| 207 | 247 | |
| 248 | +// Returns 0 when the info table is not present in the info table map.
|
|
| 249 | +// See Note [Stable identifiers for IPE entries]
|
|
| 250 | +uint64_t lookupIPEId(const StgInfoTable *info) {
|
|
| 251 | + updateIpeMap();
|
|
| 252 | + IpeMapEntry *map_ent = (IpeMapEntry *) lookupHashTable(ipeMap, (StgWord)(info));
|
|
| 253 | + if (map_ent){
|
|
| 254 | + return IPE_ENTRY_KEY(*map_ent);
|
|
| 255 | + } else {
|
|
| 256 | + return 0;
|
|
| 257 | + }
|
|
| 258 | +}
|
|
| 259 | + |
|
| 208 | 260 | void updateIpeMap(void) {
|
| 209 | 261 | // Check if there's any work at all. If not so, we can circumvent locking,
|
| 210 | 262 | // which decreases performance.
|
| ... | ... | @@ -23,6 +23,7 @@ |
| 23 | 23 | #include "Printer.h"
|
| 24 | 24 | #include "Trace.h"
|
| 25 | 25 | #include "sm/GCThread.h"
|
| 26 | +#include "IPE.h"
|
|
| 26 | 27 | |
| 27 | 28 | #include <fs_rts.h>
|
| 28 | 29 | #include <string.h>
|
| ... | ... | @@ -230,9 +231,10 @@ closureIdentity( const StgClosure *p ) |
| 230 | 231 | return closure_type_names[info->type];
|
| 231 | 232 | }
|
| 232 | 233 | }
|
| 233 | - case HEAP_BY_INFO_TABLE: {
|
|
| 234 | - return get_itbl(p);
|
|
| 235 | - }
|
|
| 234 | + case HEAP_BY_INFO_TABLE:
|
|
| 235 | + {
|
|
| 236 | + return (void *) (p->header.info);
|
|
| 237 | + }
|
|
| 236 | 238 | |
| 237 | 239 | default:
|
| 238 | 240 | barf("closureIdentity");
|
| ... | ... | @@ -853,6 +855,20 @@ aggregateCensusInfo( void ) |
| 853 | 855 | }
|
| 854 | 856 | #endif
|
| 855 | 857 | |
| 858 | +static void
|
|
| 859 | +recordIPEHeapSample(FILE *hp_file, uint64_t table_id, size_t count)
|
|
| 860 | +{
|
|
| 861 | + // Print to heap profile file
|
|
| 862 | + fprintf(hp_file, "0x%" PRIx64, table_id);
|
|
| 863 | + |
|
| 864 | + // Create label string for tracing
|
|
| 865 | + char str[100];
|
|
| 866 | + sprintf(str, "0x%" PRIx64, table_id);
|
|
| 867 | + |
|
| 868 | + // Emit the profiling sample (convert count to bytes)
|
|
| 869 | + traceHeapProfSampleString(0, str, count * sizeof(W_));
|
|
| 870 | +}
|
|
| 871 | + |
|
| 856 | 872 | /* -----------------------------------------------------------------------------
|
| 857 | 873 | * Print out the results of a heap census.
|
| 858 | 874 | * -------------------------------------------------------------------------- */
|
| ... | ... | @@ -915,6 +931,11 @@ dumpCensus( Census *census ) |
| 915 | 931 | }
|
| 916 | 932 | #endif
|
| 917 | 933 | |
| 934 | + // Census entries which we need to group together.
|
|
| 935 | + // Used by IPE profiling to group together bands which don't have IPE information.
|
|
| 936 | + // Printing at the end in the 0 band
|
|
| 937 | + uint64_t uncategorised_count = 0;
|
|
| 938 | + |
|
| 918 | 939 | for (ctr = census->ctrs; ctr != NULL; ctr = ctr->next) {
|
| 919 | 940 | |
| 920 | 941 | #if defined(PROFILING)
|
| ... | ... | @@ -944,12 +965,15 @@ dumpCensus( Census *census ) |
| 944 | 965 | traceHeapProfSampleString(0, (char *)ctr->identity,
|
| 945 | 966 | count * sizeof(W_));
|
| 946 | 967 | break;
|
| 947 | - case HEAP_BY_INFO_TABLE:
|
|
| 948 | - fprintf(hp_file, "%p", ctr->identity);
|
|
| 949 | - char str[100];
|
|
| 950 | - sprintf(str, "%p", ctr->identity);
|
|
| 951 | - traceHeapProfSampleString(0, str, count * sizeof(W_));
|
|
| 968 | + case HEAP_BY_INFO_TABLE: {
|
|
| 969 | + uint64_t table_id = lookupIPEId(ctr->identity);
|
|
| 970 | + if (! table_id) {
|
|
| 971 | + uncategorised_count += count;
|
|
| 972 | + continue;
|
|
| 973 | + }
|
|
| 974 | + recordIPEHeapSample(hp_file, table_id, count);
|
|
| 952 | 975 | break;
|
| 976 | + }
|
|
| 953 | 977 | #if defined(PROFILING)
|
| 954 | 978 | case HEAP_BY_CCS:
|
| 955 | 979 | fprint_ccs(hp_file, (CostCentreStack *)ctr->identity,
|
| ... | ... | @@ -999,9 +1023,21 @@ dumpCensus( Census *census ) |
| 999 | 1023 | barf("dumpCensus; doHeapProfile");
|
| 1000 | 1024 | }
|
| 1001 | 1025 | |
| 1026 | + |
|
| 1027 | + |
|
| 1002 | 1028 | fprintf(hp_file, "\t%" FMT_Word "\n", (W_)count * sizeof(W_));
|
| 1003 | 1029 | }
|
| 1004 | 1030 | |
| 1031 | + // Print the unallocated data into the 0 band for info table profiling.
|
|
| 1032 | + switch (RtsFlags.ProfFlags.doHeapProfile) {
|
|
| 1033 | + case HEAP_BY_INFO_TABLE:
|
|
| 1034 | + recordIPEHeapSample(hp_file, 0, uncategorised_count);
|
|
| 1035 | + break;
|
|
| 1036 | + default:
|
|
| 1037 | + ASSERT(uncategorised_count == 0);
|
|
| 1038 | + break;
|
|
| 1039 | + }
|
|
| 1040 | + |
|
| 1005 | 1041 | traceHeapProfSampleEnd(era);
|
| 1006 | 1042 | printSample(false, census->time);
|
| 1007 | 1043 |
| ... | ... | @@ -1472,7 +1472,7 @@ void postIPE(const InfoProvEnt *ipe) |
| 1472 | 1472 | CHECK(!ensureRoomForVariableEvent(&eventBuf, len));
|
| 1473 | 1473 | postEventHeader(&eventBuf, EVENT_IPE);
|
| 1474 | 1474 | postPayloadSize(&eventBuf, len);
|
| 1475 | - postWord64(&eventBuf, (StgWord) INFO_PTR_TO_STRUCT(ipe->info));
|
|
| 1475 | + postWord64(&eventBuf, (StgWord) (ipe->prov.info_prov_id));
|
|
| 1476 | 1476 | postStringLen(&eventBuf, ipe->prov.table_name, table_name_len);
|
| 1477 | 1477 | postStringLen(&eventBuf, closure_desc_buf, closure_desc_len);
|
| 1478 | 1478 | postStringLen(&eventBuf, ipe->prov.ty_desc, ty_desc_len);
|
| ... | ... | @@ -14,6 +14,7 @@ |
| 14 | 14 | #pragma once
|
| 15 | 15 | |
| 16 | 16 | typedef struct InfoProv_ {
|
| 17 | + uint64_t info_prov_id;
|
|
| 17 | 18 | const char *table_name;
|
| 18 | 19 | uint32_t closure_desc; // closure type
|
| 19 | 20 | const char *ty_desc;
|
| ... | ... | @@ -67,19 +68,33 @@ GHC_STATIC_ASSERT(sizeof(IpeBufferEntry) % (WORD_SIZE_IN_BITS / 8) == 0, "sizeof |
| 67 | 68 | // See Note [IPE Stripping and magic words]
|
| 68 | 69 | #define IPE_MAGIC_WORD 0x4950450049504500UL
|
| 69 | 70 | |
| 71 | +// Heap profiling currently requires a 32 bit pointer.. so for now just truncate
|
|
| 72 | +// the key to fit. It should still be big enough.
|
|
| 73 | +#if SIZEOF_VOID_P == 4
|
|
| 74 | +// On 32-bit systems: keep lower 16 bits of module_id and idx
|
|
| 75 | +#define IPE_PROF_KEY(key64) \
|
|
| 76 | + (uint32_t)((((key64) >> 16) & 0xFFFF0000) | ((key64) & 0x0000FFFF))
|
|
| 77 | +#else
|
|
| 78 | +// On 64-bit systems: use full key
|
|
| 79 | +#define IPE_PROF_KEY(key64) (key64)
|
|
| 80 | +#endif
|
|
| 81 | + |
|
| 70 | 82 | typedef struct {
|
| 71 | - StgWord magic; // Must be IPE_MAGIC_WORD
|
|
| 83 | + StgWord64 magic; // Must be IPE_MAGIC_WORD
|
|
| 72 | 84 | IpeBufferEntry entries[]; // Flexible array member
|
| 73 | 85 | } IpeBufferEntryBlock;
|
| 74 | 86 | |
| 75 | 87 | typedef struct {
|
| 76 | - StgWord magic; // Must be IPE_MAGIC_WORD
|
|
| 88 | + StgWord64 magic; // Must be IPE_MAGIC_WORD
|
|
| 77 | 89 | char string_table[]; // Flexible array member for string table
|
| 78 | 90 | } IpeStringTableBlock;
|
| 79 | 91 | |
| 80 | 92 | typedef struct IpeBufferListNode_ {
|
| 81 | 93 | struct IpeBufferListNode_ *next;
|
| 82 | 94 | |
| 95 | + // This field is filled in when the node is registered.
|
|
| 96 | + uint32_t node_id;
|
|
| 97 | + |
|
| 83 | 98 | // Everything below is read-only and generated by the codegen
|
| 84 | 99 | |
| 85 | 100 | // This flag should be treated as a boolean
|
| ... | ... | @@ -112,6 +127,8 @@ void formatClosureDescIpe(const InfoProvEnt *ipe_buf, char *str_buf); |
| 112 | 127 | // Returns true on success, initializes `out`.
|
| 113 | 128 | bool lookupIPE(const StgInfoTable *info, InfoProvEnt *out);
|
| 114 | 129 | |
| 130 | +uint64_t lookupIPEId(const StgInfoTable *info);
|
|
| 131 | + |
|
| 115 | 132 | #if defined(DEBUG)
|
| 116 | 133 | void printIPE(const StgInfoTable *info);
|
| 117 | 134 | #endif |