Matthew Pickering pushed to branch wip/stable-ipe-info at Glasgow Haskell Compiler / GHC
Commits:
-
59f62065
by Matthew Pickering at 2025-07-03T16:01:26+01:00
5 changed files:
- compiler/GHC/StgToCmm/InfoTableProv.hs
- rts/IPE.c
- rts/ProfHeap.c
- rts/eventlog/EventLog.c
- rts/include/rts/IPE.h
Changes:
... | ... | @@ -158,6 +158,9 @@ emitIpeBufferListNode this_mod ents dus0 = do |
158 | 158 | [ -- 'next' field
|
159 | 159 | zeroCLit platform
|
160 | 160 | |
161 | + -- 'node_id' field
|
|
162 | + , zeroCLit platform
|
|
163 | + |
|
161 | 164 | -- 'compressed' field
|
162 | 165 | , int do_compress
|
163 | 166 |
... | ... | @@ -62,6 +62,22 @@ entry's containing IpeBufferListNode and its index in that node. |
62 | 62 | When the user looks up an IPE entry, we convert it to the user-facing
|
63 | 63 | InfoProvEnt representation.
|
64 | 64 | |
65 | +Note [Stable identifiers for IPE entries]
|
|
66 | +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
67 | + |
|
68 | +Each IPE entry is given a stable identifier which remains the same across
|
|
69 | +different runs of the executable (unlike the address of the info table).
|
|
70 | + |
|
71 | +The identifier is a 64-bit word which consists of two parts.
|
|
72 | + |
|
73 | +* The high 32-bits are a per-node identifier.
|
|
74 | +* The low 32-bits are the index of the entry in the node.
|
|
75 | + |
|
76 | +When a node is queued in the pending list by `registerInfoProvList` it is
|
|
77 | +given a unique identifier from an incrementing global variable.
|
|
78 | + |
|
79 | +The unique key can be computed by using the `IPE_ENTRY_KEY` macro.
|
|
80 | + |
|
65 | 81 | */
|
66 | 82 | |
67 | 83 | typedef struct {
|
... | ... | @@ -69,6 +85,13 @@ typedef struct { |
69 | 85 | uint32_t idx;
|
70 | 86 | } IpeMapEntry;
|
71 | 87 | |
88 | +// See Note [Stable identifiers for IPE entries]
|
|
89 | +#define IPE_ENTRY_KEY(entry) \
|
|
90 | + MAKE_IPE_KEY((entry).node->node_id, (entry).idx)
|
|
91 | + |
|
92 | +#define MAKE_IPE_KEY(module_id, idx) \
|
|
93 | + ((((uint64_t)(module_id)) << 32) | ((uint64_t)(idx)))
|
|
94 | + |
|
72 | 95 | #if defined(THREADED_RTS)
|
73 | 96 | static Mutex ipeMapLock;
|
74 | 97 | #endif
|
... | ... | @@ -78,6 +101,9 @@ static HashTable *ipeMap = NULL; |
78 | 101 | // Accessed atomically
|
79 | 102 | static IpeBufferListNode *ipeBufferList = NULL;
|
80 | 103 | |
104 | +// A global counter which is used to give an IPE entry a unique value across runs.
|
|
105 | +static StgWord next_module_id = 1; // Start at 1 to reserve 0 as "invalid"
|
|
106 | + |
|
81 | 107 | static void decompressIPEBufferListNodeIfCompressed(IpeBufferListNode*);
|
82 | 108 | static void updateIpeMap(void);
|
83 | 109 | |
... | ... | @@ -114,6 +140,7 @@ static InfoProvEnt ipeBufferEntryToIpe(const IpeBufferListNode *node, uint32_t i |
114 | 140 | return (InfoProvEnt) {
|
115 | 141 | .info = node->tables[idx],
|
116 | 142 | .prov = {
|
143 | + .info_prov_id = MAKE_IPE_KEY(node->node_id, idx),
|
|
117 | 144 | .table_name = &strings[ent->table_name],
|
118 | 145 | .closure_desc = ent->closure_desc,
|
119 | 146 | .ty_desc = &strings[ent->ty_desc],
|
... | ... | @@ -181,9 +208,22 @@ A performance test for IPE registration and lookup can be found here: |
181 | 208 | https://gitlab.haskell.org/ghc/ghc/-/merge_requests/5724#note_370806
|
182 | 209 | */
|
183 | 210 | void registerInfoProvList(IpeBufferListNode *node) {
|
211 | + |
|
212 | + // Grab a fresh module_id
|
|
213 | + uint32_t module_id;
|
|
214 | + StgWord temp_module_id;
|
|
215 | + while (true) {
|
|
216 | + temp_module_id = next_module_id;
|
|
217 | + if (cas(&next_module_id, temp_module_id, temp_module_id+1) == temp_module_id) {
|
|
218 | + module_id = (uint32_t) temp_module_id;
|
|
219 | + break;
|
|
220 | + }
|
|
221 | + |
|
222 | + }
|
|
184 | 223 | while (true) {
|
185 | 224 | IpeBufferListNode *old = RELAXED_LOAD(&ipeBufferList);
|
186 | 225 | node->next = old;
|
226 | + node->node_id = module_id;
|
|
187 | 227 | if (cas_ptr((volatile void **) &ipeBufferList, old, node) == (void *) old) {
|
188 | 228 | return;
|
189 | 229 | }
|
... | ... | @@ -205,6 +245,18 @@ bool lookupIPE(const StgInfoTable *info, InfoProvEnt *out) { |
205 | 245 | }
|
206 | 246 | }
|
207 | 247 | |
248 | +// Returns 0 when the info table is not present in the info table map.
|
|
249 | +// See Note [Stable identifiers for IPE entries]
|
|
250 | +uint64_t lookupIPEId(const StgInfoTable *info) {
|
|
251 | + updateIpeMap();
|
|
252 | + IpeMapEntry *map_ent = (IpeMapEntry *) lookupHashTable(ipeMap, (StgWord)(info));
|
|
253 | + if (map_ent){
|
|
254 | + return IPE_ENTRY_KEY(*map_ent);
|
|
255 | + } else {
|
|
256 | + return 0;
|
|
257 | + }
|
|
258 | +}
|
|
259 | + |
|
208 | 260 | void updateIpeMap(void) {
|
209 | 261 | // Check if there's any work at all. If not so, we can circumvent locking,
|
210 | 262 | // which decreases performance.
|
... | ... | @@ -23,6 +23,7 @@ |
23 | 23 | #include "Printer.h"
|
24 | 24 | #include "Trace.h"
|
25 | 25 | #include "sm/GCThread.h"
|
26 | +#include "IPE.h"
|
|
26 | 27 | |
27 | 28 | #include <fs_rts.h>
|
28 | 29 | #include <string.h>
|
... | ... | @@ -230,9 +231,10 @@ closureIdentity( const StgClosure *p ) |
230 | 231 | return closure_type_names[info->type];
|
231 | 232 | }
|
232 | 233 | }
|
233 | - case HEAP_BY_INFO_TABLE: {
|
|
234 | - return get_itbl(p);
|
|
235 | - }
|
|
234 | + case HEAP_BY_INFO_TABLE:
|
|
235 | + {
|
|
236 | + return (void *) (p->header.info);
|
|
237 | + }
|
|
236 | 238 | |
237 | 239 | default:
|
238 | 240 | barf("closureIdentity");
|
... | ... | @@ -853,6 +855,20 @@ aggregateCensusInfo( void ) |
853 | 855 | }
|
854 | 856 | #endif
|
855 | 857 | |
858 | +static void
|
|
859 | +recordIPEHeapSample(FILE *hp_file, uint64_t table_id, size_t count)
|
|
860 | +{
|
|
861 | + // Print to heap profile file
|
|
862 | + fprintf(hp_file, "0x%" PRIx64, table_id);
|
|
863 | + |
|
864 | + // Create label string for tracing
|
|
865 | + char str[100];
|
|
866 | + sprintf(str, "0x%" PRIx64, table_id);
|
|
867 | + |
|
868 | + // Emit the profiling sample (convert count to bytes)
|
|
869 | + traceHeapProfSampleString(0, str, count * sizeof(W_));
|
|
870 | +}
|
|
871 | + |
|
856 | 872 | /* -----------------------------------------------------------------------------
|
857 | 873 | * Print out the results of a heap census.
|
858 | 874 | * -------------------------------------------------------------------------- */
|
... | ... | @@ -915,6 +931,11 @@ dumpCensus( Census *census ) |
915 | 931 | }
|
916 | 932 | #endif
|
917 | 933 | |
934 | + // Census entries which we need to group together.
|
|
935 | + // Used by IPE profiling to group together bands which don't have IPE information.
|
|
936 | + // Printing at the end in the 0 band
|
|
937 | + uint64_t uncategorised_count = 0;
|
|
938 | + |
|
918 | 939 | for (ctr = census->ctrs; ctr != NULL; ctr = ctr->next) {
|
919 | 940 | |
920 | 941 | #if defined(PROFILING)
|
... | ... | @@ -944,12 +965,15 @@ dumpCensus( Census *census ) |
944 | 965 | traceHeapProfSampleString(0, (char *)ctr->identity,
|
945 | 966 | count * sizeof(W_));
|
946 | 967 | break;
|
947 | - case HEAP_BY_INFO_TABLE:
|
|
948 | - fprintf(hp_file, "%p", ctr->identity);
|
|
949 | - char str[100];
|
|
950 | - sprintf(str, "%p", ctr->identity);
|
|
951 | - traceHeapProfSampleString(0, str, count * sizeof(W_));
|
|
968 | + case HEAP_BY_INFO_TABLE: {
|
|
969 | + uint64_t table_id = lookupIPEId(ctr->identity);
|
|
970 | + if (! table_id) {
|
|
971 | + uncategorised_count += count;
|
|
972 | + continue;
|
|
973 | + }
|
|
974 | + recordIPEHeapSample(hp_file, table_id, count);
|
|
952 | 975 | break;
|
976 | + }
|
|
953 | 977 | #if defined(PROFILING)
|
954 | 978 | case HEAP_BY_CCS:
|
955 | 979 | fprint_ccs(hp_file, (CostCentreStack *)ctr->identity,
|
... | ... | @@ -999,9 +1023,21 @@ dumpCensus( Census *census ) |
999 | 1023 | barf("dumpCensus; doHeapProfile");
|
1000 | 1024 | }
|
1001 | 1025 | |
1026 | + |
|
1027 | + |
|
1002 | 1028 | fprintf(hp_file, "\t%" FMT_Word "\n", (W_)count * sizeof(W_));
|
1003 | 1029 | }
|
1004 | 1030 | |
1031 | + // Print the unallocated data into the 0 band for info table profiling.
|
|
1032 | + switch (RtsFlags.ProfFlags.doHeapProfile) {
|
|
1033 | + case HEAP_BY_INFO_TABLE:
|
|
1034 | + recordIPEHeapSample(hp_file, 0, uncategorised_count);
|
|
1035 | + break;
|
|
1036 | + default:
|
|
1037 | + ASSERT(uncategorised_count == 0);
|
|
1038 | + break;
|
|
1039 | + }
|
|
1040 | + |
|
1005 | 1041 | traceHeapProfSampleEnd(era);
|
1006 | 1042 | printSample(false, census->time);
|
1007 | 1043 |
... | ... | @@ -1472,7 +1472,7 @@ void postIPE(const InfoProvEnt *ipe) |
1472 | 1472 | CHECK(!ensureRoomForVariableEvent(&eventBuf, len));
|
1473 | 1473 | postEventHeader(&eventBuf, EVENT_IPE);
|
1474 | 1474 | postPayloadSize(&eventBuf, len);
|
1475 | - postWord64(&eventBuf, (StgWord) INFO_PTR_TO_STRUCT(ipe->info));
|
|
1475 | + postWord64(&eventBuf, (StgWord) (ipe->prov.info_prov_id));
|
|
1476 | 1476 | postStringLen(&eventBuf, ipe->prov.table_name, table_name_len);
|
1477 | 1477 | postStringLen(&eventBuf, closure_desc_buf, closure_desc_len);
|
1478 | 1478 | postStringLen(&eventBuf, ipe->prov.ty_desc, ty_desc_len);
|
... | ... | @@ -14,6 +14,7 @@ |
14 | 14 | #pragma once
|
15 | 15 | |
16 | 16 | typedef struct InfoProv_ {
|
17 | + uint64_t info_prov_id;
|
|
17 | 18 | const char *table_name;
|
18 | 19 | uint32_t closure_desc; // closure type
|
19 | 20 | const char *ty_desc;
|
... | ... | @@ -67,19 +68,33 @@ GHC_STATIC_ASSERT(sizeof(IpeBufferEntry) % (WORD_SIZE_IN_BITS / 8) == 0, "sizeof |
67 | 68 | // See Note [IPE Stripping and magic words]
|
68 | 69 | #define IPE_MAGIC_WORD 0x4950450049504500UL
|
69 | 70 | |
71 | +// Heap profiling currently requires a 32 bit pointer.. so for now just truncate
|
|
72 | +// the key to fit. It should still be big enough.
|
|
73 | +#if SIZEOF_VOID_P == 4
|
|
74 | +// On 32-bit systems: keep lower 16 bits of module_id and idx
|
|
75 | +#define IPE_PROF_KEY(key64) \
|
|
76 | + (uint32_t)((((key64) >> 16) & 0xFFFF0000) | ((key64) & 0x0000FFFF))
|
|
77 | +#else
|
|
78 | +// On 64-bit systems: use full key
|
|
79 | +#define IPE_PROF_KEY(key64) (key64)
|
|
80 | +#endif
|
|
81 | + |
|
70 | 82 | typedef struct {
|
71 | - StgWord magic; // Must be IPE_MAGIC_WORD
|
|
83 | + StgWord64 magic; // Must be IPE_MAGIC_WORD
|
|
72 | 84 | IpeBufferEntry entries[]; // Flexible array member
|
73 | 85 | } IpeBufferEntryBlock;
|
74 | 86 | |
75 | 87 | typedef struct {
|
76 | - StgWord magic; // Must be IPE_MAGIC_WORD
|
|
88 | + StgWord64 magic; // Must be IPE_MAGIC_WORD
|
|
77 | 89 | char string_table[]; // Flexible array member for string table
|
78 | 90 | } IpeStringTableBlock;
|
79 | 91 | |
80 | 92 | typedef struct IpeBufferListNode_ {
|
81 | 93 | struct IpeBufferListNode_ *next;
|
82 | 94 | |
95 | + // This field is filled in when the node is registered.
|
|
96 | + uint32_t node_id;
|
|
97 | + |
|
83 | 98 | // Everything below is read-only and generated by the codegen
|
84 | 99 | |
85 | 100 | // This flag should be treated as a boolean
|
... | ... | @@ -112,6 +127,8 @@ void formatClosureDescIpe(const InfoProvEnt *ipe_buf, char *str_buf); |
112 | 127 | // Returns true on success, initializes `out`.
|
113 | 128 | bool lookupIPE(const StgInfoTable *info, InfoProvEnt *out);
|
114 | 129 | |
130 | +uint64_t lookupIPEId(const StgInfoTable *info);
|
|
131 | + |
|
115 | 132 | #if defined(DEBUG)
|
116 | 133 | void printIPE(const StgInfoTable *info);
|
117 | 134 | #endif |