[Git][ghc/ghc][wip/stable-ipe-info] ipe: Use stable IDs for IPE entries

Matthew Pickering pushed to branch wip/stable-ipe-info at Glasgow Haskell Compiler / GHC Commits: 3d4ce6f7 by Matthew Pickering at 2025-07-02T12:53:18+01:00 ipe: Use stable IDs for IPE entries IPEs have historically been indexed and reported by their address. This makes it impossible to compare profiles between runs, since the addresses may change (due to ASLR) and also makes it tricky to separate out the IPE map from the binary. This small patch adds a stable identifier for each IPE entry. The stable identifier is a single 64 bit word. The high-bits are a per-module identifier and the low bits identify which entry in each module. 1. When a node is added into the IPE buffer it is assigned a unique identifier from an incrementing global counter. 2. Each entry already has an index by it's position in the `IpeBufferListNode`. The two are combined together by the `IPE_ENTRY_KEY` macro. Info table profiling uses the stable identifier rather than the address of the info table. The benefits of this change are: * Profiles from different runs can be easily compared * The metadata can be extracted from the binary (via the eventlog for example) and then stripped from the executable. Fixes #21766 - - - - - 5 changed files: - compiler/GHC/StgToCmm/InfoTableProv.hs - rts/IPE.c - rts/ProfHeap.c - rts/eventlog/EventLog.c - rts/include/rts/IPE.h Changes: ===================================== compiler/GHC/StgToCmm/InfoTableProv.hs ===================================== @@ -158,6 +158,9 @@ emitIpeBufferListNode this_mod ents dus0 = do [ -- 'next' field zeroCLit platform + -- 'node_id' field + , zeroCLit platform + -- 'compressed' field , int do_compress ===================================== rts/IPE.c ===================================== @@ -62,6 +62,22 @@ entry's containing IpeBufferListNode and its index in that node. When the user looks up an IPE entry, we convert it to the user-facing InfoProvEnt representation. +Note [Stable identifiers for IPE entries] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Each IPE entry is given a stable identifier which remains the same across +different runs of the executable (unlike the address of the info table). + +The identifier is a 64-bit word which consists of two parts. + +* The high 32-bits are a per-node identifier. +* The low 32-bits are the index of the entry in the node. + +When a node is queued in the pending list by `registerInfoProvList` it is +given a unique identifier from an incrementing global variable. + +The unique key can be computed by using the `IPE_ENTRY_KEY` macro. + */ typedef struct { @@ -69,6 +85,13 @@ typedef struct { uint32_t idx; } IpeMapEntry; +// See Note [Stable identifiers for IPE entries] +#define IPE_ENTRY_KEY(entry) \ + MAKE_IPE_KEY((entry).node->node_id, (entry).idx) + +#define MAKE_IPE_KEY(module_id, idx) \ + ((((uint64_t)(module_id)) << 32) | ((uint64_t)(idx))) + #if defined(THREADED_RTS) static Mutex ipeMapLock; #endif @@ -78,6 +101,9 @@ static HashTable *ipeMap = NULL; // Accessed atomically static IpeBufferListNode *ipeBufferList = NULL; +// A global counter which is used to give an IPE entry a unique value across runs. +static StgWord next_module_id = 1; // Start at 1 to reserve 0 as "invalid" + static void decompressIPEBufferListNodeIfCompressed(IpeBufferListNode*); static void updateIpeMap(void); @@ -114,6 +140,7 @@ static InfoProvEnt ipeBufferEntryToIpe(const IpeBufferListNode *node, uint32_t i return (InfoProvEnt) { .info = node->tables[idx], .prov = { + .info_prov_id = MAKE_IPE_KEY(node->node_id, idx), .table_name = &strings[ent->table_name], .closure_desc = ent->closure_desc, .ty_desc = &strings[ent->ty_desc], @@ -181,9 +208,22 @@ A performance test for IPE registration and lookup can be found here: https://gitlab.haskell.org/ghc/ghc/-/merge_requests/5724#note_370806 */ void registerInfoProvList(IpeBufferListNode *node) { + + // Grab a fresh module_id + uint32_t module_id; + StgWord temp_module_id; + while (true) { + temp_module_id = next_module_id; + if (cas(&next_module_id, temp_module_id, temp_module_id+1) == temp_module_id) { + module_id = (uint32_t) temp_module_id; + break; + } + + } while (true) { IpeBufferListNode *old = RELAXED_LOAD(&ipeBufferList); node->next = old; + node->node_id = module_id; if (cas_ptr((volatile void **) &ipeBufferList, old, node) == (void *) old) { return; } @@ -205,6 +245,18 @@ bool lookupIPE(const StgInfoTable *info, InfoProvEnt *out) { } } +// Returns 0 when the info table is not present in the info table map. +// See Note [Stable identifiers for IPE entries] +uint64_t lookupIPEId(const StgInfoTable *info) { + updateIpeMap(); + IpeMapEntry *map_ent = (IpeMapEntry *) lookupHashTable(ipeMap, (StgWord)(info)); + if (map_ent){ + return IPE_ENTRY_KEY(*map_ent); + } else { + return 0; + } +} + void updateIpeMap(void) { // Check if there's any work at all. If not so, we can circumvent locking, // which decreases performance. ===================================== rts/ProfHeap.c ===================================== @@ -230,9 +230,15 @@ closureIdentity( const StgClosure *p ) return closure_type_names[info->type]; } } - case HEAP_BY_INFO_TABLE: { - return get_itbl(p); + case HEAP_BY_INFO_TABLE: + { + uint64_t table_id = lookupIPEId(p->header.info); + if (table_id) { + return (void *) table_id; + } else { + return (void *) 0xffffffff; } + } default: barf("closureIdentity"); ===================================== rts/eventlog/EventLog.c ===================================== @@ -1472,7 +1472,7 @@ void postIPE(const InfoProvEnt *ipe) CHECK(!ensureRoomForVariableEvent(&eventBuf, len)); postEventHeader(&eventBuf, EVENT_IPE); postPayloadSize(&eventBuf, len); - postWord64(&eventBuf, (StgWord) INFO_PTR_TO_STRUCT(ipe->info)); + postWord64(&eventBuf, (StgWord) (ipe->prov.info_prov_id)); postStringLen(&eventBuf, ipe->prov.table_name, table_name_len); postStringLen(&eventBuf, closure_desc_buf, closure_desc_len); postStringLen(&eventBuf, ipe->prov.ty_desc, ty_desc_len); ===================================== rts/include/rts/IPE.h ===================================== @@ -14,6 +14,7 @@ #pragma once typedef struct InfoProv_ { + uint64_t info_prov_id; const char *table_name; uint32_t closure_desc; // closure type const char *ty_desc; @@ -68,18 +69,21 @@ GHC_STATIC_ASSERT(sizeof(IpeBufferEntry) % (WORD_SIZE_IN_BITS / 8) == 0, "sizeof #define IPE_MAGIC_WORD 0x4950450049504500UL typedef struct { - StgWord magic; // Must be IPE_MAGIC_WORD + StgWord64 magic; // Must be IPE_MAGIC_WORD IpeBufferEntry entries[]; // Flexible array member } IpeBufferEntryBlock; typedef struct { - StgWord magic; // Must be IPE_MAGIC_WORD + StgWord64 magic; // Must be IPE_MAGIC_WORD char string_table[]; // Flexible array member for string table } IpeStringTableBlock; typedef struct IpeBufferListNode_ { struct IpeBufferListNode_ *next; + // This field is filled in when the node is registered. + uint32_t node_id; + // Everything below is read-only and generated by the codegen // This flag should be treated as a boolean @@ -112,6 +116,8 @@ void formatClosureDescIpe(const InfoProvEnt *ipe_buf, char *str_buf); // Returns true on success, initializes `out`. bool lookupIPE(const StgInfoTable *info, InfoProvEnt *out); +uint64_t lookupIPEId(const StgInfoTable *info); + #if defined(DEBUG) void printIPE(const StgInfoTable *info); #endif View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/3d4ce6f7aaf2cf8ac431a77c4151d9af... -- View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/3d4ce6f7aaf2cf8ac431a77c4151d9af... You're receiving this email because of your account on gitlab.haskell.org.
participants (1)
-
Matthew Pickering (@mpickering)