
Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC
Commits:
1903ae35 by Matthew Pickering at 2025-08-07T12:21:10+01:00
ipe: Place strings and metadata into specific .ipe section
By placing the .ipe metadata into a specific section it can be stripped
from the final binary if desired.
```
objcopy --remove-section .ipe <binary>
upx <binary>
```
Towards #21766
- - - - -
c80dd91c by Matthew Pickering at 2025-08-07T12:22:42+01:00
ipe: Place magic word at the start of entries in the .ipe section
The magic word "IPE\nIPE\n" is placed at the start of .ipe sections,
then if the section is stripped, we can check whether the section starts
with the magic word or not to determine whether there is metadata
present or not.
Towards #21766
- - - - -
cab42666 by Matthew Pickering at 2025-08-07T12:22:42+01:00
ipe: Use stable IDs for IPE entries
IPEs have historically been indexed and reported by their address.
This makes it impossible to compare profiles between runs, since the
addresses may change (due to ASLR) and also makes it tricky to separate
out the IPE map from the binary.
This small patch adds a stable identifier for each IPE entry.
The stable identifier is a single 64 bit word. The high-bits are a
per-module identifier and the low bits identify which entry in each
module.
1. When a node is added into the IPE buffer it is assigned a unique
identifier from an incrementing global counter.
2. Each entry already has an index by it's position in the
`IpeBufferListNode`.
The two are combined together by the `IPE_ENTRY_KEY` macro.
Info table profiling uses the stable identifier rather than the address
of the info table.
The benefits of this change are:
* Profiles from different runs can be easily compared
* The metadata can be extracted from the binary (via the eventlog for
example) and then stripped from the executable.
Fixes #21766
- - - - -
12 changed files:
- compiler/GHC/Cmm.hs
- compiler/GHC/CmmToAsm/PPC/Ppr.hs
- compiler/GHC/CmmToAsm/Ppr.hs
- compiler/GHC/CmmToLlvm/Data.hs
- compiler/GHC/StgToCmm/InfoTableProv.hs
- docs/users_guide/debug-info.rst
- rts/IPE.c
- rts/ProfHeap.c
- rts/eventlog/EventLog.c
- rts/include/rts/IPE.h
- testsuite/tests/rts/ipe/ipeMap.c
- testsuite/tests/rts/ipe/ipe_lib.c
Changes:
=====================================
compiler/GHC/Cmm.hs
=====================================
@@ -278,6 +278,7 @@ data SectionType
| InitArray -- .init_array on ELF, .ctor on Windows
| FiniArray -- .fini_array on ELF, .dtor on Windows
| CString
+ | IPE
| OtherSection String
deriving (Show)
@@ -298,6 +299,7 @@ sectionProtection (Section t _) = case t of
CString -> ReadOnlySection
Data -> ReadWriteSection
UninitialisedData -> ReadWriteSection
+ IPE -> ReadWriteSection
(OtherSection _) -> ReadWriteSection
{-
@@ -557,4 +559,5 @@ pprSectionType s = doubleQuotes $ case s of
InitArray -> text "initarray"
FiniArray -> text "finiarray"
CString -> text "cstring"
+ IPE -> text "ipe"
OtherSection s' -> text s'
=====================================
compiler/GHC/CmmToAsm/PPC/Ppr.hs
=====================================
@@ -285,6 +285,9 @@ pprAlignForSection platform seg = line $
Data
| ppc64 -> text ".align 3"
| otherwise -> text ".align 2"
+ IPE
+ | ppc64 -> text ".align 3"
+ | otherwise -> text ".align 2"
ReadOnlyData
| ppc64 -> text ".align 3"
| otherwise -> text ".align 2"
=====================================
compiler/GHC/CmmToAsm/Ppr.hs
=====================================
@@ -236,6 +236,10 @@ pprGNUSectionHeader config t suffix =
| OSMinGW32 <- platformOS platform
-> text ".rdata"
| otherwise -> text ".rodata.str"
+ IPE
+ | OSMinGW32 <- platformOS platform
+ -> text ".rdata"
+ | otherwise -> text ".ipe"
OtherSection _ ->
panic "PprBase.pprGNUSectionHeader: unknown section type"
flags = case t of
@@ -248,6 +252,10 @@ pprGNUSectionHeader config t suffix =
| OSMinGW32 <- platformOS platform
-> empty
| otherwise -> text ",\"aMS\"," <> sectionType platform "progbits" <> text ",1"
+ IPE
+ | OSMinGW32 <- platformOS platform
+ -> empty
+ | otherwise -> text ",\"a\"," <> sectionType platform "progbits"
_ -> empty
{-# SPECIALIZE pprGNUSectionHeader :: NCGConfig -> SectionType -> CLabel -> SDoc #-}
{-# SPECIALIZE pprGNUSectionHeader :: NCGConfig -> SectionType -> CLabel -> HLine #-} -- see Note [SPECIALIZE to HDoc] in GHC.Utils.Outputable
@@ -262,6 +270,7 @@ pprXcoffSectionHeader t = case t of
RelocatableReadOnlyData -> text ".csect .text[PR] # RelocatableReadOnlyData"
CString -> text ".csect .text[PR] # CString"
UninitialisedData -> text ".csect .data[BS]"
+ IPE -> text ".csect .text[PR] #IPE"
_ -> panic "pprXcoffSectionHeader: unknown section type"
{-# SPECIALIZE pprXcoffSectionHeader :: SectionType -> SDoc #-}
{-# SPECIALIZE pprXcoffSectionHeader :: SectionType -> HLine #-} -- see Note [SPECIALIZE to HDoc] in GHC.Utils.Outputable
@@ -276,6 +285,7 @@ pprDarwinSectionHeader t = case t of
InitArray -> text ".section\t__DATA,__mod_init_func,mod_init_funcs"
FiniArray -> panic "pprDarwinSectionHeader: fini not supported"
CString -> text ".section\t__TEXT,__cstring,cstring_literals"
+ IPE -> text ".const"
OtherSection _ -> panic "pprDarwinSectionHeader: unknown section type"
{-# SPECIALIZE pprDarwinSectionHeader :: SectionType -> SDoc #-}
{-# SPECIALIZE pprDarwinSectionHeader :: SectionType -> HLine #-} -- see Note [SPECIALIZE to HDoc] in GHC.Utils.Outputable
=====================================
compiler/GHC/CmmToLlvm/Data.hs
=====================================
@@ -145,7 +145,7 @@ llvmSectionType p t = case t of
CString -> case platformOS p of
OSMinGW32 -> fsLit ".rdata$str"
_ -> fsLit ".rodata.str"
-
+ IPE -> fsLit ".ipe"
InitArray -> panic "llvmSectionType: InitArray"
FiniArray -> panic "llvmSectionType: FiniArray"
OtherSection _ -> panic "llvmSectionType: unknown section type"
=====================================
compiler/GHC/StgToCmm/InfoTableProv.hs
=====================================
@@ -10,6 +10,7 @@ import qualified Data.ByteString.Internal as BSI
import GHC.IO (unsafePerformIO)
#endif
+import Data.Char
import GHC.Prelude
import GHC.Platform
import GHC.Types.SrcLoc (pprUserRealSpan, srcSpanFile)
@@ -66,6 +67,28 @@ construction, the 'compressed' field of each IPE buffer list node is examined.
If the field indicates that the data has been compressed, the entry data and
strings table are decompressed before continuing with the normal IPE map
construction.
+
+Note [IPE Stripping and magic words]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+For systems which support ELF executables:
+
+The metadata part of IPE info is placed into a separate ELF section (.ipe).
+This can then be stripped afterwards if you don't require the metadata
+
+```
+-- Remove the section
+objcopy --remove-section .ipe <your-exe>
+-- Repack and compress the executable
+upx <your-exe>
+```
+
+The .ipe section starts with a magic 64-bit word "IPE\0IPE\0`, encoded as ascii.
+
+The RTS checks to see if the .ipe section starts with the magic word. If the
+section has been stripped then it won't start with the magic word and the
+metadata won't be accessible for the info tables.
+
-}
emitIpeBufferListNode ::
@@ -110,7 +133,7 @@ emitIpeBufferListNode this_mod ents dus0 = do
strings_bytes = compress defaultCompressionLevel uncompressed_strings
strings :: [CmmStatic]
- strings = [CmmString strings_bytes]
+ strings = [CmmString (ipe_header `mappend` strings_bytes)]
uncompressed_entries :: BS.ByteString
uncompressed_entries = toIpeBufferEntries (platformByteOrder platform) cg_ipes
@@ -119,16 +142,42 @@ emitIpeBufferListNode this_mod ents dus0 = do
entries_bytes = compress defaultCompressionLevel uncompressed_entries
entries :: [CmmStatic]
- entries = [CmmString entries_bytes]
+ entries = [CmmString (ipe_header `mappend` entries_bytes)]
ipe_buffer_lbl :: CLabel
ipe_buffer_lbl = mkIPELabel this_mod
+ -- A string which fits into one 64-bit word.
+ ipe_header_word :: Word64
+ ipe_header_word = stringToWord64BE "IPE\0IPE\0"
+
+ -- Convert 8 bytes to Word64 using big-endian interpretation
+ stringToWord64BE :: String -> Word64
+ stringToWord64BE = foldl' (\acc b -> GHC.Prelude.shiftL acc 8 .|. fromIntegral (ord b)) 0
+
+ -- A magic word we can use to see if the IPE information has been stripped
+ -- or not
+ -- See Note [IPE Stripping and magic words]
+ -- When read then literally the string should read IPE\0IPE\0 in hex dumps.
+ --
+ -- There is some complexity here to turn this into a ByteString rather than
+ -- a simpler CmmStaticLit, since the unregistered backend does not cope well
+ -- with CmmStaticsRaw being a mixure of CmmStaticLit and CmmString.
+ ipe_header :: BS.ByteString
+ ipe_header = BSL.toStrict . BSB.toLazyByteString $
+ case platformByteOrder platform of
+ LittleEndian -> BSB.word64LE ipe_header_word
+ BigEndian -> BSB.word64BE ipe_header_word
+
+
ipe_buffer_node :: [CmmStatic]
ipe_buffer_node = map CmmStaticLit
[ -- 'next' field
zeroCLit platform
+ -- 'node_id' field
+ , zeroCLit platform
+
-- 'compressed' field
, int do_compress
@@ -164,12 +213,12 @@ emitIpeBufferListNode this_mod ents dus0 = do
-- Emit the strings table
emitDecl $ CmmData
- (Section Data strings_lbl)
+ (Section IPE strings_lbl)
(CmmStaticsRaw strings_lbl strings)
-- Emit the list of IPE buffer entries
emitDecl $ CmmData
- (Section Data entries_lbl)
+ (Section IPE entries_lbl)
(CmmStaticsRaw entries_lbl entries)
-- Emit the IPE buffer list node
=====================================
docs/users_guide/debug-info.rst
=====================================
@@ -391,6 +391,17 @@ to a source location. This lookup table is generated by using the ``-finfo-table
In a test compiling GHC itself, the size of the :ghc-flag:`-finfo-table-map`
enabled build results was reduced by over 20% when compression was enabled.
+ The metadata for ``-finfo-table-map`` is stored in the ``.ipe`` section on
+ ELF platforms. The ``.ipe`` section can be removed from the binary after compilation::
+
+ objcopy --remove-section .ipe <binary>
+ upx <binary>
+
+ You can first compile your application with ``-finfo-table-map``, extract
+ the contents of the map (by using the eventlog), strip the ``.ipe`` section
+ and then use the extracted data to interpret a ``-hi`` profile from the stripped
+ binary.
+
:since: 9.10
:implies: :ghc-flag:`-finfo-table-map-with-stack`
:implies: :ghc-flag:`-finfo-table-map-with-fallback`
=====================================
rts/IPE.c
=====================================
@@ -62,6 +62,22 @@ entry's containing IpeBufferListNode and its index in that node.
When the user looks up an IPE entry, we convert it to the user-facing
InfoProvEnt representation.
+Note [Stable identifiers for IPE entries]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Each IPE entry is given a stable identifier which remains the same across
+different runs of the executable (unlike the address of the info table).
+
+The identifier is a 64-bit word which consists of two parts.
+
+* The high 32-bits are a per-node identifier.
+* The low 32-bits are the index of the entry in the node.
+
+When a node is queued in the pending list by `registerInfoProvList` it is
+given a unique identifier from an incrementing global variable.
+
+The unique key can be computed by using the `IPE_ENTRY_KEY` macro.
+
*/
typedef struct {
@@ -69,6 +85,13 @@ typedef struct {
uint32_t idx;
} IpeMapEntry;
+// See Note [Stable identifiers for IPE entries]
+#define IPE_ENTRY_KEY(entry) \
+ MAKE_IPE_KEY((entry).node->node_id, (entry).idx)
+
+#define MAKE_IPE_KEY(module_id, idx) \
+ ((((uint64_t)(module_id)) << 32) | ((uint64_t)(idx)))
+
#if defined(THREADED_RTS)
static Mutex ipeMapLock;
#endif
@@ -78,9 +101,22 @@ static HashTable *ipeMap = NULL;
// Accessed atomically
static IpeBufferListNode *ipeBufferList = NULL;
+// A global counter which is used to give an IPE entry a unique value across runs.
+static StgWord next_module_id = 1; // Start at 1 to reserve 0 as "invalid"
+
static void decompressIPEBufferListNodeIfCompressed(IpeBufferListNode*);
static void updateIpeMap(void);
+// Check whether the IpeBufferListNode has the relevant magic words.
+// See Note [IPE Stripping and magic words]
+static inline bool ipe_node_valid(const IpeBufferListNode *node) {
+ return node &&
+ node->entries_block &&
+ node->string_table_block &&
+ node->entries_block->magic == IPE_MAGIC_WORD &&
+ node->string_table_block->magic == IPE_MAGIC_WORD;
+}
+
#if defined(THREADED_RTS)
void initIpe(void) { initMutex(&ipeMapLock); }
@@ -99,11 +135,12 @@ static InfoProvEnt ipeBufferEntryToIpe(const IpeBufferListNode *node, uint32_t i
{
CHECK(idx < node->count);
CHECK(!node->compressed);
- const char *strings = node->string_table;
- const IpeBufferEntry *ent = &node->entries[idx];
+ const char *strings = node->string_table_block->string_table;
+ const IpeBufferEntry *ent = &node->entries_block->entries[idx];
return (InfoProvEnt) {
.info = node->tables[idx],
.prov = {
+ .info_prov_id = MAKE_IPE_KEY(node->node_id, idx),
.table_name = &strings[ent->table_name],
.closure_desc = ent->closure_desc,
.ty_desc = &strings[ent->ty_desc],
@@ -121,19 +158,23 @@ static InfoProvEnt ipeBufferEntryToIpe(const IpeBufferListNode *node, uint32_t i
static void traceIPEFromHashTable(void *data STG_UNUSED, StgWord key STG_UNUSED,
const void *value) {
const IpeMapEntry *map_ent = (const IpeMapEntry *)value;
- const InfoProvEnt ipe = ipeBufferEntryToIpe(map_ent->node, map_ent->idx);
- traceIPE(&ipe);
+ if (ipe_node_valid(map_ent->node)){
+ const InfoProvEnt ipe = ipeBufferEntryToIpe(map_ent->node, map_ent->idx);
+ traceIPE(&ipe);
+ }
}
void dumpIPEToEventLog(void) {
// Dump pending entries
IpeBufferListNode *node = RELAXED_LOAD(&ipeBufferList);
while (node != NULL) {
- decompressIPEBufferListNodeIfCompressed(node);
+ if (ipe_node_valid(node)){
+ decompressIPEBufferListNodeIfCompressed(node);
- for (uint32_t i = 0; i < node->count; i++) {
- const InfoProvEnt ent = ipeBufferEntryToIpe(node, i);
- traceIPE(&ent);
+ for (uint32_t i = 0; i < node->count; i++) {
+ const InfoProvEnt ent = ipeBufferEntryToIpe(node, i);
+ traceIPE(&ent);
+ }
}
node = node->next;
}
@@ -165,11 +206,30 @@ ipeMapLock; we instead use atomic CAS operations to add to the list.
A performance test for IPE registration and lookup can be found here:
https://gitlab.haskell.org/ghc/ghc/-/merge_requests/5724#note_370806
+
+Note that IPEs are still regiestered even if the .ipe section is stripped. That's
+because you may still want to query what the unique identifier for an info table is
+so it can be reconciled with previously extracted metadata information. For example,
+when `-hi` profiling or using `whereFrom`.
+
*/
void registerInfoProvList(IpeBufferListNode *node) {
+
+ // Grab a fresh module_id
+ uint32_t module_id;
+ StgWord temp_module_id;
+ while (true) {
+ temp_module_id = next_module_id;
+ if (cas(&next_module_id, temp_module_id, temp_module_id+1) == temp_module_id) {
+ module_id = (uint32_t) temp_module_id;
+ break;
+ }
+
+ }
while (true) {
IpeBufferListNode *old = RELAXED_LOAD(&ipeBufferList);
node->next = old;
+ node->node_id = module_id;
if (cas_ptr((volatile void **) &ipeBufferList, old, node) == (void *) old) {
return;
}
@@ -183,7 +243,7 @@ void formatClosureDescIpe(const InfoProvEnt *ipe_buf, char *str_buf) {
bool lookupIPE(const StgInfoTable *info, InfoProvEnt *out) {
updateIpeMap();
IpeMapEntry *map_ent = (IpeMapEntry *) lookupHashTable(ipeMap, (StgWord)info);
- if (map_ent) {
+ if (map_ent && ipe_node_valid(map_ent->node)) {
*out = ipeBufferEntryToIpe(map_ent->node, map_ent->idx);
return true;
} else {
@@ -191,6 +251,18 @@ bool lookupIPE(const StgInfoTable *info, InfoProvEnt *out) {
}
}
+// Returns 0 when the info table is not present in the info table map.
+// See Note [Stable identifiers for IPE entries]
+uint64_t lookupIPEId(const StgInfoTable *info) {
+ updateIpeMap();
+ IpeMapEntry *map_ent = (IpeMapEntry *) lookupHashTable(ipeMap, (StgWord)(info));
+ if (map_ent){
+ return IPE_ENTRY_KEY(*map_ent);
+ } else {
+ return 0;
+ }
+}
+
void updateIpeMap(void) {
// Check if there's any work at all. If not so, we can circumvent locking,
// which decreases performance.
=====================================
rts/ProfHeap.c
=====================================
@@ -23,6 +23,7 @@
#include "Printer.h"
#include "Trace.h"
#include "sm/GCThread.h"
+#include "IPE.h"
#include