Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC

Commits:

12 changed files:

Changes:

  • compiler/GHC/Cmm.hs
    ... ... @@ -278,6 +278,7 @@ data SectionType
    278 278
       | InitArray           -- .init_array on ELF, .ctor on Windows
    
    279 279
       | FiniArray           -- .fini_array on ELF, .dtor on Windows
    
    280 280
       | CString
    
    281
    +  | IPE
    
    281 282
       | OtherSection String
    
    282 283
       deriving (Show)
    
    283 284
     
    
    ... ... @@ -298,6 +299,7 @@ sectionProtection (Section t _) = case t of
    298 299
         CString                 -> ReadOnlySection
    
    299 300
         Data                    -> ReadWriteSection
    
    300 301
         UninitialisedData       -> ReadWriteSection
    
    302
    +    IPE                     -> ReadWriteSection
    
    301 303
         (OtherSection _)        -> ReadWriteSection
    
    302 304
     
    
    303 305
     {-
    
    ... ... @@ -557,4 +559,5 @@ pprSectionType s = doubleQuotes $ case s of
    557 559
       InitArray               -> text "initarray"
    
    558 560
       FiniArray               -> text "finiarray"
    
    559 561
       CString                 -> text "cstring"
    
    562
    +  IPE                     -> text "ipe"
    
    560 563
       OtherSection s'         -> text s'

  • compiler/GHC/CmmToAsm/PPC/Ppr.hs
    ... ... @@ -285,6 +285,9 @@ pprAlignForSection platform seg = line $
    285 285
            Data
    
    286 286
             | ppc64          -> text ".align 3"
    
    287 287
             | otherwise      -> text ".align 2"
    
    288
    +       IPE
    
    289
    +        | ppc64          -> text ".align 3"
    
    290
    +        | otherwise      -> text ".align 2"
    
    288 291
            ReadOnlyData
    
    289 292
             | ppc64          -> text ".align 3"
    
    290 293
             | otherwise      -> text ".align 2"
    

  • compiler/GHC/CmmToAsm/Ppr.hs
    ... ... @@ -236,6 +236,10 @@ pprGNUSectionHeader config t suffix =
    236 236
             | OSMinGW32 <- platformOS platform
    
    237 237
                         -> text ".rdata"
    
    238 238
             | otherwise -> text ".rodata.str"
    
    239
    +      IPE
    
    240
    +        | OSMinGW32 <- platformOS platform
    
    241
    +                    -> text ".rdata"
    
    242
    +        | otherwise -> text ".ipe"
    
    239 243
           OtherSection _ ->
    
    240 244
             panic "PprBase.pprGNUSectionHeader: unknown section type"
    
    241 245
         flags = case t of
    
    ... ... @@ -248,6 +252,10 @@ pprGNUSectionHeader config t suffix =
    248 252
             | OSMinGW32 <- platformOS platform
    
    249 253
                         -> empty
    
    250 254
             | otherwise -> text ",\"aMS\"," <> sectionType platform "progbits" <> text ",1"
    
    255
    +      IPE
    
    256
    +        | OSMinGW32 <- platformOS platform
    
    257
    +                    -> empty
    
    258
    +        | otherwise -> text ",\"a\"," <> sectionType platform "progbits"
    
    251 259
           _ -> empty
    
    252 260
     {-# SPECIALIZE pprGNUSectionHeader :: NCGConfig -> SectionType -> CLabel -> SDoc #-}
    
    253 261
     {-# SPECIALIZE pprGNUSectionHeader :: NCGConfig -> SectionType -> CLabel -> HLine #-} -- see Note [SPECIALIZE to HDoc] in GHC.Utils.Outputable
    
    ... ... @@ -262,6 +270,7 @@ pprXcoffSectionHeader t = case t of
    262 270
       RelocatableReadOnlyData -> text ".csect .text[PR] # RelocatableReadOnlyData"
    
    263 271
       CString                 -> text ".csect .text[PR] # CString"
    
    264 272
       UninitialisedData       -> text ".csect .data[BS]"
    
    273
    +  IPE                     -> text ".csect .text[PR] #IPE"
    
    265 274
       _                       -> panic "pprXcoffSectionHeader: unknown section type"
    
    266 275
     {-# SPECIALIZE pprXcoffSectionHeader :: SectionType -> SDoc #-}
    
    267 276
     {-# SPECIALIZE pprXcoffSectionHeader :: SectionType -> HLine #-} -- see Note [SPECIALIZE to HDoc] in GHC.Utils.Outputable
    
    ... ... @@ -276,6 +285,7 @@ pprDarwinSectionHeader t = case t of
    276 285
       InitArray               -> text ".section\t__DATA,__mod_init_func,mod_init_funcs"
    
    277 286
       FiniArray               -> panic "pprDarwinSectionHeader: fini not supported"
    
    278 287
       CString                 -> text ".section\t__TEXT,__cstring,cstring_literals"
    
    288
    +  IPE                     -> text ".const"
    
    279 289
       OtherSection _          -> panic "pprDarwinSectionHeader: unknown section type"
    
    280 290
     {-# SPECIALIZE pprDarwinSectionHeader :: SectionType -> SDoc #-}
    
    281 291
     {-# SPECIALIZE pprDarwinSectionHeader :: SectionType -> HLine #-} -- see Note [SPECIALIZE to HDoc] in GHC.Utils.Outputable

  • compiler/GHC/CmmToLlvm/Data.hs
    ... ... @@ -145,7 +145,7 @@ llvmSectionType p t = case t of
    145 145
         CString                 -> case platformOS p of
    
    146 146
                                      OSMinGW32 -> fsLit ".rdata$str"
    
    147 147
                                      _         -> fsLit ".rodata.str"
    
    148
    -
    
    148
    +    IPE                     -> fsLit ".ipe"
    
    149 149
         InitArray               -> panic "llvmSectionType: InitArray"
    
    150 150
         FiniArray               -> panic "llvmSectionType: FiniArray"
    
    151 151
         OtherSection _          -> panic "llvmSectionType: unknown section type"
    

  • compiler/GHC/StgToCmm/InfoTableProv.hs
    ... ... @@ -10,6 +10,7 @@ import qualified Data.ByteString.Internal as BSI
    10 10
     import GHC.IO (unsafePerformIO)
    
    11 11
     #endif
    
    12 12
     
    
    13
    +import Data.Char
    
    13 14
     import GHC.Prelude
    
    14 15
     import GHC.Platform
    
    15 16
     import GHC.Types.SrcLoc (pprUserRealSpan, srcSpanFile)
    
    ... ... @@ -66,6 +67,28 @@ construction, the 'compressed' field of each IPE buffer list node is examined.
    66 67
     If the field indicates that the data has been compressed, the entry data and
    
    67 68
     strings table are decompressed before continuing with the normal IPE map
    
    68 69
     construction.
    
    70
    +
    
    71
    +Note [IPE Stripping and magic words]
    
    72
    +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    73
    +
    
    74
    +For systems which support ELF executables:
    
    75
    +
    
    76
    +The metadata part of IPE info is placed into a separate ELF section (.ipe).
    
    77
    +This can then be stripped afterwards if you don't require the metadata
    
    78
    +
    
    79
    +```
    
    80
    +-- Remove the section
    
    81
    +objcopy --remove-section .ipe <your-exe>
    
    82
    +-- Repack and compress the executable
    
    83
    +upx <your-exe>
    
    84
    +```
    
    85
    +
    
    86
    +The .ipe section starts with a magic 64-bit word "IPE\0IPE\0`, encoded as ascii.
    
    87
    +
    
    88
    +The RTS checks to see if the .ipe section starts with the magic word. If the
    
    89
    +section has been stripped then it won't start with the magic word and the
    
    90
    +metadata won't be accessible for the info tables.
    
    91
    +
    
    69 92
     -}
    
    70 93
     
    
    71 94
     emitIpeBufferListNode ::
    
    ... ... @@ -110,7 +133,7 @@ emitIpeBufferListNode this_mod ents dus0 = do
    110 133
             strings_bytes = compress defaultCompressionLevel uncompressed_strings
    
    111 134
     
    
    112 135
             strings :: [CmmStatic]
    
    113
    -        strings = [CmmString strings_bytes]
    
    136
    +        strings = [CmmString (ipe_header `mappend` strings_bytes)]
    
    114 137
     
    
    115 138
             uncompressed_entries :: BS.ByteString
    
    116 139
             uncompressed_entries = toIpeBufferEntries (platformByteOrder platform) cg_ipes
    
    ... ... @@ -119,16 +142,42 @@ emitIpeBufferListNode this_mod ents dus0 = do
    119 142
             entries_bytes = compress defaultCompressionLevel uncompressed_entries
    
    120 143
     
    
    121 144
             entries :: [CmmStatic]
    
    122
    -        entries = [CmmString entries_bytes]
    
    145
    +        entries = [CmmString (ipe_header `mappend` entries_bytes)]
    
    123 146
     
    
    124 147
             ipe_buffer_lbl :: CLabel
    
    125 148
             ipe_buffer_lbl = mkIPELabel this_mod
    
    126 149
     
    
    150
    +        -- A string which fits into one 64-bit word.
    
    151
    +        ipe_header_word :: Word64
    
    152
    +        ipe_header_word = stringToWord64BE "IPE\0IPE\0"
    
    153
    +
    
    154
    +        -- Convert 8 bytes to Word64 using big-endian interpretation
    
    155
    +        stringToWord64BE :: String -> Word64
    
    156
    +        stringToWord64BE = foldl' (\acc b -> GHC.Prelude.shiftL acc 8 .|. fromIntegral (ord b)) 0
    
    157
    +
    
    158
    +        -- A magic word we can use to see if the IPE information has been stripped
    
    159
    +        -- or not
    
    160
    +        -- See Note [IPE Stripping and magic words]
    
    161
    +        -- When read then literally the string should read IPE\0IPE\0 in hex dumps.
    
    162
    +        --
    
    163
    +        -- There is some complexity here to turn this into a ByteString rather than
    
    164
    +        -- a simpler CmmStaticLit, since the unregistered backend does not cope well
    
    165
    +        -- with CmmStaticsRaw being a mixure of CmmStaticLit and CmmString.
    
    166
    +        ipe_header :: BS.ByteString
    
    167
    +        ipe_header = BSL.toStrict . BSB.toLazyByteString $
    
    168
    +                       case platformByteOrder platform of
    
    169
    +                         LittleEndian -> BSB.word64LE ipe_header_word
    
    170
    +                         BigEndian -> BSB.word64BE ipe_header_word
    
    171
    +
    
    172
    +
    
    127 173
             ipe_buffer_node :: [CmmStatic]
    
    128 174
             ipe_buffer_node = map CmmStaticLit
    
    129 175
               [ -- 'next' field
    
    130 176
                 zeroCLit platform
    
    131 177
     
    
    178
    +            -- 'node_id' field
    
    179
    +          , zeroCLit platform
    
    180
    +
    
    132 181
                 -- 'compressed' field
    
    133 182
               , int do_compress
    
    134 183
     
    
    ... ... @@ -164,12 +213,12 @@ emitIpeBufferListNode this_mod ents dus0 = do
    164 213
     
    
    165 214
         -- Emit the strings table
    
    166 215
         emitDecl $ CmmData
    
    167
    -      (Section Data strings_lbl)
    
    216
    +      (Section IPE strings_lbl)
    
    168 217
           (CmmStaticsRaw strings_lbl strings)
    
    169 218
     
    
    170 219
         -- Emit the list of IPE buffer entries
    
    171 220
         emitDecl $ CmmData
    
    172
    -      (Section Data entries_lbl)
    
    221
    +      (Section IPE entries_lbl)
    
    173 222
           (CmmStaticsRaw entries_lbl entries)
    
    174 223
     
    
    175 224
         -- Emit the IPE buffer list node
    

  • docs/users_guide/debug-info.rst
    ... ... @@ -391,6 +391,17 @@ to a source location. This lookup table is generated by using the ``-finfo-table
    391 391
         In a test compiling GHC itself, the size of the :ghc-flag:`-finfo-table-map`
    
    392 392
         enabled build results was reduced by over 20% when compression was enabled.
    
    393 393
     
    
    394
    +    The metadata for ``-finfo-table-map`` is stored in the ``.ipe`` section on
    
    395
    +    ELF platforms. The ``.ipe`` section can be removed from the binary after compilation::
    
    396
    +
    
    397
    +      objcopy --remove-section .ipe <binary>
    
    398
    +      upx <binary>
    
    399
    +
    
    400
    +    You can first compile your application with ``-finfo-table-map``, extract
    
    401
    +    the contents of the map (by using the eventlog), strip the ``.ipe`` section
    
    402
    +    and then use the extracted data to interpret a ``-hi`` profile from the stripped
    
    403
    +    binary.
    
    404
    +
    
    394 405
         :since: 9.10
    
    395 406
         :implies: :ghc-flag:`-finfo-table-map-with-stack`
    
    396 407
         :implies: :ghc-flag:`-finfo-table-map-with-fallback`
    

  • rts/IPE.c
    ... ... @@ -62,6 +62,22 @@ entry's containing IpeBufferListNode and its index in that node.
    62 62
     When the user looks up an IPE entry, we convert it to the user-facing
    
    63 63
     InfoProvEnt representation.
    
    64 64
     
    
    65
    +Note [Stable identifiers for IPE entries]
    
    66
    +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    67
    +
    
    68
    +Each IPE entry is given a stable identifier which remains the same across
    
    69
    +different runs of the executable (unlike the address of the info table).
    
    70
    +
    
    71
    +The identifier is a 64-bit word which consists of two parts.
    
    72
    +
    
    73
    +* The high 32-bits are a per-node identifier.
    
    74
    +* The low 32-bits are the index of the entry in the node.
    
    75
    +
    
    76
    +When a node is queued in the pending list by `registerInfoProvList` it is
    
    77
    +given a unique identifier from an incrementing global variable.
    
    78
    +
    
    79
    +The unique key can be computed by using the `IPE_ENTRY_KEY` macro.
    
    80
    +
    
    65 81
     */
    
    66 82
     
    
    67 83
     typedef struct {
    
    ... ... @@ -69,6 +85,13 @@ typedef struct {
    69 85
         uint32_t idx;
    
    70 86
     } IpeMapEntry;
    
    71 87
     
    
    88
    +// See Note [Stable identifiers for IPE entries]
    
    89
    +#define IPE_ENTRY_KEY(entry) \
    
    90
    +    MAKE_IPE_KEY((entry).node->node_id, (entry).idx)
    
    91
    +
    
    92
    +#define MAKE_IPE_KEY(module_id, idx) \
    
    93
    +    ((((uint64_t)(module_id)) << 32) | ((uint64_t)(idx)))
    
    94
    +
    
    72 95
     #if defined(THREADED_RTS)
    
    73 96
     static Mutex ipeMapLock;
    
    74 97
     #endif
    
    ... ... @@ -78,9 +101,22 @@ static HashTable *ipeMap = NULL;
    78 101
     // Accessed atomically
    
    79 102
     static IpeBufferListNode *ipeBufferList = NULL;
    
    80 103
     
    
    104
    +// A global counter which is used to give an IPE entry a unique value across runs.
    
    105
    +static StgWord next_module_id = 1; // Start at 1 to reserve 0 as "invalid"
    
    106
    +
    
    81 107
     static void decompressIPEBufferListNodeIfCompressed(IpeBufferListNode*);
    
    82 108
     static void updateIpeMap(void);
    
    83 109
     
    
    110
    +// Check whether the IpeBufferListNode has the relevant magic words.
    
    111
    +// See Note [IPE Stripping and magic words]
    
    112
    +static inline bool ipe_node_valid(const IpeBufferListNode *node) {
    
    113
    +    return node &&
    
    114
    +           node->entries_block &&
    
    115
    +           node->string_table_block &&
    
    116
    +           node->entries_block->magic == IPE_MAGIC_WORD &&
    
    117
    +           node->string_table_block->magic == IPE_MAGIC_WORD;
    
    118
    +}
    
    119
    +
    
    84 120
     #if defined(THREADED_RTS)
    
    85 121
     
    
    86 122
     void initIpe(void) { initMutex(&ipeMapLock); }
    
    ... ... @@ -99,11 +135,12 @@ static InfoProvEnt ipeBufferEntryToIpe(const IpeBufferListNode *node, uint32_t i
    99 135
     {
    
    100 136
         CHECK(idx < node->count);
    
    101 137
         CHECK(!node->compressed);
    
    102
    -    const char *strings = node->string_table;
    
    103
    -    const IpeBufferEntry *ent = &node->entries[idx];
    
    138
    +    const char *strings = node->string_table_block->string_table;
    
    139
    +    const IpeBufferEntry *ent = &node->entries_block->entries[idx];
    
    104 140
         return (InfoProvEnt) {
    
    105 141
                 .info = node->tables[idx],
    
    106 142
                 .prov = {
    
    143
    +                .info_prov_id  = MAKE_IPE_KEY(node->node_id, idx),
    
    107 144
                     .table_name = &strings[ent->table_name],
    
    108 145
                     .closure_desc = ent->closure_desc,
    
    109 146
                     .ty_desc = &strings[ent->ty_desc],
    
    ... ... @@ -121,19 +158,23 @@ static InfoProvEnt ipeBufferEntryToIpe(const IpeBufferListNode *node, uint32_t i
    121 158
     static void traceIPEFromHashTable(void *data STG_UNUSED, StgWord key STG_UNUSED,
    
    122 159
                                       const void *value) {
    
    123 160
         const IpeMapEntry *map_ent = (const IpeMapEntry *)value;
    
    124
    -    const InfoProvEnt ipe = ipeBufferEntryToIpe(map_ent->node, map_ent->idx);
    
    125
    -    traceIPE(&ipe);
    
    161
    +    if (ipe_node_valid(map_ent->node)){
    
    162
    +      const InfoProvEnt ipe = ipeBufferEntryToIpe(map_ent->node, map_ent->idx);
    
    163
    +      traceIPE(&ipe);
    
    164
    +    }
    
    126 165
     }
    
    127 166
     
    
    128 167
     void dumpIPEToEventLog(void) {
    
    129 168
         // Dump pending entries
    
    130 169
         IpeBufferListNode *node = RELAXED_LOAD(&ipeBufferList);
    
    131 170
         while (node != NULL) {
    
    132
    -        decompressIPEBufferListNodeIfCompressed(node);
    
    171
    +        if (ipe_node_valid(node)){
    
    172
    +          decompressIPEBufferListNodeIfCompressed(node);
    
    133 173
     
    
    134
    -        for (uint32_t i = 0; i < node->count; i++) {
    
    135
    -            const InfoProvEnt ent = ipeBufferEntryToIpe(node, i);
    
    136
    -            traceIPE(&ent);
    
    174
    +          for (uint32_t i = 0; i < node->count; i++) {
    
    175
    +              const InfoProvEnt ent = ipeBufferEntryToIpe(node, i);
    
    176
    +              traceIPE(&ent);
    
    177
    +          }
    
    137 178
             }
    
    138 179
             node = node->next;
    
    139 180
         }
    
    ... ... @@ -165,11 +206,30 @@ ipeMapLock; we instead use atomic CAS operations to add to the list.
    165 206
     
    
    166 207
     A performance test for IPE registration and lookup can be found here:
    
    167 208
     https://gitlab.haskell.org/ghc/ghc/-/merge_requests/5724#note_370806
    
    209
    +
    
    210
    +Note that IPEs are still regiestered even if the .ipe section is stripped. That's
    
    211
    +because you may still want to query what the unique identifier for an info table is
    
    212
    +so it can be reconciled with previously extracted metadata information. For example,
    
    213
    +when `-hi` profiling or using `whereFrom`.
    
    214
    +
    
    168 215
     */
    
    169 216
     void registerInfoProvList(IpeBufferListNode *node) {
    
    217
    +
    
    218
    +        // Grab a fresh module_id
    
    219
    +    uint32_t module_id;
    
    220
    +    StgWord temp_module_id;
    
    221
    +    while (true) {
    
    222
    +        temp_module_id = next_module_id;
    
    223
    +        if (cas(&next_module_id, temp_module_id, temp_module_id+1) == temp_module_id) {
    
    224
    +            module_id = (uint32_t) temp_module_id;
    
    225
    +            break;
    
    226
    +        }
    
    227
    +
    
    228
    +    }
    
    170 229
         while (true) {
    
    171 230
             IpeBufferListNode *old = RELAXED_LOAD(&ipeBufferList);
    
    172 231
             node->next = old;
    
    232
    +        node->node_id = module_id;
    
    173 233
             if (cas_ptr((volatile void **) &ipeBufferList, old, node) == (void *) old) {
    
    174 234
                 return;
    
    175 235
             }
    
    ... ... @@ -183,7 +243,7 @@ void formatClosureDescIpe(const InfoProvEnt *ipe_buf, char *str_buf) {
    183 243
     bool lookupIPE(const StgInfoTable *info, InfoProvEnt *out) {
    
    184 244
         updateIpeMap();
    
    185 245
         IpeMapEntry *map_ent = (IpeMapEntry *) lookupHashTable(ipeMap, (StgWord)info);
    
    186
    -    if (map_ent) {
    
    246
    +    if (map_ent && ipe_node_valid(map_ent->node)) {
    
    187 247
             *out = ipeBufferEntryToIpe(map_ent->node, map_ent->idx);
    
    188 248
             return true;
    
    189 249
         } else {
    
    ... ... @@ -191,6 +251,18 @@ bool lookupIPE(const StgInfoTable *info, InfoProvEnt *out) {
    191 251
         }
    
    192 252
     }
    
    193 253
     
    
    254
    +// Returns 0 when the info table is not present in the info table map.
    
    255
    +// See Note [Stable identifiers for IPE entries]
    
    256
    +uint64_t lookupIPEId(const StgInfoTable *info) {
    
    257
    +    updateIpeMap();
    
    258
    +    IpeMapEntry *map_ent = (IpeMapEntry *) lookupHashTable(ipeMap, (StgWord)(info));
    
    259
    +    if (map_ent){
    
    260
    +        return IPE_ENTRY_KEY(*map_ent);
    
    261
    +    } else {
    
    262
    +        return 0;
    
    263
    +    }
    
    264
    +}
    
    265
    +
    
    194 266
     void updateIpeMap(void) {
    
    195 267
         // Check if there's any work at all. If not so, we can circumvent locking,
    
    196 268
         // which decreases performance.
    

  • rts/ProfHeap.c
    ... ... @@ -23,6 +23,7 @@
    23 23
     #include "Printer.h"
    
    24 24
     #include "Trace.h"
    
    25 25
     #include "sm/GCThread.h"
    
    26
    +#include "IPE.h"
    
    26 27
     
    
    27 28
     #include <fs_rts.h>
    
    28 29
     #include <string.h>
    
    ... ... @@ -230,9 +231,10 @@ closureIdentity( const StgClosure *p )
    230 231
                 return closure_type_names[info->type];
    
    231 232
             }
    
    232 233
         }
    
    233
    -    case HEAP_BY_INFO_TABLE: {
    
    234
    -        return get_itbl(p);
    
    235
    -        }
    
    234
    +    case HEAP_BY_INFO_TABLE:
    
    235
    +    {
    
    236
    +        return (void *) (p->header.info);
    
    237
    +    }
    
    236 238
     
    
    237 239
         default:
    
    238 240
             barf("closureIdentity");
    
    ... ... @@ -853,6 +855,20 @@ aggregateCensusInfo( void )
    853 855
     }
    
    854 856
     #endif
    
    855 857
     
    
    858
    +static void
    
    859
    +recordIPEHeapSample(FILE *hp_file, uint64_t table_id, size_t count)
    
    860
    +{
    
    861
    +    // Print to heap profile file
    
    862
    +    fprintf(hp_file, "0x%" PRIx64, table_id);
    
    863
    +
    
    864
    +    // Create label string for tracing
    
    865
    +    char str[100];
    
    866
    +    sprintf(str, "0x%" PRIx64, table_id);
    
    867
    +
    
    868
    +    // Emit the profiling sample (convert count to bytes)
    
    869
    +    traceHeapProfSampleString(str, count * sizeof(W_));
    
    870
    +}
    
    871
    +
    
    856 872
     /* -----------------------------------------------------------------------------
    
    857 873
      * Print out the results of a heap census.
    
    858 874
      * -------------------------------------------------------------------------- */
    
    ... ... @@ -915,6 +931,11 @@ dumpCensus( Census *census )
    915 931
         }
    
    916 932
     #endif
    
    917 933
     
    
    934
    +    // Census entries which we need to group together.
    
    935
    +    // Used by IPE profiling to group together bands which don't have IPE information.
    
    936
    +    // Printing at the end in the 0 band
    
    937
    +    uint64_t uncategorised_count = 0;
    
    938
    +
    
    918 939
         for (ctr = census->ctrs; ctr != NULL; ctr = ctr->next) {
    
    919 940
     
    
    920 941
     #if defined(PROFILING)
    
    ... ... @@ -945,11 +966,15 @@ dumpCensus( Census *census )
    945 966
                                           count * sizeof(W_));
    
    946 967
                 break;
    
    947 968
             case HEAP_BY_INFO_TABLE:
    
    948
    -            fprintf(hp_file, "%p", ctr->identity);
    
    949
    -            char str[100];
    
    950
    -            sprintf(str, "%p", ctr->identity);
    
    951
    -            traceHeapProfSampleString(str, count * sizeof(W_));
    
    969
    +        {
    
    970
    +            uint64_t table_id = lookupIPEId(ctr->identity);
    
    971
    +            if (! table_id) {
    
    972
    +              uncategorised_count += count;
    
    973
    +              continue;
    
    974
    +            }
    
    975
    +            recordIPEHeapSample(hp_file, table_id, count);
    
    952 976
                 break;
    
    977
    +        }
    
    953 978
     #if defined(PROFILING)
    
    954 979
             case HEAP_BY_CCS:
    
    955 980
                 fprint_ccs(hp_file, (CostCentreStack *)ctr->identity,
    
    ... ... @@ -1002,6 +1027,16 @@ dumpCensus( Census *census )
    1002 1027
             fprintf(hp_file, "\t%" FMT_Word "\n", (W_)count * sizeof(W_));
    
    1003 1028
         }
    
    1004 1029
     
    
    1030
    +    // Print the unallocated data into the 0 band for info table profiling.
    
    1031
    +    switch (RtsFlags.ProfFlags.doHeapProfile) {
    
    1032
    +        case HEAP_BY_INFO_TABLE:
    
    1033
    +            recordIPEHeapSample(hp_file, 0, uncategorised_count);
    
    1034
    +            break;
    
    1035
    +        default:
    
    1036
    +            ASSERT(uncategorised_count == 0);
    
    1037
    +            break;
    
    1038
    +    }
    
    1039
    +
    
    1005 1040
         traceHeapProfSampleEnd(era);
    
    1006 1041
         printSample(false, census->time);
    
    1007 1042
     
    

  • rts/eventlog/EventLog.c
    ... ... @@ -1480,7 +1480,7 @@ void postIPE(const InfoProvEnt *ipe)
    1480 1480
         CHECK(!ensureRoomForVariableEvent(&eventBuf, len));
    
    1481 1481
         postEventHeader(&eventBuf, EVENT_IPE);
    
    1482 1482
         postPayloadSize(&eventBuf, len);
    
    1483
    -    postWord64(&eventBuf, (StgWord) INFO_PTR_TO_STRUCT(ipe->info));
    
    1483
    +    postWord64(&eventBuf, (StgWord) (ipe->prov.info_prov_id));
    
    1484 1484
         postStringLen(&eventBuf, ipe->prov.table_name, table_name_len);
    
    1485 1485
         postStringLen(&eventBuf, closure_desc_buf, closure_desc_len);
    
    1486 1486
         postStringLen(&eventBuf, ipe->prov.ty_desc, ty_desc_len);
    

  • rts/include/rts/IPE.h
    ... ... @@ -14,6 +14,7 @@
    14 14
     #pragma once
    
    15 15
     
    
    16 16
     typedef struct InfoProv_ {
    
    17
    +    uint64_t   info_prov_id;
    
    17 18
         const char *table_name;
    
    18 19
         uint32_t closure_desc; // closure type
    
    19 20
         const char *ty_desc;
    
    ... ... @@ -63,9 +64,37 @@ typedef struct {
    63 64
     
    
    64 65
     GHC_STATIC_ASSERT(sizeof(IpeBufferEntry) % (WORD_SIZE_IN_BITS / 8) == 0, "sizeof(IpeBufferEntry) must be a multiple of the word size");
    
    65 66
     
    
    67
    +// The magic word is IPE\nIPE\n, which occupies the full 64 bit width of a word.
    
    68
    +// See Note [IPE Stripping and magic words]
    
    69
    +#define IPE_MAGIC_WORD 0x4950450049504500UL
    
    70
    +
    
    71
    +// Heap profiling currently requires a 32 bit pointer.. so for now just truncate
    
    72
    +// the key to fit. It should still be big enough.
    
    73
    +#if SIZEOF_VOID_P == 4
    
    74
    +// On 32-bit systems: keep lower 16 bits of module_id and idx
    
    75
    +#define IPE_PROF_KEY(key64) \
    
    76
    +    (uint32_t)((((key64) >> 16) & 0xFFFF0000) | ((key64) & 0x0000FFFF))
    
    77
    +#else
    
    78
    +// On 64-bit systems: use full key
    
    79
    +#define IPE_PROF_KEY(key64) (key64)
    
    80
    +#endif
    
    81
    +
    
    82
    +typedef struct {
    
    83
    +    StgWord64 magic;          // Must be IPE_MAGIC_WORD
    
    84
    +    IpeBufferEntry entries[]; // Flexible array member
    
    85
    +} IpeBufferEntryBlock;
    
    86
    +
    
    87
    +typedef struct {
    
    88
    +    StgWord64 magic;          // Must be IPE_MAGIC_WORD
    
    89
    +    char string_table[];    // Flexible array member for string table
    
    90
    +} IpeStringTableBlock;
    
    91
    +
    
    66 92
     typedef struct IpeBufferListNode_ {
    
    67 93
         struct IpeBufferListNode_ *next;
    
    68 94
     
    
    95
    +    // This field is filled in when the node is registered.
    
    96
    +    uint32_t node_id;
    
    97
    +
    
    69 98
         // Everything below is read-only and generated by the codegen
    
    70 99
     
    
    71 100
         // This flag should be treated as a boolean
    
    ... ... @@ -76,10 +105,10 @@ typedef struct IpeBufferListNode_ {
    76 105
         // When TNTC is enabled, these will point to the entry code
    
    77 106
         // not the info table itself.
    
    78 107
         const StgInfoTable **tables;
    
    79
    -    IpeBufferEntry *entries;
    
    108
    +    IpeBufferEntryBlock *entries_block;
    
    80 109
         StgWord entries_size; // decompressed size
    
    81 110
     
    
    82
    -    const char *string_table;
    
    111
    +    const IpeStringTableBlock *string_table_block;
    
    83 112
         StgWord string_table_size; // decompressed size
    
    84 113
     
    
    85 114
         // Shared by all entries
    
    ... ... @@ -98,6 +127,8 @@ void formatClosureDescIpe(const InfoProvEnt *ipe_buf, char *str_buf);
    98 127
     // Returns true on success, initializes `out`.
    
    99 128
     bool lookupIPE(const StgInfoTable *info, InfoProvEnt *out);
    
    100 129
     
    
    130
    +uint64_t lookupIPEId(const StgInfoTable *info);
    
    131
    +
    
    101 132
     #if defined(DEBUG)
    
    102 133
     void printIPE(const StgInfoTable *info);
    
    103 134
     #endif

  • testsuite/tests/rts/ipe/ipeMap.c
    ... ... @@ -48,7 +48,8 @@ HaskellObj shouldFindOneIfItHasBeenRegistered(Capability *cap) {
    48 48
         // Allocate buffers for IPE buffer list node
    
    49 49
         IpeBufferListNode *node = malloc(sizeof(IpeBufferListNode));
    
    50 50
         node->tables = malloc(sizeof(StgInfoTable *));
    
    51
    -    node->entries = malloc(sizeof(IpeBufferEntry));
    
    51
    +    node->entries_block = malloc(sizeof(StgWord64) + sizeof(IpeBufferEntry));
    
    52
    +    node->entries_block->magic = IPE_MAGIC_WORD;
    
    52 53
     
    
    53 54
         StringTable st;
    
    54 55
         init_string_table(&st);
    
    ... ... @@ -61,9 +62,13 @@ HaskellObj shouldFindOneIfItHasBeenRegistered(Capability *cap) {
    61 62
         node->compressed = 0;
    
    62 63
         node->count = 1;
    
    63 64
         node->tables[0] = get_itbl(fortyTwo);
    
    64
    -    node->entries[0] = makeAnyProvEntry(cap, &st, 42);
    
    65
    +    node->entries_block->entries[0] = makeAnyProvEntry(cap, &st, 42);
    
    65 66
         node->entries_size = sizeof(IpeBufferEntry);
    
    66
    -    node->string_table = st.buffer;
    
    67
    +
    
    68
    +    IpeStringTableBlock *string_table_block = malloc(sizeof(StgWord64) + st.size);
    
    69
    +    string_table_block->magic = IPE_MAGIC_WORD;
    
    70
    +    memcpy(string_table_block->string_table, st.buffer, st.size);
    
    71
    +    node->string_table_block = string_table_block;
    
    67 72
         node->string_table_size = st.size;
    
    68 73
     
    
    69 74
         registerInfoProvList(node);
    
    ... ... @@ -90,7 +95,8 @@ void shouldFindTwoIfTwoHaveBeenRegistered(Capability *cap,
    90 95
         // Allocate buffers for IPE buffer list node
    
    91 96
         IpeBufferListNode *node = malloc(sizeof(IpeBufferListNode));
    
    92 97
         node->tables = malloc(sizeof(StgInfoTable *));
    
    93
    -    node->entries = malloc(sizeof(IpeBufferEntry));
    
    98
    +    node->entries_block = malloc(sizeof(StgWord64) + sizeof(IpeBufferEntry));
    
    99
    +    node->entries_block->magic = IPE_MAGIC_WORD;
    
    94 100
     
    
    95 101
         StringTable st;
    
    96 102
         init_string_table(&st);
    
    ... ... @@ -103,9 +109,12 @@ void shouldFindTwoIfTwoHaveBeenRegistered(Capability *cap,
    103 109
         node->compressed = 0;
    
    104 110
         node->count = 1;
    
    105 111
         node->tables[0] = get_itbl(twentyThree);
    
    106
    -    node->entries[0] = makeAnyProvEntry(cap, &st, 23);
    
    112
    +    node->entries_block->entries[0] = makeAnyProvEntry(cap, &st, 23);
    
    107 113
         node->entries_size = sizeof(IpeBufferEntry);
    
    108
    -    node->string_table = st.buffer;
    
    114
    +    IpeStringTableBlock *string_table_block = malloc(sizeof(StgWord64) + st.size);
    
    115
    +    string_table_block->magic = IPE_MAGIC_WORD;
    
    116
    +    memcpy(string_table_block->string_table, st.buffer, st.size);
    
    117
    +    node->string_table_block = string_table_block;
    
    109 118
         node->string_table_size = st.size;
    
    110 119
     
    
    111 120
         registerInfoProvList(node);
    
    ... ... @@ -121,7 +130,8 @@ void shouldFindTwoFromTheSameList(Capability *cap) {
    121 130
         // Allocate buffers for IPE buffer list node
    
    122 131
         IpeBufferListNode *node = malloc(sizeof(IpeBufferListNode));
    
    123 132
         node->tables = malloc(sizeof(StgInfoTable *) * 2);
    
    124
    -    node->entries = malloc(sizeof(IpeBufferEntry) * 2);
    
    133
    +    node->entries_block = malloc(sizeof(StgWord64) + sizeof(IpeBufferEntry) * 2);
    
    134
    +    node->entries_block->magic = IPE_MAGIC_WORD;
    
    125 135
     
    
    126 136
         StringTable st;
    
    127 137
         init_string_table(&st);
    
    ... ... @@ -133,10 +143,13 @@ void shouldFindTwoFromTheSameList(Capability *cap) {
    133 143
         node->count = 2;
    
    134 144
         node->tables[0] = get_itbl(one);
    
    135 145
         node->tables[1] = get_itbl(two);
    
    136
    -    node->entries[0] = makeAnyProvEntry(cap, &st, 1);
    
    137
    -    node->entries[1] = makeAnyProvEntry(cap, &st, 2);
    
    146
    +    node->entries_block->entries[0] = makeAnyProvEntry(cap, &st, 1);
    
    147
    +    node->entries_block->entries[1] = makeAnyProvEntry(cap, &st, 2);
    
    138 148
         node->entries_size = sizeof(IpeBufferEntry) * 2;
    
    139
    -    node->string_table = st.buffer;
    
    149
    +    IpeStringTableBlock *string_table_block = malloc(sizeof(StgWord64) + st.size);
    
    150
    +    string_table_block->magic = IPE_MAGIC_WORD;
    
    151
    +    memcpy(string_table_block->string_table, st.buffer, st.size);
    
    152
    +    node->string_table_block = string_table_block;
    
    140 153
         node->string_table_size = st.size;
    
    141 154
     
    
    142 155
         registerInfoProvList(node);
    
    ... ... @@ -152,7 +165,11 @@ void shouldDealWithAnEmptyList(Capability *cap, HaskellObj fortyTwo) {
    152 165
         IpeBufferListNode *node = malloc(sizeof(IpeBufferListNode));
    
    153 166
         node->count = 0;
    
    154 167
         node->next = NULL;
    
    155
    -    node->string_table = "";
    
    168
    +    IpeStringTableBlock *string_table_block = malloc(sizeof(StgWord64));
    
    169
    +    string_table_block->magic = IPE_MAGIC_WORD;
    
    170
    +
    
    171
    +    node->entries_block = malloc(sizeof(StgWord64));
    
    172
    +    node->entries_block->magic = IPE_MAGIC_WORD;
    
    156 173
     
    
    157 174
         registerInfoProvList(node);
    
    158 175
     
    

  • testsuite/tests/rts/ipe/ipe_lib.c
    ... ... @@ -64,7 +64,8 @@ IpeBufferListNode *makeAnyProvEntries(Capability *cap, int start, int end) {
    64 64
         // Allocate buffers for IpeBufferListNode
    
    65 65
         IpeBufferListNode *node = malloc(sizeof(IpeBufferListNode));
    
    66 66
         node->tables = malloc(sizeof(StgInfoTable *) * n);
    
    67
    -    node->entries = malloc(sizeof(IpeBufferEntry) * n);
    
    67
    +    node->entries_block = malloc(sizeof(StgWord64) + sizeof(IpeBufferEntry) * n);
    
    68
    +    node->entries_block->magic = IPE_MAGIC_WORD;
    
    68 69
     
    
    69 70
         StringTable st;
    
    70 71
         init_string_table(&st);
    
    ... ... @@ -83,14 +84,19 @@ IpeBufferListNode *makeAnyProvEntries(Capability *cap, int start, int end) {
    83 84
         for (int i=start; i < end; i++) {
    
    84 85
             HaskellObj closure = rts_mkInt(cap, 42);
    
    85 86
             node->tables[i]  = get_itbl(closure);
    
    86
    -        node->entries[i] = makeAnyProvEntry(cap, &st, i);
    
    87
    +        node->entries_block->entries[i] = makeAnyProvEntry(cap, &st, i);
    
    87 88
         }
    
    88 89
     
    
    89 90
         // Set the rest of the fields
    
    90 91
         node->next = NULL;
    
    91 92
         node->compressed = 0;
    
    92 93
         node->count = n;
    
    93
    -    node->string_table = st.buffer;
    
    94
    +
    
    95
    +    IpeStringTableBlock *string_table_block =
    
    96
    +      malloc(sizeof(StgWord64) + st.size);
    
    97
    +    string_table_block->magic = IPE_MAGIC_WORD;
    
    98
    +    memcpy(string_table_block->string_table, st.buffer, st.size);
    
    99
    +    node->string_table_block = string_table_block;
    
    94 100
     
    
    95 101
         return node;
    
    96 102
     }