Marge Bot pushed to branch wip/marge_bot_batch_merge_job at Glasgow Haskell Compiler / GHC

Commits:

16 changed files:

Changes:

  • .gitlab/darwin/toolchain.nix
    ... ... @@ -16,18 +16,17 @@ let
    16 16
       ghcBindists = let version = ghc.version; in {
    
    17 17
         aarch64-darwin = hostPkgs.fetchurl {
    
    18 18
           url = "https://downloads.haskell.org/ghc/${version}/ghc-${version}-aarch64-apple-darwin.tar.xz";
    
    19
    -      sha256 = "sha256-c1GTMJf3/yiW/t4QL532EswD5JVlgA4getkfsxj4TaA=";
    
    19
    +      sha256 = "sha256-/6+DtdeossBJIMbjkJwL4h3eJ7rzgNCV+ifoQKOi6AQ=";
    
    20 20
         };
    
    21 21
         x86_64-darwin = hostPkgs.fetchurl {
    
    22 22
           url = "https://downloads.haskell.org/ghc/${version}/ghc-${version}-x86_64-apple-darwin.tar.xz";
    
    23
    -      sha256 = "sha256-LrYniMG0phsvyW6dhQC+3ompvzcxnwAe6GezEqqzoTQ=";
    
    23
    +      sha256 = "sha256-jPIhiJMOENesUnDUJeIaPatgavc6ZVSTY5NFIAxlC+k=";
    
    24 24
         };
    
    25 25
     
    
    26 26
       };
    
    27 27
     
    
    28 28
       ghc = pkgs.stdenv.mkDerivation rec {
    
    29
    -    # Using 9.6.2 because of #24050
    
    30
    -    version = "9.6.2";
    
    29
    +    version = "9.10.1";
    
    31 30
         name = "ghc";
    
    32 31
         src = ghcBindists.${pkgs.stdenv.hostPlatform.system};
    
    33 32
         configureFlags = [
    

  • README.md
    ... ... @@ -81,6 +81,10 @@ These steps give you the default build, which includes everything
    81 81
     optimised and built in various ways (eg. profiling libs are built).
    
    82 82
     It can take a long time.  To customise the build, see the file `HACKING.md`.
    
    83 83
     
    
    84
    +## Nix
    
    85
    +
    
    86
    +If you are looking to use nix to develop on GHC, [check out the wiki for instructions](https://gitlab.haskell.org/ghc/ghc/-/wikis/building/preparation).
    
    87
    +
    
    84 88
     Filing bugs and feature requests
    
    85 89
     ================================
    
    86 90
     
    

  • compiler/GHC/Cmm.hs
    ... ... @@ -278,6 +278,7 @@ data SectionType
    278 278
       | InitArray           -- .init_array on ELF, .ctor on Windows
    
    279 279
       | FiniArray           -- .fini_array on ELF, .dtor on Windows
    
    280 280
       | CString
    
    281
    +  | IPE
    
    281 282
       | OtherSection String
    
    282 283
       deriving (Show)
    
    283 284
     
    
    ... ... @@ -298,6 +299,7 @@ sectionProtection (Section t _) = case t of
    298 299
         CString                 -> ReadOnlySection
    
    299 300
         Data                    -> ReadWriteSection
    
    300 301
         UninitialisedData       -> ReadWriteSection
    
    302
    +    IPE                     -> ReadWriteSection
    
    301 303
         (OtherSection _)        -> ReadWriteSection
    
    302 304
     
    
    303 305
     {-
    
    ... ... @@ -557,4 +559,5 @@ pprSectionType s = doubleQuotes $ case s of
    557 559
       InitArray               -> text "initarray"
    
    558 560
       FiniArray               -> text "finiarray"
    
    559 561
       CString                 -> text "cstring"
    
    562
    +  IPE                     -> text "ipe"
    
    560 563
       OtherSection s'         -> text s'

  • compiler/GHC/CmmToAsm/PPC/Ppr.hs
    ... ... @@ -285,6 +285,9 @@ pprAlignForSection platform seg = line $
    285 285
            Data
    
    286 286
             | ppc64          -> text ".align 3"
    
    287 287
             | otherwise      -> text ".align 2"
    
    288
    +       IPE
    
    289
    +        | ppc64          -> text ".align 3"
    
    290
    +        | otherwise      -> text ".align 2"
    
    288 291
            ReadOnlyData
    
    289 292
             | ppc64          -> text ".align 3"
    
    290 293
             | otherwise      -> text ".align 2"
    

  • compiler/GHC/CmmToAsm/Ppr.hs
    ... ... @@ -236,6 +236,10 @@ pprGNUSectionHeader config t suffix =
    236 236
             | OSMinGW32 <- platformOS platform
    
    237 237
                         -> text ".rdata"
    
    238 238
             | otherwise -> text ".rodata.str"
    
    239
    +      IPE
    
    240
    +        | OSMinGW32 <- platformOS platform
    
    241
    +                    -> text ".rdata"
    
    242
    +        | otherwise -> text ".ipe"
    
    239 243
           OtherSection _ ->
    
    240 244
             panic "PprBase.pprGNUSectionHeader: unknown section type"
    
    241 245
         flags = case t of
    
    ... ... @@ -248,6 +252,10 @@ pprGNUSectionHeader config t suffix =
    248 252
             | OSMinGW32 <- platformOS platform
    
    249 253
                         -> empty
    
    250 254
             | otherwise -> text ",\"aMS\"," <> sectionType platform "progbits" <> text ",1"
    
    255
    +      IPE
    
    256
    +        | OSMinGW32 <- platformOS platform
    
    257
    +                    -> empty
    
    258
    +        | otherwise -> text ",\"a\"," <> sectionType platform "progbits"
    
    251 259
           _ -> empty
    
    252 260
     {-# SPECIALIZE pprGNUSectionHeader :: NCGConfig -> SectionType -> CLabel -> SDoc #-}
    
    253 261
     {-# SPECIALIZE pprGNUSectionHeader :: NCGConfig -> SectionType -> CLabel -> HLine #-} -- see Note [SPECIALIZE to HDoc] in GHC.Utils.Outputable
    
    ... ... @@ -262,6 +270,7 @@ pprXcoffSectionHeader t = case t of
    262 270
       RelocatableReadOnlyData -> text ".csect .text[PR] # RelocatableReadOnlyData"
    
    263 271
       CString                 -> text ".csect .text[PR] # CString"
    
    264 272
       UninitialisedData       -> text ".csect .data[BS]"
    
    273
    +  IPE                     -> text ".csect .text[PR] #IPE"
    
    265 274
       _                       -> panic "pprXcoffSectionHeader: unknown section type"
    
    266 275
     {-# SPECIALIZE pprXcoffSectionHeader :: SectionType -> SDoc #-}
    
    267 276
     {-# SPECIALIZE pprXcoffSectionHeader :: SectionType -> HLine #-} -- see Note [SPECIALIZE to HDoc] in GHC.Utils.Outputable
    
    ... ... @@ -276,6 +285,7 @@ pprDarwinSectionHeader t = case t of
    276 285
       InitArray               -> text ".section\t__DATA,__mod_init_func,mod_init_funcs"
    
    277 286
       FiniArray               -> panic "pprDarwinSectionHeader: fini not supported"
    
    278 287
       CString                 -> text ".section\t__TEXT,__cstring,cstring_literals"
    
    288
    +  IPE                     -> text ".const"
    
    279 289
       OtherSection _          -> panic "pprDarwinSectionHeader: unknown section type"
    
    280 290
     {-# SPECIALIZE pprDarwinSectionHeader :: SectionType -> SDoc #-}
    
    281 291
     {-# SPECIALIZE pprDarwinSectionHeader :: SectionType -> HLine #-} -- see Note [SPECIALIZE to HDoc] in GHC.Utils.Outputable

  • compiler/GHC/CmmToLlvm/Data.hs
    ... ... @@ -145,7 +145,7 @@ llvmSectionType p t = case t of
    145 145
         CString                 -> case platformOS p of
    
    146 146
                                      OSMinGW32 -> fsLit ".rdata$str"
    
    147 147
                                      _         -> fsLit ".rodata.str"
    
    148
    -
    
    148
    +    IPE                     -> fsLit ".ipe"
    
    149 149
         InitArray               -> panic "llvmSectionType: InitArray"
    
    150 150
         FiniArray               -> panic "llvmSectionType: FiniArray"
    
    151 151
         OtherSection _          -> panic "llvmSectionType: unknown section type"
    

  • compiler/GHC/StgToCmm/InfoTableProv.hs
    ... ... @@ -66,6 +66,28 @@ construction, the 'compressed' field of each IPE buffer list node is examined.
    66 66
     If the field indicates that the data has been compressed, the entry data and
    
    67 67
     strings table are decompressed before continuing with the normal IPE map
    
    68 68
     construction.
    
    69
    +
    
    70
    +Note [IPE Stripping and magic words]
    
    71
    +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    72
    +
    
    73
    +For systems which support ELF executables:
    
    74
    +
    
    75
    +The metadata part of IPE info is placed into a separate ELF section (.ipe).
    
    76
    +This can then be stripped afterwards if you don't require the metadata
    
    77
    +
    
    78
    +```
    
    79
    +-- Remove the section
    
    80
    +objcopy --remove-section .ipe <your-exe>
    
    81
    +-- Repack and compress the executable
    
    82
    +upx <your-exe>
    
    83
    +```
    
    84
    +
    
    85
    +The .ipe section starts with a magic 64-bit word "IPE\nIPE\n`, encoded as ascii.
    
    86
    +
    
    87
    +The RTS checks to see if the .ipe section starts with the magic word. If the
    
    88
    +section has been stripped then it won't start with the magic word and the
    
    89
    +metadata won't be accessible for the info tables.
    
    90
    +
    
    69 91
     -}
    
    70 92
     
    
    71 93
     emitIpeBufferListNode ::
    
    ... ... @@ -124,11 +146,21 @@ emitIpeBufferListNode this_mod ents dus0 = do
    124 146
             ipe_buffer_lbl :: CLabel
    
    125 147
             ipe_buffer_lbl = mkIPELabel this_mod
    
    126 148
     
    
    149
    +        -- A magic word we can use to see if the IPE information has been stripped
    
    150
    +        -- or not
    
    151
    +        -- See Note [IPE Stripping and magic words]
    
    152
    +        -- "IPE\nIPE\n", null terminated.
    
    153
    +        ipe_header :: CmmStatic
    
    154
    +        ipe_header = CmmStaticLit (CmmInt 0x4950450049504500 W64)
    
    155
    +
    
    127 156
             ipe_buffer_node :: [CmmStatic]
    
    128 157
             ipe_buffer_node = map CmmStaticLit
    
    129 158
               [ -- 'next' field
    
    130 159
                 zeroCLit platform
    
    131 160
     
    
    161
    +            -- 'node_id' field
    
    162
    +          , zeroCLit platform
    
    163
    +
    
    132 164
                 -- 'compressed' field
    
    133 165
               , int do_compress
    
    134 166
     
    
    ... ... @@ -164,13 +196,13 @@ emitIpeBufferListNode this_mod ents dus0 = do
    164 196
     
    
    165 197
         -- Emit the strings table
    
    166 198
         emitDecl $ CmmData
    
    167
    -      (Section Data strings_lbl)
    
    168
    -      (CmmStaticsRaw strings_lbl strings)
    
    199
    +      (Section IPE strings_lbl)
    
    200
    +      (CmmStaticsRaw strings_lbl (ipe_header : strings))
    
    169 201
     
    
    170 202
         -- Emit the list of IPE buffer entries
    
    171 203
         emitDecl $ CmmData
    
    172
    -      (Section Data entries_lbl)
    
    173
    -      (CmmStaticsRaw entries_lbl entries)
    
    204
    +      (Section IPE entries_lbl)
    
    205
    +      (CmmStaticsRaw entries_lbl (ipe_header : entries))
    
    174 206
     
    
    175 207
         -- Emit the IPE buffer list node
    
    176 208
         emitDecl $ CmmData
    

  • docs/users_guide/debug-info.rst
    ... ... @@ -391,6 +391,17 @@ to a source location. This lookup table is generated by using the ``-finfo-table
    391 391
         In a test compiling GHC itself, the size of the :ghc-flag:`-finfo-table-map`
    
    392 392
         enabled build results was reduced by over 20% when compression was enabled.
    
    393 393
     
    
    394
    +    The metadata for ``-finfo-table-map`` is stored in the ``.ipe`` section on
    
    395
    +    ELF platforms. The ``.ipe`` section can be removed from the binary after compilation::
    
    396
    +
    
    397
    +      objcopy --remove-section .ipe <binary>
    
    398
    +      upx <binary>
    
    399
    +
    
    400
    +    You can first compile your application with ``-finfo-table-map``, extract
    
    401
    +    the contents of the map (by using the eventlog), strip the ``.ipe`` section
    
    402
    +    and then use the extracted data to interpret a ``-hi`` profile from the stripped
    
    403
    +    binary.
    
    404
    +
    
    394 405
         :since: 9.10
    
    395 406
         :implies: :ghc-flag:`-finfo-table-map-with-stack`
    
    396 407
         :implies: :ghc-flag:`-finfo-table-map-with-fallback`
    

  • docs/users_guide/exts/linear_types.rst
    ... ... @@ -213,6 +213,8 @@ With ``-XStrict``::
    213 213
        -- inferred unrestricted
    
    214 214
        let ~(x, y) = u in …
    
    215 215
     
    
    216
    +(See :ref:`strict-bindings`).
    
    217
    +
    
    216 218
     Data types
    
    217 219
     ----------
    
    218 220
     By default, all fields in algebraic data types are linear (even if
    

  • docs/users_guide/exts/strict.rst
    ... ... @@ -103,6 +103,9 @@ Note the following points:
    103 103
       See `GHC Proposal #229 <https://github.com/ghc-proposals/ghc-proposals/blob/master/proposals/0229-whitespace-bang-patterns.rst>`__
    
    104 104
       for the precise rules.
    
    105 105
     
    
    106
    +
    
    107
    +.. _strict-bindings:
    
    108
    +
    
    106 109
     Strict bindings
    
    107 110
     ~~~~~~~~~~~~~~~
    
    108 111
     
    

  • rts/IPE.c
    ... ... @@ -62,6 +62,22 @@ entry's containing IpeBufferListNode and its index in that node.
    62 62
     When the user looks up an IPE entry, we convert it to the user-facing
    
    63 63
     InfoProvEnt representation.
    
    64 64
     
    
    65
    +Note [Stable identifiers for IPE entries]
    
    66
    +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    67
    +
    
    68
    +Each IPE entry is given a stable identifier which remains the same across
    
    69
    +different runs of the executable (unlike the address of the info table).
    
    70
    +
    
    71
    +The identifier is a 64-bit word which consists of two parts.
    
    72
    +
    
    73
    +* The high 32-bits are a per-node identifier.
    
    74
    +* The low 32-bits are the index of the entry in the node.
    
    75
    +
    
    76
    +When a node is queued in the pending list by `registerInfoProvList` it is
    
    77
    +given a unique identifier from an incrementing global variable.
    
    78
    +
    
    79
    +The unique key can be computed by using the `IPE_ENTRY_KEY` macro.
    
    80
    +
    
    65 81
     */
    
    66 82
     
    
    67 83
     typedef struct {
    
    ... ... @@ -69,6 +85,13 @@ typedef struct {
    69 85
         uint32_t idx;
    
    70 86
     } IpeMapEntry;
    
    71 87
     
    
    88
    +// See Note [Stable identifiers for IPE entries]
    
    89
    +#define IPE_ENTRY_KEY(entry) \
    
    90
    +    MAKE_IPE_KEY((entry).node->node_id, (entry).idx)
    
    91
    +
    
    92
    +#define MAKE_IPE_KEY(module_id, idx) \
    
    93
    +    ((((uint64_t)(module_id)) << 32) | ((uint64_t)(idx)))
    
    94
    +
    
    72 95
     #if defined(THREADED_RTS)
    
    73 96
     static Mutex ipeMapLock;
    
    74 97
     #endif
    
    ... ... @@ -78,9 +101,22 @@ static HashTable *ipeMap = NULL;
    78 101
     // Accessed atomically
    
    79 102
     static IpeBufferListNode *ipeBufferList = NULL;
    
    80 103
     
    
    104
    +// A global counter which is used to give an IPE entry a unique value across runs.
    
    105
    +static StgWord next_module_id = 1; // Start at 1 to reserve 0 as "invalid"
    
    106
    +
    
    81 107
     static void decompressIPEBufferListNodeIfCompressed(IpeBufferListNode*);
    
    82 108
     static void updateIpeMap(void);
    
    83 109
     
    
    110
    +// Check whether the IpeBufferListNode has the relevant magic words.
    
    111
    +// See Note [IPE Stripping and magic words]
    
    112
    +static inline bool ipe_node_valid(const IpeBufferListNode *node) {
    
    113
    +    return node &&
    
    114
    +           node->entries_block &&
    
    115
    +           node->string_table_block &&
    
    116
    +           node->entries_block->magic == IPE_MAGIC_WORD &&
    
    117
    +           node->string_table_block->magic == IPE_MAGIC_WORD;
    
    118
    +}
    
    119
    +
    
    84 120
     #if defined(THREADED_RTS)
    
    85 121
     
    
    86 122
     void initIpe(void) { initMutex(&ipeMapLock); }
    
    ... ... @@ -99,11 +135,12 @@ static InfoProvEnt ipeBufferEntryToIpe(const IpeBufferListNode *node, uint32_t i
    99 135
     {
    
    100 136
         CHECK(idx < node->count);
    
    101 137
         CHECK(!node->compressed);
    
    102
    -    const char *strings = node->string_table;
    
    103
    -    const IpeBufferEntry *ent = &node->entries[idx];
    
    138
    +    const char *strings = node->string_table_block->string_table;
    
    139
    +    const IpeBufferEntry *ent = &node->entries_block->entries[idx];
    
    104 140
         return (InfoProvEnt) {
    
    105 141
                 .info = node->tables[idx],
    
    106 142
                 .prov = {
    
    143
    +                .info_prov_id  = MAKE_IPE_KEY(node->node_id, idx),
    
    107 144
                     .table_name = &strings[ent->table_name],
    
    108 145
                     .closure_desc = ent->closure_desc,
    
    109 146
                     .ty_desc = &strings[ent->ty_desc],
    
    ... ... @@ -121,19 +158,23 @@ static InfoProvEnt ipeBufferEntryToIpe(const IpeBufferListNode *node, uint32_t i
    121 158
     static void traceIPEFromHashTable(void *data STG_UNUSED, StgWord key STG_UNUSED,
    
    122 159
                                       const void *value) {
    
    123 160
         const IpeMapEntry *map_ent = (const IpeMapEntry *)value;
    
    124
    -    const InfoProvEnt ipe = ipeBufferEntryToIpe(map_ent->node, map_ent->idx);
    
    125
    -    traceIPE(&ipe);
    
    161
    +    if (ipe_node_valid(map_ent->node)){
    
    162
    +      const InfoProvEnt ipe = ipeBufferEntryToIpe(map_ent->node, map_ent->idx);
    
    163
    +      traceIPE(&ipe);
    
    164
    +    }
    
    126 165
     }
    
    127 166
     
    
    128 167
     void dumpIPEToEventLog(void) {
    
    129 168
         // Dump pending entries
    
    130 169
         IpeBufferListNode *node = RELAXED_LOAD(&ipeBufferList);
    
    131 170
         while (node != NULL) {
    
    132
    -        decompressIPEBufferListNodeIfCompressed(node);
    
    171
    +        if (ipe_node_valid(node)){
    
    172
    +          decompressIPEBufferListNodeIfCompressed(node);
    
    133 173
     
    
    134
    -        for (uint32_t i = 0; i < node->count; i++) {
    
    135
    -            const InfoProvEnt ent = ipeBufferEntryToIpe(node, i);
    
    136
    -            traceIPE(&ent);
    
    174
    +          for (uint32_t i = 0; i < node->count; i++) {
    
    175
    +              const InfoProvEnt ent = ipeBufferEntryToIpe(node, i);
    
    176
    +              traceIPE(&ent);
    
    177
    +          }
    
    137 178
             }
    
    138 179
             node = node->next;
    
    139 180
         }
    
    ... ... @@ -165,11 +206,30 @@ ipeMapLock; we instead use atomic CAS operations to add to the list.
    165 206
     
    
    166 207
     A performance test for IPE registration and lookup can be found here:
    
    167 208
     https://gitlab.haskell.org/ghc/ghc/-/merge_requests/5724#note_370806
    
    209
    +
    
    210
    +Note that IPEs are still regiestered even if the .ipe section is stripped. That's
    
    211
    +because you may still want to query what the unique identifier for an info table is
    
    212
    +so it can be reconciled with previously extracted metadata information. For example,
    
    213
    +when `-hi` profiling or using `whereFrom`.
    
    214
    +
    
    168 215
     */
    
    169 216
     void registerInfoProvList(IpeBufferListNode *node) {
    
    217
    +
    
    218
    +        // Grab a fresh module_id
    
    219
    +    uint32_t module_id;
    
    220
    +    StgWord temp_module_id;
    
    221
    +    while (true) {
    
    222
    +        temp_module_id = next_module_id;
    
    223
    +        if (cas(&next_module_id, temp_module_id, temp_module_id+1) == temp_module_id) {
    
    224
    +            module_id = (uint32_t) temp_module_id;
    
    225
    +            break;
    
    226
    +        }
    
    227
    +
    
    228
    +    }
    
    170 229
         while (true) {
    
    171 230
             IpeBufferListNode *old = RELAXED_LOAD(&ipeBufferList);
    
    172 231
             node->next = old;
    
    232
    +        node->node_id = module_id;
    
    173 233
             if (cas_ptr((volatile void **) &ipeBufferList, old, node) == (void *) old) {
    
    174 234
                 return;
    
    175 235
             }
    
    ... ... @@ -183,7 +243,7 @@ void formatClosureDescIpe(const InfoProvEnt *ipe_buf, char *str_buf) {
    183 243
     bool lookupIPE(const StgInfoTable *info, InfoProvEnt *out) {
    
    184 244
         updateIpeMap();
    
    185 245
         IpeMapEntry *map_ent = (IpeMapEntry *) lookupHashTable(ipeMap, (StgWord)info);
    
    186
    -    if (map_ent) {
    
    246
    +    if (map_ent && ipe_node_valid(map_ent->node)) {
    
    187 247
             *out = ipeBufferEntryToIpe(map_ent->node, map_ent->idx);
    
    188 248
             return true;
    
    189 249
         } else {
    
    ... ... @@ -191,6 +251,18 @@ bool lookupIPE(const StgInfoTable *info, InfoProvEnt *out) {
    191 251
         }
    
    192 252
     }
    
    193 253
     
    
    254
    +// Returns 0 when the info table is not present in the info table map.
    
    255
    +// See Note [Stable identifiers for IPE entries]
    
    256
    +uint64_t lookupIPEId(const StgInfoTable *info) {
    
    257
    +    updateIpeMap();
    
    258
    +    IpeMapEntry *map_ent = (IpeMapEntry *) lookupHashTable(ipeMap, (StgWord)(info));
    
    259
    +    if (map_ent){
    
    260
    +        return IPE_ENTRY_KEY(*map_ent);
    
    261
    +    } else {
    
    262
    +        return 0;
    
    263
    +    }
    
    264
    +}
    
    265
    +
    
    194 266
     void updateIpeMap(void) {
    
    195 267
         // Check if there's any work at all. If not so, we can circumvent locking,
    
    196 268
         // which decreases performance.
    

  • rts/ProfHeap.c
    ... ... @@ -23,6 +23,7 @@
    23 23
     #include "Printer.h"
    
    24 24
     #include "Trace.h"
    
    25 25
     #include "sm/GCThread.h"
    
    26
    +#include "IPE.h"
    
    26 27
     
    
    27 28
     #include <fs_rts.h>
    
    28 29
     #include <string.h>
    
    ... ... @@ -230,9 +231,10 @@ closureIdentity( const StgClosure *p )
    230 231
                 return closure_type_names[info->type];
    
    231 232
             }
    
    232 233
         }
    
    233
    -    case HEAP_BY_INFO_TABLE: {
    
    234
    -        return get_itbl(p);
    
    235
    -        }
    
    234
    +    case HEAP_BY_INFO_TABLE:
    
    235
    +    {
    
    236
    +        return (void *) (p->header.info);
    
    237
    +    }
    
    236 238
     
    
    237 239
         default:
    
    238 240
             barf("closureIdentity");
    
    ... ... @@ -853,6 +855,20 @@ aggregateCensusInfo( void )
    853 855
     }
    
    854 856
     #endif
    
    855 857
     
    
    858
    +static void
    
    859
    +recordIPEHeapSample(FILE *hp_file, uint64_t table_id, size_t count)
    
    860
    +{
    
    861
    +    // Print to heap profile file
    
    862
    +    fprintf(hp_file, "0x%" PRIx64, table_id);
    
    863
    +
    
    864
    +    // Create label string for tracing
    
    865
    +    char str[100];
    
    866
    +    sprintf(str, "0x%" PRIx64, table_id);
    
    867
    +
    
    868
    +    // Emit the profiling sample (convert count to bytes)
    
    869
    +    traceHeapProfSampleString(str, count * sizeof(W_));
    
    870
    +}
    
    871
    +
    
    856 872
     /* -----------------------------------------------------------------------------
    
    857 873
      * Print out the results of a heap census.
    
    858 874
      * -------------------------------------------------------------------------- */
    
    ... ... @@ -915,6 +931,11 @@ dumpCensus( Census *census )
    915 931
         }
    
    916 932
     #endif
    
    917 933
     
    
    934
    +    // Census entries which we need to group together.
    
    935
    +    // Used by IPE profiling to group together bands which don't have IPE information.
    
    936
    +    // Printing at the end in the 0 band
    
    937
    +    uint64_t uncategorised_count = 0;
    
    938
    +
    
    918 939
         for (ctr = census->ctrs; ctr != NULL; ctr = ctr->next) {
    
    919 940
     
    
    920 941
     #if defined(PROFILING)
    
    ... ... @@ -945,11 +966,15 @@ dumpCensus( Census *census )
    945 966
                                           count * sizeof(W_));
    
    946 967
                 break;
    
    947 968
             case HEAP_BY_INFO_TABLE:
    
    948
    -            fprintf(hp_file, "%p", ctr->identity);
    
    949
    -            char str[100];
    
    950
    -            sprintf(str, "%p", ctr->identity);
    
    951
    -            traceHeapProfSampleString(str, count * sizeof(W_));
    
    969
    +        {
    
    970
    +            uint64_t table_id = lookupIPEId(ctr->identity);
    
    971
    +            if (! table_id) {
    
    972
    +              uncategorised_count += count;
    
    973
    +              continue;
    
    974
    +            }
    
    975
    +            recordIPEHeapSample(hp_file, table_id, count);
    
    952 976
                 break;
    
    977
    +        }
    
    953 978
     #if defined(PROFILING)
    
    954 979
             case HEAP_BY_CCS:
    
    955 980
                 fprint_ccs(hp_file, (CostCentreStack *)ctr->identity,
    
    ... ... @@ -1002,6 +1027,16 @@ dumpCensus( Census *census )
    1002 1027
             fprintf(hp_file, "\t%" FMT_Word "\n", (W_)count * sizeof(W_));
    
    1003 1028
         }
    
    1004 1029
     
    
    1030
    +    // Print the unallocated data into the 0 band for info table profiling.
    
    1031
    +    switch (RtsFlags.ProfFlags.doHeapProfile) {
    
    1032
    +        case HEAP_BY_INFO_TABLE:
    
    1033
    +            recordIPEHeapSample(hp_file, 0, uncategorised_count);
    
    1034
    +            break;
    
    1035
    +        default:
    
    1036
    +            ASSERT(uncategorised_count == 0);
    
    1037
    +            break;
    
    1038
    +    }
    
    1039
    +
    
    1005 1040
         traceHeapProfSampleEnd(era);
    
    1006 1041
         printSample(false, census->time);
    
    1007 1042
     
    

  • rts/eventlog/EventLog.c
    ... ... @@ -1480,7 +1480,7 @@ void postIPE(const InfoProvEnt *ipe)
    1480 1480
         CHECK(!ensureRoomForVariableEvent(&eventBuf, len));
    
    1481 1481
         postEventHeader(&eventBuf, EVENT_IPE);
    
    1482 1482
         postPayloadSize(&eventBuf, len);
    
    1483
    -    postWord64(&eventBuf, (StgWord) INFO_PTR_TO_STRUCT(ipe->info));
    
    1483
    +    postWord64(&eventBuf, (StgWord) (ipe->prov.info_prov_id));
    
    1484 1484
         postStringLen(&eventBuf, ipe->prov.table_name, table_name_len);
    
    1485 1485
         postStringLen(&eventBuf, closure_desc_buf, closure_desc_len);
    
    1486 1486
         postStringLen(&eventBuf, ipe->prov.ty_desc, ty_desc_len);
    

  • rts/include/rts/IPE.h
    ... ... @@ -14,6 +14,7 @@
    14 14
     #pragma once
    
    15 15
     
    
    16 16
     typedef struct InfoProv_ {
    
    17
    +    uint64_t   info_prov_id;
    
    17 18
         const char *table_name;
    
    18 19
         uint32_t closure_desc; // closure type
    
    19 20
         const char *ty_desc;
    
    ... ... @@ -63,9 +64,37 @@ typedef struct {
    63 64
     
    
    64 65
     GHC_STATIC_ASSERT(sizeof(IpeBufferEntry) % (WORD_SIZE_IN_BITS / 8) == 0, "sizeof(IpeBufferEntry) must be a multiple of the word size");
    
    65 66
     
    
    67
    +// The magic word is IPE\nIPE\n, which occupies the full 64 bit width of a word.
    
    68
    +// See Note [IPE Stripping and magic words]
    
    69
    +#define IPE_MAGIC_WORD 0x4950450049504500UL
    
    70
    +
    
    71
    +// Heap profiling currently requires a 32 bit pointer.. so for now just truncate
    
    72
    +// the key to fit. It should still be big enough.
    
    73
    +#if SIZEOF_VOID_P == 4
    
    74
    +// On 32-bit systems: keep lower 16 bits of module_id and idx
    
    75
    +#define IPE_PROF_KEY(key64) \
    
    76
    +    (uint32_t)((((key64) >> 16) & 0xFFFF0000) | ((key64) & 0x0000FFFF))
    
    77
    +#else
    
    78
    +// On 64-bit systems: use full key
    
    79
    +#define IPE_PROF_KEY(key64) (key64)
    
    80
    +#endif
    
    81
    +
    
    82
    +typedef struct {
    
    83
    +    StgWord64 magic;          // Must be IPE_MAGIC_WORD
    
    84
    +    IpeBufferEntry entries[]; // Flexible array member
    
    85
    +} IpeBufferEntryBlock;
    
    86
    +
    
    87
    +typedef struct {
    
    88
    +    StgWord64 magic;          // Must be IPE_MAGIC_WORD
    
    89
    +    char string_table[];    // Flexible array member for string table
    
    90
    +} IpeStringTableBlock;
    
    91
    +
    
    66 92
     typedef struct IpeBufferListNode_ {
    
    67 93
         struct IpeBufferListNode_ *next;
    
    68 94
     
    
    95
    +    // This field is filled in when the node is registered.
    
    96
    +    uint32_t node_id;
    
    97
    +
    
    69 98
         // Everything below is read-only and generated by the codegen
    
    70 99
     
    
    71 100
         // This flag should be treated as a boolean
    
    ... ... @@ -76,10 +105,10 @@ typedef struct IpeBufferListNode_ {
    76 105
         // When TNTC is enabled, these will point to the entry code
    
    77 106
         // not the info table itself.
    
    78 107
         const StgInfoTable **tables;
    
    79
    -    IpeBufferEntry *entries;
    
    108
    +    IpeBufferEntryBlock *entries_block;
    
    80 109
         StgWord entries_size; // decompressed size
    
    81 110
     
    
    82
    -    const char *string_table;
    
    111
    +    const IpeStringTableBlock *string_table_block;
    
    83 112
         StgWord string_table_size; // decompressed size
    
    84 113
     
    
    85 114
         // Shared by all entries
    
    ... ... @@ -98,6 +127,8 @@ void formatClosureDescIpe(const InfoProvEnt *ipe_buf, char *str_buf);
    98 127
     // Returns true on success, initializes `out`.
    
    99 128
     bool lookupIPE(const StgInfoTable *info, InfoProvEnt *out);
    
    100 129
     
    
    130
    +uint64_t lookupIPEId(const StgInfoTable *info);
    
    131
    +
    
    101 132
     #if defined(DEBUG)
    
    102 133
     void printIPE(const StgInfoTable *info);
    
    103 134
     #endif

  • testsuite/tests/rts/ipe/ipeMap.c
    ... ... @@ -48,7 +48,8 @@ HaskellObj shouldFindOneIfItHasBeenRegistered(Capability *cap) {
    48 48
         // Allocate buffers for IPE buffer list node
    
    49 49
         IpeBufferListNode *node = malloc(sizeof(IpeBufferListNode));
    
    50 50
         node->tables = malloc(sizeof(StgInfoTable *));
    
    51
    -    node->entries = malloc(sizeof(IpeBufferEntry));
    
    51
    +    node->entries_block = malloc(sizeof(StgWord64) + sizeof(IpeBufferEntry));
    
    52
    +    node->entries_block->magic = IPE_MAGIC_WORD;
    
    52 53
     
    
    53 54
         StringTable st;
    
    54 55
         init_string_table(&st);
    
    ... ... @@ -61,9 +62,13 @@ HaskellObj shouldFindOneIfItHasBeenRegistered(Capability *cap) {
    61 62
         node->compressed = 0;
    
    62 63
         node->count = 1;
    
    63 64
         node->tables[0] = get_itbl(fortyTwo);
    
    64
    -    node->entries[0] = makeAnyProvEntry(cap, &st, 42);
    
    65
    +    node->entries_block->entries[0] = makeAnyProvEntry(cap, &st, 42);
    
    65 66
         node->entries_size = sizeof(IpeBufferEntry);
    
    66
    -    node->string_table = st.buffer;
    
    67
    +
    
    68
    +    IpeStringTableBlock *string_table_block = malloc(sizeof(StgWord64) + st.size);
    
    69
    +    string_table_block->magic = IPE_MAGIC_WORD;
    
    70
    +    memcpy(string_table_block->string_table, st.buffer, st.size);
    
    71
    +    node->string_table_block = string_table_block;
    
    67 72
         node->string_table_size = st.size;
    
    68 73
     
    
    69 74
         registerInfoProvList(node);
    
    ... ... @@ -90,7 +95,8 @@ void shouldFindTwoIfTwoHaveBeenRegistered(Capability *cap,
    90 95
         // Allocate buffers for IPE buffer list node
    
    91 96
         IpeBufferListNode *node = malloc(sizeof(IpeBufferListNode));
    
    92 97
         node->tables = malloc(sizeof(StgInfoTable *));
    
    93
    -    node->entries = malloc(sizeof(IpeBufferEntry));
    
    98
    +    node->entries_block = malloc(sizeof(StgWord64) + sizeof(IpeBufferEntry));
    
    99
    +    node->entries_block->magic = IPE_MAGIC_WORD;
    
    94 100
     
    
    95 101
         StringTable st;
    
    96 102
         init_string_table(&st);
    
    ... ... @@ -103,9 +109,12 @@ void shouldFindTwoIfTwoHaveBeenRegistered(Capability *cap,
    103 109
         node->compressed = 0;
    
    104 110
         node->count = 1;
    
    105 111
         node->tables[0] = get_itbl(twentyThree);
    
    106
    -    node->entries[0] = makeAnyProvEntry(cap, &st, 23);
    
    112
    +    node->entries_block->entries[0] = makeAnyProvEntry(cap, &st, 23);
    
    107 113
         node->entries_size = sizeof(IpeBufferEntry);
    
    108
    -    node->string_table = st.buffer;
    
    114
    +    IpeStringTableBlock *string_table_block = malloc(sizeof(StgWord64) + st.size);
    
    115
    +    string_table_block->magic = IPE_MAGIC_WORD;
    
    116
    +    memcpy(string_table_block->string_table, st.buffer, st.size);
    
    117
    +    node->string_table_block = string_table_block;
    
    109 118
         node->string_table_size = st.size;
    
    110 119
     
    
    111 120
         registerInfoProvList(node);
    
    ... ... @@ -121,7 +130,8 @@ void shouldFindTwoFromTheSameList(Capability *cap) {
    121 130
         // Allocate buffers for IPE buffer list node
    
    122 131
         IpeBufferListNode *node = malloc(sizeof(IpeBufferListNode));
    
    123 132
         node->tables = malloc(sizeof(StgInfoTable *) * 2);
    
    124
    -    node->entries = malloc(sizeof(IpeBufferEntry) * 2);
    
    133
    +    node->entries_block = malloc(sizeof(StgWord64) + sizeof(IpeBufferEntry) * 2);
    
    134
    +    node->entries_block->magic = IPE_MAGIC_WORD;
    
    125 135
     
    
    126 136
         StringTable st;
    
    127 137
         init_string_table(&st);
    
    ... ... @@ -133,10 +143,13 @@ void shouldFindTwoFromTheSameList(Capability *cap) {
    133 143
         node->count = 2;
    
    134 144
         node->tables[0] = get_itbl(one);
    
    135 145
         node->tables[1] = get_itbl(two);
    
    136
    -    node->entries[0] = makeAnyProvEntry(cap, &st, 1);
    
    137
    -    node->entries[1] = makeAnyProvEntry(cap, &st, 2);
    
    146
    +    node->entries_block->entries[0] = makeAnyProvEntry(cap, &st, 1);
    
    147
    +    node->entries_block->entries[1] = makeAnyProvEntry(cap, &st, 2);
    
    138 148
         node->entries_size = sizeof(IpeBufferEntry) * 2;
    
    139
    -    node->string_table = st.buffer;
    
    149
    +    IpeStringTableBlock *string_table_block = malloc(sizeof(StgWord64) + st.size);
    
    150
    +    string_table_block->magic = IPE_MAGIC_WORD;
    
    151
    +    memcpy(string_table_block->string_table, st.buffer, st.size);
    
    152
    +    node->string_table_block = string_table_block;
    
    140 153
         node->string_table_size = st.size;
    
    141 154
     
    
    142 155
         registerInfoProvList(node);
    
    ... ... @@ -152,7 +165,11 @@ void shouldDealWithAnEmptyList(Capability *cap, HaskellObj fortyTwo) {
    152 165
         IpeBufferListNode *node = malloc(sizeof(IpeBufferListNode));
    
    153 166
         node->count = 0;
    
    154 167
         node->next = NULL;
    
    155
    -    node->string_table = "";
    
    168
    +    IpeStringTableBlock *string_table_block = malloc(sizeof(StgWord64));
    
    169
    +    string_table_block->magic = IPE_MAGIC_WORD;
    
    170
    +
    
    171
    +    node->entries_block = malloc(sizeof(StgWord64));
    
    172
    +    node->entries_block->magic = IPE_MAGIC_WORD;
    
    156 173
     
    
    157 174
         registerInfoProvList(node);
    
    158 175
     
    

  • testsuite/tests/rts/ipe/ipe_lib.c
    ... ... @@ -64,7 +64,8 @@ IpeBufferListNode *makeAnyProvEntries(Capability *cap, int start, int end) {
    64 64
         // Allocate buffers for IpeBufferListNode
    
    65 65
         IpeBufferListNode *node = malloc(sizeof(IpeBufferListNode));
    
    66 66
         node->tables = malloc(sizeof(StgInfoTable *) * n);
    
    67
    -    node->entries = malloc(sizeof(IpeBufferEntry) * n);
    
    67
    +    node->entries_block = malloc(sizeof(StgWord64) + sizeof(IpeBufferEntry) * n);
    
    68
    +    node->entries_block->magic = IPE_MAGIC_WORD;
    
    68 69
     
    
    69 70
         StringTable st;
    
    70 71
         init_string_table(&st);
    
    ... ... @@ -83,14 +84,19 @@ IpeBufferListNode *makeAnyProvEntries(Capability *cap, int start, int end) {
    83 84
         for (int i=start; i < end; i++) {
    
    84 85
             HaskellObj closure = rts_mkInt(cap, 42);
    
    85 86
             node->tables[i]  = get_itbl(closure);
    
    86
    -        node->entries[i] = makeAnyProvEntry(cap, &st, i);
    
    87
    +        node->entries_block->entries[i] = makeAnyProvEntry(cap, &st, i);
    
    87 88
         }
    
    88 89
     
    
    89 90
         // Set the rest of the fields
    
    90 91
         node->next = NULL;
    
    91 92
         node->compressed = 0;
    
    92 93
         node->count = n;
    
    93
    -    node->string_table = st.buffer;
    
    94
    +
    
    95
    +    IpeStringTableBlock *string_table_block =
    
    96
    +      malloc(sizeof(StgWord64) + st.size);
    
    97
    +    string_table_block->magic = IPE_MAGIC_WORD;
    
    98
    +    memcpy(string_table_block->string_table, st.buffer, st.size);
    
    99
    +    node->string_table_block = string_table_block;
    
    94 100
     
    
    95 101
         return node;
    
    96 102
     }