Matthew Pickering pushed to branch wip/stable-ipe-info at Glasgow Haskell Compiler / GHC

Commits:

5 changed files:

Changes:

  • compiler/GHC/StgToCmm/InfoTableProv.hs
    ... ... @@ -158,6 +158,9 @@ emitIpeBufferListNode this_mod ents dus0 = do
    158 158
               [ -- 'next' field
    
    159 159
                 zeroCLit platform
    
    160 160
     
    
    161
    +            -- 'node_id' field
    
    162
    +          , zeroCLit platform
    
    163
    +
    
    161 164
                 -- 'compressed' field
    
    162 165
               , int do_compress
    
    163 166
     
    

  • rts/IPE.c
    ... ... @@ -62,6 +62,22 @@ entry's containing IpeBufferListNode and its index in that node.
    62 62
     When the user looks up an IPE entry, we convert it to the user-facing
    
    63 63
     InfoProvEnt representation.
    
    64 64
     
    
    65
    +Note [Stable identifiers for IPE entries]
    
    66
    +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    67
    +
    
    68
    +Each IPE entry is given a stable identifier which remains the same across
    
    69
    +different runs of the executable (unlike the address of the info table).
    
    70
    +
    
    71
    +The identifier is a 64-bit word which consists of two parts.
    
    72
    +
    
    73
    +* The high 32-bits are a per-node identifier.
    
    74
    +* The low 32-bits are the index of the entry in the node.
    
    75
    +
    
    76
    +When a node is queued in the pending list by `registerInfoProvList` it is
    
    77
    +given a unique identifier from an incrementing global variable.
    
    78
    +
    
    79
    +The unique key can be computed by using the `IPE_ENTRY_KEY` macro.
    
    80
    +
    
    65 81
     */
    
    66 82
     
    
    67 83
     typedef struct {
    
    ... ... @@ -69,6 +85,13 @@ typedef struct {
    69 85
         uint32_t idx;
    
    70 86
     } IpeMapEntry;
    
    71 87
     
    
    88
    +// See Note [Stable identifiers for IPE entries]
    
    89
    +#define IPE_ENTRY_KEY(entry) \
    
    90
    +    MAKE_IPE_KEY((entry).node->node_id, (entry).idx)
    
    91
    +
    
    92
    +#define MAKE_IPE_KEY(module_id, idx) \
    
    93
    +    ((((uint64_t)(module_id)) << 32) | ((uint64_t)(idx)))
    
    94
    +
    
    72 95
     #if defined(THREADED_RTS)
    
    73 96
     static Mutex ipeMapLock;
    
    74 97
     #endif
    
    ... ... @@ -78,6 +101,9 @@ static HashTable *ipeMap = NULL;
    78 101
     // Accessed atomically
    
    79 102
     static IpeBufferListNode *ipeBufferList = NULL;
    
    80 103
     
    
    104
    +// A global counter which is used to give an IPE entry a unique value across runs.
    
    105
    +static StgWord next_module_id = 1; // Start at 1 to reserve 0 as "invalid"
    
    106
    +
    
    81 107
     static void decompressIPEBufferListNodeIfCompressed(IpeBufferListNode*);
    
    82 108
     static void updateIpeMap(void);
    
    83 109
     
    
    ... ... @@ -114,6 +140,7 @@ static InfoProvEnt ipeBufferEntryToIpe(const IpeBufferListNode *node, uint32_t i
    114 140
         return (InfoProvEnt) {
    
    115 141
                 .info = node->tables[idx],
    
    116 142
                 .prov = {
    
    143
    +                .info_prov_id  = MAKE_IPE_KEY(node->node_id, idx),
    
    117 144
                     .table_name = &strings[ent->table_name],
    
    118 145
                     .closure_desc = ent->closure_desc,
    
    119 146
                     .ty_desc = &strings[ent->ty_desc],
    
    ... ... @@ -181,9 +208,22 @@ A performance test for IPE registration and lookup can be found here:
    181 208
     https://gitlab.haskell.org/ghc/ghc/-/merge_requests/5724#note_370806
    
    182 209
     */
    
    183 210
     void registerInfoProvList(IpeBufferListNode *node) {
    
    211
    +
    
    212
    +        // Grab a fresh module_id
    
    213
    +    uint32_t module_id;
    
    214
    +    StgWord temp_module_id;
    
    215
    +    while (true) {
    
    216
    +        temp_module_id = next_module_id;
    
    217
    +        if (cas(&next_module_id, temp_module_id, temp_module_id+1) == temp_module_id) {
    
    218
    +            module_id = (uint32_t) temp_module_id;
    
    219
    +            break;
    
    220
    +        }
    
    221
    +
    
    222
    +    }
    
    184 223
         while (true) {
    
    185 224
             IpeBufferListNode *old = RELAXED_LOAD(&ipeBufferList);
    
    186 225
             node->next = old;
    
    226
    +        node->node_id = module_id;
    
    187 227
             if (cas_ptr((volatile void **) &ipeBufferList, old, node) == (void *) old) {
    
    188 228
                 return;
    
    189 229
             }
    
    ... ... @@ -205,6 +245,18 @@ bool lookupIPE(const StgInfoTable *info, InfoProvEnt *out) {
    205 245
         }
    
    206 246
     }
    
    207 247
     
    
    248
    +// Returns 0 when the info table is not present in the info table map.
    
    249
    +// See Note [Stable identifiers for IPE entries]
    
    250
    +uint64_t lookupIPEId(const StgInfoTable *info) {
    
    251
    +    updateIpeMap();
    
    252
    +    IpeMapEntry *map_ent = (IpeMapEntry *) lookupHashTable(ipeMap, (StgWord)(info));
    
    253
    +    if (map_ent){
    
    254
    +        return IPE_ENTRY_KEY(*map_ent);
    
    255
    +    } else {
    
    256
    +        return 0;
    
    257
    +    }
    
    258
    +}
    
    259
    +
    
    208 260
     void updateIpeMap(void) {
    
    209 261
         // Check if there's any work at all. If not so, we can circumvent locking,
    
    210 262
         // which decreases performance.
    

  • rts/ProfHeap.c
    ... ... @@ -23,6 +23,7 @@
    23 23
     #include "Printer.h"
    
    24 24
     #include "Trace.h"
    
    25 25
     #include "sm/GCThread.h"
    
    26
    +#include "IPE.h"
    
    26 27
     
    
    27 28
     #include <fs_rts.h>
    
    28 29
     #include <string.h>
    
    ... ... @@ -230,9 +231,10 @@ closureIdentity( const StgClosure *p )
    230 231
                 return closure_type_names[info->type];
    
    231 232
             }
    
    232 233
         }
    
    233
    -    case HEAP_BY_INFO_TABLE: {
    
    234
    -        return get_itbl(p);
    
    235
    -        }
    
    234
    +    case HEAP_BY_INFO_TABLE:
    
    235
    +    {
    
    236
    +        return (void *) (p->header.info);
    
    237
    +    }
    
    236 238
     
    
    237 239
         default:
    
    238 240
             barf("closureIdentity");
    
    ... ... @@ -853,6 +855,20 @@ aggregateCensusInfo( void )
    853 855
     }
    
    854 856
     #endif
    
    855 857
     
    
    858
    +static void
    
    859
    +recordIPEHeapSample(FILE *hp_file, uint64_t table_id, size_t count)
    
    860
    +{
    
    861
    +    // Print to heap profile file
    
    862
    +    fprintf(hp_file, "0x%" PRIx64, table_id);
    
    863
    +
    
    864
    +    // Create label string for tracing
    
    865
    +    char str[100];
    
    866
    +    sprintf(str, "0x%" PRIx64, table_id);
    
    867
    +
    
    868
    +    // Emit the profiling sample (convert count to bytes)
    
    869
    +    traceHeapProfSampleString(0, str, count * sizeof(W_));
    
    870
    +}
    
    871
    +
    
    856 872
     /* -----------------------------------------------------------------------------
    
    857 873
      * Print out the results of a heap census.
    
    858 874
      * -------------------------------------------------------------------------- */
    
    ... ... @@ -915,6 +931,11 @@ dumpCensus( Census *census )
    915 931
         }
    
    916 932
     #endif
    
    917 933
     
    
    934
    +    // Census entries which we need to group together.
    
    935
    +    // Used by IPE profiling to group together bands which don't have IPE information.
    
    936
    +    // Printing at the end in the 0 band
    
    937
    +    uint64_t uncategorised_count = 0;
    
    938
    +
    
    918 939
         for (ctr = census->ctrs; ctr != NULL; ctr = ctr->next) {
    
    919 940
     
    
    920 941
     #if defined(PROFILING)
    
    ... ... @@ -944,12 +965,15 @@ dumpCensus( Census *census )
    944 965
                 traceHeapProfSampleString(0, (char *)ctr->identity,
    
    945 966
                                           count * sizeof(W_));
    
    946 967
                 break;
    
    947
    -        case HEAP_BY_INFO_TABLE:
    
    948
    -            fprintf(hp_file, "%p", ctr->identity);
    
    949
    -            char str[100];
    
    950
    -            sprintf(str, "%p", ctr->identity);
    
    951
    -            traceHeapProfSampleString(0, str, count * sizeof(W_));
    
    968
    +        case HEAP_BY_INFO_TABLE: {
    
    969
    +            uint64_t table_id = lookupIPEId(ctr->identity);
    
    970
    +            if (! table_id) {
    
    971
    +              uncategorised_count += count;
    
    972
    +              continue;
    
    973
    +            }
    
    974
    +            recordIPEHeapSample(hp_file, table_id, count);
    
    952 975
                 break;
    
    976
    +            }
    
    953 977
     #if defined(PROFILING)
    
    954 978
             case HEAP_BY_CCS:
    
    955 979
                 fprint_ccs(hp_file, (CostCentreStack *)ctr->identity,
    
    ... ... @@ -999,9 +1023,21 @@ dumpCensus( Census *census )
    999 1023
                 barf("dumpCensus; doHeapProfile");
    
    1000 1024
             }
    
    1001 1025
     
    
    1026
    +
    
    1027
    +
    
    1002 1028
             fprintf(hp_file, "\t%" FMT_Word "\n", (W_)count * sizeof(W_));
    
    1003 1029
         }
    
    1004 1030
     
    
    1031
    +    // Print the unallocated data into the 0 band for info table profiling.
    
    1032
    +    switch (RtsFlags.ProfFlags.doHeapProfile) {
    
    1033
    +        case HEAP_BY_INFO_TABLE:
    
    1034
    +            recordIPEHeapSample(hp_file, 0, uncategorised_count);
    
    1035
    +            break;
    
    1036
    +        default:
    
    1037
    +            ASSERT(uncategorised_count == 0);
    
    1038
    +            break;
    
    1039
    +    }
    
    1040
    +
    
    1005 1041
         traceHeapProfSampleEnd(era);
    
    1006 1042
         printSample(false, census->time);
    
    1007 1043
     
    

  • rts/eventlog/EventLog.c
    ... ... @@ -1472,7 +1472,7 @@ void postIPE(const InfoProvEnt *ipe)
    1472 1472
         CHECK(!ensureRoomForVariableEvent(&eventBuf, len));
    
    1473 1473
         postEventHeader(&eventBuf, EVENT_IPE);
    
    1474 1474
         postPayloadSize(&eventBuf, len);
    
    1475
    -    postWord64(&eventBuf, (StgWord) INFO_PTR_TO_STRUCT(ipe->info));
    
    1475
    +    postWord64(&eventBuf, (StgWord) (ipe->prov.info_prov_id));
    
    1476 1476
         postStringLen(&eventBuf, ipe->prov.table_name, table_name_len);
    
    1477 1477
         postStringLen(&eventBuf, closure_desc_buf, closure_desc_len);
    
    1478 1478
         postStringLen(&eventBuf, ipe->prov.ty_desc, ty_desc_len);
    

  • rts/include/rts/IPE.h
    ... ... @@ -14,6 +14,7 @@
    14 14
     #pragma once
    
    15 15
     
    
    16 16
     typedef struct InfoProv_ {
    
    17
    +    uint64_t   info_prov_id;
    
    17 18
         const char *table_name;
    
    18 19
         uint32_t closure_desc; // closure type
    
    19 20
         const char *ty_desc;
    
    ... ... @@ -67,19 +68,33 @@ GHC_STATIC_ASSERT(sizeof(IpeBufferEntry) % (WORD_SIZE_IN_BITS / 8) == 0, "sizeof
    67 68
     // See Note [IPE Stripping and magic words]
    
    68 69
     #define IPE_MAGIC_WORD 0x4950450049504500UL
    
    69 70
     
    
    71
    +// Heap profiling currently requires a 32 bit pointer.. so for now just truncate
    
    72
    +// the key to fit. It should still be big enough.
    
    73
    +#if SIZEOF_VOID_P == 4
    
    74
    +// On 32-bit systems: keep lower 16 bits of module_id and idx
    
    75
    +#define IPE_PROF_KEY(key64) \
    
    76
    +    (uint32_t)((((key64) >> 16) & 0xFFFF0000) | ((key64) & 0x0000FFFF))
    
    77
    +#else
    
    78
    +// On 64-bit systems: use full key
    
    79
    +#define IPE_PROF_KEY(key64) (key64)
    
    80
    +#endif
    
    81
    +
    
    70 82
     typedef struct {
    
    71
    -    StgWord magic;          // Must be IPE_MAGIC_WORD
    
    83
    +    StgWord64 magic;          // Must be IPE_MAGIC_WORD
    
    72 84
         IpeBufferEntry entries[]; // Flexible array member
    
    73 85
     } IpeBufferEntryBlock;
    
    74 86
     
    
    75 87
     typedef struct {
    
    76
    -    StgWord magic;          // Must be IPE_MAGIC_WORD
    
    88
    +    StgWord64 magic;          // Must be IPE_MAGIC_WORD
    
    77 89
         char string_table[];    // Flexible array member for string table
    
    78 90
     } IpeStringTableBlock;
    
    79 91
     
    
    80 92
     typedef struct IpeBufferListNode_ {
    
    81 93
         struct IpeBufferListNode_ *next;
    
    82 94
     
    
    95
    +    // This field is filled in when the node is registered.
    
    96
    +    uint32_t node_id;
    
    97
    +
    
    83 98
         // Everything below is read-only and generated by the codegen
    
    84 99
     
    
    85 100
         // This flag should be treated as a boolean
    
    ... ... @@ -112,6 +127,8 @@ void formatClosureDescIpe(const InfoProvEnt *ipe_buf, char *str_buf);
    112 127
     // Returns true on success, initializes `out`.
    
    113 128
     bool lookupIPE(const StgInfoTable *info, InfoProvEnt *out);
    
    114 129
     
    
    130
    +uint64_t lookupIPEId(const StgInfoTable *info);
    
    131
    +
    
    115 132
     #if defined(DEBUG)
    
    116 133
     void printIPE(const StgInfoTable *info);
    
    117 134
     #endif