Marge Bot pushed to branch wip/marge_bot_batch_merge_job at Glasgow Haskell Compiler / GHC
Commits:
-
69cc16ca
by Marc Scholten at 2025-08-06T15:51:28-04:00
-
93a2f450
by Daniel Díaz at 2025-08-06T15:52:14-04:00
-
6666fb41
by Matthew Pickering at 2025-08-06T16:24:56-04:00
-
39beb40b
by Matthew Pickering at 2025-08-06T16:24:56-04:00
-
30094971
by Matthew Pickering at 2025-08-06T16:24:56-04:00
-
61b099b6
by fendor at 2025-08-06T16:24:57-04:00
16 changed files:
- .gitlab/darwin/toolchain.nix
- README.md
- compiler/GHC/Cmm.hs
- compiler/GHC/CmmToAsm/PPC/Ppr.hs
- compiler/GHC/CmmToAsm/Ppr.hs
- compiler/GHC/CmmToLlvm/Data.hs
- compiler/GHC/StgToCmm/InfoTableProv.hs
- docs/users_guide/debug-info.rst
- docs/users_guide/exts/linear_types.rst
- docs/users_guide/exts/strict.rst
- rts/IPE.c
- rts/ProfHeap.c
- rts/eventlog/EventLog.c
- rts/include/rts/IPE.h
- testsuite/tests/rts/ipe/ipeMap.c
- testsuite/tests/rts/ipe/ipe_lib.c
Changes:
... | ... | @@ -16,18 +16,17 @@ let |
16 | 16 | ghcBindists = let version = ghc.version; in {
|
17 | 17 | aarch64-darwin = hostPkgs.fetchurl {
|
18 | 18 | url = "https://downloads.haskell.org/ghc/${version}/ghc-${version}-aarch64-apple-darwin.tar.xz";
|
19 | - sha256 = "sha256-c1GTMJf3/yiW/t4QL532EswD5JVlgA4getkfsxj4TaA=";
|
|
19 | + sha256 = "sha256-/6+DtdeossBJIMbjkJwL4h3eJ7rzgNCV+ifoQKOi6AQ=";
|
|
20 | 20 | };
|
21 | 21 | x86_64-darwin = hostPkgs.fetchurl {
|
22 | 22 | url = "https://downloads.haskell.org/ghc/${version}/ghc-${version}-x86_64-apple-darwin.tar.xz";
|
23 | - sha256 = "sha256-LrYniMG0phsvyW6dhQC+3ompvzcxnwAe6GezEqqzoTQ=";
|
|
23 | + sha256 = "sha256-jPIhiJMOENesUnDUJeIaPatgavc6ZVSTY5NFIAxlC+k=";
|
|
24 | 24 | };
|
25 | 25 | |
26 | 26 | };
|
27 | 27 | |
28 | 28 | ghc = pkgs.stdenv.mkDerivation rec {
|
29 | - # Using 9.6.2 because of #24050
|
|
30 | - version = "9.6.2";
|
|
29 | + version = "9.10.1";
|
|
31 | 30 | name = "ghc";
|
32 | 31 | src = ghcBindists.${pkgs.stdenv.hostPlatform.system};
|
33 | 32 | configureFlags = [
|
... | ... | @@ -81,6 +81,10 @@ These steps give you the default build, which includes everything |
81 | 81 | optimised and built in various ways (eg. profiling libs are built).
|
82 | 82 | It can take a long time. To customise the build, see the file `HACKING.md`.
|
83 | 83 | |
84 | +## Nix
|
|
85 | + |
|
86 | +If you are looking to use nix to develop on GHC, [check out the wiki for instructions](https://gitlab.haskell.org/ghc/ghc/-/wikis/building/preparation).
|
|
87 | + |
|
84 | 88 | Filing bugs and feature requests
|
85 | 89 | ================================
|
86 | 90 |
... | ... | @@ -278,6 +278,7 @@ data SectionType |
278 | 278 | | InitArray -- .init_array on ELF, .ctor on Windows
|
279 | 279 | | FiniArray -- .fini_array on ELF, .dtor on Windows
|
280 | 280 | | CString
|
281 | + | IPE
|
|
281 | 282 | | OtherSection String
|
282 | 283 | deriving (Show)
|
283 | 284 | |
... | ... | @@ -298,6 +299,7 @@ sectionProtection (Section t _) = case t of |
298 | 299 | CString -> ReadOnlySection
|
299 | 300 | Data -> ReadWriteSection
|
300 | 301 | UninitialisedData -> ReadWriteSection
|
302 | + IPE -> ReadWriteSection
|
|
301 | 303 | (OtherSection _) -> ReadWriteSection
|
302 | 304 | |
303 | 305 | {-
|
... | ... | @@ -557,4 +559,5 @@ pprSectionType s = doubleQuotes $ case s of |
557 | 559 | InitArray -> text "initarray"
|
558 | 560 | FiniArray -> text "finiarray"
|
559 | 561 | CString -> text "cstring"
|
562 | + IPE -> text "ipe"
|
|
560 | 563 | OtherSection s' -> text s' |
... | ... | @@ -285,6 +285,9 @@ pprAlignForSection platform seg = line $ |
285 | 285 | Data
|
286 | 286 | | ppc64 -> text ".align 3"
|
287 | 287 | | otherwise -> text ".align 2"
|
288 | + IPE
|
|
289 | + | ppc64 -> text ".align 3"
|
|
290 | + | otherwise -> text ".align 2"
|
|
288 | 291 | ReadOnlyData
|
289 | 292 | | ppc64 -> text ".align 3"
|
290 | 293 | | otherwise -> text ".align 2"
|
... | ... | @@ -236,6 +236,10 @@ pprGNUSectionHeader config t suffix = |
236 | 236 | | OSMinGW32 <- platformOS platform
|
237 | 237 | -> text ".rdata"
|
238 | 238 | | otherwise -> text ".rodata.str"
|
239 | + IPE
|
|
240 | + | OSMinGW32 <- platformOS platform
|
|
241 | + -> text ".rdata"
|
|
242 | + | otherwise -> text ".ipe"
|
|
239 | 243 | OtherSection _ ->
|
240 | 244 | panic "PprBase.pprGNUSectionHeader: unknown section type"
|
241 | 245 | flags = case t of
|
... | ... | @@ -248,6 +252,10 @@ pprGNUSectionHeader config t suffix = |
248 | 252 | | OSMinGW32 <- platformOS platform
|
249 | 253 | -> empty
|
250 | 254 | | otherwise -> text ",\"aMS\"," <> sectionType platform "progbits" <> text ",1"
|
255 | + IPE
|
|
256 | + | OSMinGW32 <- platformOS platform
|
|
257 | + -> empty
|
|
258 | + | otherwise -> text ",\"a\"," <> sectionType platform "progbits"
|
|
251 | 259 | _ -> empty
|
252 | 260 | {-# SPECIALIZE pprGNUSectionHeader :: NCGConfig -> SectionType -> CLabel -> SDoc #-}
|
253 | 261 | {-# SPECIALIZE pprGNUSectionHeader :: NCGConfig -> SectionType -> CLabel -> HLine #-} -- see Note [SPECIALIZE to HDoc] in GHC.Utils.Outputable
|
... | ... | @@ -262,6 +270,7 @@ pprXcoffSectionHeader t = case t of |
262 | 270 | RelocatableReadOnlyData -> text ".csect .text[PR] # RelocatableReadOnlyData"
|
263 | 271 | CString -> text ".csect .text[PR] # CString"
|
264 | 272 | UninitialisedData -> text ".csect .data[BS]"
|
273 | + IPE -> text ".csect .text[PR] #IPE"
|
|
265 | 274 | _ -> panic "pprXcoffSectionHeader: unknown section type"
|
266 | 275 | {-# SPECIALIZE pprXcoffSectionHeader :: SectionType -> SDoc #-}
|
267 | 276 | {-# SPECIALIZE pprXcoffSectionHeader :: SectionType -> HLine #-} -- see Note [SPECIALIZE to HDoc] in GHC.Utils.Outputable
|
... | ... | @@ -276,6 +285,7 @@ pprDarwinSectionHeader t = case t of |
276 | 285 | InitArray -> text ".section\t__DATA,__mod_init_func,mod_init_funcs"
|
277 | 286 | FiniArray -> panic "pprDarwinSectionHeader: fini not supported"
|
278 | 287 | CString -> text ".section\t__TEXT,__cstring,cstring_literals"
|
288 | + IPE -> text ".const"
|
|
279 | 289 | OtherSection _ -> panic "pprDarwinSectionHeader: unknown section type"
|
280 | 290 | {-# SPECIALIZE pprDarwinSectionHeader :: SectionType -> SDoc #-}
|
281 | 291 | {-# SPECIALIZE pprDarwinSectionHeader :: SectionType -> HLine #-} -- see Note [SPECIALIZE to HDoc] in GHC.Utils.Outputable |
... | ... | @@ -145,7 +145,7 @@ llvmSectionType p t = case t of |
145 | 145 | CString -> case platformOS p of
|
146 | 146 | OSMinGW32 -> fsLit ".rdata$str"
|
147 | 147 | _ -> fsLit ".rodata.str"
|
148 | - |
|
148 | + IPE -> fsLit ".ipe"
|
|
149 | 149 | InitArray -> panic "llvmSectionType: InitArray"
|
150 | 150 | FiniArray -> panic "llvmSectionType: FiniArray"
|
151 | 151 | OtherSection _ -> panic "llvmSectionType: unknown section type"
|
... | ... | @@ -66,6 +66,28 @@ construction, the 'compressed' field of each IPE buffer list node is examined. |
66 | 66 | If the field indicates that the data has been compressed, the entry data and
|
67 | 67 | strings table are decompressed before continuing with the normal IPE map
|
68 | 68 | construction.
|
69 | + |
|
70 | +Note [IPE Stripping and magic words]
|
|
71 | +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
72 | + |
|
73 | +For systems which support ELF executables:
|
|
74 | + |
|
75 | +The metadata part of IPE info is placed into a separate ELF section (.ipe).
|
|
76 | +This can then be stripped afterwards if you don't require the metadata
|
|
77 | + |
|
78 | +```
|
|
79 | +-- Remove the section
|
|
80 | +objcopy --remove-section .ipe <your-exe>
|
|
81 | +-- Repack and compress the executable
|
|
82 | +upx <your-exe>
|
|
83 | +```
|
|
84 | + |
|
85 | +The .ipe section starts with a magic 64-bit word "IPE\nIPE\n`, encoded as ascii.
|
|
86 | + |
|
87 | +The RTS checks to see if the .ipe section starts with the magic word. If the
|
|
88 | +section has been stripped then it won't start with the magic word and the
|
|
89 | +metadata won't be accessible for the info tables.
|
|
90 | + |
|
69 | 91 | -}
|
70 | 92 | |
71 | 93 | emitIpeBufferListNode ::
|
... | ... | @@ -124,11 +146,21 @@ emitIpeBufferListNode this_mod ents dus0 = do |
124 | 146 | ipe_buffer_lbl :: CLabel
|
125 | 147 | ipe_buffer_lbl = mkIPELabel this_mod
|
126 | 148 | |
149 | + -- A magic word we can use to see if the IPE information has been stripped
|
|
150 | + -- or not
|
|
151 | + -- See Note [IPE Stripping and magic words]
|
|
152 | + -- "IPE\nIPE\n", null terminated.
|
|
153 | + ipe_header :: CmmStatic
|
|
154 | + ipe_header = CmmStaticLit (CmmInt 0x4950450049504500 W64)
|
|
155 | + |
|
127 | 156 | ipe_buffer_node :: [CmmStatic]
|
128 | 157 | ipe_buffer_node = map CmmStaticLit
|
129 | 158 | [ -- 'next' field
|
130 | 159 | zeroCLit platform
|
131 | 160 | |
161 | + -- 'node_id' field
|
|
162 | + , zeroCLit platform
|
|
163 | + |
|
132 | 164 | -- 'compressed' field
|
133 | 165 | , int do_compress
|
134 | 166 | |
... | ... | @@ -164,13 +196,13 @@ emitIpeBufferListNode this_mod ents dus0 = do |
164 | 196 | |
165 | 197 | -- Emit the strings table
|
166 | 198 | emitDecl $ CmmData
|
167 | - (Section Data strings_lbl)
|
|
168 | - (CmmStaticsRaw strings_lbl strings)
|
|
199 | + (Section IPE strings_lbl)
|
|
200 | + (CmmStaticsRaw strings_lbl (ipe_header : strings))
|
|
169 | 201 | |
170 | 202 | -- Emit the list of IPE buffer entries
|
171 | 203 | emitDecl $ CmmData
|
172 | - (Section Data entries_lbl)
|
|
173 | - (CmmStaticsRaw entries_lbl entries)
|
|
204 | + (Section IPE entries_lbl)
|
|
205 | + (CmmStaticsRaw entries_lbl (ipe_header : entries))
|
|
174 | 206 | |
175 | 207 | -- Emit the IPE buffer list node
|
176 | 208 | emitDecl $ CmmData
|
... | ... | @@ -391,6 +391,17 @@ to a source location. This lookup table is generated by using the ``-finfo-table |
391 | 391 | In a test compiling GHC itself, the size of the :ghc-flag:`-finfo-table-map`
|
392 | 392 | enabled build results was reduced by over 20% when compression was enabled.
|
393 | 393 | |
394 | + The metadata for ``-finfo-table-map`` is stored in the ``.ipe`` section on
|
|
395 | + ELF platforms. The ``.ipe`` section can be removed from the binary after compilation::
|
|
396 | + |
|
397 | + objcopy --remove-section .ipe <binary>
|
|
398 | + upx <binary>
|
|
399 | + |
|
400 | + You can first compile your application with ``-finfo-table-map``, extract
|
|
401 | + the contents of the map (by using the eventlog), strip the ``.ipe`` section
|
|
402 | + and then use the extracted data to interpret a ``-hi`` profile from the stripped
|
|
403 | + binary.
|
|
404 | + |
|
394 | 405 | :since: 9.10
|
395 | 406 | :implies: :ghc-flag:`-finfo-table-map-with-stack`
|
396 | 407 | :implies: :ghc-flag:`-finfo-table-map-with-fallback`
|
... | ... | @@ -213,6 +213,8 @@ With ``-XStrict``:: |
213 | 213 | -- inferred unrestricted
|
214 | 214 | let ~(x, y) = u in …
|
215 | 215 | |
216 | +(See :ref:`strict-bindings`).
|
|
217 | + |
|
216 | 218 | Data types
|
217 | 219 | ----------
|
218 | 220 | By default, all fields in algebraic data types are linear (even if
|
... | ... | @@ -103,6 +103,9 @@ Note the following points: |
103 | 103 | See `GHC Proposal #229 <https://github.com/ghc-proposals/ghc-proposals/blob/master/proposals/0229-whitespace-bang-patterns.rst>`__
|
104 | 104 | for the precise rules.
|
105 | 105 | |
106 | + |
|
107 | +.. _strict-bindings:
|
|
108 | + |
|
106 | 109 | Strict bindings
|
107 | 110 | ~~~~~~~~~~~~~~~
|
108 | 111 |
... | ... | @@ -62,6 +62,22 @@ entry's containing IpeBufferListNode and its index in that node. |
62 | 62 | When the user looks up an IPE entry, we convert it to the user-facing
|
63 | 63 | InfoProvEnt representation.
|
64 | 64 | |
65 | +Note [Stable identifiers for IPE entries]
|
|
66 | +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
67 | + |
|
68 | +Each IPE entry is given a stable identifier which remains the same across
|
|
69 | +different runs of the executable (unlike the address of the info table).
|
|
70 | + |
|
71 | +The identifier is a 64-bit word which consists of two parts.
|
|
72 | + |
|
73 | +* The high 32-bits are a per-node identifier.
|
|
74 | +* The low 32-bits are the index of the entry in the node.
|
|
75 | + |
|
76 | +When a node is queued in the pending list by `registerInfoProvList` it is
|
|
77 | +given a unique identifier from an incrementing global variable.
|
|
78 | + |
|
79 | +The unique key can be computed by using the `IPE_ENTRY_KEY` macro.
|
|
80 | + |
|
65 | 81 | */
|
66 | 82 | |
67 | 83 | typedef struct {
|
... | ... | @@ -69,6 +85,13 @@ typedef struct { |
69 | 85 | uint32_t idx;
|
70 | 86 | } IpeMapEntry;
|
71 | 87 | |
88 | +// See Note [Stable identifiers for IPE entries]
|
|
89 | +#define IPE_ENTRY_KEY(entry) \
|
|
90 | + MAKE_IPE_KEY((entry).node->node_id, (entry).idx)
|
|
91 | + |
|
92 | +#define MAKE_IPE_KEY(module_id, idx) \
|
|
93 | + ((((uint64_t)(module_id)) << 32) | ((uint64_t)(idx)))
|
|
94 | + |
|
72 | 95 | #if defined(THREADED_RTS)
|
73 | 96 | static Mutex ipeMapLock;
|
74 | 97 | #endif
|
... | ... | @@ -78,9 +101,22 @@ static HashTable *ipeMap = NULL; |
78 | 101 | // Accessed atomically
|
79 | 102 | static IpeBufferListNode *ipeBufferList = NULL;
|
80 | 103 | |
104 | +// A global counter which is used to give an IPE entry a unique value across runs.
|
|
105 | +static StgWord next_module_id = 1; // Start at 1 to reserve 0 as "invalid"
|
|
106 | + |
|
81 | 107 | static void decompressIPEBufferListNodeIfCompressed(IpeBufferListNode*);
|
82 | 108 | static void updateIpeMap(void);
|
83 | 109 | |
110 | +// Check whether the IpeBufferListNode has the relevant magic words.
|
|
111 | +// See Note [IPE Stripping and magic words]
|
|
112 | +static inline bool ipe_node_valid(const IpeBufferListNode *node) {
|
|
113 | + return node &&
|
|
114 | + node->entries_block &&
|
|
115 | + node->string_table_block &&
|
|
116 | + node->entries_block->magic == IPE_MAGIC_WORD &&
|
|
117 | + node->string_table_block->magic == IPE_MAGIC_WORD;
|
|
118 | +}
|
|
119 | + |
|
84 | 120 | #if defined(THREADED_RTS)
|
85 | 121 | |
86 | 122 | void initIpe(void) { initMutex(&ipeMapLock); }
|
... | ... | @@ -99,11 +135,12 @@ static InfoProvEnt ipeBufferEntryToIpe(const IpeBufferListNode *node, uint32_t i |
99 | 135 | {
|
100 | 136 | CHECK(idx < node->count);
|
101 | 137 | CHECK(!node->compressed);
|
102 | - const char *strings = node->string_table;
|
|
103 | - const IpeBufferEntry *ent = &node->entries[idx];
|
|
138 | + const char *strings = node->string_table_block->string_table;
|
|
139 | + const IpeBufferEntry *ent = &node->entries_block->entries[idx];
|
|
104 | 140 | return (InfoProvEnt) {
|
105 | 141 | .info = node->tables[idx],
|
106 | 142 | .prov = {
|
143 | + .info_prov_id = MAKE_IPE_KEY(node->node_id, idx),
|
|
107 | 144 | .table_name = &strings[ent->table_name],
|
108 | 145 | .closure_desc = ent->closure_desc,
|
109 | 146 | .ty_desc = &strings[ent->ty_desc],
|
... | ... | @@ -121,19 +158,23 @@ static InfoProvEnt ipeBufferEntryToIpe(const IpeBufferListNode *node, uint32_t i |
121 | 158 | static void traceIPEFromHashTable(void *data STG_UNUSED, StgWord key STG_UNUSED,
|
122 | 159 | const void *value) {
|
123 | 160 | const IpeMapEntry *map_ent = (const IpeMapEntry *)value;
|
124 | - const InfoProvEnt ipe = ipeBufferEntryToIpe(map_ent->node, map_ent->idx);
|
|
125 | - traceIPE(&ipe);
|
|
161 | + if (ipe_node_valid(map_ent->node)){
|
|
162 | + const InfoProvEnt ipe = ipeBufferEntryToIpe(map_ent->node, map_ent->idx);
|
|
163 | + traceIPE(&ipe);
|
|
164 | + }
|
|
126 | 165 | }
|
127 | 166 | |
128 | 167 | void dumpIPEToEventLog(void) {
|
129 | 168 | // Dump pending entries
|
130 | 169 | IpeBufferListNode *node = RELAXED_LOAD(&ipeBufferList);
|
131 | 170 | while (node != NULL) {
|
132 | - decompressIPEBufferListNodeIfCompressed(node);
|
|
171 | + if (ipe_node_valid(node)){
|
|
172 | + decompressIPEBufferListNodeIfCompressed(node);
|
|
133 | 173 | |
134 | - for (uint32_t i = 0; i < node->count; i++) {
|
|
135 | - const InfoProvEnt ent = ipeBufferEntryToIpe(node, i);
|
|
136 | - traceIPE(&ent);
|
|
174 | + for (uint32_t i = 0; i < node->count; i++) {
|
|
175 | + const InfoProvEnt ent = ipeBufferEntryToIpe(node, i);
|
|
176 | + traceIPE(&ent);
|
|
177 | + }
|
|
137 | 178 | }
|
138 | 179 | node = node->next;
|
139 | 180 | }
|
... | ... | @@ -165,11 +206,30 @@ ipeMapLock; we instead use atomic CAS operations to add to the list. |
165 | 206 | |
166 | 207 | A performance test for IPE registration and lookup can be found here:
|
167 | 208 | https://gitlab.haskell.org/ghc/ghc/-/merge_requests/5724#note_370806
|
209 | + |
|
210 | +Note that IPEs are still regiestered even if the .ipe section is stripped. That's
|
|
211 | +because you may still want to query what the unique identifier for an info table is
|
|
212 | +so it can be reconciled with previously extracted metadata information. For example,
|
|
213 | +when `-hi` profiling or using `whereFrom`.
|
|
214 | + |
|
168 | 215 | */
|
169 | 216 | void registerInfoProvList(IpeBufferListNode *node) {
|
217 | + |
|
218 | + // Grab a fresh module_id
|
|
219 | + uint32_t module_id;
|
|
220 | + StgWord temp_module_id;
|
|
221 | + while (true) {
|
|
222 | + temp_module_id = next_module_id;
|
|
223 | + if (cas(&next_module_id, temp_module_id, temp_module_id+1) == temp_module_id) {
|
|
224 | + module_id = (uint32_t) temp_module_id;
|
|
225 | + break;
|
|
226 | + }
|
|
227 | + |
|
228 | + }
|
|
170 | 229 | while (true) {
|
171 | 230 | IpeBufferListNode *old = RELAXED_LOAD(&ipeBufferList);
|
172 | 231 | node->next = old;
|
232 | + node->node_id = module_id;
|
|
173 | 233 | if (cas_ptr((volatile void **) &ipeBufferList, old, node) == (void *) old) {
|
174 | 234 | return;
|
175 | 235 | }
|
... | ... | @@ -183,7 +243,7 @@ void formatClosureDescIpe(const InfoProvEnt *ipe_buf, char *str_buf) { |
183 | 243 | bool lookupIPE(const StgInfoTable *info, InfoProvEnt *out) {
|
184 | 244 | updateIpeMap();
|
185 | 245 | IpeMapEntry *map_ent = (IpeMapEntry *) lookupHashTable(ipeMap, (StgWord)info);
|
186 | - if (map_ent) {
|
|
246 | + if (map_ent && ipe_node_valid(map_ent->node)) {
|
|
187 | 247 | *out = ipeBufferEntryToIpe(map_ent->node, map_ent->idx);
|
188 | 248 | return true;
|
189 | 249 | } else {
|
... | ... | @@ -191,6 +251,18 @@ bool lookupIPE(const StgInfoTable *info, InfoProvEnt *out) { |
191 | 251 | }
|
192 | 252 | }
|
193 | 253 | |
254 | +// Returns 0 when the info table is not present in the info table map.
|
|
255 | +// See Note [Stable identifiers for IPE entries]
|
|
256 | +uint64_t lookupIPEId(const StgInfoTable *info) {
|
|
257 | + updateIpeMap();
|
|
258 | + IpeMapEntry *map_ent = (IpeMapEntry *) lookupHashTable(ipeMap, (StgWord)(info));
|
|
259 | + if (map_ent){
|
|
260 | + return IPE_ENTRY_KEY(*map_ent);
|
|
261 | + } else {
|
|
262 | + return 0;
|
|
263 | + }
|
|
264 | +}
|
|
265 | + |
|
194 | 266 | void updateIpeMap(void) {
|
195 | 267 | // Check if there's any work at all. If not so, we can circumvent locking,
|
196 | 268 | // which decreases performance.
|
... | ... | @@ -23,6 +23,7 @@ |
23 | 23 | #include "Printer.h"
|
24 | 24 | #include "Trace.h"
|
25 | 25 | #include "sm/GCThread.h"
|
26 | +#include "IPE.h"
|
|
26 | 27 | |
27 | 28 | #include <fs_rts.h>
|
28 | 29 | #include <string.h>
|
... | ... | @@ -230,9 +231,10 @@ closureIdentity( const StgClosure *p ) |
230 | 231 | return closure_type_names[info->type];
|
231 | 232 | }
|
232 | 233 | }
|
233 | - case HEAP_BY_INFO_TABLE: {
|
|
234 | - return get_itbl(p);
|
|
235 | - }
|
|
234 | + case HEAP_BY_INFO_TABLE:
|
|
235 | + {
|
|
236 | + return (void *) (p->header.info);
|
|
237 | + }
|
|
236 | 238 | |
237 | 239 | default:
|
238 | 240 | barf("closureIdentity");
|
... | ... | @@ -853,6 +855,20 @@ aggregateCensusInfo( void ) |
853 | 855 | }
|
854 | 856 | #endif
|
855 | 857 | |
858 | +static void
|
|
859 | +recordIPEHeapSample(FILE *hp_file, uint64_t table_id, size_t count)
|
|
860 | +{
|
|
861 | + // Print to heap profile file
|
|
862 | + fprintf(hp_file, "0x%" PRIx64, table_id);
|
|
863 | + |
|
864 | + // Create label string for tracing
|
|
865 | + char str[100];
|
|
866 | + sprintf(str, "0x%" PRIx64, table_id);
|
|
867 | + |
|
868 | + // Emit the profiling sample (convert count to bytes)
|
|
869 | + traceHeapProfSampleString(str, count * sizeof(W_));
|
|
870 | +}
|
|
871 | + |
|
856 | 872 | /* -----------------------------------------------------------------------------
|
857 | 873 | * Print out the results of a heap census.
|
858 | 874 | * -------------------------------------------------------------------------- */
|
... | ... | @@ -915,6 +931,11 @@ dumpCensus( Census *census ) |
915 | 931 | }
|
916 | 932 | #endif
|
917 | 933 | |
934 | + // Census entries which we need to group together.
|
|
935 | + // Used by IPE profiling to group together bands which don't have IPE information.
|
|
936 | + // Printing at the end in the 0 band
|
|
937 | + uint64_t uncategorised_count = 0;
|
|
938 | + |
|
918 | 939 | for (ctr = census->ctrs; ctr != NULL; ctr = ctr->next) {
|
919 | 940 | |
920 | 941 | #if defined(PROFILING)
|
... | ... | @@ -945,11 +966,15 @@ dumpCensus( Census *census ) |
945 | 966 | count * sizeof(W_));
|
946 | 967 | break;
|
947 | 968 | case HEAP_BY_INFO_TABLE:
|
948 | - fprintf(hp_file, "%p", ctr->identity);
|
|
949 | - char str[100];
|
|
950 | - sprintf(str, "%p", ctr->identity);
|
|
951 | - traceHeapProfSampleString(str, count * sizeof(W_));
|
|
969 | + {
|
|
970 | + uint64_t table_id = lookupIPEId(ctr->identity);
|
|
971 | + if (! table_id) {
|
|
972 | + uncategorised_count += count;
|
|
973 | + continue;
|
|
974 | + }
|
|
975 | + recordIPEHeapSample(hp_file, table_id, count);
|
|
952 | 976 | break;
|
977 | + }
|
|
953 | 978 | #if defined(PROFILING)
|
954 | 979 | case HEAP_BY_CCS:
|
955 | 980 | fprint_ccs(hp_file, (CostCentreStack *)ctr->identity,
|
... | ... | @@ -1002,6 +1027,16 @@ dumpCensus( Census *census ) |
1002 | 1027 | fprintf(hp_file, "\t%" FMT_Word "\n", (W_)count * sizeof(W_));
|
1003 | 1028 | }
|
1004 | 1029 | |
1030 | + // Print the unallocated data into the 0 band for info table profiling.
|
|
1031 | + switch (RtsFlags.ProfFlags.doHeapProfile) {
|
|
1032 | + case HEAP_BY_INFO_TABLE:
|
|
1033 | + recordIPEHeapSample(hp_file, 0, uncategorised_count);
|
|
1034 | + break;
|
|
1035 | + default:
|
|
1036 | + ASSERT(uncategorised_count == 0);
|
|
1037 | + break;
|
|
1038 | + }
|
|
1039 | + |
|
1005 | 1040 | traceHeapProfSampleEnd(era);
|
1006 | 1041 | printSample(false, census->time);
|
1007 | 1042 |
... | ... | @@ -1480,7 +1480,7 @@ void postIPE(const InfoProvEnt *ipe) |
1480 | 1480 | CHECK(!ensureRoomForVariableEvent(&eventBuf, len));
|
1481 | 1481 | postEventHeader(&eventBuf, EVENT_IPE);
|
1482 | 1482 | postPayloadSize(&eventBuf, len);
|
1483 | - postWord64(&eventBuf, (StgWord) INFO_PTR_TO_STRUCT(ipe->info));
|
|
1483 | + postWord64(&eventBuf, (StgWord) (ipe->prov.info_prov_id));
|
|
1484 | 1484 | postStringLen(&eventBuf, ipe->prov.table_name, table_name_len);
|
1485 | 1485 | postStringLen(&eventBuf, closure_desc_buf, closure_desc_len);
|
1486 | 1486 | postStringLen(&eventBuf, ipe->prov.ty_desc, ty_desc_len);
|
... | ... | @@ -14,6 +14,7 @@ |
14 | 14 | #pragma once
|
15 | 15 | |
16 | 16 | typedef struct InfoProv_ {
|
17 | + uint64_t info_prov_id;
|
|
17 | 18 | const char *table_name;
|
18 | 19 | uint32_t closure_desc; // closure type
|
19 | 20 | const char *ty_desc;
|
... | ... | @@ -63,9 +64,37 @@ typedef struct { |
63 | 64 | |
64 | 65 | GHC_STATIC_ASSERT(sizeof(IpeBufferEntry) % (WORD_SIZE_IN_BITS / 8) == 0, "sizeof(IpeBufferEntry) must be a multiple of the word size");
|
65 | 66 | |
67 | +// The magic word is IPE\nIPE\n, which occupies the full 64 bit width of a word.
|
|
68 | +// See Note [IPE Stripping and magic words]
|
|
69 | +#define IPE_MAGIC_WORD 0x4950450049504500UL
|
|
70 | + |
|
71 | +// Heap profiling currently requires a 32 bit pointer.. so for now just truncate
|
|
72 | +// the key to fit. It should still be big enough.
|
|
73 | +#if SIZEOF_VOID_P == 4
|
|
74 | +// On 32-bit systems: keep lower 16 bits of module_id and idx
|
|
75 | +#define IPE_PROF_KEY(key64) \
|
|
76 | + (uint32_t)((((key64) >> 16) & 0xFFFF0000) | ((key64) & 0x0000FFFF))
|
|
77 | +#else
|
|
78 | +// On 64-bit systems: use full key
|
|
79 | +#define IPE_PROF_KEY(key64) (key64)
|
|
80 | +#endif
|
|
81 | + |
|
82 | +typedef struct {
|
|
83 | + StgWord64 magic; // Must be IPE_MAGIC_WORD
|
|
84 | + IpeBufferEntry entries[]; // Flexible array member
|
|
85 | +} IpeBufferEntryBlock;
|
|
86 | + |
|
87 | +typedef struct {
|
|
88 | + StgWord64 magic; // Must be IPE_MAGIC_WORD
|
|
89 | + char string_table[]; // Flexible array member for string table
|
|
90 | +} IpeStringTableBlock;
|
|
91 | + |
|
66 | 92 | typedef struct IpeBufferListNode_ {
|
67 | 93 | struct IpeBufferListNode_ *next;
|
68 | 94 | |
95 | + // This field is filled in when the node is registered.
|
|
96 | + uint32_t node_id;
|
|
97 | + |
|
69 | 98 | // Everything below is read-only and generated by the codegen
|
70 | 99 | |
71 | 100 | // This flag should be treated as a boolean
|
... | ... | @@ -76,10 +105,10 @@ typedef struct IpeBufferListNode_ { |
76 | 105 | // When TNTC is enabled, these will point to the entry code
|
77 | 106 | // not the info table itself.
|
78 | 107 | const StgInfoTable **tables;
|
79 | - IpeBufferEntry *entries;
|
|
108 | + IpeBufferEntryBlock *entries_block;
|
|
80 | 109 | StgWord entries_size; // decompressed size
|
81 | 110 | |
82 | - const char *string_table;
|
|
111 | + const IpeStringTableBlock *string_table_block;
|
|
83 | 112 | StgWord string_table_size; // decompressed size
|
84 | 113 | |
85 | 114 | // Shared by all entries
|
... | ... | @@ -98,6 +127,8 @@ void formatClosureDescIpe(const InfoProvEnt *ipe_buf, char *str_buf); |
98 | 127 | // Returns true on success, initializes `out`.
|
99 | 128 | bool lookupIPE(const StgInfoTable *info, InfoProvEnt *out);
|
100 | 129 | |
130 | +uint64_t lookupIPEId(const StgInfoTable *info);
|
|
131 | + |
|
101 | 132 | #if defined(DEBUG)
|
102 | 133 | void printIPE(const StgInfoTable *info);
|
103 | 134 | #endif |
... | ... | @@ -48,7 +48,8 @@ HaskellObj shouldFindOneIfItHasBeenRegistered(Capability *cap) { |
48 | 48 | // Allocate buffers for IPE buffer list node
|
49 | 49 | IpeBufferListNode *node = malloc(sizeof(IpeBufferListNode));
|
50 | 50 | node->tables = malloc(sizeof(StgInfoTable *));
|
51 | - node->entries = malloc(sizeof(IpeBufferEntry));
|
|
51 | + node->entries_block = malloc(sizeof(StgWord64) + sizeof(IpeBufferEntry));
|
|
52 | + node->entries_block->magic = IPE_MAGIC_WORD;
|
|
52 | 53 | |
53 | 54 | StringTable st;
|
54 | 55 | init_string_table(&st);
|
... | ... | @@ -61,9 +62,13 @@ HaskellObj shouldFindOneIfItHasBeenRegistered(Capability *cap) { |
61 | 62 | node->compressed = 0;
|
62 | 63 | node->count = 1;
|
63 | 64 | node->tables[0] = get_itbl(fortyTwo);
|
64 | - node->entries[0] = makeAnyProvEntry(cap, &st, 42);
|
|
65 | + node->entries_block->entries[0] = makeAnyProvEntry(cap, &st, 42);
|
|
65 | 66 | node->entries_size = sizeof(IpeBufferEntry);
|
66 | - node->string_table = st.buffer;
|
|
67 | + |
|
68 | + IpeStringTableBlock *string_table_block = malloc(sizeof(StgWord64) + st.size);
|
|
69 | + string_table_block->magic = IPE_MAGIC_WORD;
|
|
70 | + memcpy(string_table_block->string_table, st.buffer, st.size);
|
|
71 | + node->string_table_block = string_table_block;
|
|
67 | 72 | node->string_table_size = st.size;
|
68 | 73 | |
69 | 74 | registerInfoProvList(node);
|
... | ... | @@ -90,7 +95,8 @@ void shouldFindTwoIfTwoHaveBeenRegistered(Capability *cap, |
90 | 95 | // Allocate buffers for IPE buffer list node
|
91 | 96 | IpeBufferListNode *node = malloc(sizeof(IpeBufferListNode));
|
92 | 97 | node->tables = malloc(sizeof(StgInfoTable *));
|
93 | - node->entries = malloc(sizeof(IpeBufferEntry));
|
|
98 | + node->entries_block = malloc(sizeof(StgWord64) + sizeof(IpeBufferEntry));
|
|
99 | + node->entries_block->magic = IPE_MAGIC_WORD;
|
|
94 | 100 | |
95 | 101 | StringTable st;
|
96 | 102 | init_string_table(&st);
|
... | ... | @@ -103,9 +109,12 @@ void shouldFindTwoIfTwoHaveBeenRegistered(Capability *cap, |
103 | 109 | node->compressed = 0;
|
104 | 110 | node->count = 1;
|
105 | 111 | node->tables[0] = get_itbl(twentyThree);
|
106 | - node->entries[0] = makeAnyProvEntry(cap, &st, 23);
|
|
112 | + node->entries_block->entries[0] = makeAnyProvEntry(cap, &st, 23);
|
|
107 | 113 | node->entries_size = sizeof(IpeBufferEntry);
|
108 | - node->string_table = st.buffer;
|
|
114 | + IpeStringTableBlock *string_table_block = malloc(sizeof(StgWord64) + st.size);
|
|
115 | + string_table_block->magic = IPE_MAGIC_WORD;
|
|
116 | + memcpy(string_table_block->string_table, st.buffer, st.size);
|
|
117 | + node->string_table_block = string_table_block;
|
|
109 | 118 | node->string_table_size = st.size;
|
110 | 119 | |
111 | 120 | registerInfoProvList(node);
|
... | ... | @@ -121,7 +130,8 @@ void shouldFindTwoFromTheSameList(Capability *cap) { |
121 | 130 | // Allocate buffers for IPE buffer list node
|
122 | 131 | IpeBufferListNode *node = malloc(sizeof(IpeBufferListNode));
|
123 | 132 | node->tables = malloc(sizeof(StgInfoTable *) * 2);
|
124 | - node->entries = malloc(sizeof(IpeBufferEntry) * 2);
|
|
133 | + node->entries_block = malloc(sizeof(StgWord64) + sizeof(IpeBufferEntry) * 2);
|
|
134 | + node->entries_block->magic = IPE_MAGIC_WORD;
|
|
125 | 135 | |
126 | 136 | StringTable st;
|
127 | 137 | init_string_table(&st);
|
... | ... | @@ -133,10 +143,13 @@ void shouldFindTwoFromTheSameList(Capability *cap) { |
133 | 143 | node->count = 2;
|
134 | 144 | node->tables[0] = get_itbl(one);
|
135 | 145 | node->tables[1] = get_itbl(two);
|
136 | - node->entries[0] = makeAnyProvEntry(cap, &st, 1);
|
|
137 | - node->entries[1] = makeAnyProvEntry(cap, &st, 2);
|
|
146 | + node->entries_block->entries[0] = makeAnyProvEntry(cap, &st, 1);
|
|
147 | + node->entries_block->entries[1] = makeAnyProvEntry(cap, &st, 2);
|
|
138 | 148 | node->entries_size = sizeof(IpeBufferEntry) * 2;
|
139 | - node->string_table = st.buffer;
|
|
149 | + IpeStringTableBlock *string_table_block = malloc(sizeof(StgWord64) + st.size);
|
|
150 | + string_table_block->magic = IPE_MAGIC_WORD;
|
|
151 | + memcpy(string_table_block->string_table, st.buffer, st.size);
|
|
152 | + node->string_table_block = string_table_block;
|
|
140 | 153 | node->string_table_size = st.size;
|
141 | 154 | |
142 | 155 | registerInfoProvList(node);
|
... | ... | @@ -152,7 +165,11 @@ void shouldDealWithAnEmptyList(Capability *cap, HaskellObj fortyTwo) { |
152 | 165 | IpeBufferListNode *node = malloc(sizeof(IpeBufferListNode));
|
153 | 166 | node->count = 0;
|
154 | 167 | node->next = NULL;
|
155 | - node->string_table = "";
|
|
168 | + IpeStringTableBlock *string_table_block = malloc(sizeof(StgWord64));
|
|
169 | + string_table_block->magic = IPE_MAGIC_WORD;
|
|
170 | + |
|
171 | + node->entries_block = malloc(sizeof(StgWord64));
|
|
172 | + node->entries_block->magic = IPE_MAGIC_WORD;
|
|
156 | 173 | |
157 | 174 | registerInfoProvList(node);
|
158 | 175 |
... | ... | @@ -64,7 +64,8 @@ IpeBufferListNode *makeAnyProvEntries(Capability *cap, int start, int end) { |
64 | 64 | // Allocate buffers for IpeBufferListNode
|
65 | 65 | IpeBufferListNode *node = malloc(sizeof(IpeBufferListNode));
|
66 | 66 | node->tables = malloc(sizeof(StgInfoTable *) * n);
|
67 | - node->entries = malloc(sizeof(IpeBufferEntry) * n);
|
|
67 | + node->entries_block = malloc(sizeof(StgWord64) + sizeof(IpeBufferEntry) * n);
|
|
68 | + node->entries_block->magic = IPE_MAGIC_WORD;
|
|
68 | 69 | |
69 | 70 | StringTable st;
|
70 | 71 | init_string_table(&st);
|
... | ... | @@ -83,14 +84,19 @@ IpeBufferListNode *makeAnyProvEntries(Capability *cap, int start, int end) { |
83 | 84 | for (int i=start; i < end; i++) {
|
84 | 85 | HaskellObj closure = rts_mkInt(cap, 42);
|
85 | 86 | node->tables[i] = get_itbl(closure);
|
86 | - node->entries[i] = makeAnyProvEntry(cap, &st, i);
|
|
87 | + node->entries_block->entries[i] = makeAnyProvEntry(cap, &st, i);
|
|
87 | 88 | }
|
88 | 89 | |
89 | 90 | // Set the rest of the fields
|
90 | 91 | node->next = NULL;
|
91 | 92 | node->compressed = 0;
|
92 | 93 | node->count = n;
|
93 | - node->string_table = st.buffer;
|
|
94 | + |
|
95 | + IpeStringTableBlock *string_table_block =
|
|
96 | + malloc(sizeof(StgWord64) + st.size);
|
|
97 | + string_table_block->magic = IPE_MAGIC_WORD;
|
|
98 | + memcpy(string_table_block->string_table, st.buffer, st.size);
|
|
99 | + node->string_table_block = string_table_block;
|
|
94 | 100 | |
95 | 101 | return node;
|
96 | 102 | } |