Ben Gamari pushed to branch wip/T26009 at Glasgow Haskell Compiler / GHC
Commits:
-
89031e59
by Ben Gamari at 2025-05-19T13:15:37-04:00
-
3b644180
by Ben Gamari at 2025-05-19T13:51:31-04:00
-
525894ab
by Ben Gamari at 2025-05-19T14:29:33-04:00
-
b5fc6607
by Ben Gamari at 2025-05-19T15:08:38-04:00
-
ff9b8733
by Ben Gamari at 2025-05-19T15:27:52-04:00
-
295ed26f
by Ben Gamari at 2025-05-19T15:28:36-04:00
7 changed files:
- rts/Hash.c
- rts/Hash.h
- rts/linker/PEi386.c
- rts/linker/ProddableBlocks.c
- rts/linker/ProddableBlocks.h
- + testsuite/tests/rts/TestProddableBlockSet.c
- testsuite/tests/rts/all.T
Changes:
... | ... | @@ -94,7 +94,7 @@ hashWord(const HashTable *table, StgWord key) |
94 | 94 | }
|
95 | 95 | |
96 | 96 | int
|
97 | -hashStr(const HashTable *table, StgWord w)
|
|
97 | +hashBuffer(const void *buf, size_t len)
|
|
98 | 98 | {
|
99 | 99 | const char *key = (char*) w;
|
100 | 100 | #if WORD_SIZE_IN_BITS == 64
|
... | ... | @@ -114,6 +114,12 @@ hashStr(const HashTable *table, StgWord w) |
114 | 114 | return bucket;
|
115 | 115 | }
|
116 | 116 | |
117 | +int
|
|
118 | +hashStr(const HashTable *table, StgWord w)
|
|
119 | +{
|
|
120 | + return hashBuffer(key, strlen(key));
|
|
121 | +}
|
|
122 | + |
|
117 | 123 | STATIC_INLINE int
|
118 | 124 | compareWord(StgWord key1, StgWord key2)
|
119 | 125 | {
|
... | ... | @@ -69,6 +69,10 @@ void * removeStrHashTable ( StrHashTable *table, const char * key, |
69 | 69 | */
|
70 | 70 | typedef int HashFunction(const HashTable *table, StgWord key);
|
71 | 71 | typedef int CompareFunction(StgWord key1, StgWord key2);
|
72 | + |
|
73 | +// Helper for implementing hash functions
|
|
74 | +int hashBuffer(const void *buf, size_t len);
|
|
75 | + |
|
72 | 76 | int hashWord(const HashTable *table, StgWord key);
|
73 | 77 | int hashStr(const HashTable *table, StgWord w);
|
74 | 78 | void insertHashTable_ ( HashTable *table, StgWord key,
|
... | ... | @@ -79,6 +83,7 @@ void * removeHashTable_ ( HashTable *table, StgWord key, |
79 | 83 | const void *data, HashFunction f,
|
80 | 84 | CompareFunction cmp );
|
81 | 85 | |
86 | + |
|
82 | 87 | /* Freeing hash tables
|
83 | 88 | */
|
84 | 89 | void freeHashTable ( HashTable *table, void (*freeDataFun)(void *) );
|
... | ... | @@ -427,8 +427,53 @@ const int default_alignment = 8; |
427 | 427 | the pointer as a redirect. Essentially it's a DATA DLL reference. */
|
428 | 428 | const void* __rts_iob_func = (void*)&__acrt_iob_func;
|
429 | 429 | |
430 | +/*
|
|
431 | + * Note [Avoiding repeated DLL loading]
|
|
432 | + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
433 | + * As LoadLibraryEx tends to be expensive and addDLL_PEi386 is called on every
|
|
434 | + * DLL-imported symbol, we use a hash-set to keep track of which DLLs have
|
|
435 | + * already been loaded. This hash-set is keyed on the dll_name passed to
|
|
436 | + * addDLL_PEi386 and serves as a quick check to avoid repeated calls to
|
|
437 | + * LoadLibraryEx for the identical DLL. See #26009.
|
|
438 | + */
|
|
439 | + |
|
440 | +typedef struct {
|
|
441 | + HashTable *hash;
|
|
442 | +} LoadedDllSet;
|
|
443 | + |
|
444 | +LoadedDllSet loaded_dll_set;
|
|
445 | + |
|
446 | +void initLoadedDllSet(LoadedDllSet *set) {
|
|
447 | + set->hash = allocHashTable();
|
|
448 | +}
|
|
449 | + |
|
450 | +int hash_path(const HashTable *table, StgWord key)
|
|
451 | +{
|
|
452 | + const pathchar *key = (pathchar*) w;
|
|
453 | + return hashBuffer(key, sizeof(pathchar) * wcslen(key));
|
|
454 | +}
|
|
455 | + |
|
456 | +int compare_path(StgWord key1, StgWord key2)
|
|
457 | +{
|
|
458 | + return wscmp((pathchar*) key1, (pathchar*) key2);
|
|
459 | +}
|
|
460 | + |
|
461 | +void addLoadedDll(LoadedDllSet *set, pathchar *dll_name)
|
|
462 | +{
|
|
463 | + insertHashTable_(set->hash, (StgWord) dll_name, (void*) 1, hash_path);
|
|
464 | +}
|
|
465 | + |
|
466 | +bool isDllLoaded(LoadedDllSet *set, pathchar *dll_name)
|
|
467 | +{
|
|
468 | + void * result = lookupHashTable_(set->hash, (StgWord) dll_name, hash_path, compare_path);
|
|
469 | + return result != NULL;
|
|
470 | +}
|
|
471 | + |
|
472 | + |
|
430 | 473 | void initLinker_PEi386(void)
|
431 | 474 | {
|
475 | + initLoadedDllSet(&loaded_dll_set);
|
|
476 | + |
|
432 | 477 | if (!ghciInsertSymbolTable(WSTR("(GHCi/Ld special symbols)"),
|
433 | 478 | symhash, "__image_base__",
|
434 | 479 | GetModuleHandleW (NULL), HS_BOOL_TRUE,
|
... | ... | @@ -440,10 +485,11 @@ void initLinker_PEi386(void) |
440 | 485 | addDLLHandle(WSTR("*.exe"), GetModuleHandle(NULL));
|
441 | 486 | #endif
|
442 | 487 | |
443 | - /* Register the cleanup routine as an exit handler, this gives other exit handlers
|
|
444 | - a chance to run which may need linker information. Exit handlers are ran in
|
|
445 | - reverse registration order so this needs to be before the linker loads anything. */
|
|
446 | - atexit (exitLinker_PEi386);
|
|
488 | + /* Register the cleanup routine as an exit handler, this gives other exit handlers
|
|
489 | + * a chance to run which may need linker information. Exit handlers are ran in
|
|
490 | + * reverse registration order so this needs to be before the linker loads anything.
|
|
491 | + */
|
|
492 | + atexit (exitLinker_PEi386);
|
|
447 | 493 | }
|
448 | 494 | |
449 | 495 | void exitLinker_PEi386(void)
|
... | ... | @@ -798,12 +844,12 @@ uint8_t* getSymShortName ( COFF_HEADER_INFO *info, COFF_symbol* sym ) |
798 | 844 | const char *
|
799 | 845 | addDLL_PEi386( pathchar *dll_name, HINSTANCE *loaded )
|
800 | 846 | {
|
801 | - /* ------------------- Win32 DLL loader ------------------- */
|
|
802 | - |
|
803 | - pathchar* buf;
|
|
804 | - HINSTANCE instance;
|
|
847 | + /* ------------------- Win32 DLL loader ------------------- */
|
|
848 | + IF_DEBUG(linker, debugBelch("addDLL; dll_name = `%" PATH_FMT "'\n", dll_name));
|
|
805 | 849 | |
806 | - IF_DEBUG(linker, debugBelch("addDLL; dll_name = `%" PATH_FMT "'\n", dll_name));
|
|
850 | + if (isDllLoaded(loaded_dll_set)) {
|
|
851 | + return NULL;
|
|
852 | + }
|
|
807 | 853 | |
808 | 854 | /* The file name has no suffix (yet) so that we can try
|
809 | 855 | both foo.dll and foo.drv
|
... | ... | @@ -816,35 +862,23 @@ addDLL_PEi386( pathchar *dll_name, HINSTANCE *loaded ) |
816 | 862 | extension. */
|
817 | 863 | |
818 | 864 | size_t bufsize = pathlen(dll_name) + 10;
|
819 | - buf = stgMallocBytes(bufsize * sizeof(wchar_t), "addDLL");
|
|
865 | + pathchar *buf = stgMallocBytes(bufsize * sizeof(wchar_t), "addDLL");
|
|
820 | 866 | |
821 | 867 | /* These are ordered by probability of success and order we'd like them. */
|
822 | 868 | const wchar_t *formats[] = { L"%ls.DLL", L"%ls.DRV", L"lib%ls.DLL", L"%ls" };
|
823 | 869 | const DWORD flags[] = { LOAD_LIBRARY_SEARCH_USER_DIRS | LOAD_LIBRARY_SEARCH_DEFAULT_DIRS, 0 };
|
824 | 870 | |
825 | - int cFormat, cFlag;
|
|
826 | - int flags_start = 1; /* Assume we don't support the new API. */
|
|
827 | - |
|
828 | - /* Detect if newer API are available, if not, skip the first flags entry. */
|
|
829 | - if (GetProcAddress((HMODULE)LoadLibraryW(L"Kernel32.DLL"), "AddDllDirectory")) {
|
|
830 | - flags_start = 0;
|
|
831 | - }
|
|
832 | - |
|
833 | 871 | /* Iterate through the possible flags and formats. */
|
834 | - for (cFlag = flags_start; cFlag < 2; cFlag++)
|
|
835 | - {
|
|
836 | - for (cFormat = 0; cFormat < 4; cFormat++)
|
|
837 | - {
|
|
872 | + HINSTANCE instance;
|
|
873 | + for (int cFlag = 0; cFlag < 2; cFlag++) {
|
|
874 | + for (int cFormat = 0; cFormat < 4; cFormat++) {
|
|
838 | 875 | snwprintf(buf, bufsize, formats[cFormat], dll_name);
|
839 | 876 | instance = LoadLibraryExW(buf, NULL, flags[cFlag]);
|
840 | 877 | if (instance == NULL) {
|
841 | - if (GetLastError() != ERROR_MOD_NOT_FOUND)
|
|
842 | - {
|
|
878 | + if (GetLastError() != ERROR_MOD_NOT_FOUND) {
|
|
843 | 879 | goto error;
|
844 | 880 | }
|
845 | - }
|
|
846 | - else
|
|
847 | - {
|
|
881 | + } else {
|
|
848 | 882 | break; /* We're done. DLL has been loaded. */
|
849 | 883 | }
|
850 | 884 | }
|
... | ... | @@ -855,6 +889,7 @@ addDLL_PEi386( pathchar *dll_name, HINSTANCE *loaded ) |
855 | 889 | goto error;
|
856 | 890 | }
|
857 | 891 | |
892 | + addLoadedDll(&loaded_dll_set, dll_name);
|
|
858 | 893 | addDLLHandle(buf, instance);
|
859 | 894 | if (loaded) {
|
860 | 895 | *loaded = instance;
|
... | ... | @@ -6,65 +6,125 @@ |
6 | 6 | *
|
7 | 7 | * ---------------------------------------------------------------------------*/
|
8 | 8 | |
9 | + |
|
10 | +/*
|
|
11 | + * Sanity checking. For each ObjectCode, maintain a list of address ranges
|
|
12 | + * which may be prodded during relocation, and abort if we try and write
|
|
13 | + * outside any of these.
|
|
14 | + */
|
|
15 | + |
|
9 | 16 | #include "Rts.h"
|
10 | 17 | #include "RtsUtils.h"
|
11 | 18 | #include "linker/ProddableBlocks.h"
|
12 | 19 | |
13 | -struct _ProddableBlock {
|
|
14 | - void* start;
|
|
15 | - int size;
|
|
16 | - struct _ProddableBlock* next;
|
|
17 | -};
|
|
20 | +#include <stdlib.h>
|
|
21 | +#include <string.h>
|
|
18 | 22 | |
19 | -typedef struct _ProddableBlock ProddableBlock;
|
|
23 | +typedef struct _ProddableBlock {
|
|
24 | + uintptr_t start; // inclusive
|
|
25 | + uintptr_t end; // inclusive
|
|
26 | +} ProddableBlock;
|
|
20 | 27 | |
21 | 28 | void
|
22 | 29 | initProddableBlockSet ( ProddableBlockSet* set )
|
23 | 30 | {
|
24 | - set->head = NULL;
|
|
31 | + set->data = NULL;
|
|
32 | + set->capacity = 0;
|
|
33 | + set->size = 0;
|
|
25 | 34 | }
|
26 | 35 | |
27 | -/* -----------------------------------------------------------------------------
|
|
28 | - * Sanity checking. For each ObjectCode, maintain a list of address ranges
|
|
29 | - * which may be prodded during relocation, and abort if we try and write
|
|
30 | - * outside any of these.
|
|
31 | - */
|
|
32 | 36 | void
|
33 | -addProddableBlock ( ProddableBlockSet* set, void* start, int size )
|
|
37 | +freeProddableBlocks (ProddableBlockSet *set)
|
|
38 | +{
|
|
39 | + stgFree(set->data);
|
|
40 | + set->data = NULL;
|
|
41 | + set->size = 0;
|
|
42 | + set->capacity = 0;
|
|
43 | +}
|
|
44 | + |
|
45 | +// Binary search for the first interval with start >= value. Returns index or
|
|
46 | +// size if none.
|
|
47 | +static size_t
|
|
48 | +findLower(const ProddableBlockSet *set, uintptr_t value)
|
|
34 | 49 | {
|
35 | - ProddableBlock* pb = stgMallocBytes(sizeof(ProddableBlock), "addProddableBlock");
|
|
36 | - |
|
37 | - IF_DEBUG(linker, debugBelch("addProddableBlock: %p %d\n", start, size));
|
|
38 | - ASSERT(size > 0);
|
|
39 | - pb->start = start;
|
|
40 | - pb->size = size;
|
|
41 | - pb->next = set->head;
|
|
42 | - set->head = pb;
|
|
50 | + size_t l = 0;
|
|
51 | + size_t r = set->size;
|
|
52 | + while (l < r) {
|
|
53 | + size_t mid = l + (r - l) / 2;
|
|
54 | + if (set->data[mid].start < value) {
|
|
55 | + l = mid + 1;
|
|
56 | + } else {
|
|
57 | + r = mid;
|
|
58 | + }
|
|
59 | + }
|
|
60 | + return l;
|
|
61 | +}
|
|
62 | + |
|
63 | +// Check whether a given value is a member of the set.
|
|
64 | +bool
|
|
65 | +containsSpan ( const ProddableBlockSet *set, uintptr_t start, uintptr_t end )
|
|
66 | +{
|
|
67 | + size_t i = findLower(set, start+1);
|
|
68 | + return i > 0
|
|
69 | + && set->data[i-1].start <= start
|
|
70 | + && end <= set->data[i-1].end;
|
|
43 | 71 | }
|
44 | 72 | |
45 | 73 | void
|
46 | -checkProddableBlock (ProddableBlockSet *set, void *addr, size_t size )
|
|
74 | +checkProddableBlock (const ProddableBlockSet *set, void *addr, size_t size )
|
|
47 | 75 | {
|
48 | - ProddableBlock* pb;
|
|
49 | - |
|
50 | - for (pb = set->head; pb != NULL; pb = pb->next) {
|
|
51 | - char* s = (char*)(pb->start);
|
|
52 | - char* e = s + pb->size;
|
|
53 | - char* a = (char*)addr;
|
|
54 | - if (a >= s && (a+size) <= e)
|
|
55 | - return;
|
|
76 | + if (! containsSpan(set, (uintptr_t) addr, (uintptr_t) addr+size)) {
|
|
77 | + barf("checkProddableBlock: invalid fixup in runtime linker: %p", addr);
|
|
56 | 78 | }
|
57 | - barf("checkProddableBlock: invalid fixup in runtime linker: %p", addr);
|
|
58 | 79 | }
|
59 | 80 | |
60 | -void freeProddableBlocks (ProddableBlockSet *set)
|
|
81 | +// Ensure capacity for at least new_capacity intervals
|
|
82 | +static void
|
|
83 | +ensureCapacity(ProddableBlockSet *set, size_t new_capacity) {
|
|
84 | + if (new_capacity > set->capacity) {
|
|
85 | + size_t cap = set->capacity ? set->capacity * 2 : 4;
|
|
86 | + if (cap < new_capacity) {
|
|
87 | + cap = new_capacity;
|
|
88 | + }
|
|
89 | + ProddableBlock *tmp = stgReallocBytes(set->data, cap * sizeof(ProddableBlock), "addProddableBlock");
|
|
90 | + set->data = tmp;
|
|
91 | + set->capacity = cap;
|
|
92 | + }
|
|
93 | +}
|
|
94 | + |
|
95 | +void
|
|
96 | +addProddableBlock ( ProddableBlockSet* set, void* start_ptr, size_t size )
|
|
61 | 97 | {
|
62 | - ProddableBlock *pb, *next;
|
|
98 | + const uintptr_t start = (uintptr_t) start_ptr;
|
|
99 | + const uintptr_t end = (uintptr_t) start + size;
|
|
100 | + size_t i = findLower(set, start);
|
|
101 | + |
|
102 | + // check previous interval if it is overlapping or adjacent
|
|
103 | + if (i > 0 && start <= set->data[i-1].end + 1) {
|
|
104 | + // merge with left interval
|
|
105 | + i--;
|
|
106 | + if (end > set->data[i].end) {
|
|
107 | + set->data[i].end = end;
|
|
108 | + }
|
|
109 | + } else {
|
|
110 | + // insert new interval
|
|
111 | + ensureCapacity(set, set->size + 1);
|
|
112 | + memmove(&set->data[i+1], &set->data[i], sizeof(ProddableBlock) * (set->size - i));
|
|
113 | + set->data[i].start = start;
|
|
114 | + set->data[i].end = end;
|
|
115 | + set->size++;
|
|
116 | + }
|
|
117 | + |
|
118 | + // coalesce overlaps on right
|
|
119 | + size_t j = i;
|
|
120 | + while (j < set->size && set->data[j].start <= set->data[i].end + 1) {
|
|
121 | + set->data[i].end = set->data[j].end;
|
|
122 | + j++;
|
|
123 | + }
|
|
63 | 124 | |
64 | - for (pb = set->head; pb != NULL; pb = next) {
|
|
65 | - next = pb->next;
|
|
66 | - stgFree(pb);
|
|
125 | + if (j != i) {
|
|
126 | + memmove(&set->data[i+1], &set->data[j], sizeof(ProddableBlock) * (set->size - j));
|
|
127 | + set->size -= j - i - 1;
|
|
67 | 128 | }
|
68 | - set->head = NULL;
|
|
69 | 129 | }
|
70 | 130 |
... | ... | @@ -8,20 +8,31 @@ |
8 | 8 | |
9 | 9 | #pragma once
|
10 | 10 | |
11 | +#include <stdbool.h>
|
|
12 | +#include <stddef.h>
|
|
13 | +#include <stdint.h>
|
|
14 | + |
|
11 | 15 | // An interval set on uintptr_t.
|
12 | 16 | struct _ProddableBlock;
|
13 | 17 | |
14 | 18 | typedef struct {
|
15 | - struct _ProddableBlock *head;
|
|
19 | + size_t size;
|
|
20 | + size_t capacity;
|
|
21 | + // sorted list of disjoint (start,end) pairs
|
|
22 | + struct _ProddableBlock *data;
|
|
16 | 23 | } ProddableBlockSet;
|
17 | 24 | |
18 | 25 | void initProddableBlockSet ( ProddableBlockSet* set );
|
19 | 26 | |
20 | 27 | // Insert an interval.
|
21 | -void addProddableBlock ( ProddableBlockSet* set, void* start, int size );
|
|
28 | +void addProddableBlock ( ProddableBlockSet* set, void* start, size_t size );
|
|
22 | 29 | |
23 | 30 | // Check that an address belongs to the set.
|
24 | -void checkProddableBlock (ProddableBlockSet *set, void *addr, size_t size );
|
|
31 | +void checkProddableBlock (const ProddableBlockSet *set, void *addr, size_t size );
|
|
32 | + |
|
25 | 33 | |
26 | 34 | // Free a set.
|
27 | 35 | void freeProddableBlocks (ProddableBlockSet *set);
|
36 | + |
|
37 | +// For testing.
|
|
38 | +bool containsSpan ( const ProddableBlockSet *set, uintptr_t start, uintptr_t end ); |
1 | +#include <assert.h>
|
|
2 | +#include <stdbool.h>
|
|
3 | +#include <stdint.h>
|
|
4 | +#include <stddef.h>
|
|
5 | + |
|
6 | +// Excerpted from ProddableBlocks.h
|
|
7 | +typedef struct {
|
|
8 | + size_t size;
|
|
9 | + size_t capacity;
|
|
10 | + // sorted list of disjoint (start,end) pairs
|
|
11 | + struct _ProddableBlock *data;
|
|
12 | +} ProddableBlockSet;
|
|
13 | + |
|
14 | +void initProddableBlockSet ( ProddableBlockSet* set );
|
|
15 | +void addProddableBlock ( ProddableBlockSet* set, void* start, size_t size );
|
|
16 | +bool containsSpan ( const ProddableBlockSet *set, uintptr_t start, uintptr_t end );
|
|
17 | + |
|
18 | +int main () {
|
|
19 | + ProddableBlockSet set;
|
|
20 | + initProddableBlockSet(&set);
|
|
21 | + addProddableBlock(&set, (void*) 0x20, 0x10);
|
|
22 | + addProddableBlock(&set, (void*) 0x30, 0x10);
|
|
23 | + addProddableBlock(&set, (void*) 0x100, 0x10);
|
|
24 | + |
|
25 | + assert( containsSpan(&set, 0x20, 0x30));
|
|
26 | + assert( containsSpan(&set, 0x30, 0x29));
|
|
27 | + assert(!containsSpan(&set, 0x30, 0x49));
|
|
28 | + assert(!containsSpan(&set, 0x60, 0x70));
|
|
29 | + assert(!containsSpan(&set, 0x90, 0x110));
|
|
30 | + assert( containsSpan(&set, 0x100, 0x101));
|
|
31 | + return 0;
|
|
32 | +}
|
|
33 | + |
... | ... | @@ -641,3 +641,5 @@ test('T25280', [unless(opsys('linux'),skip),req_process,js_skip], compile_and_ru |
641 | 641 | # N.B. This will likely issue a warning on stderr but we merely care that the
|
642 | 642 | # program doesn't crash.
|
643 | 643 | test('T25560', [req_c_rts, ignore_stderr], compile_and_run, [''])
|
644 | + |
|
645 | +test('TestProddableBlockSet', [req_c_rts], multimod_compile_and_run, ['TestProddableBlockSet.c', '-no-hs-main']) |