Ben Gamari pushed to branch wip/T26009 at Glasgow Haskell Compiler / GHC

Commits:

7 changed files:

Changes:

  • rts/Hash.c
    ... ... @@ -94,7 +94,7 @@ hashWord(const HashTable *table, StgWord key)
    94 94
     }
    
    95 95
     
    
    96 96
     int
    
    97
    -hashStr(const HashTable *table, StgWord w)
    
    97
    +hashBuffer(const void *buf, size_t len)
    
    98 98
     {
    
    99 99
         const char *key = (char*) w;
    
    100 100
     #if WORD_SIZE_IN_BITS == 64
    
    ... ... @@ -114,6 +114,12 @@ hashStr(const HashTable *table, StgWord w)
    114 114
         return bucket;
    
    115 115
     }
    
    116 116
     
    
    117
    +int
    
    118
    +hashStr(const HashTable *table, StgWord w)
    
    119
    +{
    
    120
    +    return hashBuffer(key, strlen(key));
    
    121
    +}
    
    122
    +
    
    117 123
     STATIC_INLINE int
    
    118 124
     compareWord(StgWord key1, StgWord key2)
    
    119 125
     {
    

  • rts/Hash.h
    ... ... @@ -69,6 +69,10 @@ void * removeStrHashTable ( StrHashTable *table, const char * key,
    69 69
      */
    
    70 70
     typedef int HashFunction(const HashTable *table, StgWord key);
    
    71 71
     typedef int CompareFunction(StgWord key1, StgWord key2);
    
    72
    +
    
    73
    +// Helper for implementing hash functions
    
    74
    +int hashBuffer(const void *buf, size_t len);
    
    75
    +
    
    72 76
     int hashWord(const HashTable *table, StgWord key);
    
    73 77
     int hashStr(const HashTable *table, StgWord w);
    
    74 78
     void        insertHashTable_ ( HashTable *table, StgWord key,
    
    ... ... @@ -79,6 +83,7 @@ void * removeHashTable_ ( HashTable *table, StgWord key,
    79 83
                                    const void *data, HashFunction f,
    
    80 84
                                    CompareFunction cmp );
    
    81 85
     
    
    86
    +
    
    82 87
     /* Freeing hash tables
    
    83 88
      */
    
    84 89
     void freeHashTable ( HashTable *table, void (*freeDataFun)(void *) );
    

  • rts/linker/PEi386.c
    ... ... @@ -427,8 +427,53 @@ const int default_alignment = 8;
    427 427
        the pointer as a redirect.  Essentially it's a DATA DLL reference.  */
    
    428 428
     const void* __rts_iob_func = (void*)&__acrt_iob_func;
    
    429 429
     
    
    430
    +/*
    
    431
    + * Note [Avoiding repeated DLL loading]
    
    432
    + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    433
    + * As LoadLibraryEx tends to be expensive and addDLL_PEi386 is called on every
    
    434
    + * DLL-imported symbol, we use a hash-set to keep track of which DLLs have
    
    435
    + * already been loaded. This hash-set is keyed on the dll_name passed to
    
    436
    + * addDLL_PEi386 and serves as a quick check to avoid repeated calls to
    
    437
    + * LoadLibraryEx for the identical DLL. See #26009.
    
    438
    + */
    
    439
    +
    
    440
    +typedef struct {
    
    441
    +    HashTable *hash;
    
    442
    +} LoadedDllSet;
    
    443
    +
    
    444
    +LoadedDllSet loaded_dll_set;
    
    445
    +
    
    446
    +void initLoadedDllSet(LoadedDllSet *set) {
    
    447
    +    set->hash = allocHashTable();
    
    448
    +}
    
    449
    +
    
    450
    +int hash_path(const HashTable *table, StgWord key)
    
    451
    +{
    
    452
    +    const pathchar *key = (pathchar*) w;
    
    453
    +    return hashBuffer(key, sizeof(pathchar) * wcslen(key));
    
    454
    +}
    
    455
    +
    
    456
    +int compare_path(StgWord key1, StgWord key2)
    
    457
    +{
    
    458
    +    return wscmp((pathchar*) key1, (pathchar*) key2);
    
    459
    +}
    
    460
    +
    
    461
    +void addLoadedDll(LoadedDllSet *set, pathchar *dll_name)
    
    462
    +{
    
    463
    +    insertHashTable_(set->hash, (StgWord) dll_name, (void*) 1, hash_path);
    
    464
    +}
    
    465
    +
    
    466
    +bool isDllLoaded(LoadedDllSet *set, pathchar *dll_name)
    
    467
    +{
    
    468
    +    void * result = lookupHashTable_(set->hash, (StgWord) dll_name, hash_path, compare_path);
    
    469
    +    return result != NULL;
    
    470
    +}
    
    471
    +
    
    472
    +
    
    430 473
     void initLinker_PEi386(void)
    
    431 474
     {
    
    475
    +    initLoadedDllSet(&loaded_dll_set);
    
    476
    +
    
    432 477
         if (!ghciInsertSymbolTable(WSTR("(GHCi/Ld special symbols)"),
    
    433 478
                                    symhash, "__image_base__",
    
    434 479
                                    GetModuleHandleW (NULL), HS_BOOL_TRUE,
    
    ... ... @@ -440,10 +485,11 @@ void initLinker_PEi386(void)
    440 485
         addDLLHandle(WSTR("*.exe"), GetModuleHandle(NULL));
    
    441 486
     #endif
    
    442 487
     
    
    443
    -  /* Register the cleanup routine as an exit handler,  this gives other exit handlers
    
    444
    -     a chance to run which may need linker information.  Exit handlers are ran in
    
    445
    -     reverse registration order so this needs to be before the linker loads anything.  */
    
    446
    -  atexit (exitLinker_PEi386);
    
    488
    +    /* Register the cleanup routine as an exit handler,  this gives other exit handlers
    
    489
    +     * a chance to run which may need linker information.  Exit handlers are ran in
    
    490
    +     * reverse registration order so this needs to be before the linker loads anything.
    
    491
    +     */
    
    492
    +    atexit (exitLinker_PEi386);
    
    447 493
     }
    
    448 494
     
    
    449 495
     void exitLinker_PEi386(void)
    
    ... ... @@ -798,12 +844,12 @@ uint8_t* getSymShortName ( COFF_HEADER_INFO *info, COFF_symbol* sym )
    798 844
     const char *
    
    799 845
     addDLL_PEi386( pathchar *dll_name, HINSTANCE *loaded )
    
    800 846
     {
    
    801
    -   /* ------------------- Win32 DLL loader ------------------- */
    
    802
    -
    
    803
    -   pathchar*  buf;
    
    804
    -   HINSTANCE  instance;
    
    847
    +    /* ------------------- Win32 DLL loader ------------------- */
    
    848
    +    IF_DEBUG(linker, debugBelch("addDLL; dll_name = `%" PATH_FMT "'\n", dll_name));
    
    805 849
     
    
    806
    -   IF_DEBUG(linker, debugBelch("addDLL; dll_name = `%" PATH_FMT "'\n", dll_name));
    
    850
    +    if (isDllLoaded(loaded_dll_set)) {
    
    851
    +        return NULL;
    
    852
    +    }
    
    807 853
     
    
    808 854
         /* The file name has no suffix (yet) so that we can try
    
    809 855
            both foo.dll and foo.drv
    
    ... ... @@ -816,35 +862,23 @@ addDLL_PEi386( pathchar *dll_name, HINSTANCE *loaded )
    816 862
             extension. */
    
    817 863
     
    
    818 864
         size_t bufsize = pathlen(dll_name) + 10;
    
    819
    -    buf = stgMallocBytes(bufsize * sizeof(wchar_t), "addDLL");
    
    865
    +    pathchar *buf = stgMallocBytes(bufsize * sizeof(wchar_t), "addDLL");
    
    820 866
     
    
    821 867
         /* These are ordered by probability of success and order we'd like them.  */
    
    822 868
         const wchar_t *formats[] = { L"%ls.DLL", L"%ls.DRV", L"lib%ls.DLL", L"%ls" };
    
    823 869
         const DWORD flags[] = { LOAD_LIBRARY_SEARCH_USER_DIRS | LOAD_LIBRARY_SEARCH_DEFAULT_DIRS, 0 };
    
    824 870
     
    
    825
    -    int cFormat, cFlag;
    
    826
    -    int flags_start = 1; /* Assume we don't support the new API.  */
    
    827
    -
    
    828
    -    /* Detect if newer API are available, if not, skip the first flags entry.  */
    
    829
    -    if (GetProcAddress((HMODULE)LoadLibraryW(L"Kernel32.DLL"), "AddDllDirectory")) {
    
    830
    -        flags_start = 0;
    
    831
    -    }
    
    832
    -
    
    833 871
         /* Iterate through the possible flags and formats.  */
    
    834
    -    for (cFlag = flags_start; cFlag < 2; cFlag++)
    
    835
    -    {
    
    836
    -        for (cFormat = 0; cFormat < 4; cFormat++)
    
    837
    -        {
    
    872
    +    HINSTANCE instance;
    
    873
    +    for (int cFlag = 0; cFlag < 2; cFlag++) {
    
    874
    +        for (int cFormat = 0; cFormat < 4; cFormat++) {
    
    838 875
                 snwprintf(buf, bufsize, formats[cFormat], dll_name);
    
    839 876
                 instance = LoadLibraryExW(buf, NULL, flags[cFlag]);
    
    840 877
                 if (instance == NULL) {
    
    841
    -                if (GetLastError() != ERROR_MOD_NOT_FOUND)
    
    842
    -                {
    
    878
    +                if (GetLastError() != ERROR_MOD_NOT_FOUND) {
    
    843 879
                         goto error;
    
    844 880
                     }
    
    845
    -            }
    
    846
    -            else
    
    847
    -            {
    
    881
    +            } else {
    
    848 882
                     break; /* We're done. DLL has been loaded.  */
    
    849 883
                 }
    
    850 884
             }
    
    ... ... @@ -855,6 +889,7 @@ addDLL_PEi386( pathchar *dll_name, HINSTANCE *loaded )
    855 889
             goto error;
    
    856 890
         }
    
    857 891
     
    
    892
    +    addLoadedDll(&loaded_dll_set, dll_name);
    
    858 893
         addDLLHandle(buf, instance);
    
    859 894
         if (loaded) {
    
    860 895
             *loaded = instance;
    

  • rts/linker/ProddableBlocks.c
    ... ... @@ -6,65 +6,125 @@
    6 6
      *
    
    7 7
      * ---------------------------------------------------------------------------*/
    
    8 8
     
    
    9
    +
    
    10
    +/*
    
    11
    + * Sanity checking.  For each ObjectCode, maintain a list of address ranges
    
    12
    + * which may be prodded during relocation, and abort if we try and write
    
    13
    + * outside any of these.
    
    14
    + */
    
    15
    +
    
    9 16
     #include "Rts.h"
    
    10 17
     #include "RtsUtils.h"
    
    11 18
     #include "linker/ProddableBlocks.h"
    
    12 19
     
    
    13
    -struct _ProddableBlock {
    
    14
    -  void* start;
    
    15
    -  int   size;
    
    16
    -  struct _ProddableBlock* next;
    
    17
    -};
    
    20
    +#include <stdlib.h>
    
    21
    +#include <string.h>
    
    18 22
     
    
    19
    -typedef struct _ProddableBlock ProddableBlock;
    
    23
    +typedef struct _ProddableBlock {
    
    24
    +    uintptr_t start;  // inclusive
    
    25
    +    uintptr_t end;    // inclusive
    
    26
    +} ProddableBlock;
    
    20 27
     
    
    21 28
     void
    
    22 29
     initProddableBlockSet ( ProddableBlockSet* set )
    
    23 30
     {
    
    24
    -    set->head = NULL;
    
    31
    +    set->data = NULL;
    
    32
    +    set->capacity = 0;
    
    33
    +    set->size = 0;
    
    25 34
     }
    
    26 35
     
    
    27
    -/* -----------------------------------------------------------------------------
    
    28
    - * Sanity checking.  For each ObjectCode, maintain a list of address ranges
    
    29
    - * which may be prodded during relocation, and abort if we try and write
    
    30
    - * outside any of these.
    
    31
    - */
    
    32 36
     void
    
    33
    -addProddableBlock ( ProddableBlockSet* set, void* start, int size )
    
    37
    +freeProddableBlocks (ProddableBlockSet *set)
    
    38
    +{
    
    39
    +    stgFree(set->data);
    
    40
    +    set->data = NULL;
    
    41
    +    set->size = 0;
    
    42
    +    set->capacity = 0;
    
    43
    +}
    
    44
    +
    
    45
    +// Binary search for the first interval with start >= value. Returns index or
    
    46
    +// size if none.
    
    47
    +static size_t
    
    48
    +findLower(const ProddableBlockSet *set, uintptr_t value)
    
    34 49
     {
    
    35
    -    ProddableBlock* pb = stgMallocBytes(sizeof(ProddableBlock), "addProddableBlock");
    
    36
    -
    
    37
    -    IF_DEBUG(linker, debugBelch("addProddableBlock: %p %d\n", start, size));
    
    38
    -    ASSERT(size > 0);
    
    39
    -    pb->start = start;
    
    40
    -    pb->size  = size;
    
    41
    -    pb->next  = set->head;
    
    42
    -    set->head = pb;
    
    50
    +    size_t l = 0;
    
    51
    +    size_t r = set->size;
    
    52
    +    while (l < r) {
    
    53
    +        size_t mid = l + (r - l) / 2;
    
    54
    +        if (set->data[mid].start < value) {
    
    55
    +            l = mid + 1;
    
    56
    +        } else {
    
    57
    +            r = mid;
    
    58
    +        }
    
    59
    +    }
    
    60
    +    return l;
    
    61
    +}
    
    62
    +
    
    63
    +// Check whether a given value is a member of the set.
    
    64
    +bool
    
    65
    +containsSpan ( const ProddableBlockSet *set, uintptr_t start, uintptr_t end )
    
    66
    +{
    
    67
    +    size_t i = findLower(set, start+1);
    
    68
    +    return i > 0
    
    69
    +      && set->data[i-1].start <= start
    
    70
    +      && end <= set->data[i-1].end;
    
    43 71
     }
    
    44 72
     
    
    45 73
     void
    
    46
    -checkProddableBlock (ProddableBlockSet *set, void *addr, size_t size )
    
    74
    +checkProddableBlock (const ProddableBlockSet *set, void *addr, size_t size )
    
    47 75
     {
    
    48
    -    ProddableBlock* pb;
    
    49
    -
    
    50
    -    for (pb = set->head; pb != NULL; pb = pb->next) {
    
    51
    -        char* s = (char*)(pb->start);
    
    52
    -        char* e = s + pb->size;
    
    53
    -        char* a = (char*)addr;
    
    54
    -        if (a >= s && (a+size) <= e)
    
    55
    -            return;
    
    76
    +    if (! containsSpan(set, (uintptr_t) addr, (uintptr_t) addr+size)) {
    
    77
    +        barf("checkProddableBlock: invalid fixup in runtime linker: %p", addr);
    
    56 78
         }
    
    57
    -    barf("checkProddableBlock: invalid fixup in runtime linker: %p", addr);
    
    58 79
     }
    
    59 80
     
    
    60
    -void freeProddableBlocks (ProddableBlockSet *set)
    
    81
    +// Ensure capacity for at least new_capacity intervals
    
    82
    +static void
    
    83
    +ensureCapacity(ProddableBlockSet *set, size_t new_capacity) {
    
    84
    +    if (new_capacity > set->capacity) {
    
    85
    +        size_t cap = set->capacity ? set->capacity * 2 : 4;
    
    86
    +        if (cap < new_capacity) {
    
    87
    +            cap = new_capacity;
    
    88
    +        }
    
    89
    +        ProddableBlock *tmp = stgReallocBytes(set->data, cap * sizeof(ProddableBlock), "addProddableBlock");
    
    90
    +        set->data = tmp;
    
    91
    +        set->capacity = cap;
    
    92
    +    }
    
    93
    +}
    
    94
    +
    
    95
    +void
    
    96
    +addProddableBlock ( ProddableBlockSet* set, void* start_ptr, size_t size )
    
    61 97
     {
    
    62
    -    ProddableBlock *pb, *next;
    
    98
    +    const uintptr_t start = (uintptr_t) start_ptr;
    
    99
    +    const uintptr_t end = (uintptr_t) start + size;
    
    100
    +    size_t i = findLower(set, start);
    
    101
    +
    
    102
    +    // check previous interval if it is overlapping or adjacent
    
    103
    +    if (i > 0 && start <= set->data[i-1].end + 1) {
    
    104
    +        // merge with left interval
    
    105
    +        i--;
    
    106
    +        if (end > set->data[i].end) {
    
    107
    +            set->data[i].end = end;
    
    108
    +        }
    
    109
    +    } else {
    
    110
    +        // insert new interval
    
    111
    +        ensureCapacity(set, set->size + 1);
    
    112
    +        memmove(&set->data[i+1], &set->data[i], sizeof(ProddableBlock) * (set->size - i));
    
    113
    +        set->data[i].start = start;
    
    114
    +        set->data[i].end = end;
    
    115
    +        set->size++;
    
    116
    +    }
    
    117
    +
    
    118
    +    // coalesce overlaps on right
    
    119
    +    size_t j = i;
    
    120
    +    while (j < set->size && set->data[j].start <= set->data[i].end + 1) {
    
    121
    +        set->data[i].end = set->data[j].end;
    
    122
    +        j++;
    
    123
    +    }
    
    63 124
     
    
    64
    -    for (pb = set->head; pb != NULL; pb = next) {
    
    65
    -        next = pb->next;
    
    66
    -        stgFree(pb);
    
    125
    +    if (j != i) {
    
    126
    +        memmove(&set->data[i+1], &set->data[j], sizeof(ProddableBlock) * (set->size - j));
    
    127
    +        set->size -= j - i - 1;
    
    67 128
         }
    
    68
    -    set->head = NULL;
    
    69 129
     }
    
    70 130
     

  • rts/linker/ProddableBlocks.h
    ... ... @@ -8,20 +8,31 @@
    8 8
     
    
    9 9
     #pragma once
    
    10 10
     
    
    11
    +#include <stdbool.h>
    
    12
    +#include <stddef.h>
    
    13
    +#include <stdint.h>
    
    14
    +
    
    11 15
     // An interval set on uintptr_t.
    
    12 16
     struct _ProddableBlock;
    
    13 17
     
    
    14 18
     typedef struct {
    
    15
    -    struct _ProddableBlock *head;
    
    19
    +    size_t size;
    
    20
    +    size_t capacity;
    
    21
    +    // sorted list of disjoint (start,end) pairs
    
    22
    +    struct _ProddableBlock *data;
    
    16 23
     } ProddableBlockSet;
    
    17 24
     
    
    18 25
     void initProddableBlockSet ( ProddableBlockSet* set );
    
    19 26
     
    
    20 27
     // Insert an interval.
    
    21
    -void addProddableBlock ( ProddableBlockSet* set, void* start, int size );
    
    28
    +void addProddableBlock ( ProddableBlockSet* set, void* start, size_t size );
    
    22 29
     
    
    23 30
     // Check that an address belongs to the set.
    
    24
    -void checkProddableBlock (ProddableBlockSet *set, void *addr, size_t size );
    
    31
    +void checkProddableBlock (const ProddableBlockSet *set, void *addr, size_t size );
    
    32
    +
    
    25 33
     
    
    26 34
     // Free a set.
    
    27 35
     void freeProddableBlocks (ProddableBlockSet *set);
    
    36
    +
    
    37
    +// For testing.
    
    38
    +bool containsSpan ( const ProddableBlockSet *set, uintptr_t start, uintptr_t end );

  • testsuite/tests/rts/TestProddableBlockSet.c
    1
    +#include <assert.h>
    
    2
    +#include <stdbool.h>
    
    3
    +#include <stdint.h>
    
    4
    +#include <stddef.h>
    
    5
    +
    
    6
    +// Excerpted from ProddableBlocks.h
    
    7
    +typedef struct {
    
    8
    +    size_t size;
    
    9
    +    size_t capacity;
    
    10
    +    // sorted list of disjoint (start,end) pairs
    
    11
    +    struct _ProddableBlock *data;
    
    12
    +} ProddableBlockSet;
    
    13
    +
    
    14
    +void initProddableBlockSet ( ProddableBlockSet* set );
    
    15
    +void addProddableBlock ( ProddableBlockSet* set, void* start, size_t size );
    
    16
    +bool containsSpan ( const ProddableBlockSet *set, uintptr_t start, uintptr_t end );
    
    17
    +
    
    18
    +int main () {
    
    19
    +  ProddableBlockSet set;
    
    20
    +  initProddableBlockSet(&set);
    
    21
    +  addProddableBlock(&set, (void*) 0x20, 0x10);
    
    22
    +  addProddableBlock(&set, (void*) 0x30, 0x10);
    
    23
    +  addProddableBlock(&set, (void*) 0x100, 0x10);
    
    24
    +
    
    25
    +  assert( containsSpan(&set, 0x20, 0x30));
    
    26
    +  assert( containsSpan(&set, 0x30, 0x29));
    
    27
    +  assert(!containsSpan(&set, 0x30, 0x49));
    
    28
    +  assert(!containsSpan(&set, 0x60, 0x70));
    
    29
    +  assert(!containsSpan(&set, 0x90, 0x110));
    
    30
    +  assert( containsSpan(&set, 0x100, 0x101));
    
    31
    +  return 0;
    
    32
    +}
    
    33
    +

  • testsuite/tests/rts/all.T
    ... ... @@ -641,3 +641,5 @@ test('T25280', [unless(opsys('linux'),skip),req_process,js_skip], compile_and_ru
    641 641
     # N.B. This will likely issue a warning on stderr but we merely care that the
    
    642 642
     # program doesn't crash.
    
    643 643
     test('T25560', [req_c_rts, ignore_stderr], compile_and_run, [''])
    
    644
    +
    
    645
    +test('TestProddableBlockSet', [req_c_rts], multimod_compile_and_run, ['TestProddableBlockSet.c', '-no-hs-main'])