Ben Gamari pushed to branch wip/bump-win32-tarballs at Glasgow Haskell Compiler / GHC

Commits:

2 changed files:

Changes:

  • rts/linker/LoadArchive.c
    ... ... @@ -223,21 +223,22 @@ lookupGNUArchiveIndex(int gnuFileIndexSize, char **fileName_,
    223 223
         char* gnuFileIndex, pathchar* path, size_t* thisFileNameSize,
    
    224 224
         size_t* fileNameSize)
    
    225 225
     {
    
    226
    -    int n;
    
    227 226
         char *fileName = *fileName_;
    
    228 227
         if (isdigit(fileName[1])) {
    
    229
    -        int i;
    
    230
    -        for (n = 2; isdigit(fileName[n]); n++)
    
    231
    -            ;
    
    232
    -
    
    233
    -        fileName[n] = '\0';
    
    234
    -        n = atoi(fileName + 1);
    
    235 228
             if (gnuFileIndex == NULL) {
    
    236 229
                 errorBelch("loadArchive: GNU-variant filename "
    
    237 230
                         "without an index while reading from `%" PATH_FMT "'",
    
    238 231
                         path);
    
    239 232
                 return false;
    
    240 233
             }
    
    234
    +
    
    235
    +        int n;
    
    236
    +        for (n = 2; isdigit(fileName[n]); n++)
    
    237
    +            ;
    
    238
    +
    
    239
    +        char *end;
    
    240
    +        fileName[n] = '\0';
    
    241
    +        n = strtol(fileName + 1, &end, 10);
    
    241 242
             if (n < 0 || n > gnuFileIndexSize) {
    
    242 243
                 errorBelch("loadArchive: GNU-variant filename "
    
    243 244
                         "offset %d out of range [0..%d] "
    
    ... ... @@ -245,17 +246,27 @@ lookupGNUArchiveIndex(int gnuFileIndexSize, char **fileName_,
    245 246
                         n, gnuFileIndexSize, path);
    
    246 247
                 return false;
    
    247 248
             }
    
    248
    -        if (n != 0 && gnuFileIndex[n - 1] != '\n') {
    
    249
    +
    
    250
    +        // Check that the previous entry ends with the expected
    
    251
    +        // end-of-string delimiter.
    
    252
    +#if defined(mingw32_HOST_OS)
    
    253
    +#define IS_SYMBOL_DELIMITER(STR) (STR =='\n' || STR == '\0')
    
    254
    +#else
    
    255
    +#define IS_SYMBOL_DELIMITER(STR) (STR =='\n')
    
    256
    +#endif
    
    257
    +        if (n != 0 && !IS_SYMBOL_DELIMITER(gnuFileIndex[n - 1])) {
    
    249 258
                 errorBelch("loadArchive: GNU-variant filename offset "
    
    250 259
                         "%d invalid (range [0..%d]) while reading "
    
    251 260
                         "filename from `%" PATH_FMT "'",
    
    252 261
                         n, gnuFileIndexSize, path);
    
    253 262
                 return false;
    
    254 263
             }
    
    255
    -        for (i = n; gnuFileIndex[i] != '\n'; i++)
    
    264
    +
    
    265
    +        int i;
    
    266
    +        for (i = n; !IS_SYMBOL_DELIMITER(gnuFileIndex[i]); i++)
    
    256 267
                 ;
    
    257 268
     
    
    258
    -        size_t FileNameSize = i - n - 1;
    
    269
    +        size_t FileNameSize = i - n;
    
    259 270
             if (FileNameSize >= *fileNameSize) {
    
    260 271
                 /* Double it to avoid potentially continually
    
    261 272
                  increasing it by 1 */
    

  • rts/linker/PEi386.c
    ... ... @@ -342,6 +342,98 @@
    342 342
        Finally, we enter `ocResolve`, where we resolve relocations and and allocate
    
    343 343
        jump islands (using the m32 allocator for backing storage) as necessary.
    
    344 344
     
    
    345
    +   Note [Windows API Set]
    
    346
    +   ~~~~~~~~~~~~~~~~~~~~~~
    
    347
    +   Windows has a concept called API Sets [1][2] which is intended to be Windows's
    
    348
    +   equivalent to glibc's symbolic versioning.  It is also used to handle the API
    
    349
    +   surface difference between different device classes.  e.g. the API might be
    
    350
    +   handled differently between a desktop and tablet.
    
    351
    +
    
    352
    +   This is handled through two mechanisms:
    
    353
    +
    
    354
    +   1. Direct Forward:  These use import libraries to manage to first level
    
    355
    +      redirection.  So what used to be in ucrt.dll is now redirected based on
    
    356
    +      ucrt.lib.  Every API now points to a possible different set of API sets
    
    357
    +      each following the API set contract:
    
    358
    +
    
    359
    +      * The name must begin either with the string api- or ext-.
    
    360
    +      * Names that begin with api- represent APIs that exist on all Windows
    
    361
    +        editions that satisfy the API's version requirements.
    
    362
    +      * Names that begin with ext- represent APIs that may not exist on all
    
    363
    +        Windows editions.
    
    364
    +      * The name must end with the sequence l<n>-<n>-<n>, where n consists of
    
    365
    +        decimal digits.
    
    366
    +      * The body of the name can be alphanumeric characters, or dashes (-).
    
    367
    +      * The name is case insensitive.
    
    368
    +
    
    369
    +      Here are some examples of API set contract names:
    
    370
    +
    
    371
    +        - api-ms-win-core-ums-l1-1-0
    
    372
    +        - ext-ms-win-com-ole32-l1-1-5
    
    373
    +        - ext-ms-win-ntuser-window-l1-1-0
    
    374
    +        - ext-ms-win-ntuser-window-l1-1-1
    
    375
    +
    
    376
    +      Forward references don't require anything special from the calling
    
    377
    +      application in that the Windows loader through "LoadLibrary" will
    
    378
    +      automatically load the right reference for you if given an API set
    
    379
    +      name including the ".dll" suffix.  For example:
    
    380
    +
    
    381
    +      INFO: DLL api-ms-win-eventing-provider-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
    
    382
    +      INFO: DLL api-ms-win-core-apiquery-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\ntdll.dll by API set
    
    383
    +      INFO: DLL api-ms-win-core-processthreads-l1-1-3.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
    
    384
    +      INFO: DLL api-ms-win-core-processthreads-l1-1-2.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
    
    385
    +      INFO: DLL api-ms-win-core-processthreads-l1-1-1.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
    
    386
    +      INFO: DLL api-ms-win-core-processthreads-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
    
    387
    +      INFO: DLL api-ms-win-core-registry-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
    
    388
    +      INFO: DLL api-ms-win-core-heap-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
    
    389
    +      INFO: DLL api-ms-win-core-heap-l2-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
    
    390
    +      INFO: DLL api-ms-win-core-memory-l1-1-1.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
    
    391
    +      INFO: DLL api-ms-win-core-memory-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
    
    392
    +      INFO: DLL api-ms-win-core-memory-l1-1-2.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
    
    393
    +      INFO: DLL api-ms-win-core-handle-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
    
    394
    +
    
    395
    +      Which shows how the loader has redirected some of the references used
    
    396
    +      by ghci.
    
    397
    +
    
    398
    +      Historically though we've treated shared libs lazily.  We would load\
    
    399
    +      the shared library, but not resolve the symbol immediately and wait until
    
    400
    +      the symbol is requested to iterate in order through the shared libraries.
    
    401
    +
    
    402
    +      This assumes that you ever only had one version of a symbol.  i.e. we had
    
    403
    +      an assumption that all exported symbols in different shared libraries
    
    404
    +      should be the same, because most of the time they come from re-exporting
    
    405
    +      from a base library.  This is a bit of a weak assumption and doesn't hold
    
    406
    +      with API Sets.
    
    407
    +
    
    408
    +      For that reason the loader now resolves symbols immediately, and because
    
    409
    +      we now resolve using BIND_NOW we must make sure that a symbol loaded
    
    410
    +      through an OC has precedent because the BIND_NOW refernce was not asked
    
    411
    +      for.   For that reason we load the symbols for API sets with the
    
    412
    +      SYM_TYPE_DUP_DISCARD flag set.
    
    413
    +
    
    414
    +    2. Reverse forwarders:  This is when the application has a direct reference
    
    415
    +       to the old name of an API. e.g. if GHC still used "msvcrt.dll" or
    
    416
    +       "ucrt.dll" we would have had to deal with this case.  In this case the
    
    417
    +       loader intercepts the call and if it exists the dll is loaded.  There is
    
    418
    +       an extra indirection as you go from foo.dll => api-ms-foo-1.dll => foo_imp.dll
    
    419
    +
    
    420
    +       But if the API doesn't exist on the device it's resolved to a stub in the
    
    421
    +       API set that if called will result in an error should it be called [3].
    
    422
    +
    
    423
    +    This means that usages of GetProcAddress and LoadLibrary to check for the
    
    424
    +    existance of a function aren't safe, because they'll always succeed, but may
    
    425
    +    result in a pointer to the stub rather than the actual function.
    
    426
    +
    
    427
    +    WHat does this mean for the RTS linker? Nothing.  We don't have a fallback
    
    428
    +    for if the function doesn't exist.  The RTS is merely just executing what
    
    429
    +    it was told to run.  It's writers of libraries that have to be careful when
    
    430
    +    doing dlopen()/LoadLibrary.
    
    431
    +
    
    432
    +
    
    433
    +   [1] https://learn.microsoft.com/en-us/windows/win32/apiindex/windows-apisets
    
    434
    +   [2] https://mingwpy.github.io/ucrt.html#api-set-implementation
    
    435
    +   [3] https://learn.microsoft.com/en-us/windows/win32/apiindex/detect-api-set-availability
    
    436
    +
    
    345 437
     */
    
    346 438
     
    
    347 439
     #include "Rts.h"
    
    ... ... @@ -882,7 +974,7 @@ addDLL_PEi386( const pathchar *dll_name, HINSTANCE *loaded )
    882 974
                         goto error;
    
    883 975
                     }
    
    884 976
                 } else {
    
    885
    -                goto loaded; /* We're done. DLL has been loaded.  */
    
    977
    +                goto loaded_ok; /* We're done. DLL has been loaded.  */
    
    886 978
                 }
    
    887 979
             }
    
    888 980
         }
    
    ... ... @@ -890,7 +982,7 @@ addDLL_PEi386( const pathchar *dll_name, HINSTANCE *loaded )
    890 982
         // We failed to load
    
    891 983
         goto error;
    
    892 984
     
    
    893
    -loaded:
    
    985
    +loaded_ok:
    
    894 986
         addLoadedDll(&loaded_dll_cache, dll_name, instance);
    
    895 987
         addDLLHandle(buf, instance);
    
    896 988
         if (loaded) {
    
    ... ... @@ -1055,7 +1147,8 @@ bool checkAndLoadImportLibrary( pathchar* arch_name, char* member_name, FILE* f
    1055 1147
         // We must call `addDLL_PEi386` directly rather than `addDLL` because `addDLL`
    
    1056 1148
         // is now a wrapper around `loadNativeObj` which acquires a lock which we
    
    1057 1149
         // already have here.
    
    1058
    -    const char* result = addDLL_PEi386(dll, NULL);
    
    1150
    +    HINSTANCE instance;
    
    1151
    +    const char* result = addDLL_PEi386(dll, &instance);
    
    1059 1152
     
    
    1060 1153
         stgFree(image);
    
    1061 1154
     
    
    ... ... @@ -1069,6 +1162,24 @@ bool checkAndLoadImportLibrary( pathchar* arch_name, char* member_name, FILE* f
    1069 1162
         }
    
    1070 1163
     
    
    1071 1164
         stgFree(dll);
    
    1165
    +
    
    1166
    +    // See Note [Windows API Set]
    
    1167
    +    // We must immediately tie the symbol to the shared library.  The easiest
    
    1168
    +    // way is to load the symbol immediately. We already have all the
    
    1169
    +    // information so might as well
    
    1170
    +    SymbolAddr* sym = lookupSymbolInDLL_PEi386 (symbol, instance, dll, NULL);
    
    1171
    +    ASSERT(sym);
    
    1172
    +    // The symbol must have been found, and we can add it to the RTS symbol table
    
    1173
    +    IF_DEBUG(linker, debugBelch("checkAndLoadImportLibrary: resolved symbol %s to %p\n", symbol, sym));
    
    1174
    +    // Because the symbol has been loaded before we actually need it, if a
    
    1175
    +    // stronger reference wants to add a duplicate we should discard this
    
    1176
    +    // one to preserve link order.
    
    1177
    +    SymType symType = SYM_TYPE_DUP_DISCARD | SYM_TYPE_HIDDEN;
    
    1178
    +    symType |= hdr.Type == IMPORT_OBJECT_CODE ? SYM_TYPE_CODE : SYM_TYPE_DATA;
    
    1179
    +
    
    1180
    +    if (!ghciInsertSymbolTable(dll, symhash, symbol, sym, false, symType, NULL))
    
    1181
    +      return false;
    
    1182
    +
    
    1072 1183
         return true;
    
    1073 1184
     }
    
    1074 1185