Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC

Commits:

6 changed files:

Changes:

  • mk/get-win32-tarballs.py
    ... ... @@ -8,7 +8,7 @@ import argparse
    8 8
     import sys
    
    9 9
     from sys import stderr
    
    10 10
     
    
    11
    -TARBALL_VERSION = '0.8'
    
    11
    +TARBALL_VERSION = '0.9'
    
    12 12
     BASE_URL = "https://downloads.haskell.org/ghc/mingw/{}".format(TARBALL_VERSION)
    
    13 13
     DEST = Path('ghc-tarballs/mingw-w64')
    
    14 14
     ARCHS = ['x86_64', 'sources']
    

  • rts/linker/LoadArchive.c
    ... ... @@ -223,21 +223,22 @@ lookupGNUArchiveIndex(int gnuFileIndexSize, char **fileName_,
    223 223
         char* gnuFileIndex, pathchar* path, size_t* thisFileNameSize,
    
    224 224
         size_t* fileNameSize)
    
    225 225
     {
    
    226
    -    int n;
    
    227 226
         char *fileName = *fileName_;
    
    228 227
         if (isdigit(fileName[1])) {
    
    229
    -        int i;
    
    230
    -        for (n = 2; isdigit(fileName[n]); n++)
    
    231
    -            ;
    
    232
    -
    
    233
    -        fileName[n] = '\0';
    
    234
    -        n = atoi(fileName + 1);
    
    235 228
             if (gnuFileIndex == NULL) {
    
    236 229
                 errorBelch("loadArchive: GNU-variant filename "
    
    237 230
                         "without an index while reading from `%" PATH_FMT "'",
    
    238 231
                         path);
    
    239 232
                 return false;
    
    240 233
             }
    
    234
    +
    
    235
    +        int n;
    
    236
    +        for (n = 2; isdigit(fileName[n]); n++)
    
    237
    +            ;
    
    238
    +
    
    239
    +        char *end;
    
    240
    +        fileName[n] = '\0';
    
    241
    +        n = strtol(fileName + 1, &end, 10);
    
    241 242
             if (n < 0 || n > gnuFileIndexSize) {
    
    242 243
                 errorBelch("loadArchive: GNU-variant filename "
    
    243 244
                         "offset %d out of range [0..%d] "
    
    ... ... @@ -245,17 +246,27 @@ lookupGNUArchiveIndex(int gnuFileIndexSize, char **fileName_,
    245 246
                         n, gnuFileIndexSize, path);
    
    246 247
                 return false;
    
    247 248
             }
    
    248
    -        if (n != 0 && gnuFileIndex[n - 1] != '\n') {
    
    249
    +
    
    250
    +        // Check that the previous entry ends with the expected
    
    251
    +        // end-of-string delimiter.
    
    252
    +#if defined(mingw32_HOST_OS)
    
    253
    +#define IS_SYMBOL_DELIMITER(STR) (STR =='\n' || STR == '\0')
    
    254
    +#else
    
    255
    +#define IS_SYMBOL_DELIMITER(STR) (STR =='\n')
    
    256
    +#endif
    
    257
    +        if (n != 0 && !IS_SYMBOL_DELIMITER(gnuFileIndex[n - 1])) {
    
    249 258
                 errorBelch("loadArchive: GNU-variant filename offset "
    
    250 259
                         "%d invalid (range [0..%d]) while reading "
    
    251 260
                         "filename from `%" PATH_FMT "'",
    
    252 261
                         n, gnuFileIndexSize, path);
    
    253 262
                 return false;
    
    254 263
             }
    
    255
    -        for (i = n; gnuFileIndex[i] != '\n'; i++)
    
    264
    +
    
    265
    +        int i;
    
    266
    +        for (i = n; !IS_SYMBOL_DELIMITER(gnuFileIndex[i]); i++)
    
    256 267
                 ;
    
    257 268
     
    
    258
    -        size_t FileNameSize = i - n - 1;
    
    269
    +        size_t FileNameSize = i - n;
    
    259 270
             if (FileNameSize >= *fileNameSize) {
    
    260 271
                 /* Double it to avoid potentially continually
    
    261 272
                  increasing it by 1 */
    
    ... ... @@ -264,6 +275,13 @@ lookupGNUArchiveIndex(int gnuFileIndexSize, char **fileName_,
    264 275
                         "loadArchive(fileName)");
    
    265 276
             }
    
    266 277
             memcpy(fileName, gnuFileIndex + n, FileNameSize);
    
    278
    +
    
    279
    +
    
    280
    +       /* llvm-ar terminates string table entries with `/\n`. */
    
    281
    +       if (fileName[FileNameSize-1] == '/') {
    
    282
    +           FileNameSize--;
    
    283
    +       }
    
    284
    +
    
    267 285
             fileName[FileNameSize] = '\0';
    
    268 286
             *thisFileNameSize = FileNameSize;
    
    269 287
         }
    

  • rts/linker/PEi386.c
    ... ... @@ -342,6 +342,98 @@
    342 342
        Finally, we enter `ocResolve`, where we resolve relocations and and allocate
    
    343 343
        jump islands (using the m32 allocator for backing storage) as necessary.
    
    344 344
     
    
    345
    +   Note [Windows API Set]
    
    346
    +   ~~~~~~~~~~~~~~~~~~~~~~
    
    347
    +   Windows has a concept called API Sets [1][2] which is intended to be Windows's
    
    348
    +   equivalent to glibc's symbolic versioning.  It is also used to handle the API
    
    349
    +   surface difference between different device classes.  e.g. the API might be
    
    350
    +   handled differently between a desktop and tablet.
    
    351
    +
    
    352
    +   This is handled through two mechanisms:
    
    353
    +
    
    354
    +   1. Direct Forward:  These use import libraries to manage to first level
    
    355
    +      redirection.  So what used to be in ucrt.dll is now redirected based on
    
    356
    +      ucrt.lib.  Every API now points to a possible different set of API sets
    
    357
    +      each following the API set contract:
    
    358
    +
    
    359
    +      * The name must begin either with the string api- or ext-.
    
    360
    +      * Names that begin with api- represent APIs that exist on all Windows
    
    361
    +        editions that satisfy the API's version requirements.
    
    362
    +      * Names that begin with ext- represent APIs that may not exist on all
    
    363
    +        Windows editions.
    
    364
    +      * The name must end with the sequence l<n>-<n>-<n>, where n consists of
    
    365
    +        decimal digits.
    
    366
    +      * The body of the name can be alphanumeric characters, or dashes (-).
    
    367
    +      * The name is case insensitive.
    
    368
    +
    
    369
    +      Here are some examples of API set contract names:
    
    370
    +
    
    371
    +        - api-ms-win-core-ums-l1-1-0
    
    372
    +        - ext-ms-win-com-ole32-l1-1-5
    
    373
    +        - ext-ms-win-ntuser-window-l1-1-0
    
    374
    +        - ext-ms-win-ntuser-window-l1-1-1
    
    375
    +
    
    376
    +      Forward references don't require anything special from the calling
    
    377
    +      application in that the Windows loader through "LoadLibrary" will
    
    378
    +      automatically load the right reference for you if given an API set
    
    379
    +      name including the ".dll" suffix.  For example:
    
    380
    +
    
    381
    +      INFO: DLL api-ms-win-eventing-provider-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
    
    382
    +      INFO: DLL api-ms-win-core-apiquery-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\ntdll.dll by API set
    
    383
    +      INFO: DLL api-ms-win-core-processthreads-l1-1-3.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
    
    384
    +      INFO: DLL api-ms-win-core-processthreads-l1-1-2.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
    
    385
    +      INFO: DLL api-ms-win-core-processthreads-l1-1-1.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
    
    386
    +      INFO: DLL api-ms-win-core-processthreads-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
    
    387
    +      INFO: DLL api-ms-win-core-registry-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
    
    388
    +      INFO: DLL api-ms-win-core-heap-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
    
    389
    +      INFO: DLL api-ms-win-core-heap-l2-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
    
    390
    +      INFO: DLL api-ms-win-core-memory-l1-1-1.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
    
    391
    +      INFO: DLL api-ms-win-core-memory-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
    
    392
    +      INFO: DLL api-ms-win-core-memory-l1-1-2.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
    
    393
    +      INFO: DLL api-ms-win-core-handle-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
    
    394
    +
    
    395
    +      Which shows how the loader has redirected some of the references used
    
    396
    +      by ghci.
    
    397
    +
    
    398
    +      Historically though we've treated shared libs lazily.  We would load\
    
    399
    +      the shared library, but not resolve the symbol immediately and wait until
    
    400
    +      the symbol is requested to iterate in order through the shared libraries.
    
    401
    +
    
    402
    +      This assumes that you ever only had one version of a symbol.  i.e. we had
    
    403
    +      an assumption that all exported symbols in different shared libraries
    
    404
    +      should be the same, because most of the time they come from re-exporting
    
    405
    +      from a base library.  This is a bit of a weak assumption and doesn't hold
    
    406
    +      with API Sets.
    
    407
    +
    
    408
    +      For that reason the loader now resolves symbols immediately, and because
    
    409
    +      we now resolve using BIND_NOW we must make sure that a symbol loaded
    
    410
    +      through an OC has precedent because the BIND_NOW refernce was not asked
    
    411
    +      for.   For that reason we load the symbols for API sets with the
    
    412
    +      SYM_TYPE_DUP_DISCARD flag set.
    
    413
    +
    
    414
    +    2. Reverse forwarders:  This is when the application has a direct reference
    
    415
    +       to the old name of an API. e.g. if GHC still used "msvcrt.dll" or
    
    416
    +       "ucrt.dll" we would have had to deal with this case.  In this case the
    
    417
    +       loader intercepts the call and if it exists the dll is loaded.  There is
    
    418
    +       an extra indirection as you go from foo.dll => api-ms-foo-1.dll => foo_imp.dll
    
    419
    +
    
    420
    +       But if the API doesn't exist on the device it's resolved to a stub in the
    
    421
    +       API set that if called will result in an error should it be called [3].
    
    422
    +
    
    423
    +    This means that usages of GetProcAddress and LoadLibrary to check for the
    
    424
    +    existance of a function aren't safe, because they'll always succeed, but may
    
    425
    +    result in a pointer to the stub rather than the actual function.
    
    426
    +
    
    427
    +    WHat does this mean for the RTS linker? Nothing.  We don't have a fallback
    
    428
    +    for if the function doesn't exist.  The RTS is merely just executing what
    
    429
    +    it was told to run.  It's writers of libraries that have to be careful when
    
    430
    +    doing dlopen()/LoadLibrary.
    
    431
    +
    
    432
    +
    
    433
    +   [1] https://learn.microsoft.com/en-us/windows/win32/apiindex/windows-apisets
    
    434
    +   [2] https://mingwpy.github.io/ucrt.html#api-set-implementation
    
    435
    +   [3] https://learn.microsoft.com/en-us/windows/win32/apiindex/detect-api-set-availability
    
    436
    +
    
    345 437
     */
    
    346 438
     
    
    347 439
     #include "Rts.h"
    
    ... ... @@ -882,7 +974,7 @@ addDLL_PEi386( const pathchar *dll_name, HINSTANCE *loaded )
    882 974
                         goto error;
    
    883 975
                     }
    
    884 976
                 } else {
    
    885
    -                goto loaded; /* We're done. DLL has been loaded.  */
    
    977
    +                goto loaded_ok; /* We're done. DLL has been loaded.  */
    
    886 978
                 }
    
    887 979
             }
    
    888 980
         }
    
    ... ... @@ -890,7 +982,7 @@ addDLL_PEi386( const pathchar *dll_name, HINSTANCE *loaded )
    890 982
         // We failed to load
    
    891 983
         goto error;
    
    892 984
     
    
    893
    -loaded:
    
    985
    +loaded_ok:
    
    894 986
         addLoadedDll(&loaded_dll_cache, dll_name, instance);
    
    895 987
         addDLLHandle(buf, instance);
    
    896 988
         if (loaded) {
    
    ... ... @@ -1055,7 +1147,8 @@ bool checkAndLoadImportLibrary( pathchar* arch_name, char* member_name, FILE* f
    1055 1147
         // We must call `addDLL_PEi386` directly rather than `addDLL` because `addDLL`
    
    1056 1148
         // is now a wrapper around `loadNativeObj` which acquires a lock which we
    
    1057 1149
         // already have here.
    
    1058
    -    const char* result = addDLL_PEi386(dll, NULL);
    
    1150
    +    HINSTANCE instance;
    
    1151
    +    const char* result = addDLL_PEi386(dll, &instance);
    
    1059 1152
     
    
    1060 1153
         stgFree(image);
    
    1061 1154
     
    
    ... ... @@ -1069,6 +1162,28 @@ bool checkAndLoadImportLibrary( pathchar* arch_name, char* member_name, FILE* f
    1069 1162
         }
    
    1070 1163
     
    
    1071 1164
         stgFree(dll);
    
    1165
    +
    
    1166
    +    // See Note [Windows API Set]
    
    1167
    +    // We must immediately tie the symbol to the shared library.  The easiest
    
    1168
    +    // way is to load the symbol immediately. We already have all the
    
    1169
    +    // information so might as well
    
    1170
    +    SymbolAddr* sym = lookupSymbolInDLL_PEi386 (symbol, instance, dll, NULL);
    
    1171
    +
    
    1172
    +    // Could be an import descriptor etc, skip if no symbol.
    
    1173
    +    if (!sym)
    
    1174
    +      return true;
    
    1175
    +
    
    1176
    +    // The symbol must have been found, and we can add it to the RTS symbol table
    
    1177
    +    IF_DEBUG(linker, debugBelch("checkAndLoadImportLibrary: resolved symbol %s to %p\n", symbol, sym));
    
    1178
    +    // Because the symbol has been loaded before we actually need it, if a
    
    1179
    +    // stronger reference wants to add a duplicate we should discard this
    
    1180
    +    // one to preserve link order.
    
    1181
    +    SymType symType = SYM_TYPE_DUP_DISCARD | SYM_TYPE_HIDDEN;
    
    1182
    +    symType |= hdr.Type == IMPORT_OBJECT_CODE ? SYM_TYPE_CODE : SYM_TYPE_DATA;
    
    1183
    +
    
    1184
    +    if (!ghciInsertSymbolTable(dll, symhash, symbol, sym, false, symType, NULL))
    
    1185
    +      return false;
    
    1186
    +
    
    1072 1187
         return true;
    
    1073 1188
     }
    
    1074 1189
     
    
    ... ... @@ -1198,7 +1313,7 @@ lookupSymbolInDLL_PEi386 ( const SymbolName* lbl, HINSTANCE instance, pathchar*
    1198 1313
              it generates call *__imp_foo, and __imp_foo here has exactly
    
    1199 1314
              the same semantics as in __imp_foo = GetProcAddress(..., "foo")
    
    1200 1315
          */
    
    1201
    -    if (sym == NULL && strncmp (lbl, "__imp_", 6) == 0) {
    
    1316
    +    if (sym == NULL && dependent && strncmp (lbl, "__imp_", 6) == 0) {
    
    1202 1317
             sym = GetProcAddress(instance,
    
    1203 1318
                                  lbl + 6);
    
    1204 1319
             if (sym != NULL) {
    
    ... ... @@ -1214,12 +1329,6 @@ lookupSymbolInDLL_PEi386 ( const SymbolName* lbl, HINSTANCE instance, pathchar*
    1214 1329
                }
    
    1215 1330
         }
    
    1216 1331
     
    
    1217
    -    sym = GetProcAddress(instance, lbl);
    
    1218
    -    if (sym != NULL) {
    
    1219
    -        /*debugBelch("found %s in %s\n", lbl,dll_name);*/
    
    1220
    -        return sym;
    
    1221
    -       }
    
    1222
    -
    
    1223 1332
         return NULL;
    
    1224 1333
     }
    
    1225 1334
     
    
    ... ... @@ -1821,6 +1930,27 @@ ocGetNames_PEi386 ( ObjectCode* oc )
    1821 1930
               }
    
    1822 1931
               if(NULL != targetSection)
    
    1823 1932
                   addr = (SymbolAddr*) ((size_t) targetSection->start + getSymValue(info, targetSym));
    
    1933
    +          else
    
    1934
    +            {
    
    1935
    +                // Do the symbol lookup based on name, this follows Microsoft's weak external's
    
    1936
    +                // format 3 specifications.  Example header generated:
    
    1937
    +                // api-ms-win-crt-stdio-l1-1-0.dll:     file format pe-x86-64
    
    1938
    +                //
    
    1939
    +                // SYMBOL TABLE:
    
    1940
    +                // [  0](sec -1)(fl 0x00)(ty    0)(scl   3) (nx 0) 0x0000000000000000 @comp.id
    
    1941
    +                // [  1](sec -1)(fl 0x00)(ty    0)(scl   3) (nx 0) 0x0000000000000000 @feat.00
    
    1942
    +                // [  2](sec  0)(fl 0x00)(ty    0)(scl   2) (nx 0) 0x0000000000000000 _write
    
    1943
    +                // [  3](sec  0)(fl 0x00)(ty    0)(scl 105) (nx 1) 0x0000000000000000 write
    
    1944
    +                // AUX lnno 3 size 0x0 tagndx 2
    
    1945
    +                //
    
    1946
    +                // https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#auxiliary-format-3-weak-externals
    
    1947
    +                SymbolName *target_sname = get_sym_name (getSymShortName (info, targetSym), oc);
    
    1948
    +                if (target_sname)
    
    1949
    +                  addr = lookupSymbol_PEi386 (target_sname, oc, &type);
    
    1950
    +
    
    1951
    +                IF_DEBUG(linker, debugBelch("weak external symbol @ %s => %s resolved to %p\n", \
    
    1952
    +                                            sname, target_sname, addr));
    
    1953
    +            }
    
    1824 1954
           }
    
    1825 1955
           else if (  secNumber == IMAGE_SYM_UNDEFINED && symValue > 0) {
    
    1826 1956
              /* This symbol isn't in any section at all, ie, global bss.
    
    ... ... @@ -2115,6 +2245,13 @@ ocResolve_PEi386 ( ObjectCode* oc )
    2115 2245
                        *(uint64_t *)pP = S + A;
    
    2116 2246
                        break;
    
    2117 2247
                    }
    
    2248
    +           case 11: /* IMAGE_REL_AMD64_SECREL (PE constant 11) */
    
    2249
    +              {
    
    2250
    +                  uint64_t offset = S - (uint64_t) section.start;
    
    2251
    +                  CHECK((uint32_t) offset == offset);
    
    2252
    +                   *(uint32_t *)pP = offset + A;
    
    2253
    +                  break;
    
    2254
    +              }
    
    2118 2255
                 case 2: /* R_X86_64_32 (ELF constant 10) - IMAGE_REL_AMD64_ADDR32 (PE constant 2) */
    
    2119 2256
                 case 3: /* IMAGE_REL_AMD64_ADDR32NB (PE constant 3) */
    
    2120 2257
                 case 17: /* R_X86_64_32S ELF constant, no PE mapping. See note [ELF constant in PE file] */
    

  • testsuite/tests/rts/all.T
    ... ... @@ -426,9 +426,7 @@ test('T10296b', [only_ways(['threaded2'])], compile_and_run, [''])
    426 426
     test('numa001', [ extra_run_opts('8'), unless(unregisterised(), extra_ways(['debug_numa'])), req_ghc_with_threaded_rts ]
    
    427 427
                     , compile_and_run, [''])
    
    428 428
     
    
    429
    -test('T12497', [ unless(opsys('mingw32'), skip), expect_broken(22694)
    
    430
    -               ],
    
    431
    -               makefile_test, ['T12497'])
    
    429
    +test('T12497', unless(opsys('mingw32'), skip), makefile_test, ['T12497'])
    
    432 430
     
    
    433 431
     test('T13617', [ unless(opsys('mingw32'), skip)],
    
    434 432
                    makefile_test, ['T13617'])
    

  • testsuite/tests/rts/linker/T11223/T11223_link_order_a_b_2_fail.stderr-ws-32-mingw32
    ... ... @@ -3,7 +3,7 @@ GHC runtime linker: fatal error: I found a duplicate definition for symbol
    3 3
     whilst processing object file
    
    4 4
        E:\ghc-dev\msys64\home\Tamar\ghc\testsuite\tests\rts\T11223\T11223_link_order_a_b_2_fail.run\libfoo_link_lib_3.a
    
    5 5
     The symbol was previously defined in
    
    6
    -   E:\ghc-dev\msys64\home\Tamar\ghc\testsuite\tests\rts\T11223\T11223_link_order_a_b_2_fail.run\libbar_link_lib_3.a(#2:bar_link_lib_3.o)
    
    6
    +   E:\ghc-dev\msys64\home\Tamar\ghc\testsuite\tests\rts\T11223\T11223_link_order_a_b_2_fail.run\libbar_link_lib_3.a(#3:bar_link_lib_3.o)
    
    7 7
     This could be caused by:
    
    8 8
        * Loading two different object files which export the same symbol
    
    9 9
        * Specifying the same object file twice on the GHCi command line
    

  • testsuite/tests/rts/linker/T11223/T11223_link_order_a_b_2_fail.stderr-ws-64-mingw32
    ... ... @@ -3,7 +3,7 @@ GHC runtime linker: fatal error: I found a duplicate definition for symbol
    3 3
     whilst processing object file
    
    4 4
        E:\ghc-dev\msys64\home\Tamar\ghc\testsuite\tests\rts\T11223\T11223_link_order_a_b_2_fail.run\libfoo_link_lib_3.a
    
    5 5
     The symbol was previously defined in
    
    6
    -   E:\ghc-dev\msys64\home\Tamar\ghc\testsuite\tests\rts\T11223\T11223_link_order_a_b_2_fail.run\libbar_link_lib_3.a(#2:bar_link_lib_3.o)
    
    6
    +   E:\ghc-dev\msys64\home\Tamar\ghc\testsuite\tests\rts\T11223\T11223_link_order_a_b_2_fail.run\libbar_link_lib_3.a(#3:bar_link_lib_3.o)
    
    7 7
     This could be caused by:
    
    8 8
        * Loading two different object files which export the same symbol
    
    9 9
        * Specifying the same object file twice on the GHCi command line