Ben Gamari pushed to branch ghc-9.14 at Glasgow Haskell Compiler / GHC Commits: aefea783 by Ben Gamari at 2025-07-16T10:42:48-04:00 Bump win32-tarballs to v0.9 (cherry picked from commit 7308276916232706c8c196f929ef05fdffca4ddd) - - - - - cffed96e by Ben Gamari at 2025-07-16T10:42:48-04:00 rts/LoadArchive: Handle null terminated string tables As of `llvm-ar` now emits filename tables terminated with null characters instead of the usual POSIX `/\n` sequence. Fixes #26150. (cherry picked from commit 3b63b254cb8b1832f32751aed6d05ceaabdeb826) - - - - - c9435f9a by Tamar Christina at 2025-07-16T10:42:48-04:00 rts: rename label so name doesn't conflict with param (cherry picked from commit 195f6527264d2a32bacdb34953ec471f632ef320) - - - - - 584c3f4f by Tamar Christina at 2025-07-16T10:42:48-04:00 rts: Handle API set symbol versioning conflicts (cherry picked from commit 63373b95331f07c16e3eef511379fe3bed484839) - - - - - e38451bc by Tamar Christina at 2025-07-16T10:42:48-04:00 rts: Mark API set symbols as HIDDEN and correct symbol type (cherry picked from commit 48e9aa3ebf5acb950a94addc6e47bfebeabead70) - - - - - 81b988ea by Tamar Christina at 2025-07-16T10:42:48-04:00 rts: Implement WEAK EXTERNAL undef redirection by target symbol name (cherry picked from commit 959e827a878c6cd1e0b0702353d70dccd1f52c48) - - - - - 71de21eb by Ben Gamari at 2025-07-16T10:42:48-04:00 rts/LoadArchive: Handle string table entries terminated with / llvm-ar appears to terminate string table entries with `/\n` [1]. This matters in the case of thin archives, since the filename is used. In the past this worked since `llvm-ar` would produce archives with "small" filenames when possible. However, now it appears to always use the string table. [1] https://github.com/llvm/llvm-project/blob/bfb686bb5ba503e9386dc899e1ebbe2488... (cherry picked from commit 65f19293124acbc2c6493ca9c098fc74150e184a) - - - - - a5a936e3 by Ben Gamari at 2025-07-16T10:42:48-04:00 testsuite: Mark T12497 as fixed Thanks to the LLVM toolchain update. Closes #22694. (cherry picked from commit 9cbb3ef5254439eaaaeb3cc8f627dd9d86ccd9d6) - - - - - 9a121551 by Ben Gamari at 2025-07-16T10:42:48-04:00 testsuite: Accept new output of T11223_link_order_a_b_2_fail on Windows The archive member number changed due to the fact that llvm-ar now uses a string table. (cherry picked from commit 2854407eccb97a20a98bac370a0fb731f3776750) - - - - - 9ac00bef by Ben Gamari at 2025-07-16T10:42:48-04:00 rts/linker/PEi386: Implement IMAGE_REL_AMD64_SECREL This appears to now be used by libc++ as distributed by msys2. (cherry picked from commit 284395931894150441620c72cccbc046ffcfc177) - - - - - a0660df1 by Tamar Christina at 2025-07-16T10:42:48-04:00 rts: Cleanup merge resolution residue in lookupSymbolInDLL_PEi386 and make safe without dependent (cherry picked from commit 2b05375510674e6d6cdfffe68dca51a39f6dd869) - - - - - 6 changed files: - mk/get-win32-tarballs.py - rts/linker/LoadArchive.c - rts/linker/PEi386.c - testsuite/tests/rts/all.T - testsuite/tests/rts/linker/T11223/T11223_link_order_a_b_2_fail.stderr-ws-32-mingw32 - testsuite/tests/rts/linker/T11223/T11223_link_order_a_b_2_fail.stderr-ws-64-mingw32 Changes: ===================================== mk/get-win32-tarballs.py ===================================== @@ -8,7 +8,7 @@ import argparse import sys from sys import stderr -TARBALL_VERSION = '0.8' +TARBALL_VERSION = '0.9' BASE_URL = "https://downloads.haskell.org/ghc/mingw/{}".format(TARBALL_VERSION) DEST = Path('ghc-tarballs/mingw-w64') ARCHS = ['x86_64', 'sources'] ===================================== rts/linker/LoadArchive.c ===================================== @@ -223,21 +223,22 @@ lookupGNUArchiveIndex(int gnuFileIndexSize, char **fileName_, char* gnuFileIndex, pathchar* path, size_t* thisFileNameSize, size_t* fileNameSize) { - int n; char *fileName = *fileName_; if (isdigit(fileName[1])) { - int i; - for (n = 2; isdigit(fileName[n]); n++) - ; - - fileName[n] = '\0'; - n = atoi(fileName + 1); if (gnuFileIndex == NULL) { errorBelch("loadArchive: GNU-variant filename " "without an index while reading from `%" PATH_FMT "'", path); return false; } + + int n; + for (n = 2; isdigit(fileName[n]); n++) + ; + + char *end; + fileName[n] = '\0'; + n = strtol(fileName + 1, &end, 10); if (n < 0 || n > gnuFileIndexSize) { errorBelch("loadArchive: GNU-variant filename " "offset %d out of range [0..%d] " @@ -245,17 +246,27 @@ lookupGNUArchiveIndex(int gnuFileIndexSize, char **fileName_, n, gnuFileIndexSize, path); return false; } - if (n != 0 && gnuFileIndex[n - 1] != '\n') { + + // Check that the previous entry ends with the expected + // end-of-string delimiter. +#if defined(mingw32_HOST_OS) +#define IS_SYMBOL_DELIMITER(STR) (STR =='\n' || STR == '\0') +#else +#define IS_SYMBOL_DELIMITER(STR) (STR =='\n') +#endif + if (n != 0 && !IS_SYMBOL_DELIMITER(gnuFileIndex[n - 1])) { errorBelch("loadArchive: GNU-variant filename offset " "%d invalid (range [0..%d]) while reading " "filename from `%" PATH_FMT "'", n, gnuFileIndexSize, path); return false; } - for (i = n; gnuFileIndex[i] != '\n'; i++) + + int i; + for (i = n; !IS_SYMBOL_DELIMITER(gnuFileIndex[i]); i++) ; - size_t FileNameSize = i - n - 1; + size_t FileNameSize = i - n; if (FileNameSize >= *fileNameSize) { /* Double it to avoid potentially continually increasing it by 1 */ @@ -264,6 +275,13 @@ lookupGNUArchiveIndex(int gnuFileIndexSize, char **fileName_, "loadArchive(fileName)"); } memcpy(fileName, gnuFileIndex + n, FileNameSize); + + + /* llvm-ar terminates string table entries with `/\n`. */ + if (fileName[FileNameSize-1] == '/') { + FileNameSize--; + } + fileName[FileNameSize] = '\0'; *thisFileNameSize = FileNameSize; } ===================================== rts/linker/PEi386.c ===================================== @@ -342,6 +342,98 @@ Finally, we enter `ocResolve`, where we resolve relocations and and allocate jump islands (using the m32 allocator for backing storage) as necessary. + Note [Windows API Set] + ~~~~~~~~~~~~~~~~~~~~~~ + Windows has a concept called API Sets [1][2] which is intended to be Windows's + equivalent to glibc's symbolic versioning. It is also used to handle the API + surface difference between different device classes. e.g. the API might be + handled differently between a desktop and tablet. + + This is handled through two mechanisms: + + 1. Direct Forward: These use import libraries to manage to first level + redirection. So what used to be in ucrt.dll is now redirected based on + ucrt.lib. Every API now points to a possible different set of API sets + each following the API set contract: + + * The name must begin either with the string api- or ext-. + * Names that begin with api- represent APIs that exist on all Windows + editions that satisfy the API's version requirements. + * Names that begin with ext- represent APIs that may not exist on all + Windows editions. + * The name must end with the sequence l<n>-<n>-<n>, where n consists of + decimal digits. + * The body of the name can be alphanumeric characters, or dashes (-). + * The name is case insensitive. + + Here are some examples of API set contract names: + + - api-ms-win-core-ums-l1-1-0 + - ext-ms-win-com-ole32-l1-1-5 + - ext-ms-win-ntuser-window-l1-1-0 + - ext-ms-win-ntuser-window-l1-1-1 + + Forward references don't require anything special from the calling + application in that the Windows loader through "LoadLibrary" will + automatically load the right reference for you if given an API set + name including the ".dll" suffix. For example: + + INFO: DLL api-ms-win-eventing-provider-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set + INFO: DLL api-ms-win-core-apiquery-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\ntdll.dll by API set + INFO: DLL api-ms-win-core-processthreads-l1-1-3.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set + INFO: DLL api-ms-win-core-processthreads-l1-1-2.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set + INFO: DLL api-ms-win-core-processthreads-l1-1-1.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set + INFO: DLL api-ms-win-core-processthreads-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set + INFO: DLL api-ms-win-core-registry-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set + INFO: DLL api-ms-win-core-heap-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set + INFO: DLL api-ms-win-core-heap-l2-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set + INFO: DLL api-ms-win-core-memory-l1-1-1.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set + INFO: DLL api-ms-win-core-memory-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set + INFO: DLL api-ms-win-core-memory-l1-1-2.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set + INFO: DLL api-ms-win-core-handle-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set + + Which shows how the loader has redirected some of the references used + by ghci. + + Historically though we've treated shared libs lazily. We would load\ + the shared library, but not resolve the symbol immediately and wait until + the symbol is requested to iterate in order through the shared libraries. + + This assumes that you ever only had one version of a symbol. i.e. we had + an assumption that all exported symbols in different shared libraries + should be the same, because most of the time they come from re-exporting + from a base library. This is a bit of a weak assumption and doesn't hold + with API Sets. + + For that reason the loader now resolves symbols immediately, and because + we now resolve using BIND_NOW we must make sure that a symbol loaded + through an OC has precedent because the BIND_NOW refernce was not asked + for. For that reason we load the symbols for API sets with the + SYM_TYPE_DUP_DISCARD flag set. + + 2. Reverse forwarders: This is when the application has a direct reference + to the old name of an API. e.g. if GHC still used "msvcrt.dll" or + "ucrt.dll" we would have had to deal with this case. In this case the + loader intercepts the call and if it exists the dll is loaded. There is + an extra indirection as you go from foo.dll => api-ms-foo-1.dll => foo_imp.dll + + But if the API doesn't exist on the device it's resolved to a stub in the + API set that if called will result in an error should it be called [3]. + + This means that usages of GetProcAddress and LoadLibrary to check for the + existance of a function aren't safe, because they'll always succeed, but may + result in a pointer to the stub rather than the actual function. + + WHat does this mean for the RTS linker? Nothing. We don't have a fallback + for if the function doesn't exist. The RTS is merely just executing what + it was told to run. It's writers of libraries that have to be careful when + doing dlopen()/LoadLibrary. + + + [1] https://learn.microsoft.com/en-us/windows/win32/apiindex/windows-apisets + [2] https://mingwpy.github.io/ucrt.html#api-set-implementation + [3] https://learn.microsoft.com/en-us/windows/win32/apiindex/detect-api-set-avai... + */ #include "Rts.h" @@ -882,7 +974,7 @@ addDLL_PEi386( const pathchar *dll_name, HINSTANCE *loaded ) goto error; } } else { - goto loaded; /* We're done. DLL has been loaded. */ + goto loaded_ok; /* We're done. DLL has been loaded. */ } } } @@ -890,7 +982,7 @@ addDLL_PEi386( const pathchar *dll_name, HINSTANCE *loaded ) // We failed to load goto error; -loaded: +loaded_ok: addLoadedDll(&loaded_dll_cache, dll_name, instance); addDLLHandle(buf, instance); if (loaded) { @@ -1055,7 +1147,8 @@ bool checkAndLoadImportLibrary( pathchar* arch_name, char* member_name, FILE* f // We must call `addDLL_PEi386` directly rather than `addDLL` because `addDLL` // is now a wrapper around `loadNativeObj` which acquires a lock which we // already have here. - const char* result = addDLL_PEi386(dll, NULL); + HINSTANCE instance; + const char* result = addDLL_PEi386(dll, &instance); stgFree(image); @@ -1069,6 +1162,28 @@ bool checkAndLoadImportLibrary( pathchar* arch_name, char* member_name, FILE* f } stgFree(dll); + + // See Note [Windows API Set] + // We must immediately tie the symbol to the shared library. The easiest + // way is to load the symbol immediately. We already have all the + // information so might as well + SymbolAddr* sym = lookupSymbolInDLL_PEi386 (symbol, instance, dll, NULL); + + // Could be an import descriptor etc, skip if no symbol. + if (!sym) + return true; + + // The symbol must have been found, and we can add it to the RTS symbol table + IF_DEBUG(linker, debugBelch("checkAndLoadImportLibrary: resolved symbol %s to %p\n", symbol, sym)); + // Because the symbol has been loaded before we actually need it, if a + // stronger reference wants to add a duplicate we should discard this + // one to preserve link order. + SymType symType = SYM_TYPE_DUP_DISCARD | SYM_TYPE_HIDDEN; + symType |= hdr.Type == IMPORT_OBJECT_CODE ? SYM_TYPE_CODE : SYM_TYPE_DATA; + + if (!ghciInsertSymbolTable(dll, symhash, symbol, sym, false, symType, NULL)) + return false; + return true; } @@ -1198,7 +1313,7 @@ lookupSymbolInDLL_PEi386 ( const SymbolName* lbl, HINSTANCE instance, pathchar* it generates call *__imp_foo, and __imp_foo here has exactly the same semantics as in __imp_foo = GetProcAddress(..., "foo") */ - if (sym == NULL && strncmp (lbl, "__imp_", 6) == 0) { + if (sym == NULL && dependent && strncmp (lbl, "__imp_", 6) == 0) { sym = GetProcAddress(instance, lbl + 6); if (sym != NULL) { @@ -1214,12 +1329,6 @@ lookupSymbolInDLL_PEi386 ( const SymbolName* lbl, HINSTANCE instance, pathchar* } } - sym = GetProcAddress(instance, lbl); - if (sym != NULL) { - /*debugBelch("found %s in %s\n", lbl,dll_name);*/ - return sym; - } - return NULL; } @@ -1821,6 +1930,27 @@ ocGetNames_PEi386 ( ObjectCode* oc ) } if(NULL != targetSection) addr = (SymbolAddr*) ((size_t) targetSection->start + getSymValue(info, targetSym)); + else + { + // Do the symbol lookup based on name, this follows Microsoft's weak external's + // format 3 specifications. Example header generated: + // api-ms-win-crt-stdio-l1-1-0.dll: file format pe-x86-64 + // + // SYMBOL TABLE: + // [ 0](sec -1)(fl 0x00)(ty 0)(scl 3) (nx 0) 0x0000000000000000 @comp.id + // [ 1](sec -1)(fl 0x00)(ty 0)(scl 3) (nx 0) 0x0000000000000000 @feat.00 + // [ 2](sec 0)(fl 0x00)(ty 0)(scl 2) (nx 0) 0x0000000000000000 _write + // [ 3](sec 0)(fl 0x00)(ty 0)(scl 105) (nx 1) 0x0000000000000000 write + // AUX lnno 3 size 0x0 tagndx 2 + // + // https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#auxiliary-fo... + SymbolName *target_sname = get_sym_name (getSymShortName (info, targetSym), oc); + if (target_sname) + addr = lookupSymbol_PEi386 (target_sname, oc, &type); + + IF_DEBUG(linker, debugBelch("weak external symbol @ %s => %s resolved to %p\n", \ + sname, target_sname, addr)); + } } else if ( secNumber == IMAGE_SYM_UNDEFINED && symValue > 0) { /* This symbol isn't in any section at all, ie, global bss. @@ -2115,6 +2245,13 @@ ocResolve_PEi386 ( ObjectCode* oc ) *(uint64_t *)pP = S + A; break; } + case 11: /* IMAGE_REL_AMD64_SECREL (PE constant 11) */ + { + uint64_t offset = S - (uint64_t) section.start; + CHECK((uint32_t) offset == offset); + *(uint32_t *)pP = offset + A; + break; + } case 2: /* R_X86_64_32 (ELF constant 10) - IMAGE_REL_AMD64_ADDR32 (PE constant 2) */ case 3: /* IMAGE_REL_AMD64_ADDR32NB (PE constant 3) */ case 17: /* R_X86_64_32S ELF constant, no PE mapping. See note [ELF constant in PE file] */ ===================================== testsuite/tests/rts/all.T ===================================== @@ -426,9 +426,7 @@ test('T10296b', [only_ways(['threaded2'])], compile_and_run, ['']) test('numa001', [ extra_run_opts('8'), unless(unregisterised(), extra_ways(['debug_numa'])), req_ghc_with_threaded_rts ] , compile_and_run, ['']) -test('T12497', [ unless(opsys('mingw32'), skip), expect_broken(22694) - ], - makefile_test, ['T12497']) +test('T12497', unless(opsys('mingw32'), skip), makefile_test, ['T12497']) test('T13617', [ unless(opsys('mingw32'), skip)], makefile_test, ['T13617']) ===================================== testsuite/tests/rts/linker/T11223/T11223_link_order_a_b_2_fail.stderr-ws-32-mingw32 ===================================== @@ -3,7 +3,7 @@ GHC runtime linker: fatal error: I found a duplicate definition for symbol whilst processing object file E:\ghc-dev\msys64\home\Tamar\ghc\testsuite\tests\rts\T11223\T11223_link_order_a_b_2_fail.run\libfoo_link_lib_3.a The symbol was previously defined in - E:\ghc-dev\msys64\home\Tamar\ghc\testsuite\tests\rts\T11223\T11223_link_order_a_b_2_fail.run\libbar_link_lib_3.a(#2:bar_link_lib_3.o) + E:\ghc-dev\msys64\home\Tamar\ghc\testsuite\tests\rts\T11223\T11223_link_order_a_b_2_fail.run\libbar_link_lib_3.a(#3:bar_link_lib_3.o) This could be caused by: * Loading two different object files which export the same symbol * Specifying the same object file twice on the GHCi command line ===================================== testsuite/tests/rts/linker/T11223/T11223_link_order_a_b_2_fail.stderr-ws-64-mingw32 ===================================== @@ -3,7 +3,7 @@ GHC runtime linker: fatal error: I found a duplicate definition for symbol whilst processing object file E:\ghc-dev\msys64\home\Tamar\ghc\testsuite\tests\rts\T11223\T11223_link_order_a_b_2_fail.run\libfoo_link_lib_3.a The symbol was previously defined in - E:\ghc-dev\msys64\home\Tamar\ghc\testsuite\tests\rts\T11223\T11223_link_order_a_b_2_fail.run\libbar_link_lib_3.a(#2:bar_link_lib_3.o) + E:\ghc-dev\msys64\home\Tamar\ghc\testsuite\tests\rts\T11223\T11223_link_order_a_b_2_fail.run\libbar_link_lib_3.a(#3:bar_link_lib_3.o) This could be caused by: * Loading two different object files which export the same symbol * Specifying the same object file twice on the GHCi command line View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/7dafa40c27ebc5dd5882f7dea734fcb... -- View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/7dafa40c27ebc5dd5882f7dea734fcb... You're receiving this email because of your account on gitlab.haskell.org.