
Ben Gamari pushed to branch wip/bump-win32-tarballs at Glasgow Haskell Compiler / GHC Commits: ce93395d by Ben Gamari at 2025-06-30T17:06:54-04:00 rts/LoadArchive: Handle null terminated string tables As of `llvm-ar` now emits filename tables terminated with null characters instead of the usual POSIX `/\n` sequence. Fixes #26150. - - - - - a7334916 by Tamar Christina at 2025-06-30T17:08:26-04:00 rts: rename label so name doesn't conflict with param - - - - - d409d6ae by Tamar Christina at 2025-06-30T17:08:26-04:00 rts: Handle API set symbol versioning conflicts - - - - - e82f64b8 by Tamar Christina at 2025-06-30T17:08:26-04:00 rts: Mark API set symbols as HIDDEN and correct symbol type - - - - - 2 changed files: - rts/linker/LoadArchive.c - rts/linker/PEi386.c Changes: ===================================== rts/linker/LoadArchive.c ===================================== @@ -223,21 +223,22 @@ lookupGNUArchiveIndex(int gnuFileIndexSize, char **fileName_, char* gnuFileIndex, pathchar* path, size_t* thisFileNameSize, size_t* fileNameSize) { - int n; char *fileName = *fileName_; if (isdigit(fileName[1])) { - int i; - for (n = 2; isdigit(fileName[n]); n++) - ; - - fileName[n] = '\0'; - n = atoi(fileName + 1); if (gnuFileIndex == NULL) { errorBelch("loadArchive: GNU-variant filename " "without an index while reading from `%" PATH_FMT "'", path); return false; } + + int n; + for (n = 2; isdigit(fileName[n]); n++) + ; + + char *end; + fileName[n] = '\0'; + n = strtol(fileName + 1, &end, 10); if (n < 0 || n > gnuFileIndexSize) { errorBelch("loadArchive: GNU-variant filename " "offset %d out of range [0..%d] " @@ -245,17 +246,27 @@ lookupGNUArchiveIndex(int gnuFileIndexSize, char **fileName_, n, gnuFileIndexSize, path); return false; } - if (n != 0 && gnuFileIndex[n - 1] != '\n') { + + // Check that the previous entry ends with the expected + // end-of-string delimiter. +#if defined(mingw32_HOST_OS) +#define IS_SYMBOL_DELIMITER(STR) (STR =='\n' || STR == '\0') +#else +#define IS_SYMBOL_DELIMITER(STR) (STR =='\n') +#endif + if (n != 0 && !IS_SYMBOL_DELIMITER(gnuFileIndex[n - 1])) { errorBelch("loadArchive: GNU-variant filename offset " "%d invalid (range [0..%d]) while reading " "filename from `%" PATH_FMT "'", n, gnuFileIndexSize, path); return false; } - for (i = n; gnuFileIndex[i] != '\n'; i++) + + int i; + for (i = n; !IS_SYMBOL_DELIMITER(gnuFileIndex[i]); i++) ; - size_t FileNameSize = i - n - 1; + size_t FileNameSize = i - n; if (FileNameSize >= *fileNameSize) { /* Double it to avoid potentially continually increasing it by 1 */ ===================================== rts/linker/PEi386.c ===================================== @@ -342,6 +342,98 @@ Finally, we enter `ocResolve`, where we resolve relocations and and allocate jump islands (using the m32 allocator for backing storage) as necessary. + Note [Windows API Set] + ~~~~~~~~~~~~~~~~~~~~~~ + Windows has a concept called API Sets [1][2] which is intended to be Windows's + equivalent to glibc's symbolic versioning. It is also used to handle the API + surface difference between different device classes. e.g. the API might be + handled differently between a desktop and tablet. + + This is handled through two mechanisms: + + 1. Direct Forward: These use import libraries to manage to first level + redirection. So what used to be in ucrt.dll is now redirected based on + ucrt.lib. Every API now points to a possible different set of API sets + each following the API set contract: + + * The name must begin either with the string api- or ext-. + * Names that begin with api- represent APIs that exist on all Windows + editions that satisfy the API's version requirements. + * Names that begin with ext- represent APIs that may not exist on all + Windows editions. + * The name must end with the sequence l<n>-<n>-<n>, where n consists of + decimal digits. + * The body of the name can be alphanumeric characters, or dashes (-). + * The name is case insensitive. + + Here are some examples of API set contract names: + + - api-ms-win-core-ums-l1-1-0 + - ext-ms-win-com-ole32-l1-1-5 + - ext-ms-win-ntuser-window-l1-1-0 + - ext-ms-win-ntuser-window-l1-1-1 + + Forward references don't require anything special from the calling + application in that the Windows loader through "LoadLibrary" will + automatically load the right reference for you if given an API set + name including the ".dll" suffix. For example: + + INFO: DLL api-ms-win-eventing-provider-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set + INFO: DLL api-ms-win-core-apiquery-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\ntdll.dll by API set + INFO: DLL api-ms-win-core-processthreads-l1-1-3.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set + INFO: DLL api-ms-win-core-processthreads-l1-1-2.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set + INFO: DLL api-ms-win-core-processthreads-l1-1-1.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set + INFO: DLL api-ms-win-core-processthreads-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set + INFO: DLL api-ms-win-core-registry-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set + INFO: DLL api-ms-win-core-heap-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set + INFO: DLL api-ms-win-core-heap-l2-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set + INFO: DLL api-ms-win-core-memory-l1-1-1.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set + INFO: DLL api-ms-win-core-memory-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set + INFO: DLL api-ms-win-core-memory-l1-1-2.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set + INFO: DLL api-ms-win-core-handle-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set + + Which shows how the loader has redirected some of the references used + by ghci. + + Historically though we've treated shared libs lazily. We would load\ + the shared library, but not resolve the symbol immediately and wait until + the symbol is requested to iterate in order through the shared libraries. + + This assumes that you ever only had one version of a symbol. i.e. we had + an assumption that all exported symbols in different shared libraries + should be the same, because most of the time they come from re-exporting + from a base library. This is a bit of a weak assumption and doesn't hold + with API Sets. + + For that reason the loader now resolves symbols immediately, and because + we now resolve using BIND_NOW we must make sure that a symbol loaded + through an OC has precedent because the BIND_NOW refernce was not asked + for. For that reason we load the symbols for API sets with the + SYM_TYPE_DUP_DISCARD flag set. + + 2. Reverse forwarders: This is when the application has a direct reference + to the old name of an API. e.g. if GHC still used "msvcrt.dll" or + "ucrt.dll" we would have had to deal with this case. In this case the + loader intercepts the call and if it exists the dll is loaded. There is + an extra indirection as you go from foo.dll => api-ms-foo-1.dll => foo_imp.dll + + But if the API doesn't exist on the device it's resolved to a stub in the + API set that if called will result in an error should it be called [3]. + + This means that usages of GetProcAddress and LoadLibrary to check for the + existance of a function aren't safe, because they'll always succeed, but may + result in a pointer to the stub rather than the actual function. + + WHat does this mean for the RTS linker? Nothing. We don't have a fallback + for if the function doesn't exist. The RTS is merely just executing what + it was told to run. It's writers of libraries that have to be careful when + doing dlopen()/LoadLibrary. + + + [1] https://learn.microsoft.com/en-us/windows/win32/apiindex/windows-apisets + [2] https://mingwpy.github.io/ucrt.html#api-set-implementation + [3] https://learn.microsoft.com/en-us/windows/win32/apiindex/detect-api-set-avai... + */ #include "Rts.h" @@ -882,7 +974,7 @@ addDLL_PEi386( const pathchar *dll_name, HINSTANCE *loaded ) goto error; } } else { - goto loaded; /* We're done. DLL has been loaded. */ + goto loaded_ok; /* We're done. DLL has been loaded. */ } } } @@ -890,7 +982,7 @@ addDLL_PEi386( const pathchar *dll_name, HINSTANCE *loaded ) // We failed to load goto error; -loaded: +loaded_ok: addLoadedDll(&loaded_dll_cache, dll_name, instance); addDLLHandle(buf, instance); if (loaded) { @@ -1055,7 +1147,8 @@ bool checkAndLoadImportLibrary( pathchar* arch_name, char* member_name, FILE* f // We must call `addDLL_PEi386` directly rather than `addDLL` because `addDLL` // is now a wrapper around `loadNativeObj` which acquires a lock which we // already have here. - const char* result = addDLL_PEi386(dll, NULL); + HINSTANCE instance; + const char* result = addDLL_PEi386(dll, &instance); stgFree(image); @@ -1069,6 +1162,24 @@ bool checkAndLoadImportLibrary( pathchar* arch_name, char* member_name, FILE* f } stgFree(dll); + + // See Note [Windows API Set] + // We must immediately tie the symbol to the shared library. The easiest + // way is to load the symbol immediately. We already have all the + // information so might as well + SymbolAddr* sym = lookupSymbolInDLL_PEi386 (symbol, instance, dll, NULL); + ASSERT(sym); + // The symbol must have been found, and we can add it to the RTS symbol table + IF_DEBUG(linker, debugBelch("checkAndLoadImportLibrary: resolved symbol %s to %p\n", symbol, sym)); + // Because the symbol has been loaded before we actually need it, if a + // stronger reference wants to add a duplicate we should discard this + // one to preserve link order. + SymType symType = SYM_TYPE_DUP_DISCARD | SYM_TYPE_HIDDEN; + symType |= hdr.Type == IMPORT_OBJECT_CODE ? SYM_TYPE_CODE : SYM_TYPE_DATA; + + if (!ghciInsertSymbolTable(dll, symhash, symbol, sym, false, symType, NULL)) + return false; + return true; } View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/e16baf6209c4ff0f1f1883bfed65f2f... -- View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/e16baf6209c4ff0f1f1883bfed65f2f... You're receiving this email because of your account on gitlab.haskell.org.