[Git][ghc/ghc][wip/T25943] rts/linker: Don't fail due to RTLD_NOW

Ben Gamari pushed to branch wip/T25943 at Glasgow Haskell Compiler / GHC Commits: 3c6551e6 by Ben Gamari at 2025-05-02T11:22:05-04:00 rts/linker: Don't fail due to RTLD_NOW In !12264 we started using the NativeObj machinery introduced some time ago for loading of shared objects. One of the side-effects of this change is shared objects are now loaded eagerly (i.e. with `RTLD_NOW`). This is needed by NativeObj to ensure full visibility of the mappings of the loaded object, which is in turn needed for safe shared object unloading. Unfortunately, this change subtly regressed, causing compilation failures in some programs. Specifically, shared objects which refer to undefined symbols (e.g. which may be usually provided by either the executable image or libraries loaded via `dlopen`) will fail to load with eager binding. This is problematic as GHC loads all package dependencies while, e.g., evaluating TemplateHaskell splices. This results in compilation failures in programs depending upon (but not using at compile-time) packages with undefined symbol references. To mitigate this NativeObj now first attempts to load an object via eager binding, reverting to lazy binding (and disabling unloading) on failure. See Note [Don't fail due to RTLD_NOW]. Fixes #25943. - - - - - 1 changed file: - rts/linker/LoadNativeObjPosix.c Changes: ===================================== rts/linker/LoadNativeObjPosix.c ===================================== @@ -88,6 +88,27 @@ void freeNativeCode_POSIX (ObjectCode *nc) { } } +/* + * Note [Don't fail due to RTLD_NOW] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * If possible we want to load dynamic objects immediately (e.g. using + * RTLD_NOW) so that we can query their mappings and therefore be able to + * safely unload them. However, there are some cases where an object cannot be + * successfully eagerly loaded yet execution can nevertheless succeed with lazy + * binding. + * + * One such instance was found in #25943, where a library referenced undefined + * symbols. While this pattern is quite dodgy (really, these symbol references + * should be weakly bound in the library), previous GHC versions accepted such + * programs. Moreover, it is important that we are able to load such libraries + * since GHC insists on loading all package dependencies when, e.g., evaluating + * TemplateHaskell splices. + * + * To ensure that we don't fail to load such programs, we first attempt loading + * with RTLD_NOW and, if this fails, attempt to load again with lazy binding + * (taking care to mark the object as not unloadable in this case). + */ + void * loadNativeObj_POSIX (pathchar *path, char **errmsg) { ObjectCode* nc; @@ -99,7 +120,6 @@ void * loadNativeObj_POSIX (pathchar *path, char **errmsg) retval = NULL; - /* If we load the same object multiple times, just return the * already-loaded handle. Note that this is broken if unloadNativeObj * is used, as we don’t do any reference counting; see #24345. @@ -116,6 +136,23 @@ void * loadNativeObj_POSIX (pathchar *path, char **errmsg) nc = mkOc(DYNAMIC_OBJECT, path, NULL, 0, false, NULL, 0); + // If we HAVE_DLINFO, we use RTLD_NOW rather than RTLD_LAZY because we want + // to learn eagerly about all external functions. Otherwise, there is no + // additional advantage to being eager, so it is better to be lazy and only + // bind functions when needed for better performance. + // + // Moreover, it is possible that loading will fail (e.g. if the library + // being loaded depends upon symbols from a library which is not available); + // in this case we will retry loading with load_now=false. See + // Note [Don't fail due to RTLD_NOW].. + bool load_now; +#if defined(HAVE_DLINFO) + load_now = true; +#else + load_now = false; +#endif + +try_again: foreignExportsLoadingObject(nc); // When dlopen() loads a profiled dynamic library, it calls the ctors which @@ -129,17 +166,7 @@ void * loadNativeObj_POSIX (pathchar *path, char **errmsg) ACQUIRE_LOCK(&ccs_mutex); #endif - // If we HAVE_DLINFO, we use RTLD_NOW rather than RTLD_LAZY because we want - // to learn eagerly about all external functions. Otherwise, there is no - // additional advantage to being eager, so it is better to be lazy and only bind - // functions when needed for better performance. - int dlopen_mode; -#if defined(HAVE_DLINFO) - dlopen_mode = RTLD_NOW; -#else - dlopen_mode = RTLD_LAZY; -#endif - + const int dlopen_mode = load_now ? RTLD_NOW : RTLD_LAZY; hdl = dlopen(path, dlopen_mode|RTLD_LOCAL); /* see Note [RTLD_LOCAL] */ nc->dlopen_handle = hdl; nc->status = OBJECT_READY; @@ -151,31 +178,42 @@ void * loadNativeObj_POSIX (pathchar *path, char **errmsg) foreignExportsFinishedLoadingObject(); if (hdl == NULL) { - /* dlopen failed; save the message in errmsg */ - copyErrmsg(errmsg, dlerror()); - goto dlopen_fail; + if (load_now) { + // See Note [Don't fail due to RTLD_NOW] + load_now = false; + goto try_again; + } else { + /* dlopen failed; save the message in errmsg */ + copyErrmsg(errmsg, dlerror()); + goto dlopen_fail; + } } #if defined(HAVE_DLINFO) - struct link_map *map; - if (dlinfo(hdl, RTLD_DI_LINKMAP, &map) == -1) { - /* dlinfo failed; save the message in errmsg */ - copyErrmsg(errmsg, dlerror()); - goto dlinfo_fail; - } + if (load_now) { + struct link_map *map; + if (dlinfo(hdl, RTLD_DI_LINKMAP, &map) == -1) { + /* dlinfo failed; save the message in errmsg */ + copyErrmsg(errmsg, dlerror()); + goto dlinfo_fail; + } - hdl = NULL; // pass handle ownership to nc + hdl = NULL; // pass handle ownership to nc - struct piterate_cb_info piterate_info = { - .nc = nc, - .l_addr = (void *) map->l_addr - }; - dl_iterate_phdr(loadNativeObjCb_, &piterate_info); - if (!nc->nc_ranges) { - copyErrmsg(errmsg, "dl_iterate_phdr failed to find obj"); - goto dl_iterate_phdr_fail; + struct piterate_cb_info piterate_info = { + .nc = nc, + .l_addr = (void *) map->l_addr + }; + dl_iterate_phdr(loadNativeObjCb_, &piterate_info); + if (!nc->nc_ranges) { + copyErrmsg(errmsg, "dl_iterate_phdr failed to find obj"); + goto dl_iterate_phdr_fail; + } + nc->unloadable = true; + } else { + nc->nc_ranges = NULL; + nc->unloadable = false; } - nc->unloadable = true; #else nc->nc_ranges = NULL; nc->unloadable = false; View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/3c6551e66a8b04ccae68b182f89b0b43... -- View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/3c6551e66a8b04ccae68b182f89b0b43... You're receiving this email because of your account on gitlab.haskell.org.
participants (1)
-
Ben Gamari (@bgamari)