Cheng Shao pushed to branch wip/batch-loaddll at Glasgow Haskell Compiler / GHC
Commits:
22f896cc by Cheng Shao at 2025-08-21T00:52:46+02:00
ghci: LoadDLL -> LoadDLLs
Closes #25407.
Co-authored-by: Codex
- - - - -
d9a2d03b by Cheng Shao at 2025-08-21T00:52:50+02:00
loadPackages': separate downsweep/upsweep
- - - - -
8 changed files:
- compiler/GHC/Driver/Plugins.hs
- compiler/GHC/Linker/Loader.hs
- compiler/GHC/Linker/MacOS.hs
- compiler/GHC/Runtime/Interpreter.hs
- libraries/ghci/GHCi/Message.hs
- libraries/ghci/GHCi/ObjLink.hs
- libraries/ghci/GHCi/Run.hs
- utils/jsffi/dyld.mjs
Changes:
=====================================
compiler/GHC/Driver/Plugins.hs
=====================================
@@ -421,7 +421,7 @@ loadExternalPlugins ps = do
loadExternalPluginLib :: FilePath -> IO ()
loadExternalPluginLib path = do
-- load library
- loadDLL path >>= \case
+ loadDLLs [path] >>= \case
Left errmsg -> pprPanic "loadExternalPluginLib"
(vcat [ text "Can't load plugin library"
, text " Library path: " <> text path
=====================================
compiler/GHC/Linker/Loader.hs
=====================================
@@ -1,6 +1,7 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE ViewPatterns #-}
--
-- (c) The University of Glasgow 2002-2006
@@ -534,9 +535,10 @@ preloadLib interp hsc_env lib_paths framework_paths pls lib_spec = do
return pls
DLL dll_unadorned -> do
- maybe_errstr <- loadDLL interp (platformSOName platform dll_unadorned)
- case maybe_errstr of
- Right _ -> maybePutStrLn logger "done"
+ res_e <- loadDLLs interp [platformSOName platform dll_unadorned]
+ case res_e of
+ Right [_] -> maybePutStrLn logger "done"
+ Right _ -> preloadFailed "unexpected LoadDLLs response" lib_paths lib_spec
Left mm | platformOS platform /= OSDarwin ->
preloadFailed mm lib_paths lib_spec
Left mm | otherwise -> do
@@ -544,17 +546,19 @@ preloadLib interp hsc_env lib_paths framework_paths pls lib_spec = do
-- since (apparently) some things install that way - see
-- ticket #8770.
let libfile = ("lib" ++ dll_unadorned) <.> "so"
- err2 <- loadDLL interp libfile
- case err2 of
- Right _ -> maybePutStrLn logger "done"
- Left _ -> preloadFailed mm lib_paths lib_spec
+ err2_e <- loadDLLs interp [libfile]
+ case err2_e of
+ Right [_] -> maybePutStrLn logger "done"
+ Right _ -> preloadFailed mm lib_paths lib_spec
+ Left _ -> preloadFailed mm lib_paths lib_spec
return pls
DLLPath dll_path -> do
- do maybe_errstr <- loadDLL interp dll_path
- case maybe_errstr of
- Right _ -> maybePutStrLn logger "done"
- Left mm -> preloadFailed mm lib_paths lib_spec
+ do res_e <- loadDLLs interp [dll_path]
+ case res_e of
+ Right [_] -> maybePutStrLn logger "done"
+ Left mm -> preloadFailed mm lib_paths lib_spec
+ _ -> preloadFailed "unexpected LoadDLLs response" lib_paths lib_spec
return pls
Framework framework ->
@@ -891,10 +895,11 @@ dynLoadObjs interp hsc_env pls@LoaderState{..} objs = do
-- if we got this far, extend the lifetime of the library file
changeTempFilesLifetime tmpfs TFL_GhcSession [soFile]
- m <- loadDLL interp soFile
- case m of
- Right _ -> return $! pls { temp_sos = (libPath, libName) : temp_sos }
- Left err -> linkFail msg (text err)
+ res <- loadDLLs interp [soFile]
+ case res of
+ Right [_] -> return $! pls { temp_sos = (libPath, libName) : temp_sos }
+ Left err -> linkFail msg (text err)
+ _ -> linkFail msg (text "unexpected LoadDLLs response")
where
msg = "GHC.Linker.Loader.dynLoadObjs: Loading temp shared object failed"
@@ -1128,33 +1133,57 @@ loadPackages interp hsc_env new_pkgs = do
loadPackages' :: Interp -> HscEnv -> [UnitId] -> LoaderState -> IO LoaderState
loadPackages' interp hsc_env new_pks pls = do
- pkgs' <- link (pkgs_loaded pls) new_pks
- return $! pls { pkgs_loaded = pkgs'
+ (reverse -> pkgs_info_list, pkgs_almost_loaded) <-
+ downsweep
+ ([], pkgs_loaded pls)
+ new_pks
+ let link_one pkgs new_pkg_info = do
+ (hs_cls, extra_cls, loaded_dlls) <-
+ loadPackage
+ interp
+ hsc_env
+ new_pkg_info
+ evaluate $
+ adjustUDFM
+ ( \old_pkg_info ->
+ old_pkg_info
+ { loaded_pkg_hs_objs = hs_cls,
+ loaded_pkg_non_hs_objs = extra_cls,
+ loaded_pkg_hs_dlls = loaded_dlls
}
+ )
+ pkgs
+ (Packages.unitId new_pkg_info)
+ pkgs_loaded' <- foldlM link_one pkgs_almost_loaded pkgs_info_list
+ evaluate $ pls {pkgs_loaded = pkgs_loaded'}
where
- link :: PkgsLoaded -> [UnitId] -> IO PkgsLoaded
- link pkgs new_pkgs =
- foldM link_one pkgs new_pkgs
-
- link_one pkgs new_pkg
- | new_pkg `elemUDFM` pkgs -- Already linked
- = return pkgs
-
- | Just pkg_cfg <- lookupUnitId (hsc_units hsc_env) new_pkg
- = do { let deps = unitDepends pkg_cfg
- -- Link dependents first
- ; pkgs' <- link pkgs deps
- -- Now link the package itself
- ; (hs_cls, extra_cls, loaded_dlls) <- loadPackage interp hsc_env pkg_cfg
- ; let trans_deps = unionManyUniqDSets [ addOneToUniqDSet (loaded_pkg_trans_deps loaded_pkg_info) dep_pkg
- | dep_pkg <- deps
- , Just loaded_pkg_info <- pure (lookupUDFM pkgs' dep_pkg)
- ]
- ; return (addToUDFM pkgs' new_pkg (LoadedPkgInfo new_pkg hs_cls extra_cls loaded_dlls trans_deps)) }
-
- | otherwise
- = throwGhcExceptionIO (CmdLineError ("unknown package: " ++ unpackFS (unitIdFS new_pkg)))
-
+ downsweep = foldlM downsweep_one
+
+ downsweep_one (pkgs_info_list, pkgs) new_pkg
+ | new_pkg `elemUDFM` pkgs = pure (pkgs_info_list, pkgs)
+ | Just new_pkg_info <- lookupUnitId (hsc_units hsc_env) new_pkg = do
+ let new_pkg_deps = unitDepends new_pkg_info
+ (pkgs_info_list', pkgs') <- downsweep (pkgs_info_list, pkgs) new_pkg_deps
+ let new_pkg_trans_deps =
+ unionManyUniqDSets
+ [ addOneToUniqDSet (loaded_pkg_trans_deps loaded_pkg_info) dep_pkg
+ | dep_pkg <- new_pkg_deps,
+ loaded_pkg_info <- maybeToList $ pkgs' `lookupUDFM` dep_pkg
+ ]
+ pure
+ ( new_pkg_info : pkgs_info_list',
+ addToUDFM pkgs' new_pkg $
+ LoadedPkgInfo
+ { loaded_pkg_uid = new_pkg,
+ loaded_pkg_hs_objs = [],
+ loaded_pkg_non_hs_objs = [],
+ loaded_pkg_hs_dlls = [],
+ loaded_pkg_trans_deps = new_pkg_trans_deps
+ }
+ )
+ | otherwise =
+ throwGhcExceptionIO
+ (CmdLineError ("unknown package: " ++ unpackFS (unitIdFS new_pkg)))
loadPackage :: Interp -> HscEnv -> UnitInfo -> IO ([LibrarySpec], [LibrarySpec], [RemotePtr LoadedDLL])
loadPackage interp hsc_env pkg
@@ -1221,11 +1250,16 @@ loadPackage interp hsc_env pkg
loadFrameworks interp platform pkg
-- See Note [Crash early load_dyn and locateLib]
-- Crash early if can't load any of `known_dlls`
- mapM_ (load_dyn interp hsc_env True) known_extra_dlls
- loaded_dlls <- mapMaybeM (load_dyn interp hsc_env True) known_hs_dlls
+ _ <- load_dyn interp hsc_env True known_extra_dlls
+ -- Batch-load Haskell package DLLs to allow downstream
+ -- interpreters/linkers to parallelize work (#25407).
+ res_loaded_dlls <- loadDLLs interp known_hs_dlls
+ loaded_dlls <- case res_loaded_dlls of
+ Right ds -> pure ds
+ Left e -> cmdLineErrorIO e
-- For remaining `dlls` crash early only when there is surely
-- no package's DLL around ... (not is_dyn)
- mapM_ (load_dyn interp hsc_env (not is_dyn) . platformSOName platform) dlls
+ _ <- load_dyn interp hsc_env (not is_dyn) $ map (platformSOName platform) dlls
#else
let loaded_dlls = []
#endif
@@ -1300,11 +1334,11 @@ restriction very easily.
-- can be passed directly to loadDLL. They are either fully-qualified
-- ("/usr/lib/libfoo.so"), or unqualified ("libfoo.so"). In the latter case,
-- loadDLL is going to search the system paths to find the library.
-load_dyn :: Interp -> HscEnv -> Bool -> FilePath -> IO (Maybe (RemotePtr LoadedDLL))
-load_dyn interp hsc_env crash_early dll = do
- r <- loadDLL interp dll
- case r of
- Right loaded_dll -> pure (Just loaded_dll)
+load_dyn :: Interp -> HscEnv -> Bool -> [FilePath] -> IO (Maybe [RemotePtr LoadedDLL])
+load_dyn interp hsc_env crash_early dlls = do
+ rs <- loadDLLs interp dlls
+ case rs of
+ Right loaded_dlls -> pure (Just loaded_dlls)
Left err ->
if crash_early
then cmdLineErrorIO err
=====================================
compiler/GHC/Linker/MacOS.hs
=====================================
@@ -162,7 +162,7 @@ loadFramework interp extraPaths rootname
-- sorry for the hardcoded paths, I hope they won't change anytime soon:
defaultFrameworkPaths = ["/Library/Frameworks", "/System/Library/Frameworks"]
- -- Try to call loadDLL for each candidate path.
+ -- Try to call loadDLLs for each candidate path.
--
-- See Note [macOS Big Sur dynamic libraries]
findLoadDLL [] errs =
@@ -170,8 +170,9 @@ loadFramework interp extraPaths rootname
-- has no built-in paths for frameworks: give up
return $ Just errs
findLoadDLL (p:ps) errs =
- do { dll <- loadDLL interp (p > fwk_file)
- ; case dll of
- Right _ -> return Nothing
- Left err -> findLoadDLL ps ((p ++ ": " ++ err):errs)
+ do { res <- loadDLLs interp [p > fwk_file]
+ ; case res of
+ Right [_] -> return Nothing
+ Left err -> findLoadDLL ps ((p ++ ": " ++ err):errs)
+ _ -> findLoadDLL ps ((p ++ ": unexpected LoadDLLs response"):errs)
}
=====================================
compiler/GHC/Runtime/Interpreter.hs
=====================================
@@ -38,7 +38,7 @@ module GHC.Runtime.Interpreter
, lookupSymbol
, lookupSymbolInDLL
, lookupClosure
- , loadDLL
+ , loadDLLs
, loadArchive
, loadObj
, unloadObj
@@ -559,13 +559,10 @@ withSymbolCache interp str determine_addr = do
purgeLookupSymbolCache :: Interp -> IO ()
purgeLookupSymbolCache interp = purgeInterpSymbolCache (interpSymbolCache interp)
--- | loadDLL loads a dynamic library using the OS's native linker
--- (i.e. dlopen() on Unix, LoadLibrary() on Windows). It takes either
--- an absolute pathname to the file, or a relative filename
--- (e.g. "libfoo.so" or "foo.dll"). In the latter case, loadDLL
--- searches the standard locations for the appropriate library.
-loadDLL :: Interp -> String -> IO (Either String (RemotePtr LoadedDLL))
-loadDLL interp str = interpCmd interp (LoadDLL str)
+-- | Load multiple DLLs in one batch. Returns either an error (if any DLL fails)
+-- or the list of successfully loaded DLL handles.
+loadDLLs :: Interp -> [String] -> IO (Either String [RemotePtr LoadedDLL])
+loadDLLs interp strs = interpCmd interp (LoadDLLs strs)
loadArchive :: Interp -> String -> IO ()
loadArchive interp path = do
@@ -761,4 +758,3 @@ readIModModBreaks hug mod = imodBreaks_modBreaks . expectJust <$> readIModBreaks
fromEvalResult :: EvalResult a -> IO a
fromEvalResult (EvalException e) = throwIO (fromSerializableException e)
fromEvalResult (EvalSuccess a) = return a
-
=====================================
libraries/ghci/GHCi/Message.hs
=====================================
@@ -84,7 +84,8 @@ data Message a where
LookupSymbol :: String -> Message (Maybe (RemotePtr ()))
LookupSymbolInDLL :: RemotePtr LoadedDLL -> String -> Message (Maybe (RemotePtr ()))
LookupClosure :: String -> Message (Maybe HValueRef)
- LoadDLL :: String -> Message (Either String (RemotePtr LoadedDLL))
+ -- | Load multiple DLLs in one batch
+ LoadDLLs :: [String] -> Message (Either String [RemotePtr LoadedDLL])
LoadArchive :: String -> Message () -- error?
LoadObj :: String -> Message () -- error?
UnloadObj :: String -> Message () -- error?
@@ -441,7 +442,7 @@ data BreakModule
-- that type isn't available here.
data BreakUnitId
--- | A dummy type that tags pointers returned by 'LoadDLL'.
+-- | A dummy type that tags pointers returned by 'LoadDLLs'.
data LoadedDLL
-- SomeException can't be serialized because it contains dynamic
@@ -555,7 +556,7 @@ getMessage = do
1 -> Msg <$> return InitLinker
2 -> Msg <$> LookupSymbol <$> get
3 -> Msg <$> LookupClosure <$> get
- 4 -> Msg <$> LoadDLL <$> get
+ 4 -> Msg <$> LoadDLLs <$> get
5 -> Msg <$> LoadArchive <$> get
6 -> Msg <$> LoadObj <$> get
7 -> Msg <$> UnloadObj <$> get
@@ -601,7 +602,7 @@ putMessage m = case m of
InitLinker -> putWord8 1
LookupSymbol str -> putWord8 2 >> put str
LookupClosure str -> putWord8 3 >> put str
- LoadDLL str -> putWord8 4 >> put str
+ LoadDLLs strs -> putWord8 4 >> put strs
LoadArchive str -> putWord8 5 >> put str
LoadObj str -> putWord8 6 >> put str
UnloadObj str -> putWord8 7 >> put str
=====================================
libraries/ghci/GHCi/ObjLink.hs
=====================================
@@ -12,7 +12,7 @@
-- dynamic linker.
module GHCi.ObjLink
( initObjLinker, ShouldRetainCAFs(..)
- , loadDLL
+ , loadDLLs
, loadArchive
, loadObj
, unloadObj
@@ -43,6 +43,10 @@ import Control.Exception (catch, evaluate)
import GHC.Wasm.Prim
#endif
+#if defined(wasm32_HOST_ARCH)
+import Data.List (intercalate)
+#endif
+
-- ---------------------------------------------------------------------------
-- RTS Linker Interface
-- ---------------------------------------------------------------------------
@@ -67,20 +71,25 @@ data ShouldRetainCAFs
initObjLinker :: ShouldRetainCAFs -> IO ()
initObjLinker _ = pure ()
-loadDLL :: String -> IO (Either String (Ptr LoadedDLL))
-loadDLL f =
+-- Batch load multiple DLLs at once via dyld to enable a single
+-- dependency resolution and more parallel compilation. We pass a
+-- NUL-delimited JSString to avoid array marshalling on wasm.
+loadDLLs :: [String] -> IO (Either String [Ptr LoadedDLL])
+loadDLLs fs =
m `catch` \(err :: JSException) ->
- pure $ Left $ "loadDLL failed for " <> f <> ": " <> show err
+ pure $ Left $ "loadDLLs failed: " <> show err
where
+ packed :: JSString
+ packed = toJSString (intercalate ['\0'] fs)
m = do
- evaluate =<< js_loadDLL (toJSString f)
- pure $ Right nullPtr
+ evaluate =<< js_loadDLLs packed
+ pure $ Right (replicate (length fs) nullPtr)
-- See Note [Variable passing in JSFFI] for where
-- __ghc_wasm_jsffi_dyld comes from
-foreign import javascript safe "__ghc_wasm_jsffi_dyld.loadDLL($1)"
- js_loadDLL :: JSString -> IO ()
+foreign import javascript safe "__ghc_wasm_jsffi_dyld.loadDLLs($1)"
+ js_loadDLLs :: JSString -> IO ()
loadArchive :: String -> IO ()
loadArchive f = throwIO $ ErrorCall $ "loadArchive: unsupported on wasm for " <> f
@@ -241,6 +250,17 @@ resolveObjs = do
r <- c_resolveObjs
return (r /= 0)
+-- | Portable batch load: map 'loadDLL' and collect results.
+loadDLLs :: [String] -> IO (Either String [Ptr LoadedDLL])
+loadDLLs = go []
+ where
+ go acc [] = pure (Right (reverse acc))
+ go acc (p:ps) = do
+ r <- loadDLL p
+ case r of
+ Left err -> pure (Left err)
+ Right h -> go (h:acc) ps
+
-- ---------------------------------------------------------------------------
-- Foreign declarations to RTS entry points which does the real work;
-- ---------------------------------------------------------------------------
=====================================
libraries/ghci/GHCi/Run.hs
=====================================
@@ -57,7 +57,7 @@ run m = case m of
#if defined(javascript_HOST_ARCH)
LoadObj p -> withCString p loadJS
InitLinker -> notSupportedJS m
- LoadDLL {} -> notSupportedJS m
+ LoadDLLs {} -> notSupportedJS m
LoadArchive {} -> notSupportedJS m
UnloadObj {} -> notSupportedJS m
AddLibrarySearchPath {} -> notSupportedJS m
@@ -69,7 +69,9 @@ run m = case m of
LookupClosure str -> lookupJSClosure str
#else
InitLinker -> initObjLinker RetainCAFs
- LoadDLL str -> fmap toRemotePtr <$> loadDLL str
+ LoadDLLs strs -> do
+ -- Always go through batch API. On non-wasm this falls back to per-DLL loads.
+ fmap (map toRemotePtr) <$> loadDLLs strs
LoadArchive str -> loadArchive str
LoadObj str -> loadObj str
UnloadObj str -> unloadObj str
=====================================
utils/jsffi/dyld.mjs
=====================================
@@ -9,7 +9,7 @@
// iserv (GHCi.Server.defaultServer). This part only runs in
// nodejs.
// 2. Dynamic linker: provide RTS linker interfaces like
-// loadDLL/lookupSymbol etc which are imported by wasm iserv. This
+// loadDLLs/lookupSymbol etc which are imported by wasm iserv. This
// part can run in browsers as well.
//
// When GHC starts external interpreter for the wasm target, it starts
@@ -50,7 +50,7 @@
//
// *** What works right now and what doesn't work yet?
//
-// loadDLL & bytecode interpreter work. Template Haskell & ghci work.
+// loadDLLs & bytecode interpreter work. Template Haskell & ghci work.
// Profiled dynamic code works. Compiled code and bytecode can all be
// loaded, though the side effects are constrained to what's supported
// by wasi preview1: we map the full host filesystem into wasm cause
@@ -801,17 +801,17 @@ class DyLD {
return this.#rpc.findSystemLibrary(f);
}
- // When we do loadDLL, we first perform "downsweep" which return a
+ // When we do loadDLLs, we first perform "downsweep" which return a
// toposorted array of dependencies up to itself, then sequentially
// load the downsweep result.
//
// The rationale of a separate downsweep phase, instead of a simple
- // recursive loadDLL function is: V8 delegates async
+ // recursive loadDLLs function is: V8 delegates async
// WebAssembly.compile to a background worker thread pool. To
// maintain consistent internal linker state, we *must* load each so
// file sequentially, but it's okay to kick off compilation asap,
// store the Promise in downsweep result and await for the actual
- // WebAssembly.Module in loadDLL logic. This way we can harness some
+ // WebAssembly.Module in loadDLLs logic. This way we can harness some
// background parallelism.
async #downsweep(p) {
const toks = p.split("/");
@@ -852,8 +852,26 @@ class DyLD {
return acc;
}
- // The real stuff
- async loadDLL(p) {
+ // Batch load multiple DLLs in one go.
+ // Accepts a NUL-delimited string of paths to avoid array marshalling.
+ // Each path can be absolute or a soname; dependency resolution is
+ // performed across the full set to enable maximal parallel compile
+ // while maintaining sequential instantiation order.
+ async loadDLLs(packed) {
+ // Normalize input to an array of strings. When called from Haskell
+ // we pass a single JSString containing NUL-separated paths.
+ const paths = (typeof packed === "string"
+ ? (packed.length === 0 ? [] : packed.split("\0"))
+ : [packed] // tolerate an accidental single path object
+ ).filter((s) => s.length > 0);
+
+ // Compute a single downsweep plan for the whole batch.
+ // Note: #downsweep mutates #loadedSos to break cycles and dedup.
+ const plan = [];
+ for (const p of paths) {
+ plan.push(...(await this.#downsweep(p)));
+ }
+
for (const {
memSize,
memP2Align,
@@ -861,7 +879,7 @@ class DyLD {
tableP2Align,
modp,
soname,
- } of await this.#downsweep(p)) {
+ } of plan) {
const import_obj = {
wasi_snapshot_preview1: this.#wasi.wasiImport,
env: {
@@ -1138,7 +1156,7 @@ export async function main({ rpc, libdir, ghciSoPath, args }) {
rpc,
});
await dyld.addLibrarySearchPath(libdir);
- await dyld.loadDLL(ghciSoPath);
+ await dyld.loadDLLs(ghciSoPath);
const reader = rpc.readStream.getReader();
const writer = rpc.writeStream.getWriter();
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/98c839ddd9ea9beeef0fe2dda49ac45...
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/98c839ddd9ea9beeef0fe2dda49ac45...
You're receiving this email because of your account on gitlab.haskell.org.