Wolfgang Jeltsch pushed to branch wip/jeltsch/more-efficient-home-unit-imports-finding at Glasgow Haskell Compiler / GHC
Commits:
5d1d513c by Wolfgang Jeltsch at 2026-05-06T15:25:01+03:00
Introduce a cache of home module name providers
This contribution introduces to the module graph a cache that maps home
module names to sets of units providing them and changes the finder to
use that cache. This is a performance optimization, especially for
multi-home-unit builds.
The particular changes are as follows:
* In `GHC.Unit.Module.Graph`, `ModuleGraph` is extended with a new
field `mg_home_module_name_providers_map`, exposed as
`mgHomeModuleNameProvidersMap`. This is a cache that assigns to each
home module name the set of IDs of home units that define it.
Operations that construct module graphs are updated such that this
cache stays synchronized.
* In `GHC.Unit.Finder`, `findImportedModule` is changed to pull
`mgHomeModuleNameProvidersMap` from `hsc_mod_graph` and pass it to
`findImportedModuleNoHsc`, which now does not search home units in
arbitrary order but prioritizes those units that the cache mentions
as potential providers of the requested module.
Resolves #27055.
Co-authored-by: Wolfgang Jeltsch
- - - - -
2 changed files:
- compiler/GHC/Unit/Finder.hs
- compiler/GHC/Unit/Module/Graph.hs
Changes:
=====================================
compiler/GHC/Unit/Finder.hs
=====================================
@@ -44,6 +44,11 @@ import GHC.Data.OsPath
import GHC.Unit.Env
import GHC.Unit.Types
import GHC.Unit.Module
+import GHC.Unit.Module.Graph
+ (
+ HomeModuleNameProvidersMap,
+ mgHomeModuleNameProvidersMap
+ )
import GHC.Unit.Home
import GHC.Unit.Home.Graph (UnitEnvGraph)
import qualified GHC.Unit.Home.Graph as HUG
@@ -72,7 +77,8 @@ import GHC.Driver.Config.Finder
import GHC.Types.Unique.Set
import qualified Data.List as L(sort)
import Data.List.NonEmpty ( NonEmpty (..) )
-import qualified Data.Set as Set (toList)
+import Data.Set (Set)
+import qualified Data.Set as Set (empty, intersection, difference, null, toList)
import qualified System.Directory as SD
import qualified System.OsPath as OsPath
import qualified Data.List.NonEmpty as NE
@@ -177,12 +183,13 @@ getDirHash dir = do
findImportedModule :: HscEnv -> ModuleName -> PkgQual -> IO FindResult
findImportedModule hsc_env mod pkg_qual =
- let fc = hsc_FC hsc_env
- mhome_unit = hsc_home_unit_maybe hsc_env
- dflags = hsc_dflags hsc_env
- fopts = initFinderOpts dflags
+ let fc = hsc_FC hsc_env
+ mb_home_unit = hsc_home_unit_maybe hsc_env
+ dflags = hsc_dflags hsc_env
+ fopts = initFinderOpts dflags
in do
- findImportedModuleNoHsc fc fopts (hsc_unit_env hsc_env) mhome_unit mod pkg_qual
+ let home_module_name_providers_map = mgHomeModuleNameProvidersMap (hsc_mod_graph hsc_env)
+ findImportedModuleNoHsc fc fopts (hsc_unit_env hsc_env) home_module_name_providers_map mb_home_unit mod pkg_qual
findImportedModuleWithIsBoot :: HscEnv -> ModuleName -> IsBootInterface -> PkgQual -> IO FindResult
findImportedModuleWithIsBoot hsc_env mod is_boot pkg_qual = do
@@ -195,55 +202,118 @@ findImportedModuleNoHsc
:: FinderCache
-> FinderOpts
-> UnitEnv
+ -> HomeModuleNameProvidersMap
-> Maybe HomeUnit
-> ModuleName
-> PkgQual
-> IO FindResult
-findImportedModuleNoHsc fc fopts ue mhome_unit mod_name mb_pkg =
+findImportedModuleNoHsc fc fopts ue home_module_name_providers_map mb_home_unit mod_name mb_pkg =
case mb_pkg of
NoPkgQual -> unqual_import
- ThisPkg uid | (homeUnitId <$> mhome_unit) == Just uid -> home_import
+ ThisPkg uid | (homeUnitId <$> mb_home_unit) == Just uid -> home_import
| Just os <- lookup uid other_fopts -> home_pkg_import (uid, os)
- | otherwise -> pprPanic "findImportModule" (ppr mod_name $$ ppr mb_pkg $$ ppr (homeUnitId <$> mhome_unit) $$ ppr uid $$ ppr (map fst all_opts))
+ | otherwise -> pprPanic "findImportModule" (ppr mod_name $$ ppr mb_pkg $$ ppr (homeUnitId <$> mb_home_unit) $$ ppr uid $$ ppr (map fst all_opts))
OtherPkg _ -> pkg_import
where
- all_opts = case mhome_unit of
- Nothing -> other_fopts
- Just home_unit -> (homeUnitId home_unit, fopts) : other_fopts
-
-
- home_import = case mhome_unit of
- Just home_unit -> findHomeModule fc fopts home_unit mod_name
- Nothing -> pure $ NoPackage (panic "findImportedModule: no home-unit")
-
+ mb_home_unit_id :: Maybe UnitId
+ mb_home_unit_id = homeUnitId <$> mb_home_unit
+
+ all_opts :: [(UnitId, FinderOpts)]
+ all_opts = case mb_home_unit_id of
+ Nothing -> other_fopts
+ Just home_unit_id -> (home_unit_id, fopts) : other_fopts
+
+ home_import :: IO FindResult
+ home_import = case mb_home_unit of
+ Just home_unit -> findHomeModule fc fopts home_unit mod_name
+ Nothing -> pure $
+ NoPackage (panic "findImportedModule: no home-unit")
+
+ {-
+ If the module is reexported, then look for it as if it was from the
+ perspective of that package which reexports it.
+ -}
+ home_pkg_import :: (UnitId, FinderOpts) -> IO FindResult
home_pkg_import (uid, opts)
- -- If the module is reexported, then look for it as if it was from the perspective
- -- of that package which reexports it.
- | Just real_mod_name <- lookupUniqMap (finder_reexportedModules opts) mod_name =
- findImportedModuleNoHsc fc opts ue (Just $ DefiniteHomeUnit uid Nothing) real_mod_name NoPkgQual
- | elementOfUniqSet mod_name (finder_hiddenModules opts) =
- return (mkHomeHidden uid)
- | otherwise =
- findHomePackageModule fc opts uid mod_name
-
- -- Do not be smart and change this to `foldr orIfNotFound home_import hs` as
- -- that is not the same!! home_import is first because we need to look within ourselves
- -- first before looking at the packages in order.
- any_home_import = foldr1 orIfNotFound (home_import:| map home_pkg_import other_fopts)
-
- pkg_import = findExposedPackageModule fc fopts units mod_name mb_pkg
-
- unqual_import = any_home_import
- `orIfNotFound`
- findExposedPackageModule fc fopts units mod_name NoPkgQual
-
- units = case mhome_unit of
- Nothing -> ue_homeUnitState ue
- Just home_unit -> HUG.homeUnitEnv_units $ ue_findHomeUnitEnv (homeUnitId home_unit) ue
- hpt_deps :: [UnitId]
- hpt_deps = Set.toList (homeUnitDepends units)
- other_fopts = map (\uid -> (uid, initFinderOpts (homeUnitEnv_dflags (ue_findHomeUnitEnv uid ue)))) hpt_deps
+ | Just real_mod_name
+ <- lookupUniqMap (finder_reexportedModules opts) mod_name
+ = findImportedModuleNoHsc fc opts ue home_module_name_providers_map
+ (Just $ DefiniteHomeUnit uid Nothing)
+ real_mod_name
+ NoPkgQual
+ | elementOfUniqSet mod_name (finder_hiddenModules opts)
+ = return (mkHomeHidden uid)
+ | otherwise
+ = findHomePackageModule fc opts uid mod_name
+
+ any_home_import :: IO FindResult
+ any_home_import = foldr1 orIfNotFound $
+ home_import :| map home_pkg_import other_fopts
+ {-
+ Do not try to be smart and change this to `foldr orIfNotFound
+ home_import (map home_pkg_import other_fopts)`, as that would not be the
+ same. `home_import` is first because we need to first look within the
+ current unit before looking at the other units in order.
+ -}
+
+ pkg_import :: IO FindResult
+ pkg_import = findExposedPackageModule fc fopts unit_state mod_name mb_pkg
+
+ unqual_import :: IO FindResult
+ unqual_import
+ = any_home_import
+ `orIfNotFound`
+ findExposedPackageModule fc fopts unit_state mod_name NoPkgQual
+
+ unit_state :: UnitState
+ unit_state = case mb_home_unit_id of
+ Nothing -> ue_homeUnitState ue
+ Just home_unit_id -> HUG.homeUnitEnv_units $
+ ue_findHomeUnitEnv home_unit_id ue
+
+ home_unit_deps :: Set UnitId
+ home_unit_deps = homeUnitDepends unit_state
+
+ ranked_home_unit_deps :: [UnitId]
+ ranked_home_unit_deps = rankedHomeUnitDeps home_module_name_providers_map
+ mod_name
+ home_unit_deps
+
+ other_fopts :: [(UnitId, FinderOpts)]
+ other_fopts
+ = [
+ (uid, opts) |
+ uid <- ranked_home_unit_deps,
+ let opts = initFinderOpts $
+ homeUnitEnv_dflags (ue_findHomeUnitEnv uid ue)
+ ]
+
+rankedHomeUnitDeps :: HomeModuleNameProvidersMap
+ -> ModuleName
+ -> Set UnitId
+ -> [UnitId]
+rankedHomeUnitDeps _ _ home_unit_deps | Set.null home_unit_deps
+ = []
+rankedHomeUnitDeps home_module_name_providers_map mod_name home_unit_deps
+ = Set.toList cached_deps ++ Set.toList uncached_deps
+ where
+
+ cached_providers :: Set UnitId
+ cached_providers = lookupWithDefaultUniqMap home_module_name_providers_map
+ Set.empty
+ mod_name
+
+ cached_deps :: Set UnitId
+ cached_deps = Set.intersection home_unit_deps cached_providers
+
+ uncached_deps :: Set UnitId
+ uncached_deps = Set.difference home_unit_deps cached_providers
+{-
+ The special handling of the situation where the dependency set is empty does
+ not change the result, but it avoids triggering evaluation of the module
+ graph.
+-}
-- | Locate a plugin module requested by the user, for a compiler
-- plugin. This consults the same set of exposed packages as
@@ -261,15 +331,15 @@ findPluginModule :: HscEnv -> ModuleName -> IO FindResult
findPluginModule hsc_env mod_name = do
let fc = hsc_FC hsc_env
let units = hsc_units hsc_env
- let mhome_unit = hsc_home_unit_maybe hsc_env
- findPluginModuleNoHsc fc (initFinderOpts (hsc_dflags hsc_env)) units mhome_unit mod_name
+ let mb_home_unit = hsc_home_unit_maybe hsc_env
+ findPluginModuleNoHsc fc (initFinderOpts (hsc_dflags hsc_env)) units mb_home_unit mod_name
-- | A version of findExactModule which takes the exact parts of the HscEnv it needs
-- directly.
findExactModuleNoHsc :: FinderCache -> FinderOpts -> UnitEnvGraph FinderOpts -> UnitState -> Maybe HomeUnit -> InstalledModule -> IsBootInterface -> IO InstalledFindResult
-findExactModuleNoHsc fc fopts other_fopts unit_state mhome_unit mod is_boot = do
- res <- case mhome_unit of
+findExactModuleNoHsc fc fopts other_fopts unit_state mb_home_unit mod is_boot = do
+ res <- case mb_home_unit of
Just home_unit
| isHomeInstalledModule home_unit mod
-> findInstalledHomeModule fc fopts (homeUnitId home_unit) (moduleName mod)
=====================================
compiler/GHC/Unit/Module/Graph.hs
=====================================
@@ -67,6 +67,8 @@ module GHC.Unit.Module.Graph
, mgLookupModule
, mgLookupModuleName
, mgHasHoles
+ , HomeModuleNameProvidersMap
+ , mgHomeModuleNameProvidersMap
, showModMsg
-- ** Reachability queries
@@ -156,10 +158,12 @@ import GHC.Unit.Module.ModIface
import GHC.Utils.Misc ( partitionWith )
import System.FilePath
+import Data.Set (Set)
+import qualified Data.Set as Set
+import Data.Map (Map)
import qualified Data.Map as Map
import GHC.Types.Unique.DSet
-import qualified Data.Set as Set
-import Data.Set (Set)
+import GHC.Types.Unique.Map (UniqMap, emptyUniqMap, listToUniqMap_C)
import GHC.Unit.Module
import GHC.Unit.Module.ModNodeKey
import GHC.Unit.Module.Stage
@@ -202,14 +206,32 @@ data ModuleGraph = ModuleGraph
-- Cached computation, whether any of the ModuleGraphNode are isHoleModule,
-- This is only used for a hack in GHC.Iface.Load to do with backpack, please
-- remove this at the earliest opportunity.
+ , mg_home_module_name_providers_map :: HomeModuleNameProvidersMap
+ -- ^ For each module name, which home units provide it.
}
+type HomeModuleNameProvidersMap = UniqMap ModuleName (Set UnitId)
+
+mkHomeModuleNameProvidersMap :: [ModuleGraphNode] -> HomeModuleNameProvidersMap
+mkHomeModuleNameProvidersMap nodes
+ = listToUniqMap_C Set.union $
+ [
+ (moduleName, Set.singleton unitID) |
+ ModuleNode _ moduleNodeInfo <- nodes,
+ let moduleName = moduleNodeInfoModuleName moduleNodeInfo,
+ let unitID = moduleNodeInfoUnitId moduleNodeInfo
+ ]
+
+mgHomeModuleNameProvidersMap :: ModuleGraph -> HomeModuleNameProvidersMap
+mgHomeModuleNameProvidersMap = mg_home_module_name_providers_map
+
-- | Why do we ever need to construct empty graphs? Is it because of one shot mode?
emptyMG :: ModuleGraph
emptyMG = ModuleGraph [] (graphReachability emptyGraph, const Nothing)
(graphReachability emptyGraph, const Nothing)
(graphReachability emptyGraph, const Nothing)
False
+ emptyUniqMap
-- | Construct a module graph. This function should be the only entry point for
-- building a 'ModuleGraph', since it is supposed to be built once and never modified.
@@ -308,7 +330,7 @@ checkModuleGraph ModuleGraph{..} =
where
duplicate_errs = rights (Map.elems node_types)
- node_types :: Map.Map NodeKey (Either ModuleNodeType ModuleGraphInvariantError)
+ node_types :: Map NodeKey (Either ModuleNodeType ModuleGraphInvariantError)
node_types = Map.fromListWithKey go [ (mkNodeKey n, Left (moduleNodeType n)) | n <- mg_mss ]
where
-- Multiple nodes with the same key are not allowed.
@@ -319,7 +341,7 @@ checkModuleGraph ModuleGraph{..} =
-- | Check that all dependencies in the graph are present in the node_types map.
-- This is a helper function used by checkModuleGraph.
-checkAllDependenciesInGraph :: Map.Map NodeKey (Either ModuleNodeType ModuleGraphInvariantError)
+checkAllDependenciesInGraph :: Map NodeKey (Either ModuleNodeType ModuleGraphInvariantError)
-> ModuleGraphNode
-> Maybe ModuleGraphInvariantError
checkAllDependenciesInGraph node_types node =
@@ -334,7 +356,7 @@ checkAllDependenciesInGraph node_types node =
-- | Check if for the fixed module node invariant:
--
-- Fixed nodes can only depend on other fixed nodes.
-checkFixedModuleInvariant :: Map.Map NodeKey (Either ModuleNodeType ModuleGraphInvariantError)
+checkFixedModuleInvariant :: Map NodeKey (Either ModuleNodeType ModuleGraphInvariantError)
-> ModuleGraphNode
-> Maybe ModuleGraphInvariantError
checkFixedModuleInvariant node_types node = case node of
@@ -484,13 +506,17 @@ isEmptyMG = null . mg_mss
-- To preserve invariants, 'f' can't change the isBoot status.
mapMG :: (ModSummary -> ModSummary) -> ModuleGraph -> ModuleGraph
mapMG f mg@ModuleGraph{..} = mg
- { mg_mss = flip fmap mg_mss $ \case
- InstantiationNode uid iuid -> InstantiationNode uid iuid
- LinkNode uid nks -> LinkNode uid nks
- ModuleNode deps (ModuleNodeFixed key loc) -> ModuleNode deps (ModuleNodeFixed key loc)
- ModuleNode deps (ModuleNodeCompile ms) -> ModuleNode deps (ModuleNodeCompile (f ms))
- UnitNode deps uid -> UnitNode deps uid
+ { mg_mss = new_mss
+ , mg_home_module_name_providers_map = mkHomeModuleNameProvidersMap new_mss
}
+ where
+ new_mss =
+ flip fmap mg_mss $ \case
+ InstantiationNode uid iuid -> InstantiationNode uid iuid
+ LinkNode uid nks -> LinkNode uid nks
+ ModuleNode deps (ModuleNodeFixed key loc) -> ModuleNode deps (ModuleNodeFixed key loc)
+ ModuleNode deps (ModuleNodeCompile ms) -> ModuleNode deps (ModuleNodeCompile (f ms))
+ UnitNode deps uid -> UnitNode deps uid
-- | Map a function 'f' over all the 'ModSummaries', in 'IO'.
-- To preserve invariants, 'f' can't change the isBoot status.
@@ -856,7 +882,7 @@ moduleNodeInfoBootString mn@(ModuleNodeFixed {}) =
-- described in the export list haddocks.
--------------------------------------------------------------------------------
-newtype NodeMap a = NodeMap { unNodeMap :: Map.Map NodeKey a }
+newtype NodeMap a = NodeMap { unNodeMap :: Map NodeKey a }
deriving (Functor, Traversable, Foldable)
-- | Transitive dependencies, including SOURCE edges
@@ -932,7 +958,7 @@ moduleGraphNodesZero summaries =
lookup_key :: ZeroScopeKey -> Maybe Int
lookup_key = fmap zeroSummaryNodeKey . lookup_node
- node_map :: Map.Map ZeroScopeKey ZeroSummaryNode
+ node_map :: Map ZeroScopeKey ZeroSummaryNode
node_map =
Map.fromList [ (s, node)
| node <- nodes
@@ -1031,7 +1057,7 @@ moduleGraphNodesStages summaries =
lookup_key :: (NodeKey, ModuleStage) -> Maybe Int
lookup_key = fmap stageSummaryNodeKey . lookup_node
- node_map :: Map.Map (NodeKey, ModuleStage) StageSummaryNode
+ node_map :: Map (NodeKey, ModuleStage) StageSummaryNode
node_map =
Map.fromList [ (s, node)
| node <- nodes
@@ -1049,10 +1075,13 @@ moduleGraphNodesStages summaries =
extendMG :: ModuleGraph -> ModuleGraphNode -> ModuleGraph
extendMG ModuleGraph{..} node =
ModuleGraph
- { mg_mss = node : mg_mss
- , mg_graph = mkTransDeps (node : mg_mss)
- , mg_loop_graph = mkTransLoopDeps (node : mg_mss)
- , mg_zero_graph = mkTransZeroDeps (node : mg_mss)
+ { mg_mss = new_mss
+ , mg_graph = mkTransDeps new_mss
+ , mg_loop_graph = mkTransLoopDeps new_mss
+ , mg_zero_graph = mkTransZeroDeps new_mss
, mg_has_holes = mg_has_holes || maybe False isHsigFile (moduleNodeInfoHscSource =<< mgNodeIsModule node)
+ , mg_home_module_name_providers_map = mkHomeModuleNameProvidersMap new_mss
}
+ where
+ new_mss = node : mg_mss
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/5d1d513cc16976aea5efae2880cb6555...
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/5d1d513cc16976aea5efae2880cb6555...
You're receiving this email because of your account on gitlab.haskell.org.