Torsten Schmits pushed to branch wip/torsten.schmits/unit-index-debug at Glasgow Haskell Compiler / GHC Commits: 36c660e0 by Matthew Pickering at 2025-12-03T20:19:51+01:00 Use ModuleGraph for cache - - - - - d93b96b1 by Matthew Pickering at 2025-12-03T20:19:51+01:00 OsPath for Map - - - - - 45884d99 by Matthew Pickering at 2025-12-03T20:19:51+01:00 Set hpt deps - - - - - 17191a26 by Matthew Pickering at 2025-12-03T20:19:51+01:00 HomeUnitMap - - - - - 0278068c by Matthew Pickering at 2025-12-03T20:20:43+01:00 Use a name provider map for home packages - - - - - 10 changed files: - compiler/GHC/Driver/Make.hs - compiler/GHC/Linker/Loader.hs - compiler/GHC/Rename/Names.hs - compiler/GHC/Unit/Env.hs - compiler/GHC/Unit/Finder.hs - compiler/GHC/Unit/Module/Graph.hs - compiler/GHC/Unit/State.hs - ghc/Main.hs - testsuite/tests/ghc-api/downsweep/OldModLocation.hs - testsuite/tests/ghc-api/downsweep/PartialDownsweep.hs Changes: ===================================== compiler/GHC/Driver/Make.hs ===================================== @@ -114,6 +114,8 @@ import Data.Either ( rights, partitionEithers, lefts ) import qualified Data.Map as Map import qualified Data.Set as Set +import GHC.Data.OsPath (OsPath) +import qualified GHC.Data.OsPath as OsPath import Control.Concurrent ( newQSem, waitQSem, signalQSem, ThreadId, killThread, forkIOWithUnmask ) import qualified GHC.Conc as CC import Control.Concurrent.MVar @@ -247,7 +249,7 @@ depanalPartial excluded_mods allow_dup_roots = do liftIO $ flushFinderCaches (hsc_FC hsc_env) (hsc_unit_env hsc_env) (errs, graph_nodes) <- liftIO $ downsweep - hsc_env (mgModSummaries old_graph) + hsc_env (mgModSummaries old_graph) (Just old_graph) excluded_mods allow_dup_roots let mod_graph = mkModuleGraph graph_nodes @@ -1541,6 +1543,10 @@ warnUnnecessarySourceImports sccs = do -- an import of this module mean. type DownsweepCache = M.Map (UnitId, PkgQual, ModuleNameWithIsBoot) [Either DriverMessages ModSummary] +moduleGraphNodeMap :: ModuleGraph -> M.Map NodeKey ModuleGraphNode +moduleGraphNodeMap graph = + M.fromList [(mkNodeKey node, node) | node <- mgModSummaries' graph] + ----------------------------------------------------------------------------- -- -- | Downsweep (dependency analysis) @@ -1559,6 +1565,8 @@ type DownsweepCache = M.Map (UnitId, PkgQual, ModuleNameWithIsBoot) [Either Driv downsweep :: HscEnv -> [ModSummary] -- ^ Old summaries + -> Maybe ModuleGraph + -- ^ Existing module graph to reuse cached nodes from -> [ModuleName] -- Ignore dependencies on these; treat -- them as if they were package modules -> Bool -- True <=> allow multiple targets to have @@ -1568,10 +1576,10 @@ downsweep :: HscEnv -- The non-error elements of the returned list all have distinct -- (Modules, IsBoot) identifiers, unless the Bool is true in -- which case there can be repeats -downsweep hsc_env old_summaries excl_mods allow_dup_roots = do +downsweep hsc_env old_summaries old_graph excl_mods allow_dup_roots = do n_jobs <- mkWorkerLimit (hsc_dflags hsc_env) new <- rootSummariesParallel n_jobs hsc_env summary - downsweep_imports hsc_env old_summary_map excl_mods allow_dup_roots new + downsweep_imports hsc_env old_summary_map old_graph excl_mods allow_dup_roots new where summary = getRootSummary excl_mods old_summary_map @@ -1580,21 +1588,23 @@ downsweep hsc_env old_summaries excl_mods allow_dup_roots = do -- file was used in. -- Reuse these if we can because the most expensive part of downsweep is -- reading the headers. - old_summary_map :: M.Map (UnitId, FilePath) ModSummary + old_summary_map :: M.Map (UnitId, OsPath) ModSummary old_summary_map = - M.fromList [((ms_unitid ms, msHsFilePath ms), ms) | ms <- old_summaries] + M.fromList [((ms_unitid ms, OsPath.unsafeEncodeUtf (msHsFilePath ms)), ms) | ms <- old_summaries] downsweep_imports :: HscEnv - -> M.Map (UnitId, FilePath) ModSummary + -> M.Map (UnitId, OsPath) ModSummary + -> Maybe ModuleGraph -> [ModuleName] -> Bool -> ([(UnitId, DriverMessages)], [ModSummary]) -> IO ([DriverMessages], [ModuleGraphNode]) -downsweep_imports hsc_env old_summaries excl_mods allow_dup_roots (root_errs, rootSummariesOk) +downsweep_imports hsc_env old_summaries old_graph excl_mods allow_dup_roots (root_errs, rootSummariesOk) = do let root_map = mkRootMap rootSummariesOk checkDuplicates root_map - (deps, map0) <- loopSummaries rootSummariesOk (M.empty, root_map) + let done0 = maybe M.empty moduleGraphNodeMap old_graph + (deps, map0) <- loopSummaries rootSummariesOk (done0, root_map) let closure_errs = checkHomeUnitsClosed (hsc_unit_env hsc_env) let unit_env = hsc_unit_env hsc_env let tmpfs = hsc_tmpfs hsc_env @@ -1725,7 +1735,7 @@ downsweep_imports hsc_env old_summaries excl_mods allow_dup_roots (root_errs, ro getRootSummary :: [ModuleName] -> - M.Map (UnitId, FilePath) ModSummary -> + M.Map (UnitId, OsPath) ModSummary -> HscEnv -> Target -> IO (Either (UnitId, DriverMessages) ModSummary) @@ -2071,7 +2081,7 @@ mkRootMap summaries = Map.fromListWith (flip (++)) summariseFile :: HscEnv -> HomeUnit - -> M.Map (UnitId, FilePath) ModSummary -- old summaries + -> M.Map (UnitId, OsPath) ModSummary -- old summaries -> FilePath -- source file name -> Maybe Phase -- start phase -> Maybe (StringBuffer,UTCTime) @@ -2080,7 +2090,7 @@ summariseFile summariseFile hsc_env' home_unit old_summaries src_fn mb_phase maybe_buf -- we can use a cached summary if one is available and the -- source file hasn't changed, - | Just old_summary <- M.lookup (homeUnitId home_unit, src_fn) old_summaries + | Just old_summary <- M.lookup (homeUnitId home_unit, src_fn_os) old_summaries = do let location = ms_location $ old_summary @@ -2101,6 +2111,7 @@ summariseFile hsc_env' home_unit old_summaries src_fn mb_phase maybe_buf where -- change the main active unit so all operations happen relative to the given unit hsc_env = hscSetActiveHomeUnit home_unit hsc_env' + src_fn_os = OsPath.unsafeEncodeUtf src_fn -- src_fn does not necessarily exist on the filesystem, so we need to -- check what kind of target we are dealing with get_src_hash = case maybe_buf of @@ -2190,7 +2201,7 @@ data SummariseResult = summariseModule :: HscEnv -> HomeUnit - -> M.Map (UnitId, FilePath) ModSummary + -> M.Map (UnitId, OsPath) ModSummary -- ^ Map of old summaries -> IsBootInterface -- True <=> a {-# SOURCE #-} import -> Located ModuleName -- Imported module to be summarised @@ -2251,7 +2262,7 @@ summariseModule hsc_env' home_unit old_summary_map is_boot (L _ wanted_mod) mb_p Right ms -> FoundHome ms new_summary_cache_check loc mod src_fn h - | Just old_summary <- Map.lookup ((toUnitId (moduleUnit mod), src_fn)) old_summary_map = + | Just old_summary <- Map.lookup ((toUnitId (moduleUnit mod), src_fn_os)) old_summary_map = -- check the hash on the source file, and -- return the cached summary if it hasn't changed. If the @@ -2262,6 +2273,8 @@ summariseModule hsc_env' home_unit old_summary_map is_boot (L _ wanted_mod) mb_p Nothing -> checkSummaryHash hsc_env (new_summary loc mod src_fn) old_summary loc h | otherwise = new_summary loc mod src_fn h + where + src_fn_os = OsPath.unsafeEncodeUtf src_fn new_summary :: ModLocation -> Module ===================================== compiler/GHC/Linker/Loader.hs ===================================== @@ -358,7 +358,7 @@ loadCmdLineLibs' interp hsc_env pls = snd <$> let hsc' = hscSetActiveUnitId uid hsc_env -- Load potential dependencies first (done', pls') <- foldM (\(done', pls') uid -> load done' uid pls') (done, pls) - (homeUnitDepends (hsc_units hsc')) + (Set.toList (homeUnitDepends (hsc_units hsc'))) pls'' <- loadCmdLineLibs'' interp hsc' pls' return $ (Set.insert uid done', pls'') ===================================== compiler/GHC/Rename/Names.hs ===================================== @@ -475,11 +475,14 @@ renamePkgQual unit_env query mn mb_pkg = case mb_pkg of -- not really correct as pkg_fs is unlikely to be a valid unit-id but -- we will report the failure later... where - home_names = map (\uid -> (uid, mkFastString <$> thisPackageName (homeUnitEnv_dflags (ue_findHomeUnitEnv uid unit_env)))) hpt_deps + home_names = + [ (uid, mkFastString <$> thisPackageName (homeUnitEnv_dflags (ue_findHomeUnitEnv uid unit_env))) + | uid <- S.toList hpt_deps + ] units = ue_units unit_env - hpt_deps :: [UnitId] + hpt_deps :: S.Set UnitId hpt_deps = homeUnitDepends units hscRenameRawPkgQual :: ===================================== compiler/GHC/Unit/Env.hs ===================================== @@ -142,7 +142,7 @@ ue_transitiveHomeDeps uid unit_env = Set.toList (loop Set.empty [uid]) loop acc (uid:uids) | uid `Set.member` acc = loop acc uids | otherwise = - let hue = homeUnitDepends (homeUnitEnv_units (ue_findHomeUnitEnv uid unit_env)) + let hue = Set.toList (homeUnitDepends (homeUnitEnv_units (ue_findHomeUnitEnv uid unit_env))) in loop (Set.insert uid acc) (hue ++ uids) ===================================== compiler/GHC/Unit/Finder.hs ===================================== @@ -68,8 +68,9 @@ import Control.Monad import Data.Time import qualified Data.Map as M import GHC.Driver.Env - ( hsc_home_unit_maybe, HscEnv(hsc_FC, hsc_dflags, hsc_unit_env), hscUnitIndexQuery ) + ( hsc_home_unit_maybe, HscEnv(hsc_FC, hsc_dflags, hsc_unit_env, hsc_mod_graph), hscUnitIndexQuery ) import GHC.Driver.Config.Finder +import GHC.Unit.Module.Graph (mgHomeModuleMap, ModuleNameHomeMap) import qualified Data.Set as Set import qualified Data.List.NonEmpty as NE @@ -163,30 +164,36 @@ findImportedModule hsc_env mod pkg_qual = dflags = hsc_dflags hsc_env fopts = initFinderOpts dflags in do + let home_module_map = mgHomeModuleMap (hsc_mod_graph hsc_env) query <- hscUnitIndexQuery hsc_env - findImportedModuleNoHsc fc fopts (hsc_unit_env hsc_env) query mhome_unit mod pkg_qual + findImportedModuleNoHsc fc fopts (hsc_unit_env hsc_env) query home_module_map mhome_unit mod pkg_qual findImportedModuleNoHsc :: FinderCache -> FinderOpts -> UnitEnv -> UnitIndexQuery + -> ModuleNameHomeMap -> Maybe HomeUnit -> ModuleName -> PkgQual -> IO FindResult -findImportedModuleNoHsc fc fopts ue query mhome_unit mod_name mb_pkg = +findImportedModuleNoHsc fc fopts ue query home_module_map mhome_unit mod_name mb_pkg = case mb_pkg of NoPkgQual -> unqual_import ThisPkg uid | (homeUnitId <$> mhome_unit) == Just uid -> home_import - | Just os <- lookup uid other_fopts -> home_pkg_import (uid, os) + | Just os <- M.lookup uid other_fopts_map -> home_pkg_import (uid, os) | otherwise -> pprPanic "findImportModule" (ppr mod_name $$ ppr mb_pkg $$ ppr (homeUnitId <$> mhome_unit) $$ ppr uid $$ ppr (map fst all_opts)) OtherPkg _ -> pkg_import where + (complete_units, module_name_map) = home_module_map + module_home_units = M.findWithDefault Set.empty mod_name module_name_map + current_unit_id = homeUnitId <$> mhome_unit all_opts = case mhome_unit of - Nothing -> other_fopts - Just home_unit -> (homeUnitId home_unit, fopts) : other_fopts + Nothing -> other_fopts_list + Just home_unit -> (homeUnitId home_unit, fopts) : other_fopts_list + other_fopts_map = M.fromList other_fopts_list home_import = case mhome_unit of Just home_unit -> findHomeModule fc fopts home_unit mod_name @@ -197,7 +204,7 @@ findImportedModuleNoHsc fc fopts ue query mhome_unit mod_name mb_pkg = -- If the module is reexported, then look for it as if it was from the perspective -- of that package which reexports it. | mod_name `Set.member` finder_reexportedModules opts = - findImportedModuleNoHsc fc opts ue query (Just $ DefiniteHomeUnit uid Nothing) mod_name NoPkgQual + findImportedModuleNoHsc fc opts ue query home_module_map (Just $ DefiniteHomeUnit uid Nothing) mod_name NoPkgQual | mod_name `Set.member` finder_hiddenModules opts = return (mkHomeHidden uid) | otherwise = @@ -206,7 +213,7 @@ findImportedModuleNoHsc fc fopts ue query mhome_unit mod_name mb_pkg = -- Do not be smart and change this to `foldr orIfNotFound home_import hs` as -- that is not the same!! home_import is first because we need to look within ourselves -- first before looking at the packages in order. - any_home_import = foldr1 orIfNotFound (home_import: map home_pkg_import other_fopts) + any_home_import = foldr1 orIfNotFound (home_import: map home_pkg_import other_fopts_list) pkg_import = findExposedPackageModule fc fopts units query mod_name mb_pkg @@ -217,9 +224,21 @@ findImportedModuleNoHsc fc fopts ue query mhome_unit mod_name mb_pkg = units = case mhome_unit of Nothing -> ue_units ue Just home_unit -> homeUnitEnv_units $ ue_findHomeUnitEnv (homeUnitId home_unit) ue - hpt_deps :: [UnitId] + hpt_deps :: Set.Set UnitId hpt_deps = homeUnitDepends units - other_fopts = map (\uid -> (uid, initFinderOpts (homeUnitEnv_dflags (ue_findHomeUnitEnv uid ue)))) hpt_deps + dep_providers = Set.intersection module_home_units hpt_deps + known_other_uids = + let providers = maybe dep_providers (\u -> Set.delete u dep_providers) current_unit_id + in Set.toList providers + unknown_units = + let candidates = Set.difference hpt_deps complete_units + excluded = maybe dep_providers (\u -> Set.insert u dep_providers) current_unit_id + in Set.toList (Set.difference candidates excluded) + other_home_uids = known_other_uids ++ unknown_units + other_fopts_list = + [ (uid, initFinderOpts (homeUnitEnv_dflags (ue_findHomeUnitEnv uid ue))) + | uid <- other_home_uids + ] -- | Locate a plugin module requested by the user, for a compiler -- plugin. This consults the same set of exposed packages as ===================================== compiler/GHC/Unit/Module/Graph.hs ===================================== @@ -18,6 +18,8 @@ module GHC.Unit.Module.Graph , mgModSummaries , mgModSummaries' , mgLookupModule + , ModuleNameHomeMap + , mgHomeModuleMap , showModMsg , moduleGraphNodeModule , moduleGraphNodeModSum @@ -153,23 +155,31 @@ instance Outputable ModNodeKeyWithUid where -- check that the module and its hs-boot agree. -- -- The graph is not necessarily stored in topologically-sorted order. Use +type ModuleNameHomeMap = (Set UnitId, Map.Map ModuleName (Set UnitId)) + -- 'GHC.topSortModuleGraph' and 'GHC.Data.Graph.Directed.flattenSCC' to achieve this. data ModuleGraph = ModuleGraph { mg_mss :: [ModuleGraphNode] , mg_graph :: (ReachabilityIndex SummaryNode, NodeKey -> Maybe SummaryNode) -- A cached transitive dependency calculation so that a lot of work is not -- repeated whenever the transitive dependencies need to be calculated (for example, hptInstances) + , mg_home_map :: ModuleNameHomeMap + -- ^ For each module name, which home-unit UnitIds define it together with the set of units for which the listing is complete. } -- | Map a function 'f' over all the 'ModSummaries'. -- To preserve invariants 'f' can't change the isBoot status. mapMG :: (ModSummary -> ModSummary) -> ModuleGraph -> ModuleGraph mapMG f mg@ModuleGraph{..} = mg - { mg_mss = flip fmap mg_mss $ \case - InstantiationNode uid iuid -> InstantiationNode uid iuid - LinkNode uid nks -> LinkNode uid nks - ModuleNode deps ms -> ModuleNode deps (f ms) + { mg_mss = new_mss + , mg_home_map = mkHomeModuleMap new_mss } + where + new_mss = + flip fmap mg_mss $ \case + InstantiationNode uid iuid -> InstantiationNode uid iuid + LinkNode uid nks -> LinkNode uid nks + ModuleNode deps ms -> ModuleNode deps (f ms) unionMG :: ModuleGraph -> ModuleGraph -> ModuleGraph unionMG a b = @@ -177,11 +187,27 @@ unionMG a b = in ModuleGraph { mg_mss = new_mss , mg_graph = mkTransDeps new_mss + , mg_home_map = mkHomeModuleMap new_mss } mkTransDeps :: [ModuleGraphNode] -> (ReachabilityIndex SummaryNode, NodeKey -> Maybe SummaryNode) mkTransDeps = first graphReachability {- module graph is acyclic -} . moduleGraphNodes False +mkHomeModuleMap :: [ModuleGraphNode] -> ModuleNameHomeMap +mkHomeModuleMap nodes = + (complete_units, provider_map) + where + provider_map = + Map.fromListWith Set.union + [ (ms_mod_name ms, Set.singleton (ms_unitid ms)) + | ModuleNode _ ms <- nodes + ] + complete_units = + Set.fromList + [ ms_unitid ms + | ModuleNode _ ms <- nodes + ] + mgModSummaries :: ModuleGraph -> [ModSummary] mgModSummaries mg = [ m | ModuleNode _ m <- mgModSummaries' mg ] @@ -200,8 +226,11 @@ mgLookupModule ModuleGraph{..} m = listToMaybe $ mapMaybe go mg_mss = Just ms go _ = Nothing +mgHomeModuleMap :: ModuleGraph -> ModuleNameHomeMap +mgHomeModuleMap = mg_home_map + emptyMG :: ModuleGraph -emptyMG = ModuleGraph [] (graphReachability emptyGraph, const Nothing) +emptyMG = ModuleGraph [] (graphReachability emptyGraph, const Nothing) (Set.empty, Map.empty) isTemplateHaskellOrQQNonBoot :: ModSummary -> Bool isTemplateHaskellOrQQNonBoot ms = @@ -213,9 +242,12 @@ isTemplateHaskellOrQQNonBoot ms = -- not an element of the ModuleGraph. extendMG :: ModuleGraph -> [NodeKey] -> ModSummary -> ModuleGraph extendMG ModuleGraph{..} deps ms = ModuleGraph - { mg_mss = ModuleNode deps ms : mg_mss - , mg_graph = mkTransDeps (ModuleNode deps ms : mg_mss) + { mg_mss = new_mss + , mg_graph = mkTransDeps new_mss + , mg_home_map = mkHomeModuleMap new_mss } + where + new_mss = ModuleNode deps ms : mg_mss extendMGInst :: ModuleGraph -> UnitId -> InstantiatedUnit -> ModuleGraph extendMGInst mg uid depUnitId = mg ===================================== compiler/GHC/Unit/State.hs ===================================== @@ -469,7 +469,7 @@ data UnitState = UnitState { -- -Wunused-packages warning. explicitUnits :: [(Unit, Maybe PackageArg)], - homeUnitDepends :: [UnitId], + homeUnitDepends :: Set UnitId, -- | This is a full map from 'ModuleName' to all modules which may possibly -- be providing it. These providers may be hidden (but we'll still want @@ -504,7 +504,7 @@ emptyUnitState = UnitState { unwireMap = emptyUniqMap, preloadUnits = [], explicitUnits = [], - homeUnitDepends = [], + homeUnitDepends = Set.empty, moduleNameProvidersMap = emptyUniqMap, pluginModuleNameProvidersMap = emptyUniqMap, requirementContext = emptyUniqMap, @@ -1573,7 +1573,7 @@ mkUnitState logger cfg index = do let !state = UnitState { preloadUnits = dep_preload , explicitUnits = explicit_pkgs - , homeUnitDepends = Set.toList home_unit_deps + , homeUnitDepends = home_unit_deps , unitInfoMap = pkg_db , preloadClosure = emptyUniqSet , moduleNameProvidersMap ===================================== ghc/Main.hs ===================================== @@ -893,7 +893,7 @@ checkUnitCycles :: DynFlags -> UnitEnvGraph HomeUnitEnv -> Ghc () checkUnitCycles dflags graph = processSCCs sccs where mkNode :: (UnitId, HomeUnitEnv) -> Node UnitId UnitId - mkNode (uid, hue) = DigraphNode uid uid (homeUnitDepends (homeUnitEnv_units hue)) + mkNode (uid, hue) = DigraphNode uid uid (Set.toList (homeUnitDepends (homeUnitEnv_units hue))) nodes = map mkNode (unitEnv_elts graph) sccs = stronglyConnCompFromEdgedVerticesOrd nodes ===================================== testsuite/tests/ghc-api/downsweep/OldModLocation.hs ===================================== @@ -47,13 +47,13 @@ main = do liftIO $ do - _emss <- downsweep hsc_env [] [] False + _emss <- downsweep hsc_env [] Nothing [] False flushFinderCaches (hsc_FC hsc_env) (hsc_unit_env hsc_env) createDirectoryIfMissing False "mydir" renameFile "B.hs" "mydir/B.hs" - (_, nodes) <- downsweep hsc_env [] [] False + (_, nodes) <- downsweep hsc_env [] Nothing [] False -- If 'checkSummaryTimestamp' were to call 'addHomeModuleToFinder' with -- (ms_location old_summary) like summariseFile used to instead of ===================================== testsuite/tests/ghc-api/downsweep/PartialDownsweep.hs ===================================== @@ -168,7 +168,7 @@ go label mods cnd = setTargets [tgt] hsc_env <- getSession - (_, nodes) <- liftIO $ downsweep hsc_env [] [] False + (_, nodes) <- liftIO $ downsweep hsc_env [] Nothing [] False it label $ cnd (mapMaybe moduleGraphNodeModSum nodes) View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/e02bbc621ee3fd14a7c57729fb775e3... -- View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/e02bbc621ee3fd14a7c57729fb775e3... You're receiving this email because of your account on gitlab.haskell.org.