
Alan Zimmerman pushed to branch wip/az/ghc-cpp at Glasgow Haskell Compiler / GHC Commits: 9f4b5aab by Alan Zimmerman at 2025-06-21T13:20:54+01:00 Some cleanup - - - - - 4 changed files: - compiler/GHC/Parser/Lexer.x - compiler/GHC/Parser/PreProcess.hs - compiler/GHC/Parser/PreProcess/State.hs - utils/check-cpp/PreProcess.hs Changes: ===================================== compiler/GHC/Parser/Lexer.x ===================================== @@ -59,7 +59,6 @@ module GHC.Parser.Lexer ( Token(..), lexer, lexerDbg, ParserOpts(..), mkParserOpts, PState (..), initParserState, initPragState, - PSavedAlrState(..), getAlrState, setAlrState, startSkipping, stopSkipping, P(..), ParseResult(POk, PFailed), allocateComments, allocatePriorComments, allocateFinalComments, @@ -273,7 +272,6 @@ $tab { warnTab } -- set. "{-" / { isNormalComment } { nested_comment } --- "/*" / { ifExtension GhcCppBit } { cpp_comment } -- Single-line comments are a bit tricky. Haskell 98 says that two or -- more dashes followed by a symbol should be parsed as a varsym, so we @@ -1351,6 +1349,7 @@ hopefully_open_brace :: Action p hopefully_open_brace span buf len buf2 = do relaxed <- getBit RelaxedLayoutBit ctx <- getContext + -- See Note [GHC_CPP saved offset] offset <- getOffset let isOK = relaxed || @@ -1592,15 +1591,6 @@ nested_doc_comment span buf _len _buf2 = {-# SCC "nested_doc_comment" #-} withLe dropTrailingDec "-}" = "" dropTrailingDec (x:xs) = x:dropTrailingDec xs -cpp_comment :: Action p -cpp_comment span buf len _buf2 = {-# SCC "cpp_comment" #-} do - l <- getLastLocIncludingComments - let endComment input (L _ comment) = commentEnd lexToken input (Nothing, ITblockComment comment l) buf span - input <- getInput - -- Include decorator in comment - let start_decorator = reverse $ lexemeToString buf len - cpp_comment_logic endComment start_decorator input span - {-# INLINE nested_comment_logic #-} -- | Includes the trailing '-}' decorators -- drop the last two elements with the callback if you don't want them to be included @@ -1635,31 +1625,6 @@ nested_comment_logic endComment commentAcc input span = go commentAcc (1::Int) i Just (_,_) -> go ('\n':commentAcc) n input Just (c,input) -> go (c:commentAcc) n input -{-# INLINE cpp_comment_logic #-} --- | Includes the trailing '*/' decorators --- drop the last two elements with the callback if you don't want them to be included -cpp_comment_logic - :: (AlexInput -> Located String -> P p (PsLocated Token)) -- ^ Continuation that gets the rest of the input and the lexed comment - -> String -- ^ starting value for accumulator (reversed) - When we want to include a decorator '/*' in the comment - -> AlexInput - -> PsSpan - -> P p (PsLocated Token) -cpp_comment_logic endComment commentAcc input span = go commentAcc (1::Int) input - where - go commentAcc 0 input@(AI end_loc _) = do - let comment = reverse commentAcc - cspan = mkSrcSpanPs $ mkPsSpan (psSpanStart span) end_loc - lcomment = L cspan comment - endComment input lcomment - go commentAcc n input = case alexGetChar' input of - Nothing -> errBrace input (psRealSpan span) - Just ('*',input) -> case alexGetChar' input of - Nothing -> errBrace input (psRealSpan span) - Just ('/',input) -> go ('/':'*':commentAcc) (n-1) input -- '/' - Just (_,_) -> go ('*':commentAcc) n input - Just (c,input) -> go (c:commentAcc) n input - - ghcCppSet :: P p Bool ghcCppSet = do exts <- getExts @@ -1775,6 +1740,7 @@ linePrag span buf len buf2 = do usePosPrags <- getBit UsePosPragsBit if usePosPrags then begin line_prag2 span buf len buf2 + -- TODO:AZ: should we make this test if GhcCpp is active, and maybe do the old -- else let !src = lexemeToFastString buf len -- in return (L span (ITline_prag (SourceText src))) else nested_comment span buf len buf2 @@ -2166,6 +2132,7 @@ do_bol span _str _len _buf2 = do -- See Note [Nested comment line pragmas] b <- getBit InNestedCommentBit if b then return (L span ITcomment_line_prag) else do + -- See Note [GHC_CPP saved offset] resetOffset (pos, gen_semic) <- getOffside case pos of @@ -2216,6 +2183,7 @@ maybe_layout t = do -- If the alternative layout rule is enabled then new_layout_context :: Bool -> Bool -> Token -> Action p new_layout_context strict gen_semic tok span _buf len _buf2 = do _ <- popLexState + -- See Note [GHC_CPP saved offset] current_col <- getOffset let offset = current_col - len ctx <- getContext @@ -2670,6 +2638,7 @@ data PState a = PState { pp :: !a, -- If a CPP directive occurs in the layout context, we need to -- store the prior column so any alr processing can continue. + -- See Note [GHC_CPP saved offset] pp_last_col :: !(Maybe Int) } -- last_loc and last_len are used when generating error messages, @@ -2684,32 +2653,6 @@ data PState a = PState { -- of the action, it is the *current* token. Do I understand -- correctly? -data PSavedAlrState = PSavedAlrState { - -- s_warnings :: Messages PsMessage, - -- s_errors :: Messages PsMessage, - s_lex_state :: [Int], - s_context :: [LayoutContext], - s_alr_pending_implicit_tokens :: [PsLocated Token], - s_alr_next_token :: Maybe (PsLocated Token), - s_alr_last_loc :: PsSpan, - s_alr_context :: [ALRContext], - s_alr_expecting_ocurly :: Maybe ALRLayout, - s_alr_justClosedExplicitLetBlock :: Bool, - s_last_col :: Int - } - - --- -- | Use for emulating (limited) CPP preprocessing in GHC. --- -- TODO: move this into PreProcess, and make a param on PState --- data PpState = PpState { --- pp_defines :: !(Map String [String]), --- pp_continuation :: ![Located Token], --- -- pp_context :: ![PpContext], --- pp_context :: ![Token], -- What preprocessor directive we are currently processing --- pp_accepting :: !Bool --- } --- deriving (Show) - data PpContext = PpContextIf [Located Token] deriving (Show) @@ -2825,7 +2768,7 @@ getLastBufCur = P $ \s@(PState { last_buf_cur = last_buf_cur }) -> POk s last_bu getLastLen :: P p Int getLastLen = P $ \s@(PState { last_len = last_len }) -> POk s last_len --- see Note [TBD] +-- See Note [GHC_CPP saved offset] getOffset :: P p Int getOffset = P $ \s@(PState { pp_last_col = last_col, loc = l}) -> @@ -2835,74 +2778,55 @@ getOffset = P $ \s@(PState { pp_last_col = last_col, -- (fromMaybe (srcLocCol (psRealLoc l)) last_col) in POk s { pp_last_col = Nothing} offset +-- See Note [GHC_CPP saved offset] resetOffset :: P p () resetOffset = P $ \s -> POk s { pp_last_col = Nothing} () +{- Note [GHC_CPP saved offset] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The layout processing machinery examines the offset of the previous +line when doing its calculations. + +When GHC_CPP is used, a set of CPP directives may ignore some number +of preceding lines, each of which has a different offset. + +We deal with this as follows + +- When we start skipping lines due to CPP we store the offset of the + line before the CPP directive +- We explicitly ask for the offset using `getOffset` when doing layout + calculations +- If there is a stored offset, we use that instead of the prior line + offset + +-} + startSkipping :: P p () startSkipping = do pushLexState skipping -- pushLexState (trace ("startSkipping:" ++ show skipping) skipping) -stopSkipping :: P p Int +stopSkipping :: P p () stopSkipping = do - -- popLexState - ret <- popLexState + _ <- popLexState -- We just processed a CPP directive, which included a trailing newline. -- To properly sync up, we now need to ensure that `do_bol` processing occurs. - -- But this call does not emit a token. - -- Maybe it should be an argument to lexToken instead? - -- Alternatively, push the input location to the previous char. - AI ps buf <- getInput - last_buf_cur <- getLastBufCur - last_loc <- getLastLoc + -- But this call does not emit a token, so we instead + -- change the input location to the previous char, the newline + AI _ps buf <- getInput last_tk <- getLastTk case last_tk of Strict.Just (L l _) -> do let ps' = PsLoc (realSrcSpanEnd (psRealSpan l)) (bufSpanEnd (psBufSpan l)) let cur' = (cur buf) - 1 - -- let cur' = trace ("stopSkipping:(cur',ps'):" ++ show (cur'',ps')) cur'' setInput (AI ps' (buf { cur = cur'})) _ -> return () - -- return $ trace ("stopSkipping: (ps, cur buf, last_loc, last_buf_cur, last_tk):" ++ show (ps, cur buf, last_loc, last_buf_cur, last_tk)) ret - return ret -- old <- popLexState -- return (trace ("stopSkipping:" ++ show old) old) -getAlrState :: P p PSavedAlrState -getAlrState = P $ \s@(PState {loc=l}) -> POk s - PSavedAlrState { - -- s_warnings = warnings s, - -- s_errors = errors s, - -- s_lex_state = lex_state s, - s_lex_state = lex_state s, - s_context = context s, - s_alr_pending_implicit_tokens = alr_pending_implicit_tokens s, - s_alr_next_token = alr_next_token s, - s_alr_last_loc = alr_last_loc s, - s_alr_context = alr_context s, - s_alr_expecting_ocurly = alr_expecting_ocurly s, - s_alr_justClosedExplicitLetBlock = alr_justClosedExplicitLetBlock s, - s_last_col = srcLocCol (psRealLoc l) - } - -setAlrState :: PSavedAlrState -> P p () -setAlrState ss = P $ \s -> POk s { - -- errors = s_errors ss, - -- warnings = s_warnings ss, - lex_state = s_lex_state ss, - context = s_context ss, - alr_pending_implicit_tokens = s_alr_pending_implicit_tokens ss, - alr_next_token = s_alr_next_token ss, - alr_last_loc = s_alr_last_loc ss, - alr_context = s_alr_context ss, - alr_expecting_ocurly = s_alr_expecting_ocurly ss, - alr_justClosedExplicitLetBlock = s_alr_justClosedExplicitLetBlock ss, - pp_last_col = Just (s_last_col ss) - } () - - {-# INLINE alexGetChar' #-} -- This version does not squash unicode characters, it is used when @@ -3199,6 +3123,7 @@ disableHaddock opts = upd_bitmap (xunset HaddockBit) where upd_bitmap f = opts { pExtsBitmap = f (pExtsBitmap opts) } +-- TODO:AZ check which of these are actually needed, enableGhcCpp :: ParserOpts -> ParserOpts enableGhcCpp = enableExtBit GhcCppBit @@ -3881,8 +3806,6 @@ warn_unknown_prag prags span buf len buf2 = do %************************************************************************ -} --- TODO:AZ: we should have only mkParensEpToks. Delete mkParensEpAnn, mkParensLocs - -- |Given a 'RealSrcSpan' that surrounds a 'HsPar' or 'HsParTy', generate -- 'EpToken' values for the opening and closing bordering on the start -- and end of the span ===================================== compiler/GHC/Parser/PreProcess.hs ===================================== @@ -14,8 +14,8 @@ module GHC.Parser.PreProcess ( ) where import Data.List (intercalate, sortBy) -import Data.Maybe (fromMaybe, listToMaybe) import Data.Map qualified as Map +import Data.Maybe (fromMaybe, listToMaybe) import Debug.Trace (trace) import GHC.Data.FastString import GHC.Data.Strict qualified as Strict @@ -23,6 +23,7 @@ import GHC.Data.StringBuffer import GHC.Driver.DynFlags (DynFlags, xopt) import GHC.LanguageExtensions qualified as LangExt import GHC.Parser.Errors.Ppr () +import GHC.Parser.Errors.Types (PsMessage (PsErrGhcCpp)) import GHC.Parser.Lexer (P (..), PState (..), ParseResult (..), Token (..)) import GHC.Parser.Lexer qualified as Lexer import GHC.Parser.PreProcess.Macro @@ -34,7 +35,6 @@ import GHC.Types.SrcLoc import GHC.Utils.Error import GHC.Utils.Outputable (text) import GHC.Utils.Panic.Plain (panic) -import GHC.Parser.Errors.Types (PsMessage(PsErrGhcCpp)) -- --------------------------------------------------------------------- @@ -42,9 +42,10 @@ dumpGhcCpp :: DynFlags -> PState PpState -> SDoc dumpGhcCpp dflags pst = output where ghc_cpp_enabled = xopt LangExt.GhcCpp dflags - output = if ghc_cpp_enabled - then text $ sepa ++ defines ++ sepa ++ final ++ sepa - else text "GHC_CPP not enabled" + output = + if ghc_cpp_enabled + then text $ sepa ++ defines ++ sepa ++ final ++ sepa + else text "GHC_CPP not enabled" -- Note: pst is the state /before/ the parser runs, so we can use it to lex. (pst_final, bare_toks) = lexAll pst comments = reverse (Lexer.comment_q pst_final) @@ -76,6 +77,7 @@ renderCombinedToks toks = showCppTokenStream toks -- --------------------------------------------------------------------- -- addSourceToTokens copied here to unbreak an import loop. -- It should probably move somewhere else +-- TODO: We should be able to do away with this once #26095 is done {- | Given a source location and a StringBuffer corresponding to this location, return a rich token stream with the source associated to the @@ -105,7 +107,8 @@ addSourceToTokens loc0 buf0 (t@(L sp _) : ts) = -- --------------------------------------------------------------------- --- Tweaked from showRichTokenStream +-- Tweaked from showRichTokenStream, to add markers per line if it is +-- currently active or not showCppTokenStream :: [(Located Token, String)] -> String showCppTokenStream ts0 = go startLoc ts0 "" where @@ -196,7 +199,7 @@ ppLexer queueComments cont = ppLexer queueComments cont in case tk of - -- case (trace ("M.ppLexer:tk=" ++ show (unLoc tk)) tk) of + -- case (trace ("M.ppLexer:tk=" ++ show (unLoc tk)) tk) of L _ ITeof -> do mInp <- popIncludeLoc case mInp of @@ -219,13 +222,11 @@ ppLexer queueComments cont = case mdump of Just dump -> -- We have a dump of the state, put it into an ignored token + -- AZ: TODO: is this actually useful? contIgnoreTok (L l (ITcpp continuation (appendFS s (fsLit dump)) sp)) Nothing -> contIgnoreTok tk else contInner tk L _ (ITcppIgnored _ _) -> contIgnoreTok tk - L _ (ITline_prag _) -> do - setInLinePragma True - contIgnoreTok tk _ -> do state <- getCppState inLinePragma <- getInLinePragma @@ -253,7 +254,7 @@ processCppToks fs = do -- Combine any prior continuation tokens cs <- popContinuation let loc = combineLocs fs (fromMaybe fs (listToMaybe cs)) - processCpp loc (concat $ reverse $ map get (fs:cs)) + processCpp loc (concat $ reverse $ map get (fs : cs)) processCpp :: SrcSpan -> String -> PP (Maybe String) processCpp loc s = do @@ -262,13 +263,11 @@ processCpp loc s = do then return (Just "\ndumped state\n") else do case directive of - Left err -> Lexer.addError $ mkPlainErrorMsgEnvelope loc $ PsErrGhcCpp (text err) + Left err -> Lexer.addError $ mkPlainErrorMsgEnvelope loc $ PsErrGhcCpp (text err) Right (CppInclude filename) -> do ppInclude filename - Right (CppDefine name args def) -> do - ppDefine (MacroName name args) def - Right (CppUndef name) -> do - ppUndef name + Right (CppDefine name args def) -> ppDefine (MacroName name args) def + Right (CppUndef name) -> ppUndef name Right (CppIf cond) -> do val <- cppCond loc cond ar <- pushAccepting val ===================================== compiler/GHC/Parser/PreProcess/State.hs ===================================== @@ -71,7 +71,6 @@ initPpState = , pp_continuation = [] , pp_defines = Map.empty , pp_scope = (PpScope True PpNoGroup) :| [] - , pp_alr_state = Nothing , pp_in_line_pragma = False } @@ -81,7 +80,6 @@ data PpState = PpState , pp_continuation :: ![Located Lexer.Token] , pp_defines :: !MacroDefines , pp_scope :: !(NonEmpty PpScope) - , pp_alr_state :: Maybe Lexer.PSavedAlrState , pp_in_line_pragma :: !Bool } ===================================== utils/check-cpp/PreProcess.hs ===================================== @@ -329,15 +329,8 @@ processCpp loc s = do acceptStateChange :: AcceptingResult -> PP () acceptStateChange ArNoChange = return () acceptStateChange ArNowIgnoring = do - -- alr <- Lexer.getAlrState - -- s <- getPpState - -- let s = trace ("acceptStateChange:ArNowIgnoring") s' - -- setPpState (s { pp_alr_state = Just alr}) Lexer.startSkipping acceptStateChange ArNowAccepting = do - -- s <- getPpState - -- let s = trace ("acceptStateChange:ArNowAccepting") s' - -- mapM_ Lexer.setAlrState (pp_alr_state s) _ <- Lexer.stopSkipping return () View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/9f4b5aabeeeabc3134e5464f8ecb707e... -- View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/9f4b5aabeeeabc3134e5464f8ecb707e... You're receiving this email because of your account on gitlab.haskell.org.
participants (1)
-
Alan Zimmerman (@alanz)