[Git][ghc/ghc][wip/az/ghc-cpp] 2 commits: Do not provide TIdentifierLParen paren twice

Alan Zimmerman pushed to branch wip/az/ghc-cpp at Glasgow Haskell Compiler / GHC Commits: 7f35ad18 by Alan Zimmerman at 2025-04-21T16:24:44+01:00 Do not provide TIdentifierLParen paren twice - - - - - b5ae074e by Alan Zimmerman at 2025-04-21T17:07:28+01:00 Handle whitespace between identifier and '(' for directive only - - - - - 11 changed files: - compiler/GHC/Parser/PreProcess.hs - compiler/GHC/Parser/PreProcess/Lexer.x - compiler/GHC/Parser/PreProcess/Macro.hs - compiler/GHC/Parser/PreProcess/ParsePP.hs - compiler/GHC/Parser/PreProcess/ParserM.hs - testsuite/tests/ghc-cpp/all.T - utils/check-cpp/Lexer.x - utils/check-cpp/Macro.hs - utils/check-cpp/ParsePP.hs - utils/check-cpp/ParserM.hs - utils/check-cpp/PreProcess.hs Changes: ===================================== compiler/GHC/Parser/PreProcess.hs ===================================== @@ -254,7 +254,7 @@ processCpp ss = do Right (CppDefine name args def) -> do ppDefine (MacroName name args) def Right (CppIf cond) -> do - val <- cppIf cond + val <- cppCond cond ar <- pushAccepting val acceptStateChange ar Right (CppIfdef name) -> do @@ -270,7 +270,7 @@ processCpp ss = do ar <- setAccepting (not accepting) acceptStateChange ar Right (CppElIf cond) -> do - val <- cppIf cond + val <- cppCond cond ar <- setAccepting val acceptStateChange ar Right CppEndif -> do ===================================== compiler/GHC/Parser/PreProcess/Lexer.x ===================================== @@ -2,11 +2,12 @@ module GHC.Parser.PreProcess.Lexer (lex_tok, lexCppTokenStream ) where import GHC.Parser.PreProcess.ParserM ( - St, init_pos, + St(..), init_pos, ParserM (..), Action, mkTv, Token(..), start_code, setStartCode, show_pos, position, - AlexInput(..), alexGetByte) + AlexInput(..), alexGetByte, + alexInputPrevChar) import qualified GHC.Parser.PreProcess.ParserM as ParserM (input) import Control.Monad import GHC.Prelude @@ -92,17 +93,20 @@ words :- <0> "xor" { mkTv TXor } <0> "xor_eq" { mkTv TXorEq } ---------------------------------------- - <0> [a-zA-Z_][a-zA-Z0-9_]*\( { mkTv TIdentifierLParen } - <0> [a-zA-Z_][a-zA-Z0-9_]* { mkTv TIdentifier } - <0> \-? [0-9][0-9]* { mkTv TInteger } - <0> \" [^\"]* \" { mkTv (TString . tail . init) } - <0> () { begin other } + <0> [a-zA-Z_][a-zA-Z0-9_]*\( / { inDirective } { mkTv TIdentifierLParen } + <0> [a-zA-Z_][a-zA-Z0-9_]* { mkTv TIdentifier } + <0> \-? [0-9][0-9]* { mkTv TInteger } + <0> \" [^\"]* \" { mkTv (TString . tail . init) } + <0> () { begin other } <other> .+ { \i -> do {setStartCode 0; mkTv TOther i} } { +inDirective :: AlexAccPred Bool +inDirective flag _ _ _ = flag + begin :: Int -> Action begin sc _str = do setStartCode sc @@ -110,7 +114,7 @@ begin sc _str = get_tok :: ParserM Token get_tok = ParserM $ \i st -> - case alexScan i (start_code st) of + case alexScanUser (scanning_directive st) i (start_code st) of AlexEOF -> Right (i, st, TEOF "") AlexError _ -> Left ("Lexical error at " ++ show_pos (position i)) AlexSkip i' _ -> case get_tok of ===================================== compiler/GHC/Parser/PreProcess/Macro.hs ===================================== @@ -1,6 +1,6 @@ module GHC.Parser.PreProcess.Macro ( -- process, - cppIf, + cppCond, -- get rid of warnings for tests m1, m2, @@ -46,8 +46,8 @@ import GHC.Prelude -- --------------------------------------------------------------------- -- We evaluate to an Int, which we convert to a bool -cppIf :: String -> PP Bool -cppIf str = do +cppCond :: String -> PP Bool +cppCond str = do s <- getPpState let expanded = expand (pp_defines s) str @@ -62,7 +62,7 @@ expand :: MacroDefines -> String -> String expand s str = expanded where -- TODO: repeat until re-expand or fixpoint - toks = case cppLex str of + toks = case cppLex False str of Left err -> error $ "expand:" ++ show (err, str) Right tks -> tks expanded = combineToks $ map t_str $ expandToks s toks @@ -81,7 +81,7 @@ doExpandToks ed _ [] = (ed, []) doExpandToks ed s (TIdentifierLParen n: ts) = -- TIdentifierLParen has no meaning here (only in a #define), so -- restore it to its constituent tokens - doExpandToks ed s (TIdentifier n:TOpenParen "(":ts) + doExpandToks ed s (TIdentifier (init n):TOpenParen "(":ts) doExpandToks _ s (TIdentifier "defined" : ts) = (True, rest) -- See Note: [defined unary operator] below where @@ -268,13 +268,13 @@ isOther _ = True -- --------------------------------------------------------------------- m1 :: Either String [Token] -m1 = cppLex "`" +m1 = cppLex False "`" m2 :: Either String [Token] -m2 = cppLex "hello(5)" +m2 = cppLex False "hello(5)" m3 :: Either String [Token] -m3 = cppLex "#define FOO(m1,m2,m) ((m1) < 1 || (m1) == 1 && (m2) < 7 || (m1) == 1 && (m2) == 7 && (m) <= 0)" +m3 = cppLex True "#define FOO(m1,m2,m) ((m1) < 1 || (m1) == 1 && (m2) < 7 || (m1) == 1 && (m2) == 7 && (m) <= 0)" -- Right [THash {t_str = "#"} -- ,TDefine {t_str = "define"} @@ -290,12 +290,12 @@ m3 = cppLex "#define FOO(m1,m2,m) ((m1) < 1 || (m1) == 1 && (m2) < 7 || (m1) = -- ] m4 :: Either String [Token] -m4 = cppLex "#if (m < 1)" +m4 = cppLex True "#if (m < 1)" m5 :: Either String (Maybe [[Token]], [Token]) m5 = do -- toks <- cppLex "(43,foo(a)) some other stuff" - toks <- cppLex "( ff(bar(),baz), 4 )" + toks <- cppLex False "( ff(bar(),baz), 4 )" return $ getExpandArgs toks tt :: Either String ([[Char]], [Char]) ===================================== compiler/GHC/Parser/PreProcess/ParsePP.hs ===================================== @@ -13,7 +13,7 @@ module GHC.Parser.PreProcess.ParsePP ( import Data.List (intercalate) import GHC.Parser.Errors.Ppr () import GHC.Parser.PreProcess.Lexer -import GHC.Parser.PreProcess.ParserM (Token (..), init_state) +import GHC.Parser.PreProcess.ParserM (Token (..), init_state, St(..)) import GHC.Parser.PreProcess.State import GHC.Prelude @@ -24,7 +24,7 @@ import GHC.Prelude -- | Parse a CPP directive, using tokens from the CPP lexer parseDirective :: String -> Either String CppDirective parseDirective s = - case cppLex s of + case cppLex True s of Left e -> Left e Right toks -> case toks of @@ -48,7 +48,7 @@ combineToks ss = intercalate " " ss cppDefine :: [Token] -> Either String CppDirective cppDefine [] = Left "error:empty #define directive" -cppDefine (TIdentifierLParen n : ts) = Right $ CppDefine n args def +cppDefine (TIdentifierLParen n : ts) = Right $ CppDefine (init n) args def where (args, def) = getArgs ts cppDefine (TIdentifier n : ts) = Right $ CppDefine n Nothing ts @@ -102,8 +102,9 @@ parseDefineArgs acc ts = Left $ "malformed macro args, expecting identifier foll -- --------------------------------------------------------------------- -cppLex :: String -> Either String [Token] -cppLex s = case lexCppTokenStream s init_state of +-- TODO: give this a better name +cppLex :: Bool -> String -> Either String [Token] +cppLex sd s = case lexCppTokenStream s (init_state {scanning_directive = sd}) of Left err -> Left err Right (_inp, _st, toks) -> Right toks @@ -141,4 +142,5 @@ t2 = doATest "# if ((m1) < 1 || (m1) == 1 && (m2) < 7 || (m1) == 1 && (m2) == t3 :: Either String CppDirective t3 = parseDirective "# if FOO == 4" -t4 = cppLex "#define foo(X) X" +t4 :: Either String [Token] +t4 = cppLex True "#define foo(X) X" ===================================== compiler/GHC/Parser/PreProcess/ParserM.hs ===================================== @@ -8,10 +8,9 @@ module GHC.Parser.PreProcess.ParserM ( AlexInput (..), run_parser, -- Parser state - St, + St(..), init_state, StartCode, - start_code, setStartCode, -- Tokens Token (..), @@ -75,6 +74,7 @@ run_parser (ParserM f) = data St = St { start_code :: !StartCode , brace_depth :: !Int + , scanning_directive :: !Bool } deriving (Show) type StartCode = Int @@ -84,6 +84,7 @@ init_state = St { start_code = 0 , brace_depth = 0 + , scanning_directive = False } -- Tokens ===================================== testsuite/tests/ghc-cpp/all.T ===================================== @@ -9,8 +9,13 @@ def normalise_haskell_full_version( str ): def normalise_haskell_pl1( str ): return re.sub(r'__GLASGOW_HASKELL_PATCHLEVEL1__.*\n', '__GLASGOW_HASKELL_PATCHLEVEL1__ XXX', str) +# The MIN_VERSION_GLASGOW_HASKELL macro gets updated on every configure. +# Replace the RHS with a constant +def normalise_min_version_haskell( str ): + return re.sub(r'MIN_VERSION_GLASGOW_HASKELL.*\n', 'MIN_VERSION_GLASGOW_HASKELL XXX', str) + test('GhcCpp01', # normal, - [normalise_errmsg_fun(normalise_haskell_full_version, normalise_haskell_pl1)], + [normalise_errmsg_fun(normalise_haskell_full_version, normalise_haskell_pl1,normalise_min_version_haskell)], compile, ['-ddump-ghc-cpp -dkeep-comments']) ===================================== utils/check-cpp/Lexer.x ===================================== @@ -2,14 +2,16 @@ module Lexer (lex_tok, lexCppTokenStream ) where import ParserM ( - St, init_pos, + St(..), init_pos, ParserM (..), Action, mkTv, Token(..), start_code, setStartCode, show_pos, position, - AlexInput(..), alexGetByte) + AlexInput(..), alexGetByte, + alexInputPrevChar) -- import qualified ParserM as ParserM (input) import Control.Monad + -- The lexer is based on -- https://timsong-cpp.github.io/cppwp/n4140/lex.pptoken } @@ -90,17 +92,20 @@ words :- <0> "xor" { mkTv TXor } <0> "xor_eq" { mkTv TXorEq } ---------------------------------------- - <0> [a-zA-Z_][a-zA-Z0-9_]*\( { mkTv TIdentifierLParen } - <0> [a-zA-Z_][a-zA-Z0-9_]* { mkTv TIdentifier } - <0> \-? [0-9][0-9]* { mkTv TInteger } - <0> \" [^\"]* \" { mkTv (TString . tail . init) } - <0> () { begin other } + <0> [a-zA-Z_][a-zA-Z0-9_]*\( / { inDirective } { mkTv TIdentifierLParen } + <0> [a-zA-Z_][a-zA-Z0-9_]* { mkTv TIdentifier } + <0> \-? [0-9][0-9]* { mkTv TInteger } + <0> \" [^\"]* \" { mkTv (TString . tail . init) } + <0> () { begin other } <other> .+ { \i -> do {setStartCode 0; mkTv TOther i} } { +inDirective :: AlexAccPred Bool +inDirective flag _ _ _ = flag + begin :: Int -> Action begin sc _str = do setStartCode sc @@ -108,7 +113,7 @@ begin sc _str = get_tok :: ParserM Token get_tok = ParserM $ \i st -> - case alexScan i (start_code st) of + case alexScanUser (scanning_directive st) i (start_code st) of AlexEOF -> Right (i, st, TEOF "") AlexError _ -> Left ("Lexical error at " ++ show_pos (position i)) AlexSkip i' _ -> case get_tok of ===================================== utils/check-cpp/Macro.hs ===================================== @@ -1,6 +1,6 @@ module Macro ( -- process, - cppIf, + cppCond, -- get rid of warnings for tests -- m0, m1, @@ -45,8 +45,8 @@ import State -- --------------------------------------------------------------------- -- We evaluate to an Int, which we convert to a bool -cppIf :: String -> PP Bool -cppIf str = do +cppCond :: String -> PP Bool +cppCond str = do s <- getPpState let expanded = expand (pp_defines s) str @@ -61,7 +61,7 @@ expand :: MacroDefines -> String -> String expand s str = expanded where -- TODO: repeat until re-expand or fixpoint - toks = case cppLex str of + toks = case cppLex False str of Left err -> error $ "expand:" ++ show (err, str) Right tks -> tks expanded = combineToks $ map t_str $ expandToks s toks @@ -80,7 +80,7 @@ doExpandToks ed _ [] = (ed, []) doExpandToks ed s (TIdentifierLParen n: ts) = -- TIdentifierLParen has no meaning here (only in a #define), so -- restore it to its constituent tokens - doExpandToks ed s (TIdentifier n:TOpenParen "(":ts) + doExpandToks ed s (TIdentifier (init n):TOpenParen "(":ts) doExpandToks _ s (TIdentifier "defined" : ts) = (True, rest) -- See Note: [defined unary operator] below where @@ -267,13 +267,13 @@ isOther _ = True -- --------------------------------------------------------------------- m1 :: Either String [Token] -m1 = cppLex "`" +m1 = cppLex False "`" m2 :: Either String [Token] -m2 = cppLex "hello(5)" +m2 = cppLex False "hello(5)" m3 :: Either String [Token] -m3 = cppLex "#define FOO(m1,m2,m) ((m1) < 1 || (m1) == 1 && (m2) < 7 || (m1) == 1 && (m2) == 7 && (m) <= 0)" +m3 = cppLex True "#define FOO(m1,m2,m) ((m1) < 1 || (m1) == 1 && (m2) < 7 || (m1) == 1 && (m2) == 7 && (m) <= 0)" -- Right [THash {t_str = "#"} -- ,TDefine {t_str = "define"} @@ -289,12 +289,12 @@ m3 = cppLex "#define FOO(m1,m2,m) ((m1) < 1 || (m1) == 1 && (m2) < 7 || (m1) = -- ] m4 :: Either String [Token] -m4 = cppLex "#if (m < 1)" +m4 = cppLex True "#if (m < 1)" m5 :: Either String (Maybe [[Token]], [Token]) m5 = do -- toks <- cppLex "(43,foo(a)) some other stuff" - toks <- cppLex "( ff(bar(),baz), 4 )" + toks <- cppLex False "( ff(bar(),baz), 4 )" return $ getExpandArgs toks tt :: Either String ([[Char]], [Char]) ===================================== utils/check-cpp/ParsePP.hs ===================================== @@ -12,7 +12,7 @@ module ParsePP ( import Data.List import GHC.Parser.Errors.Ppr () import Lexer -import ParserM (Token (..), init_state) +import ParserM (Token (..), init_state, St(..)) import State -- import Debug.Trace @@ -24,7 +24,7 @@ import State -- | Parse a CPP directive, using tokens from the CPP lexer parseDirective :: String -> Either String CppDirective parseDirective s = - case cppLex s of + case cppLex True s of Left e -> Left e Right toks -> case toks of @@ -48,7 +48,7 @@ combineToks ss = intercalate " " ss cppDefine :: [Token] -> Either String CppDirective cppDefine [] = Left "error:empty #define directive" -cppDefine (TIdentifierLParen n : ts) = Right $ CppDefine n args def +cppDefine (TIdentifierLParen n : ts) = Right $ CppDefine (init n) args def where (args, def) = getArgs ts cppDefine (TIdentifier n : ts) = Right $ CppDefine n Nothing ts @@ -102,8 +102,9 @@ parseDefineArgs acc ts = Left $ "malformed macro args, expecting identifier foll -- --------------------------------------------------------------------- -cppLex :: String -> Either String [Token] -cppLex s = case lexCppTokenStream s init_state of +-- TODO: give this a better name +cppLex :: Bool -> String -> Either String [Token] +cppLex sd s = case lexCppTokenStream s (init_state {scanning_directive = sd}) of Left err -> Left err Right (_inp, _st, toks) -> Right toks @@ -141,4 +142,4 @@ t2 = doATest "# if ((m1) < 1 || (m1) == 1 && (m2) < 7 || (m1) == 1 && (m2) == t3 :: Either String CppDirective t3 = parseDirective "# if FOO == 4" -t4 = cppLex "#define foo(X) X" +t4 = cppLex True "#define foo(X) X" ===================================== utils/check-cpp/ParserM.hs ===================================== @@ -8,7 +8,7 @@ module ParserM ( AlexInput (..), run_parser, -- Parser state - St, + St(..), init_state, StartCode, start_code, @@ -75,6 +75,7 @@ run_parser (ParserM f) = data St = St { start_code :: !StartCode , brace_depth :: !Int + , scanning_directive :: !Bool } deriving (Show) type StartCode = Int @@ -84,6 +85,7 @@ init_state = St { start_code = 0 , brace_depth = 0 + , scanning_directive = False } -- Tokens ===================================== utils/check-cpp/PreProcess.hs ===================================== @@ -281,7 +281,7 @@ processCpp ss = do Right (CppDefine name args def) -> do ppDefine (MacroName name args) def Right (CppIf cond) -> do - val <- cppIf cond + val <- cppCond cond ar <- pushAccepting val acceptStateChange ar Right (CppIfdef name) -> do @@ -297,7 +297,7 @@ processCpp ss = do ar <- setAccepting (not accepting) acceptStateChange ar Right (CppElIf cond) -> do - val <- cppIf cond + val <- cppCond cond ar <- setAccepting val acceptStateChange ar Right CppEndif -> do View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/937a9c4d2a142d0c7a4f29ce8515d34... -- View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/937a9c4d2a142d0c7a4f29ce8515d34... You're receiving this email because of your account on gitlab.haskell.org.
participants (1)
-
Alan Zimmerman (@alanz)