Alan Zimmerman pushed to branch wip/az/ghc-cpp at Glasgow Haskell Compiler / GHC
Commits:
-
937a9c4d
by Alan Zimmerman at 2025-04-21T13:20:39+01:00
13 changed files:
- compiler/GHC/Parser/PreProcess/Lexer.x
- compiler/GHC/Parser/PreProcess/Macro.hs
- compiler/GHC/Parser/PreProcess/ParsePP.hs
- compiler/GHC/Parser/PreProcess/Parser.y
- compiler/GHC/Parser/PreProcess/ParserM.hs
- utils/check-cpp/Eval.hs
- utils/check-cpp/Lexer.x
- utils/check-cpp/Macro.hs
- utils/check-cpp/Main.hs
- utils/check-cpp/ParsePP.hs
- utils/check-cpp/Parser.y
- utils/check-cpp/ParserM.hs
- utils/check-cpp/State.hs
Changes:
| ... | ... | @@ -92,10 +92,11 @@ words :- |
| 92 | 92 | <0> "xor" { mkTv TXor }
|
| 93 | 93 | <0> "xor_eq" { mkTv TXorEq }
|
| 94 | 94 | ----------------------------------------
|
| 95 | - <0> [a-zA-Z_][a-zA-Z0-9_]* { mkTv TIdentifier }
|
|
| 96 | - <0> \-? [0-9][0-9]* { mkTv TInteger }
|
|
| 97 | - <0> \" [^\"]* \" { mkTv (TString . tail . init) }
|
|
| 98 | - <0> () { begin other }
|
|
| 95 | + <0> [a-zA-Z_][a-zA-Z0-9_]*\( { mkTv TIdentifierLParen }
|
|
| 96 | + <0> [a-zA-Z_][a-zA-Z0-9_]* { mkTv TIdentifier }
|
|
| 97 | + <0> \-? [0-9][0-9]* { mkTv TInteger }
|
|
| 98 | + <0> \" [^\"]* \" { mkTv (TString . tail . init) }
|
|
| 99 | + <0> () { begin other }
|
|
| 99 | 100 | |
| 100 | 101 | <other> .+ { \i -> do {setStartCode 0;
|
| 101 | 102 | mkTv TOther i} }
|
| ... | ... | @@ -71,14 +71,18 @@ expandToks :: MacroDefines -> [Token] -> [Token] |
| 71 | 71 | expandToks s ts =
|
| 72 | 72 | let
|
| 73 | 73 | (expansionDone, r) = doExpandToks False s ts
|
| 74 | - in
|
|
| 74 | + in
|
|
| 75 | 75 | if expansionDone
|
| 76 | 76 | then expandToks s r
|
| 77 | 77 | else r
|
| 78 | 78 | |
| 79 | 79 | doExpandToks :: Bool -> MacroDefines -> [Token] -> (Bool, [Token])
|
| 80 | 80 | doExpandToks ed _ [] = (ed, [])
|
| 81 | -doExpandToks _ s (TIdentifier "defined" : ts) = (True, rest)
|
|
| 81 | +doExpandToks ed s (TIdentifierLParen n: ts) =
|
|
| 82 | + -- TIdentifierLParen has no meaning here (only in a #define), so
|
|
| 83 | + -- restore it to its constituent tokens
|
|
| 84 | + doExpandToks ed s (TIdentifier n:TOpenParen "(":ts)
|
|
| 85 | +doExpandToks _ s (TIdentifier "defined" : ts) = (True, rest)
|
|
| 82 | 86 | -- See Note: [defined unary operator] below
|
| 83 | 87 | where
|
| 84 | 88 | rest = case getExpandArgs ts of
|
| ... | ... | @@ -7,6 +7,7 @@ module GHC.Parser.PreProcess.ParsePP ( |
| 7 | 7 | t1,
|
| 8 | 8 | t2,
|
| 9 | 9 | t3,
|
| 10 | + t4,
|
|
| 10 | 11 | ) where
|
| 11 | 12 | |
| 12 | 13 | import Data.List (intercalate)
|
| ... | ... | @@ -47,9 +48,10 @@ combineToks ss = intercalate " " ss |
| 47 | 48 | |
| 48 | 49 | cppDefine :: [Token] -> Either String CppDirective
|
| 49 | 50 | cppDefine [] = Left "error:empty #define directive"
|
| 50 | -cppDefine (TIdentifier n : ts) = Right $ CppDefine n args def
|
|
| 51 | +cppDefine (TIdentifierLParen n : ts) = Right $ CppDefine n args def
|
|
| 51 | 52 | where
|
| 52 | 53 | (args, def) = getArgs ts
|
| 54 | +cppDefine (TIdentifier n : ts) = Right $ CppDefine n Nothing ts
|
|
| 53 | 55 | cppDefine (t : _) = Left $ "#define: expecting an identifier, got :" ++ show t
|
| 54 | 56 | |
| 55 | 57 | cppInclude :: [String] -> CppDirective
|
| ... | ... | @@ -79,14 +81,14 @@ cppDumpState _ts = CppDumpState |
| 79 | 81 | -- ---------------------------------------------------------------------
|
| 80 | 82 | |
| 81 | 83 | -- Crack out the arguments to a #define. This is of the form of
|
| 82 | --- comma-separated identifiers between parens
|
|
| 84 | +-- comma-separated identifiers between parens, where we have already
|
|
| 85 | +-- seen the opening paren.
|
|
| 83 | 86 | getArgs :: [Token] -> (Maybe [String], [Token])
|
| 84 | 87 | getArgs [] = (Nothing, [])
|
| 85 | -getArgs (TOpenParen _ : ts) =
|
|
| 88 | +getArgs ts =
|
|
| 86 | 89 | case parseDefineArgs [] ts of
|
| 87 | 90 | Left err -> error err
|
| 88 | 91 | Right (args, rest) -> (Just (reverse args), rest)
|
| 89 | -getArgs ts = (Nothing, ts)
|
|
| 90 | 92 | |
| 91 | 93 | parseDefineArgs ::
|
| 92 | 94 | [String] ->
|
| ... | ... | @@ -138,3 +140,5 @@ t2 = doATest "# if ((m1) < 1 || (m1) == 1 && (m2) < 7 || (m1) == 1 && (m2) == |
| 138 | 140 | |
| 139 | 141 | t3 :: Either String CppDirective
|
| 140 | 142 | t3 = parseDirective "# if FOO == 4"
|
| 143 | + |
|
| 144 | +t4 = cppLex "#define foo(X) X" |
| ... | ... | @@ -92,6 +92,7 @@ import GHC.Prelude |
| 92 | 92 | 'xor_eq' { TXorEq {} }
|
| 93 | 93 | |
| 94 | 94 | identifier { TIdentifier {} }
|
| 95 | + identifierLP { TIdentifierLParen {} }
|
|
| 95 | 96 | integer { TInteger {} }
|
| 96 | 97 | string { TString {} }
|
| 97 | 98 | other { TOther {} }
|
| ... | ... | @@ -91,6 +91,7 @@ init_state = |
| 91 | 91 | data Token
|
| 92 | 92 | = TEOF {t_str :: String}
|
| 93 | 93 | | TIdentifier {t_str :: String}
|
| 94 | + | TIdentifierLParen {t_str :: String}
|
|
| 94 | 95 | | TInteger {t_str :: String}
|
| 95 | 96 | | -- preprocessing-op-or-punc
|
| 96 | 97 | -- https://timsong-cpp.github.io/cppwp/n4140/lex.operators#nt:preprocessing-op-or-punc
|
| ... | ... | @@ -8,7 +8,7 @@ eval :: Expr -> Int |
| 8 | 8 | eval (Parens e) = eval e
|
| 9 | 9 | eval (Not e) = fromBool $ not (toBool $ eval e)
|
| 10 | 10 | -- eval (Var v) = error $ "need to look up :" ++ v
|
| 11 | -eval (Var v) = 0 -- Spec says remaining identifiers are replaces with zero
|
|
| 11 | +eval (Var _) = 0 -- Spec says remaining identifiers are replaces with zero
|
|
| 12 | 12 | eval (IntVal i) = i
|
| 13 | 13 | eval (Plus e1 e2) = (eval e1) + (eval e2)
|
| 14 | 14 | eval (Minus e1 e2) = (eval e1) - (eval e2)
|
| ... | ... | @@ -90,10 +90,11 @@ words :- |
| 90 | 90 | <0> "xor" { mkTv TXor }
|
| 91 | 91 | <0> "xor_eq" { mkTv TXorEq }
|
| 92 | 92 | ----------------------------------------
|
| 93 | - <0> [a-zA-Z_][a-zA-Z0-9_]* { mkTv TIdentifier }
|
|
| 94 | - <0> \-? [0-9][0-9]* { mkTv TInteger }
|
|
| 95 | - <0> \" [^\"]* \" { mkTv (TString . tail . init) }
|
|
| 96 | - <0> () { begin other }
|
|
| 93 | + <0> [a-zA-Z_][a-zA-Z0-9_]*\( { mkTv TIdentifierLParen }
|
|
| 94 | + <0> [a-zA-Z_][a-zA-Z0-9_]* { mkTv TIdentifier }
|
|
| 95 | + <0> \-? [0-9][0-9]* { mkTv TInteger }
|
|
| 96 | + <0> \" [^\"]* \" { mkTv (TString . tail . init) }
|
|
| 97 | + <0> () { begin other }
|
|
| 97 | 98 | |
| 98 | 99 | <other> .+ { \i -> do {setStartCode 0;
|
| 99 | 100 | mkTv TOther i} }
|
| ... | ... | @@ -70,13 +70,17 @@ expandToks :: MacroDefines -> [Token] -> [Token] |
| 70 | 70 | expandToks s ts =
|
| 71 | 71 | let
|
| 72 | 72 | (expansionDone, r) = doExpandToks False s ts
|
| 73 | - in
|
|
| 73 | + in
|
|
| 74 | 74 | if expansionDone
|
| 75 | 75 | then expandToks s r
|
| 76 | 76 | else r
|
| 77 | 77 | |
| 78 | 78 | doExpandToks :: Bool -> MacroDefines -> [Token] -> (Bool, [Token])
|
| 79 | 79 | doExpandToks ed _ [] = (ed, [])
|
| 80 | +doExpandToks ed s (TIdentifierLParen n: ts) =
|
|
| 81 | + -- TIdentifierLParen has no meaning here (only in a #define), so
|
|
| 82 | + -- restore it to its constituent tokens
|
|
| 83 | + doExpandToks ed s (TIdentifier n:TOpenParen "(":ts)
|
|
| 80 | 84 | doExpandToks _ s (TIdentifier "defined" : ts) = (True, rest)
|
| 81 | 85 | -- See Note: [defined unary operator] below
|
| 82 | 86 | where
|
| ... | ... | @@ -280,6 +280,7 @@ t2 = do |
| 280 | 280 | , "#else"
|
| 281 | 281 | , "x = 5"
|
| 282 | 282 | , "#endif"
|
| 283 | + , ""
|
|
| 283 | 284 | ]
|
| 284 | 285 | |
| 285 | 286 | -- x = 5
|
| ... | ... | @@ -348,6 +349,7 @@ t4 = do |
| 348 | 349 | , "#else"
|
| 349 | 350 | , "x = \"no version\""
|
| 350 | 351 | , "#endif"
|
| 352 | + , ""
|
|
| 351 | 353 | ]
|
| 352 | 354 | |
| 353 | 355 | -- x = "got version"
|
| ... | ... | @@ -399,6 +401,7 @@ t10 = do |
| 399 | 401 | , "#else"
|
| 400 | 402 | , "x = 2"
|
| 401 | 403 | , "#endif"
|
| 404 | + , ""
|
|
| 402 | 405 | ]
|
| 403 | 406 | |
| 404 | 407 | -- x = 1
|
| ... | ... | @@ -424,6 +427,7 @@ t11 = do |
| 424 | 427 | , "#else"
|
| 425 | 428 | , "x = 5"
|
| 426 | 429 | , "#endif"
|
| 430 | + , ""
|
|
| 427 | 431 | ]
|
| 428 | 432 | |
| 429 | 433 | -- x = 1
|
| ... | ... | @@ -438,6 +442,7 @@ t12 = do |
| 438 | 442 | , "#else"
|
| 439 | 443 | , "x = 5"
|
| 440 | 444 | , "#endif"
|
| 445 | + , ""
|
|
| 441 | 446 | ]
|
| 442 | 447 | |
| 443 | 448 | -- x = 1
|
| ... | ... | @@ -450,6 +455,7 @@ t13 = do |
| 450 | 455 | , "#else"
|
| 451 | 456 | , "x = 5"
|
| 452 | 457 | , "#endif"
|
| 458 | + , ""
|
|
| 453 | 459 | ]
|
| 454 | 460 | |
| 455 | 461 | -- x = 1
|
| ... | ... | @@ -473,6 +479,7 @@ t14 = do |
| 473 | 479 | , "#else"
|
| 474 | 480 | , "z = 5"
|
| 475 | 481 | , "#endif"
|
| 482 | + , ""
|
|
| 476 | 483 | ]
|
| 477 | 484 | |
| 478 | 485 | -- x = 1
|
| ... | ... | @@ -496,6 +503,7 @@ t16 = do |
| 496 | 503 | , "#else"
|
| 497 | 504 | , "x = 5"
|
| 498 | 505 | , "#endif"
|
| 506 | + , ""
|
|
| 499 | 507 | ]
|
| 500 | 508 | |
| 501 | 509 | -- x = 1
|
| ... | ... | @@ -509,6 +517,7 @@ t17 = do |
| 509 | 517 | , "#else"
|
| 510 | 518 | , "x = 5"
|
| 511 | 519 | , "#endif"
|
| 520 | + , ""
|
|
| 512 | 521 | ]
|
| 513 | 522 | |
| 514 | 523 | -- x = 1
|
| ... | ... | @@ -525,6 +534,7 @@ t18 = do |
| 525 | 534 | , "#else"
|
| 526 | 535 | , "x = 5"
|
| 527 | 536 | , "#endif"
|
| 537 | + , ""
|
|
| 528 | 538 | ]
|
| 529 | 539 | |
| 530 | 540 | t19 :: IO ()
|
| ... | ... | @@ -593,6 +603,7 @@ t22 = do |
| 593 | 603 | , "also ignored"
|
| 594 | 604 | , "#endif"
|
| 595 | 605 | , "#endif"
|
| 606 | + , ""
|
|
| 596 | 607 | ]
|
| 597 | 608 | |
| 598 | 609 | t23 :: IO ()
|
| ... | ... | @@ -606,6 +617,7 @@ t23 = do |
| 606 | 617 | , "#else"
|
| 607 | 618 | , "x = 2"
|
| 608 | 619 | , "#endif"
|
| 620 | + , ""
|
|
| 609 | 621 | ]
|
| 610 | 622 | |
| 611 | 623 | t24 :: IO ()
|
| ... | ... | @@ -619,6 +631,7 @@ t24 = do |
| 619 | 631 | , "#else"
|
| 620 | 632 | , "x = 2"
|
| 621 | 633 | , "#endif"
|
| 634 | + , ""
|
|
| 622 | 635 | ]
|
| 623 | 636 | |
| 624 | 637 | t25 :: IO ()
|
| ... | ... | @@ -632,6 +645,7 @@ t25 = do |
| 632 | 645 | , "#else"
|
| 633 | 646 | , "x = 2"
|
| 634 | 647 | , "#endif"
|
| 648 | + , ""
|
|
| 635 | 649 | ]
|
| 636 | 650 | |
| 637 | 651 | t26 :: IO ()
|
| ... | ... | @@ -662,6 +676,7 @@ t27 = do |
| 662 | 676 | , "#ifdef DEBUG"
|
| 663 | 677 | , " hiding (rev)"
|
| 664 | 678 | , "#endif"
|
| 679 | + , ""
|
|
| 665 | 680 | ]
|
| 666 | 681 | |
| 667 | 682 | t28 :: IO ()
|
| ... | ... | @@ -48,9 +48,10 @@ combineToks ss = intercalate " " ss |
| 48 | 48 | |
| 49 | 49 | cppDefine :: [Token] -> Either String CppDirective
|
| 50 | 50 | cppDefine [] = Left "error:empty #define directive"
|
| 51 | -cppDefine (TIdentifier n : ts) = Right $ CppDefine n args def
|
|
| 51 | +cppDefine (TIdentifierLParen n : ts) = Right $ CppDefine n args def
|
|
| 52 | 52 | where
|
| 53 | 53 | (args, def) = getArgs ts
|
| 54 | +cppDefine (TIdentifier n : ts) = Right $ CppDefine n Nothing ts
|
|
| 54 | 55 | cppDefine (t : _) = Left $ "#define: expecting an identifier, got :" ++ show t
|
| 55 | 56 | |
| 56 | 57 | cppInclude :: [String] -> CppDirective
|
| ... | ... | @@ -80,14 +81,14 @@ cppDumpState _ts = CppDumpState |
| 80 | 81 | -- ---------------------------------------------------------------------
|
| 81 | 82 | |
| 82 | 83 | -- Crack out the arguments to a #define. This is of the form of
|
| 83 | --- comma-separated identifiers between parens
|
|
| 84 | +-- comma-separated identifiers between parens, where we have already
|
|
| 85 | +-- seen the opening paren.
|
|
| 84 | 86 | getArgs :: [Token] -> (Maybe [String], [Token])
|
| 85 | 87 | getArgs [] = (Nothing, [])
|
| 86 | -getArgs (TOpenParen _ : ts) =
|
|
| 88 | +getArgs ts =
|
|
| 87 | 89 | case parseDefineArgs [] ts of
|
| 88 | 90 | Left err -> error err
|
| 89 | 91 | Right (args, rest) -> (Just (reverse args), rest)
|
| 90 | -getArgs ts = (Nothing, ts)
|
|
| 91 | 92 | |
| 92 | 93 | parseDefineArgs ::
|
| 93 | 94 | [String] ->
|
| ... | ... | @@ -139,3 +140,5 @@ t2 = doATest "# if ((m1) < 1 || (m1) == 1 && (m2) < 7 || (m1) == 1 && (m2) == |
| 139 | 140 | |
| 140 | 141 | t3 :: Either String CppDirective
|
| 141 | 142 | t3 = parseDirective "# if FOO == 4"
|
| 143 | + |
|
| 144 | +t4 = cppLex "#define foo(X) X" |
| ... | ... | @@ -91,6 +91,7 @@ import qualified GHC.Internal.Data.Tuple as Happy_Prelude |
| 91 | 91 | 'xor_eq' { TXorEq {} }
|
| 92 | 92 | |
| 93 | 93 | identifier { TIdentifier {} }
|
| 94 | + identifierLP { TIdentifierLParen {} }
|
|
| 94 | 95 | integer { TInteger {} }
|
| 95 | 96 | string { TString {} }
|
| 96 | 97 | other { TOther {} }
|
| ... | ... | @@ -91,6 +91,7 @@ init_state = |
| 91 | 91 | data Token
|
| 92 | 92 | = TEOF {t_str :: String}
|
| 93 | 93 | | TIdentifier {t_str :: String}
|
| 94 | + | TIdentifierLParen {t_str :: String}
|
|
| 94 | 95 | | TInteger {t_str :: String}
|
| 95 | 96 | | -- preprocessing-op-or-punc
|
| 96 | 97 | -- https://timsong-cpp.github.io/cppwp/n4140/lex.operators#nt:preprocessing-op-or-punc
|
| ... | ... | @@ -191,7 +191,7 @@ setAccepting on = do |
| 191 | 191 | let possible_accepting = parent_on && on
|
| 192 | 192 | let (new_group_state, accepting) =
|
| 193 | 193 | case (group_state, possible_accepting) of
|
| 194 | - (PpNoGroup, v) -> error "setAccepting for state PpNoGroup"
|
|
| 194 | + (PpNoGroup, _) -> error "setAccepting for state PpNoGroup"
|
|
| 195 | 195 | (PpInGroupStillInactive, True) -> (PpInGroupHasBeenActive, True)
|
| 196 | 196 | (PpInGroupStillInactive, False) -> (PpInGroupStillInactive, False)
|
| 197 | 197 | (PpInGroupHasBeenActive, _) -> (PpInGroupHasBeenActive, False)
|
| ... | ... | @@ -317,7 +317,7 @@ addDefine name def = do |
| 317 | 317 | |
| 318 | 318 | addDefine' :: PpState -> MacroName -> MacroDef -> PpState
|
| 319 | 319 | addDefine' s name def =
|
| 320 | - s{pp_defines = insertMacroDef name def (pp_defines s)}
|
|
| 320 | + s{ pp_defines = insertMacroDef name def (pp_defines s)}
|
|
| 321 | 321 | |
| 322 | 322 | ppDefine :: MacroName -> MacroDef -> PP ()
|
| 323 | 323 | ppDefine name val = addDefine name val
|