Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC
Commits:
-
6467d61e
by Brandon Chinn at 2025-04-29T18:36:03-04:00
6 changed files:
- compiler/GHC/Parser/Errors/Types.hs
- compiler/GHC/Parser/String.hs
- + testsuite/tests/parser/should_run/T25937.hs
- + testsuite/tests/parser/should_run/T25937.stdout
- testsuite/tests/parser/should_run/all.T
- + testsuite/tests/parser/should_run/parser_unit_tests.hs
Changes:
... | ... | @@ -611,6 +611,7 @@ data LexErr |
611 | 611 | | LexUnterminatedComment -- ^ Unterminated `{-'
|
612 | 612 | | LexUnterminatedOptions -- ^ Unterminated OPTIONS pragma
|
613 | 613 | | LexUnterminatedQQ -- ^ Unterminated quasiquotation
|
614 | + deriving (Show,Eq,Ord)
|
|
614 | 615 | |
615 | 616 | -- | Errors from the Cmm parser
|
616 | 617 | data CmmParserError
|
... | ... | @@ -36,6 +36,7 @@ import GHC.Utils.Panic (panic) |
36 | 36 | |
37 | 37 | type BufPos = Int
|
38 | 38 | data StringLexError = StringLexError LexErr BufPos
|
39 | + deriving (Show, Eq)
|
|
39 | 40 | |
40 | 41 | lexString :: Int -> StringBuffer -> Either StringLexError String
|
41 | 42 | lexString = lexStringWith processChars processChars
|
... | ... | @@ -122,20 +123,49 @@ bufferLocatedChars initialBuf len = go initialBuf |
122 | 123 | -- -----------------------------------------------------------------------------
|
123 | 124 | -- Lexing phases
|
124 | 125 | |
126 | +-- | Collapse all string gaps in the given input.
|
|
127 | +--
|
|
128 | +-- Iterates through the input in `go` until we encounter a backslash. The
|
|
129 | +-- @stringchar Alex regex only allows backslashes in two places: escape codes
|
|
130 | +-- and string gaps.
|
|
131 | +--
|
|
132 | +-- * If the next character is a space, it has to be the start of a string gap
|
|
133 | +-- AND it must end, since the @gap Alex regex will only match if it ends.
|
|
134 | +-- Collapse the gap and continue the main iteration loop.
|
|
135 | +--
|
|
136 | +-- * Otherwise, this is an escape code. If it's an escape code, there are
|
|
137 | +-- ONLY three possibilities (see the @escape Alex regex):
|
|
138 | +-- 1. The escape code is "\\"
|
|
139 | +-- 2. The escape code is "\^\"
|
|
140 | +-- 3. The escape code does not have a backslash, other than the initial
|
|
141 | +-- backslash
|
|
142 | +--
|
|
143 | +-- In the first two possibilities, just skip them and continue the main
|
|
144 | +-- iteration loop ("skip" as in "keep in the list as-is"). In the last one,
|
|
145 | +-- we can just skip the backslash, then continue the main iteration loop.
|
|
146 | +-- the rest of the escape code will be skipped as normal characters in the
|
|
147 | +-- string; no need to fully parse a proper escape code.
|
|
125 | 148 | collapseGaps :: HasChar c => [c] -> [c]
|
126 | 149 | collapseGaps = go
|
127 | 150 | where
|
128 | 151 | go = \case
|
129 | - c1@(Char '\\') : c2@(Char c) : cs
|
|
130 | - -- #25784: string gaps are semantically equivalent to "\&"
|
|
152 | + -- Match the start of a string gap + drop gap
|
|
153 | + -- #25784: string gaps are semantically equivalent to "\&"
|
|
154 | + c1@(Char '\\') : Char c : cs
|
|
131 | 155 | | is_space c -> c1 : setChar '&' c1 : go (dropGap cs)
|
132 | - | otherwise -> c1 : c2 : go cs
|
|
156 | + -- Match all possible escape characters that include a backslash
|
|
157 | + c1@(Char '\\') : c2@(Char '\\') : cs
|
|
158 | + -> c1 : c2 : go cs
|
|
159 | + c1@(Char '\\') : c2@(Char '^') : c3@(Char '\\') : cs
|
|
160 | + -> c1 : c2 : c3 : go cs
|
|
161 | + -- Otherwise, just keep looping
|
|
133 | 162 | c : cs -> c : go cs
|
134 | 163 | [] -> []
|
135 | 164 | |
136 | 165 | dropGap = \case
|
137 | 166 | Char '\\' : cs -> cs
|
138 | 167 | _ : cs -> dropGap cs
|
168 | + -- Unreachable since gaps must end; see docstring
|
|
139 | 169 | [] -> panic "gap unexpectedly ended"
|
140 | 170 | |
141 | 171 | resolveEscapes :: HasChar c => [c] -> Either (c, LexErr) [c]
|
1 | +main :: IO ()
|
|
2 | +main = print "\^\ " |
1 | +"\FS " |
1 | +test('parser_unit_tests',
|
|
2 | + normal,
|
|
3 | + compile_and_run,
|
|
4 | + ['-package ghc'])
|
|
5 | + |
|
1 | 6 | test('readRun001', normal, compile_and_run, [''])
|
2 | 7 | test('readRun002', normal, compile_and_run, [''])
|
3 | 8 | test('readRun003', normal, compile_and_run, [''])
|
... | ... | @@ -21,6 +26,7 @@ test('RecordDotSyntax3', [extra_files(['RecordDotSyntaxA.hs'])], multimod_compil |
21 | 26 | test('RecordDotSyntax4', [extra_files(['RecordDotSyntaxA.hs'])], multimod_compile_and_run, ['RecordDotSyntax4', ''])
|
22 | 27 | test('RecordDotSyntax5', normal, compile_and_run, [''])
|
23 | 28 | test('ListTuplePunsConstraints', extra_files(['ListTuplePunsConstraints.hs']), ghci_script, ['ListTuplePunsConstraints.script'])
|
29 | +test('T25937', normal, compile_and_run, [''])
|
|
24 | 30 | |
25 | 31 | # Multiline strings
|
26 | 32 | test('MultilineStrings', normal, compile_and_run, [''])
|
1 | +import GHC.Data.StringBuffer (stringToStringBuffer)
|
|
2 | +import qualified GHC.Data.StringBuffer as StringBuffer (StringBuffer (..))
|
|
3 | +import GHC.Parser.String (lexString, lexMultilineString)
|
|
4 | + |
|
5 | +import qualified Control.Exception as E
|
|
6 | +import Control.Monad (forM_, unless)
|
|
7 | + |
|
8 | +main :: IO ()
|
|
9 | +main = do
|
|
10 | + forM_ tests $ \(testName, test) -> do
|
|
11 | + result <- E.try test
|
|
12 | + case result of
|
|
13 | + Right () -> pure ()
|
|
14 | + Left (e :: E.SomeException)
|
|
15 | + | Just e' <- E.asyncExceptionFromException e -> do
|
|
16 | + E.throwIO (e' :: E.AsyncException)
|
|
17 | + | otherwise -> do
|
|
18 | + putStrLn $ ">>> FAIL: " ++ testName
|
|
19 | + putStrLn $ E.displayException e
|
|
20 | + |
|
21 | +{----- Test infrastructure -----}
|
|
22 | + |
|
23 | +data TestFailure = TestFailure String
|
|
24 | + deriving (Show)
|
|
25 | + |
|
26 | +instance E.Exception TestFailure where
|
|
27 | + displayException (TestFailure msg) = "Test failure:\n" ++ msg
|
|
28 | + |
|
29 | +testFailure :: String -> IO a
|
|
30 | +testFailure = E.throwIO . TestFailure
|
|
31 | + |
|
32 | +shouldBe :: (Eq a, Show a) => a -> a -> IO ()
|
|
33 | +shouldBe actual expected =
|
|
34 | + unless (actual == expected) $
|
|
35 | + testFailure $
|
|
36 | + "Got: " ++ show actual ++ "\n" ++
|
|
37 | + "Expected: " ++ show expected
|
|
38 | + |
|
39 | +type TestCase = (String, IO ())
|
|
40 | + |
|
41 | +testCase :: String -> IO () -> TestCase
|
|
42 | +testCase = (,)
|
|
43 | + |
|
44 | +{----- Tests -----}
|
|
45 | + |
|
46 | +tests :: [TestCase]
|
|
47 | +tests = concat
|
|
48 | + [ stringTests
|
|
49 | + ]
|
|
50 | + |
|
51 | +-- | Unit tests for GHC.Parser.String
|
|
52 | +stringTests :: [TestCase]
|
|
53 | +stringTests = concat
|
|
54 | + [ escapedBackslashTests
|
|
55 | + ]
|
|
56 | + where
|
|
57 | + withBuf f s = let buf = stringToStringBuffer s in f (StringBuffer.len buf) buf
|
|
58 | + |
|
59 | + -- Test all situations where backslashes can appear in escape characters (#25937)
|
|
60 | + escapedBackslashTests =
|
|
61 | + [ testCase label $ do
|
|
62 | + withBuf lexStr input `shouldBe` Right output
|
|
63 | + | (lexLabel, lexStr) <- [("strings", lexString), ("multiline strings", lexMultilineString)]
|
|
64 | + , (label, input, output) <-
|
|
65 | + [ ( "escaped backslashes in " ++ lexLabel ++ " not mistaken for string gaps"
|
|
66 | + , [' ', '\\', '\\', ' ', '\\', '\\', ' ']
|
|
67 | + , " \\ \\ "
|
|
68 | + )
|
|
69 | + , ( "escaped \\FS in " ++ lexLabel ++ " not mistaken for beginning of string gap"
|
|
70 | + , ['\\', '^', '\\']
|
|
71 | + , "\FS"
|
|
72 | + )
|
|
73 | + , ( "escaped \\FS in " ++ lexLabel ++ " not mistaken for unterminated string gap"
|
|
74 | + , ['\\', '^', '\\', ' ']
|
|
75 | + , "\FS "
|
|
76 | + )
|
|
77 | + , ( "escaped \\FS in " ++ lexLabel ++ " does not collapse mistaken string gap"
|
|
78 | + , ['\\', '^', '\\', ' ', '\\', 'n']
|
|
79 | + , "\FS \n"
|
|
80 | + )
|
|
81 | + ]
|
|
82 | + ] |