Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC
Commits:
- 
6467d61e
by Brandon Chinn at 2025-04-29T18:36:03-04:00
6 changed files:
- compiler/GHC/Parser/Errors/Types.hs
- compiler/GHC/Parser/String.hs
- + testsuite/tests/parser/should_run/T25937.hs
- + testsuite/tests/parser/should_run/T25937.stdout
- testsuite/tests/parser/should_run/all.T
- + testsuite/tests/parser/should_run/parser_unit_tests.hs
Changes:
| ... | ... | @@ -611,6 +611,7 @@ data LexErr | 
| 611 | 611 |     | LexUnterminatedComment -- ^ Unterminated `{-'
 | 
| 612 | 612 |     | LexUnterminatedOptions -- ^ Unterminated OPTIONS pragma
 | 
| 613 | 613 |     | LexUnterminatedQQ      -- ^ Unterminated quasiquotation
 | 
| 614 | +   deriving (Show,Eq,Ord)
 | |
| 614 | 615 | |
| 615 | 616 |  -- | Errors from the Cmm parser
 | 
| 616 | 617 |  data CmmParserError
 | 
| ... | ... | @@ -36,6 +36,7 @@ import GHC.Utils.Panic (panic) | 
| 36 | 36 | |
| 37 | 37 |  type BufPos = Int
 | 
| 38 | 38 |  data StringLexError = StringLexError LexErr BufPos
 | 
| 39 | +  deriving (Show, Eq)
 | |
| 39 | 40 | |
| 40 | 41 |  lexString :: Int -> StringBuffer -> Either StringLexError String
 | 
| 41 | 42 |  lexString = lexStringWith processChars processChars
 | 
| ... | ... | @@ -122,20 +123,49 @@ bufferLocatedChars initialBuf len = go initialBuf | 
| 122 | 123 |  -- -----------------------------------------------------------------------------
 | 
| 123 | 124 |  -- Lexing phases
 | 
| 124 | 125 | |
| 126 | +-- | Collapse all string gaps in the given input.
 | |
| 127 | +--
 | |
| 128 | +-- Iterates through the input in `go` until we encounter a backslash. The
 | |
| 129 | +-- @stringchar Alex regex only allows backslashes in two places: escape codes
 | |
| 130 | +-- and string gaps.
 | |
| 131 | +--
 | |
| 132 | +--   * If the next character is a space, it has to be the start of a string gap
 | |
| 133 | +--     AND it must end, since the @gap Alex regex will only match if it ends.
 | |
| 134 | +--     Collapse the gap and continue the main iteration loop.
 | |
| 135 | +--
 | |
| 136 | +--   * Otherwise, this is an escape code. If it's an escape code, there are
 | |
| 137 | +--     ONLY three possibilities (see the @escape Alex regex):
 | |
| 138 | +--       1. The escape code is "\\"
 | |
| 139 | +--       2. The escape code is "\^\"
 | |
| 140 | +--       3. The escape code does not have a backslash, other than the initial
 | |
| 141 | +--          backslash
 | |
| 142 | +--
 | |
| 143 | +--     In the first two possibilities, just skip them and continue the main
 | |
| 144 | +--     iteration loop ("skip" as in "keep in the list as-is"). In the last one,
 | |
| 145 | +--     we can just skip the backslash, then continue the main iteration loop.
 | |
| 146 | +--     the rest of the escape code will be skipped as normal characters in the
 | |
| 147 | +--     string; no need to fully parse a proper escape code.
 | |
| 125 | 148 |  collapseGaps :: HasChar c => [c] -> [c]
 | 
| 126 | 149 |  collapseGaps = go
 | 
| 127 | 150 |    where
 | 
| 128 | 151 |      go = \case
 | 
| 129 | -      c1@(Char '\\') : c2@(Char c) : cs
 | |
| 130 | -        -- #25784: string gaps are semantically equivalent to "\&"
 | |
| 152 | +      -- Match the start of a string gap + drop gap
 | |
| 153 | +      -- #25784: string gaps are semantically equivalent to "\&"
 | |
| 154 | +      c1@(Char '\\') : Char c : cs
 | |
| 131 | 155 |          | is_space c -> c1 : setChar '&' c1 : go (dropGap cs)
 | 
| 132 | -        | otherwise  -> c1 : c2 : go cs
 | |
| 156 | +      -- Match all possible escape characters that include a backslash
 | |
| 157 | +      c1@(Char '\\') : c2@(Char '\\') : cs
 | |
| 158 | +        -> c1 : c2 : go cs
 | |
| 159 | +      c1@(Char '\\') : c2@(Char '^') : c3@(Char '\\') : cs
 | |
| 160 | +        -> c1 : c2 : c3 : go cs
 | |
| 161 | +      -- Otherwise, just keep looping
 | |
| 133 | 162 |        c : cs -> c : go cs
 | 
| 134 | 163 |        [] -> []
 | 
| 135 | 164 | |
| 136 | 165 |      dropGap = \case
 | 
| 137 | 166 |        Char '\\' : cs -> cs
 | 
| 138 | 167 |        _ : cs -> dropGap cs
 | 
| 168 | +      -- Unreachable since gaps must end; see docstring
 | |
| 139 | 169 |        [] -> panic "gap unexpectedly ended"
 | 
| 140 | 170 | |
| 141 | 171 |  resolveEscapes :: HasChar c => [c] -> Either (c, LexErr) [c]
 | 
| 1 | +main :: IO ()
 | |
| 2 | +main = print "\^\ " | 
| 1 | +"\FS " | 
| 1 | +test('parser_unit_tests',
 | |
| 2 | +     normal,
 | |
| 3 | +     compile_and_run,
 | |
| 4 | +     ['-package ghc'])
 | |
| 5 | + | |
| 1 | 6 |  test('readRun001', normal, compile_and_run, [''])
 | 
| 2 | 7 |  test('readRun002', normal, compile_and_run, [''])
 | 
| 3 | 8 |  test('readRun003', normal, compile_and_run, [''])
 | 
| ... | ... | @@ -21,6 +26,7 @@ test('RecordDotSyntax3', [extra_files(['RecordDotSyntaxA.hs'])], multimod_compil | 
| 21 | 26 |  test('RecordDotSyntax4', [extra_files(['RecordDotSyntaxA.hs'])], multimod_compile_and_run, ['RecordDotSyntax4', ''])
 | 
| 22 | 27 |  test('RecordDotSyntax5', normal, compile_and_run, [''])
 | 
| 23 | 28 |  test('ListTuplePunsConstraints', extra_files(['ListTuplePunsConstraints.hs']), ghci_script, ['ListTuplePunsConstraints.script'])
 | 
| 29 | +test('T25937', normal, compile_and_run, [''])
 | |
| 24 | 30 | |
| 25 | 31 |  # Multiline strings
 | 
| 26 | 32 |  test('MultilineStrings', normal, compile_and_run, [''])
 | 
| 1 | +import GHC.Data.StringBuffer (stringToStringBuffer)
 | |
| 2 | +import qualified GHC.Data.StringBuffer as StringBuffer (StringBuffer (..))
 | |
| 3 | +import GHC.Parser.String (lexString, lexMultilineString)
 | |
| 4 | + | |
| 5 | +import qualified Control.Exception as E
 | |
| 6 | +import Control.Monad (forM_, unless)
 | |
| 7 | + | |
| 8 | +main :: IO ()
 | |
| 9 | +main = do
 | |
| 10 | +  forM_ tests $ \(testName, test) -> do
 | |
| 11 | +    result <- E.try test
 | |
| 12 | +    case result of
 | |
| 13 | +      Right () -> pure ()
 | |
| 14 | +      Left (e :: E.SomeException)
 | |
| 15 | +        | Just e' <- E.asyncExceptionFromException e -> do
 | |
| 16 | +            E.throwIO (e' :: E.AsyncException)
 | |
| 17 | +        | otherwise -> do
 | |
| 18 | +            putStrLn $ ">>> FAIL: " ++ testName
 | |
| 19 | +            putStrLn $ E.displayException e
 | |
| 20 | + | |
| 21 | +{----- Test infrastructure -----}
 | |
| 22 | + | |
| 23 | +data TestFailure = TestFailure String
 | |
| 24 | +  deriving (Show)
 | |
| 25 | + | |
| 26 | +instance E.Exception TestFailure where
 | |
| 27 | +  displayException (TestFailure msg) = "Test failure:\n" ++ msg
 | |
| 28 | + | |
| 29 | +testFailure :: String -> IO a
 | |
| 30 | +testFailure = E.throwIO . TestFailure
 | |
| 31 | + | |
| 32 | +shouldBe :: (Eq a, Show a) => a -> a -> IO ()
 | |
| 33 | +shouldBe actual expected =
 | |
| 34 | +  unless (actual == expected) $
 | |
| 35 | +    testFailure $
 | |
| 36 | +      "Got:      " ++ show actual ++ "\n" ++
 | |
| 37 | +      "Expected: " ++ show expected
 | |
| 38 | + | |
| 39 | +type TestCase = (String, IO ())
 | |
| 40 | + | |
| 41 | +testCase :: String -> IO () -> TestCase
 | |
| 42 | +testCase = (,)
 | |
| 43 | + | |
| 44 | +{----- Tests -----}
 | |
| 45 | + | |
| 46 | +tests :: [TestCase]
 | |
| 47 | +tests = concat
 | |
| 48 | +  [ stringTests
 | |
| 49 | +  ]
 | |
| 50 | + | |
| 51 | +-- | Unit tests for GHC.Parser.String
 | |
| 52 | +stringTests :: [TestCase]
 | |
| 53 | +stringTests = concat
 | |
| 54 | +  [ escapedBackslashTests
 | |
| 55 | +  ]
 | |
| 56 | +  where
 | |
| 57 | +    withBuf f s = let buf = stringToStringBuffer s in f (StringBuffer.len buf) buf
 | |
| 58 | + | |
| 59 | +    -- Test all situations where backslashes can appear in escape characters (#25937)
 | |
| 60 | +    escapedBackslashTests =
 | |
| 61 | +      [ testCase label $ do
 | |
| 62 | +          withBuf lexStr input `shouldBe` Right output
 | |
| 63 | +      | (lexLabel, lexStr) <- [("strings", lexString), ("multiline strings", lexMultilineString)]
 | |
| 64 | +      , (label, input, output) <-
 | |
| 65 | +          [ ( "escaped backslashes in " ++ lexLabel ++ " not mistaken for string gaps"
 | |
| 66 | +            , [' ', '\\', '\\', ' ', '\\', '\\', ' ']
 | |
| 67 | +            , " \\ \\ "
 | |
| 68 | +            )
 | |
| 69 | +          , ( "escaped \\FS in " ++ lexLabel ++ " not mistaken for beginning of string gap"
 | |
| 70 | +            , ['\\', '^', '\\']
 | |
| 71 | +            , "\FS"
 | |
| 72 | +            )
 | |
| 73 | +          , ( "escaped \\FS in " ++ lexLabel ++ " not mistaken for unterminated string gap"
 | |
| 74 | +            , ['\\', '^', '\\', ' ']
 | |
| 75 | +            , "\FS "
 | |
| 76 | +            )
 | |
| 77 | +          , ( "escaped \\FS in " ++ lexLabel ++ " does not collapse mistaken string gap"
 | |
| 78 | +            , ['\\', '^', '\\', ' ', '\\', 'n']
 | |
| 79 | +            , "\FS \n"
 | |
| 80 | +            )
 | |
| 81 | +          ]
 | |
| 82 | +      ] |