
Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC Commits: 6467d61e by Brandon Chinn at 2025-04-29T18:36:03-04:00 Fix lexing "\^\" (#25937) This broke in the refactor in !13128, where the old code parsed escape codes and collapsed string gaps at the same time, but the new code collapsed gaps first, then resolved escape codes. The new code used a naive heuristic to skip escaped backslashes, but didn't account for "\^\". - - - - - 6 changed files: - compiler/GHC/Parser/Errors/Types.hs - compiler/GHC/Parser/String.hs - + testsuite/tests/parser/should_run/T25937.hs - + testsuite/tests/parser/should_run/T25937.stdout - testsuite/tests/parser/should_run/all.T - + testsuite/tests/parser/should_run/parser_unit_tests.hs Changes: ===================================== compiler/GHC/Parser/Errors/Types.hs ===================================== @@ -611,6 +611,7 @@ data LexErr | LexUnterminatedComment -- ^ Unterminated `{-' | LexUnterminatedOptions -- ^ Unterminated OPTIONS pragma | LexUnterminatedQQ -- ^ Unterminated quasiquotation + deriving (Show,Eq,Ord) -- | Errors from the Cmm parser data CmmParserError ===================================== compiler/GHC/Parser/String.hs ===================================== @@ -36,6 +36,7 @@ import GHC.Utils.Panic (panic) type BufPos = Int data StringLexError = StringLexError LexErr BufPos + deriving (Show, Eq) lexString :: Int -> StringBuffer -> Either StringLexError String lexString = lexStringWith processChars processChars @@ -122,20 +123,49 @@ bufferLocatedChars initialBuf len = go initialBuf -- ----------------------------------------------------------------------------- -- Lexing phases +-- | Collapse all string gaps in the given input. +-- +-- Iterates through the input in `go` until we encounter a backslash. The +-- @stringchar Alex regex only allows backslashes in two places: escape codes +-- and string gaps. +-- +-- * If the next character is a space, it has to be the start of a string gap +-- AND it must end, since the @gap Alex regex will only match if it ends. +-- Collapse the gap and continue the main iteration loop. +-- +-- * Otherwise, this is an escape code. If it's an escape code, there are +-- ONLY three possibilities (see the @escape Alex regex): +-- 1. The escape code is "\\" +-- 2. The escape code is "\^\" +-- 3. The escape code does not have a backslash, other than the initial +-- backslash +-- +-- In the first two possibilities, just skip them and continue the main +-- iteration loop ("skip" as in "keep in the list as-is"). In the last one, +-- we can just skip the backslash, then continue the main iteration loop. +-- the rest of the escape code will be skipped as normal characters in the +-- string; no need to fully parse a proper escape code. collapseGaps :: HasChar c => [c] -> [c] collapseGaps = go where go = \case - c1@(Char '\\') : c2@(Char c) : cs - -- #25784: string gaps are semantically equivalent to "\&" + -- Match the start of a string gap + drop gap + -- #25784: string gaps are semantically equivalent to "\&" + c1@(Char '\\') : Char c : cs | is_space c -> c1 : setChar '&' c1 : go (dropGap cs) - | otherwise -> c1 : c2 : go cs + -- Match all possible escape characters that include a backslash + c1@(Char '\\') : c2@(Char '\\') : cs + -> c1 : c2 : go cs + c1@(Char '\\') : c2@(Char '^') : c3@(Char '\\') : cs + -> c1 : c2 : c3 : go cs + -- Otherwise, just keep looping c : cs -> c : go cs [] -> [] dropGap = \case Char '\\' : cs -> cs _ : cs -> dropGap cs + -- Unreachable since gaps must end; see docstring [] -> panic "gap unexpectedly ended" resolveEscapes :: HasChar c => [c] -> Either (c, LexErr) [c] ===================================== testsuite/tests/parser/should_run/T25937.hs ===================================== @@ -0,0 +1,2 @@ +main :: IO () +main = print "\^\ " ===================================== testsuite/tests/parser/should_run/T25937.stdout ===================================== @@ -0,0 +1 @@ +"\FS " ===================================== testsuite/tests/parser/should_run/all.T ===================================== @@ -1,3 +1,8 @@ +test('parser_unit_tests', + normal, + compile_and_run, + ['-package ghc']) + test('readRun001', normal, compile_and_run, ['']) test('readRun002', normal, compile_and_run, ['']) test('readRun003', normal, compile_and_run, ['']) @@ -21,6 +26,7 @@ test('RecordDotSyntax3', [extra_files(['RecordDotSyntaxA.hs'])], multimod_compil test('RecordDotSyntax4', [extra_files(['RecordDotSyntaxA.hs'])], multimod_compile_and_run, ['RecordDotSyntax4', '']) test('RecordDotSyntax5', normal, compile_and_run, ['']) test('ListTuplePunsConstraints', extra_files(['ListTuplePunsConstraints.hs']), ghci_script, ['ListTuplePunsConstraints.script']) +test('T25937', normal, compile_and_run, ['']) # Multiline strings test('MultilineStrings', normal, compile_and_run, ['']) ===================================== testsuite/tests/parser/should_run/parser_unit_tests.hs ===================================== @@ -0,0 +1,82 @@ +import GHC.Data.StringBuffer (stringToStringBuffer) +import qualified GHC.Data.StringBuffer as StringBuffer (StringBuffer (..)) +import GHC.Parser.String (lexString, lexMultilineString) + +import qualified Control.Exception as E +import Control.Monad (forM_, unless) + +main :: IO () +main = do + forM_ tests $ \(testName, test) -> do + result <- E.try test + case result of + Right () -> pure () + Left (e :: E.SomeException) + | Just e' <- E.asyncExceptionFromException e -> do + E.throwIO (e' :: E.AsyncException) + | otherwise -> do + putStrLn $ ">>> FAIL: " ++ testName + putStrLn $ E.displayException e + +{----- Test infrastructure -----} + +data TestFailure = TestFailure String + deriving (Show) + +instance E.Exception TestFailure where + displayException (TestFailure msg) = "Test failure:\n" ++ msg + +testFailure :: String -> IO a +testFailure = E.throwIO . TestFailure + +shouldBe :: (Eq a, Show a) => a -> a -> IO () +shouldBe actual expected = + unless (actual == expected) $ + testFailure $ + "Got: " ++ show actual ++ "\n" ++ + "Expected: " ++ show expected + +type TestCase = (String, IO ()) + +testCase :: String -> IO () -> TestCase +testCase = (,) + +{----- Tests -----} + +tests :: [TestCase] +tests = concat + [ stringTests + ] + +-- | Unit tests for GHC.Parser.String +stringTests :: [TestCase] +stringTests = concat + [ escapedBackslashTests + ] + where + withBuf f s = let buf = stringToStringBuffer s in f (StringBuffer.len buf) buf + + -- Test all situations where backslashes can appear in escape characters (#25937) + escapedBackslashTests = + [ testCase label $ do + withBuf lexStr input `shouldBe` Right output + | (lexLabel, lexStr) <- [("strings", lexString), ("multiline strings", lexMultilineString)] + , (label, input, output) <- + [ ( "escaped backslashes in " ++ lexLabel ++ " not mistaken for string gaps" + , [' ', '\\', '\\', ' ', '\\', '\\', ' '] + , " \\ \\ " + ) + , ( "escaped \\FS in " ++ lexLabel ++ " not mistaken for beginning of string gap" + , ['\\', '^', '\\'] + , "\FS" + ) + , ( "escaped \\FS in " ++ lexLabel ++ " not mistaken for unterminated string gap" + , ['\\', '^', '\\', ' '] + , "\FS " + ) + , ( "escaped \\FS in " ++ lexLabel ++ " does not collapse mistaken string gap" + , ['\\', '^', '\\', ' ', '\\', 'n'] + , "\FS \n" + ) + ] + ] View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/6467d61ee8cdd6b611e035b72cbe356c... -- View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/6467d61ee8cdd6b611e035b72cbe356c... You're receiving this email because of your account on gitlab.haskell.org.
participants (1)
-
Marge Bot (@marge-bot)