Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC

Commits:

6 changed files:

Changes:

  • compiler/GHC/Parser/Errors/Types.hs
    ... ... @@ -611,6 +611,7 @@ data LexErr
    611 611
        | LexUnterminatedComment -- ^ Unterminated `{-'
    
    612 612
        | LexUnterminatedOptions -- ^ Unterminated OPTIONS pragma
    
    613 613
        | LexUnterminatedQQ      -- ^ Unterminated quasiquotation
    
    614
    +   deriving (Show,Eq,Ord)
    
    614 615
     
    
    615 616
     -- | Errors from the Cmm parser
    
    616 617
     data CmmParserError
    

  • compiler/GHC/Parser/String.hs
    ... ... @@ -36,6 +36,7 @@ import GHC.Utils.Panic (panic)
    36 36
     
    
    37 37
     type BufPos = Int
    
    38 38
     data StringLexError = StringLexError LexErr BufPos
    
    39
    +  deriving (Show, Eq)
    
    39 40
     
    
    40 41
     lexString :: Int -> StringBuffer -> Either StringLexError String
    
    41 42
     lexString = lexStringWith processChars processChars
    
    ... ... @@ -122,20 +123,49 @@ bufferLocatedChars initialBuf len = go initialBuf
    122 123
     -- -----------------------------------------------------------------------------
    
    123 124
     -- Lexing phases
    
    124 125
     
    
    126
    +-- | Collapse all string gaps in the given input.
    
    127
    +--
    
    128
    +-- Iterates through the input in `go` until we encounter a backslash. The
    
    129
    +-- @stringchar Alex regex only allows backslashes in two places: escape codes
    
    130
    +-- and string gaps.
    
    131
    +--
    
    132
    +--   * If the next character is a space, it has to be the start of a string gap
    
    133
    +--     AND it must end, since the @gap Alex regex will only match if it ends.
    
    134
    +--     Collapse the gap and continue the main iteration loop.
    
    135
    +--
    
    136
    +--   * Otherwise, this is an escape code. If it's an escape code, there are
    
    137
    +--     ONLY three possibilities (see the @escape Alex regex):
    
    138
    +--       1. The escape code is "\\"
    
    139
    +--       2. The escape code is "\^\"
    
    140
    +--       3. The escape code does not have a backslash, other than the initial
    
    141
    +--          backslash
    
    142
    +--
    
    143
    +--     In the first two possibilities, just skip them and continue the main
    
    144
    +--     iteration loop ("skip" as in "keep in the list as-is"). In the last one,
    
    145
    +--     we can just skip the backslash, then continue the main iteration loop.
    
    146
    +--     the rest of the escape code will be skipped as normal characters in the
    
    147
    +--     string; no need to fully parse a proper escape code.
    
    125 148
     collapseGaps :: HasChar c => [c] -> [c]
    
    126 149
     collapseGaps = go
    
    127 150
       where
    
    128 151
         go = \case
    
    129
    -      c1@(Char '\\') : c2@(Char c) : cs
    
    130
    -        -- #25784: string gaps are semantically equivalent to "\&"
    
    152
    +      -- Match the start of a string gap + drop gap
    
    153
    +      -- #25784: string gaps are semantically equivalent to "\&"
    
    154
    +      c1@(Char '\\') : Char c : cs
    
    131 155
             | is_space c -> c1 : setChar '&' c1 : go (dropGap cs)
    
    132
    -        | otherwise  -> c1 : c2 : go cs
    
    156
    +      -- Match all possible escape characters that include a backslash
    
    157
    +      c1@(Char '\\') : c2@(Char '\\') : cs
    
    158
    +        -> c1 : c2 : go cs
    
    159
    +      c1@(Char '\\') : c2@(Char '^') : c3@(Char '\\') : cs
    
    160
    +        -> c1 : c2 : c3 : go cs
    
    161
    +      -- Otherwise, just keep looping
    
    133 162
           c : cs -> c : go cs
    
    134 163
           [] -> []
    
    135 164
     
    
    136 165
         dropGap = \case
    
    137 166
           Char '\\' : cs -> cs
    
    138 167
           _ : cs -> dropGap cs
    
    168
    +      -- Unreachable since gaps must end; see docstring
    
    139 169
           [] -> panic "gap unexpectedly ended"
    
    140 170
     
    
    141 171
     resolveEscapes :: HasChar c => [c] -> Either (c, LexErr) [c]
    

  • testsuite/tests/parser/should_run/T25937.hs
    1
    +main :: IO ()
    
    2
    +main = print "\^\ "

  • testsuite/tests/parser/should_run/T25937.stdout
    1
    +"\FS "

  • testsuite/tests/parser/should_run/all.T
    1
    +test('parser_unit_tests',
    
    2
    +     normal,
    
    3
    +     compile_and_run,
    
    4
    +     ['-package ghc'])
    
    5
    +
    
    1 6
     test('readRun001', normal, compile_and_run, [''])
    
    2 7
     test('readRun002', normal, compile_and_run, [''])
    
    3 8
     test('readRun003', normal, compile_and_run, [''])
    
    ... ... @@ -21,6 +26,7 @@ test('RecordDotSyntax3', [extra_files(['RecordDotSyntaxA.hs'])], multimod_compil
    21 26
     test('RecordDotSyntax4', [extra_files(['RecordDotSyntaxA.hs'])], multimod_compile_and_run, ['RecordDotSyntax4', ''])
    
    22 27
     test('RecordDotSyntax5', normal, compile_and_run, [''])
    
    23 28
     test('ListTuplePunsConstraints', extra_files(['ListTuplePunsConstraints.hs']), ghci_script, ['ListTuplePunsConstraints.script'])
    
    29
    +test('T25937', normal, compile_and_run, [''])
    
    24 30
     
    
    25 31
     # Multiline strings
    
    26 32
     test('MultilineStrings', normal, compile_and_run, [''])
    

  • testsuite/tests/parser/should_run/parser_unit_tests.hs
    1
    +import GHC.Data.StringBuffer (stringToStringBuffer)
    
    2
    +import qualified GHC.Data.StringBuffer as StringBuffer (StringBuffer (..))
    
    3
    +import GHC.Parser.String (lexString, lexMultilineString)
    
    4
    +
    
    5
    +import qualified Control.Exception as E
    
    6
    +import Control.Monad (forM_, unless)
    
    7
    +
    
    8
    +main :: IO ()
    
    9
    +main = do
    
    10
    +  forM_ tests $ \(testName, test) -> do
    
    11
    +    result <- E.try test
    
    12
    +    case result of
    
    13
    +      Right () -> pure ()
    
    14
    +      Left (e :: E.SomeException)
    
    15
    +        | Just e' <- E.asyncExceptionFromException e -> do
    
    16
    +            E.throwIO (e' :: E.AsyncException)
    
    17
    +        | otherwise -> do
    
    18
    +            putStrLn $ ">>> FAIL: " ++ testName
    
    19
    +            putStrLn $ E.displayException e
    
    20
    +
    
    21
    +{----- Test infrastructure -----}
    
    22
    +
    
    23
    +data TestFailure = TestFailure String
    
    24
    +  deriving (Show)
    
    25
    +
    
    26
    +instance E.Exception TestFailure where
    
    27
    +  displayException (TestFailure msg) = "Test failure:\n" ++ msg
    
    28
    +
    
    29
    +testFailure :: String -> IO a
    
    30
    +testFailure = E.throwIO . TestFailure
    
    31
    +
    
    32
    +shouldBe :: (Eq a, Show a) => a -> a -> IO ()
    
    33
    +shouldBe actual expected =
    
    34
    +  unless (actual == expected) $
    
    35
    +    testFailure $
    
    36
    +      "Got:      " ++ show actual ++ "\n" ++
    
    37
    +      "Expected: " ++ show expected
    
    38
    +
    
    39
    +type TestCase = (String, IO ())
    
    40
    +
    
    41
    +testCase :: String -> IO () -> TestCase
    
    42
    +testCase = (,)
    
    43
    +
    
    44
    +{----- Tests -----}
    
    45
    +
    
    46
    +tests :: [TestCase]
    
    47
    +tests = concat
    
    48
    +  [ stringTests
    
    49
    +  ]
    
    50
    +
    
    51
    +-- | Unit tests for GHC.Parser.String
    
    52
    +stringTests :: [TestCase]
    
    53
    +stringTests = concat
    
    54
    +  [ escapedBackslashTests
    
    55
    +  ]
    
    56
    +  where
    
    57
    +    withBuf f s = let buf = stringToStringBuffer s in f (StringBuffer.len buf) buf
    
    58
    +
    
    59
    +    -- Test all situations where backslashes can appear in escape characters (#25937)
    
    60
    +    escapedBackslashTests =
    
    61
    +      [ testCase label $ do
    
    62
    +          withBuf lexStr input `shouldBe` Right output
    
    63
    +      | (lexLabel, lexStr) <- [("strings", lexString), ("multiline strings", lexMultilineString)]
    
    64
    +      , (label, input, output) <-
    
    65
    +          [ ( "escaped backslashes in " ++ lexLabel ++ " not mistaken for string gaps"
    
    66
    +            , [' ', '\\', '\\', ' ', '\\', '\\', ' ']
    
    67
    +            , " \\ \\ "
    
    68
    +            )
    
    69
    +          , ( "escaped \\FS in " ++ lexLabel ++ " not mistaken for beginning of string gap"
    
    70
    +            , ['\\', '^', '\\']
    
    71
    +            , "\FS"
    
    72
    +            )
    
    73
    +          , ( "escaped \\FS in " ++ lexLabel ++ " not mistaken for unterminated string gap"
    
    74
    +            , ['\\', '^', '\\', ' ']
    
    75
    +            , "\FS "
    
    76
    +            )
    
    77
    +          , ( "escaped \\FS in " ++ lexLabel ++ " does not collapse mistaken string gap"
    
    78
    +            , ['\\', '^', '\\', ' ', '\\', 'n']
    
    79
    +            , "\FS \n"
    
    80
    +            )
    
    81
    +          ]
    
    82
    +      ]