Marge Bot pushed to branch wip/marge_bot_batch_merge_job at Glasgow Haskell Compiler / GHC
Commits:
-
6467d61e
by Brandon Chinn at 2025-04-29T18:36:03-04:00
-
99868a86
by Jens Petersen at 2025-04-29T18:36:44-04:00
-
840d64c3
by Zubin Duggal at 2025-04-29T19:09:31-04:00
-
3787f533
by Ben Gamari at 2025-04-29T19:09:31-04:00
11 changed files:
- compiler/GHC/Parser/Errors/Types.hs
- compiler/GHC/Parser/String.hs
- compiler/GHC/Unit/Types.hs
- compiler/Language/Haskell/Syntax/Module/Name.hs
- hadrian/README.md
- hadrian/hadrian.cabal
- m4/fp_setup_windows_toolchain.m4
- + testsuite/tests/parser/should_run/T25937.hs
- + testsuite/tests/parser/should_run/T25937.stdout
- testsuite/tests/parser/should_run/all.T
- + testsuite/tests/parser/should_run/parser_unit_tests.hs
Changes:
... | ... | @@ -611,6 +611,7 @@ data LexErr |
611 | 611 | | LexUnterminatedComment -- ^ Unterminated `{-'
|
612 | 612 | | LexUnterminatedOptions -- ^ Unterminated OPTIONS pragma
|
613 | 613 | | LexUnterminatedQQ -- ^ Unterminated quasiquotation
|
614 | + deriving (Show,Eq,Ord)
|
|
614 | 615 | |
615 | 616 | -- | Errors from the Cmm parser
|
616 | 617 | data CmmParserError
|
... | ... | @@ -36,6 +36,7 @@ import GHC.Utils.Panic (panic) |
36 | 36 | |
37 | 37 | type BufPos = Int
|
38 | 38 | data StringLexError = StringLexError LexErr BufPos
|
39 | + deriving (Show, Eq)
|
|
39 | 40 | |
40 | 41 | lexString :: Int -> StringBuffer -> Either StringLexError String
|
41 | 42 | lexString = lexStringWith processChars processChars
|
... | ... | @@ -122,20 +123,49 @@ bufferLocatedChars initialBuf len = go initialBuf |
122 | 123 | -- -----------------------------------------------------------------------------
|
123 | 124 | -- Lexing phases
|
124 | 125 | |
126 | +-- | Collapse all string gaps in the given input.
|
|
127 | +--
|
|
128 | +-- Iterates through the input in `go` until we encounter a backslash. The
|
|
129 | +-- @stringchar Alex regex only allows backslashes in two places: escape codes
|
|
130 | +-- and string gaps.
|
|
131 | +--
|
|
132 | +-- * If the next character is a space, it has to be the start of a string gap
|
|
133 | +-- AND it must end, since the @gap Alex regex will only match if it ends.
|
|
134 | +-- Collapse the gap and continue the main iteration loop.
|
|
135 | +--
|
|
136 | +-- * Otherwise, this is an escape code. If it's an escape code, there are
|
|
137 | +-- ONLY three possibilities (see the @escape Alex regex):
|
|
138 | +-- 1. The escape code is "\\"
|
|
139 | +-- 2. The escape code is "\^\"
|
|
140 | +-- 3. The escape code does not have a backslash, other than the initial
|
|
141 | +-- backslash
|
|
142 | +--
|
|
143 | +-- In the first two possibilities, just skip them and continue the main
|
|
144 | +-- iteration loop ("skip" as in "keep in the list as-is"). In the last one,
|
|
145 | +-- we can just skip the backslash, then continue the main iteration loop.
|
|
146 | +-- the rest of the escape code will be skipped as normal characters in the
|
|
147 | +-- string; no need to fully parse a proper escape code.
|
|
125 | 148 | collapseGaps :: HasChar c => [c] -> [c]
|
126 | 149 | collapseGaps = go
|
127 | 150 | where
|
128 | 151 | go = \case
|
129 | - c1@(Char '\\') : c2@(Char c) : cs
|
|
130 | - -- #25784: string gaps are semantically equivalent to "\&"
|
|
152 | + -- Match the start of a string gap + drop gap
|
|
153 | + -- #25784: string gaps are semantically equivalent to "\&"
|
|
154 | + c1@(Char '\\') : Char c : cs
|
|
131 | 155 | | is_space c -> c1 : setChar '&' c1 : go (dropGap cs)
|
132 | - | otherwise -> c1 : c2 : go cs
|
|
156 | + -- Match all possible escape characters that include a backslash
|
|
157 | + c1@(Char '\\') : c2@(Char '\\') : cs
|
|
158 | + -> c1 : c2 : go cs
|
|
159 | + c1@(Char '\\') : c2@(Char '^') : c3@(Char '\\') : cs
|
|
160 | + -> c1 : c2 : c3 : go cs
|
|
161 | + -- Otherwise, just keep looping
|
|
133 | 162 | c : cs -> c : go cs
|
134 | 163 | [] -> []
|
135 | 164 | |
136 | 165 | dropGap = \case
|
137 | 166 | Char '\\' : cs -> cs
|
138 | 167 | _ : cs -> dropGap cs
|
168 | + -- Unreachable since gaps must end; see docstring
|
|
139 | 169 | [] -> panic "gap unexpectedly ended"
|
140 | 170 | |
141 | 171 | resolveEscapes :: HasChar c => [c] -> Either (c, LexErr) [c]
|
1 | -{-# OPTIONS_GHC -Wno-orphans #-} -- instance Data ModuleName
|
|
2 | - |
|
3 | 1 | {-# LANGUAGE FlexibleInstances #-}
|
4 | 2 | {-# LANGUAGE DeriveDataTypeable #-}
|
5 | 3 | {-# LANGUAGE DeriveTraversable #-}
|
... | ... | @@ -117,13 +115,6 @@ data GenModule unit = Module |
117 | 115 | }
|
118 | 116 | deriving (Eq,Ord,Data,Functor)
|
119 | 117 | |
120 | --- TODO: should be moved back into Language.Haskell.Syntax.Module.Name
|
|
121 | -instance Data ModuleName where
|
|
122 | - -- don't traverse?
|
|
123 | - toConstr _ = abstractConstr "ModuleName"
|
|
124 | - gunfold _ _ = error "gunfold"
|
|
125 | - dataTypeOf _ = mkNoRepType "ModuleName"
|
|
126 | - |
|
127 | 118 | -- | A Module is a pair of a 'Unit' and a 'ModuleName'.
|
128 | 119 | type Module = GenModule Unit
|
129 | 120 |
... | ... | @@ -3,6 +3,7 @@ module Language.Haskell.Syntax.Module.Name where |
3 | 3 | import Prelude
|
4 | 4 | |
5 | 5 | import Data.Char (isAlphaNum)
|
6 | +import Data.Data
|
|
6 | 7 | import Control.DeepSeq
|
7 | 8 | import qualified Text.ParserCombinators.ReadP as Parse
|
8 | 9 | import System.FilePath
|
... | ... | @@ -12,6 +13,14 @@ import GHC.Data.FastString |
12 | 13 | -- | A ModuleName is essentially a simple string, e.g. @Data.List@.
|
13 | 14 | newtype ModuleName = ModuleName FastString deriving (Show, Eq)
|
14 | 15 | |
16 | +instance Data ModuleName where
|
|
17 | + -- don't traverse?
|
|
18 | + toConstr x = constr
|
|
19 | + where
|
|
20 | + constr = mkConstr (dataTypeOf x) "{abstract:ModuleName}" [] Prefix
|
|
21 | + gunfold _ _ = error "gunfold"
|
|
22 | + dataTypeOf _ = mkNoRepType "ModuleName"
|
|
23 | + |
|
15 | 24 | instance Ord ModuleName where
|
16 | 25 | nm1 `compare` nm2 = stableModuleNameCmp nm1 nm2
|
17 | 26 |
... | ... | @@ -226,8 +226,8 @@ tested in CI. If you use an untested flavour such as "Quick" then you run the |
226 | 226 | risk that not all tests will pass. In particular you can rely on the `validate`
|
227 | 227 | and `perf` flavours being tested but no others.
|
228 | 228 | |
229 | -`build selftest` runs tests of the build system. The current test coverage
|
|
230 | -is close to zero (see [#197][test-issue]).
|
|
229 | +`build selftest` (no longer enabled by default) runs tests of the build system.
|
|
230 | +The current test coverage is close to zero (see [#197][test-issue]).
|
|
231 | 231 | |
232 | 232 | #### Running linters
|
233 | 233 |
... | ... | @@ -27,7 +27,7 @@ flag threaded |
27 | 27 | -- See also #21913
|
28 | 28 | flag selftest
|
29 | 29 | manual: True
|
30 | - default: True
|
|
30 | + default: False
|
|
31 | 31 | description: Enables the hadrian selftest rules which require
|
32 | 32 | QuickCheck. Disabling it thus saves on a few dependencies
|
33 | 33 | which can be problematic when bootstrapping.
|
... | ... | @@ -17,12 +17,13 @@ AC_DEFUN([FP_INSTALL_WINDOWS_TOOLCHAIN],[ |
17 | 17 | else
|
18 | 18 | action="download"
|
19 | 19 | fi
|
20 | - $PYTHON mk/get-win32-tarballs.py $action $mingw_arch > missing-win32-tarballs
|
|
20 | + $PYTHON mk/get-win32-tarballs.py $action $mingw_arch
|
|
21 | 21 | case $? in
|
22 | 22 | 0)
|
23 | 23 | rm missing-win32-tarballs
|
24 | 24 | ;;
|
25 | 25 | 2)
|
26 | + $PYTHON mk/get-win32-tarballs.py list $mingw_arch > missing-win32-tarballs
|
|
26 | 27 | echo
|
27 | 28 | echo "Error:"
|
28 | 29 | echo "Needed msys2 tarballs are missing. You have a few options to get them,"
|
1 | +main :: IO ()
|
|
2 | +main = print "\^\ " |
1 | +"\FS " |
1 | +test('parser_unit_tests',
|
|
2 | + normal,
|
|
3 | + compile_and_run,
|
|
4 | + ['-package ghc'])
|
|
5 | + |
|
1 | 6 | test('readRun001', normal, compile_and_run, [''])
|
2 | 7 | test('readRun002', normal, compile_and_run, [''])
|
3 | 8 | test('readRun003', normal, compile_and_run, [''])
|
... | ... | @@ -21,6 +26,7 @@ test('RecordDotSyntax3', [extra_files(['RecordDotSyntaxA.hs'])], multimod_compil |
21 | 26 | test('RecordDotSyntax4', [extra_files(['RecordDotSyntaxA.hs'])], multimod_compile_and_run, ['RecordDotSyntax4', ''])
|
22 | 27 | test('RecordDotSyntax5', normal, compile_and_run, [''])
|
23 | 28 | test('ListTuplePunsConstraints', extra_files(['ListTuplePunsConstraints.hs']), ghci_script, ['ListTuplePunsConstraints.script'])
|
29 | +test('T25937', normal, compile_and_run, [''])
|
|
24 | 30 | |
25 | 31 | # Multiline strings
|
26 | 32 | test('MultilineStrings', normal, compile_and_run, [''])
|
1 | +import GHC.Data.StringBuffer (stringToStringBuffer)
|
|
2 | +import qualified GHC.Data.StringBuffer as StringBuffer (StringBuffer (..))
|
|
3 | +import GHC.Parser.String (lexString, lexMultilineString)
|
|
4 | + |
|
5 | +import qualified Control.Exception as E
|
|
6 | +import Control.Monad (forM_, unless)
|
|
7 | + |
|
8 | +main :: IO ()
|
|
9 | +main = do
|
|
10 | + forM_ tests $ \(testName, test) -> do
|
|
11 | + result <- E.try test
|
|
12 | + case result of
|
|
13 | + Right () -> pure ()
|
|
14 | + Left (e :: E.SomeException)
|
|
15 | + | Just e' <- E.asyncExceptionFromException e -> do
|
|
16 | + E.throwIO (e' :: E.AsyncException)
|
|
17 | + | otherwise -> do
|
|
18 | + putStrLn $ ">>> FAIL: " ++ testName
|
|
19 | + putStrLn $ E.displayException e
|
|
20 | + |
|
21 | +{----- Test infrastructure -----}
|
|
22 | + |
|
23 | +data TestFailure = TestFailure String
|
|
24 | + deriving (Show)
|
|
25 | + |
|
26 | +instance E.Exception TestFailure where
|
|
27 | + displayException (TestFailure msg) = "Test failure:\n" ++ msg
|
|
28 | + |
|
29 | +testFailure :: String -> IO a
|
|
30 | +testFailure = E.throwIO . TestFailure
|
|
31 | + |
|
32 | +shouldBe :: (Eq a, Show a) => a -> a -> IO ()
|
|
33 | +shouldBe actual expected =
|
|
34 | + unless (actual == expected) $
|
|
35 | + testFailure $
|
|
36 | + "Got: " ++ show actual ++ "\n" ++
|
|
37 | + "Expected: " ++ show expected
|
|
38 | + |
|
39 | +type TestCase = (String, IO ())
|
|
40 | + |
|
41 | +testCase :: String -> IO () -> TestCase
|
|
42 | +testCase = (,)
|
|
43 | + |
|
44 | +{----- Tests -----}
|
|
45 | + |
|
46 | +tests :: [TestCase]
|
|
47 | +tests = concat
|
|
48 | + [ stringTests
|
|
49 | + ]
|
|
50 | + |
|
51 | +-- | Unit tests for GHC.Parser.String
|
|
52 | +stringTests :: [TestCase]
|
|
53 | +stringTests = concat
|
|
54 | + [ escapedBackslashTests
|
|
55 | + ]
|
|
56 | + where
|
|
57 | + withBuf f s = let buf = stringToStringBuffer s in f (StringBuffer.len buf) buf
|
|
58 | + |
|
59 | + -- Test all situations where backslashes can appear in escape characters (#25937)
|
|
60 | + escapedBackslashTests =
|
|
61 | + [ testCase label $ do
|
|
62 | + withBuf lexStr input `shouldBe` Right output
|
|
63 | + | (lexLabel, lexStr) <- [("strings", lexString), ("multiline strings", lexMultilineString)]
|
|
64 | + , (label, input, output) <-
|
|
65 | + [ ( "escaped backslashes in " ++ lexLabel ++ " not mistaken for string gaps"
|
|
66 | + , [' ', '\\', '\\', ' ', '\\', '\\', ' ']
|
|
67 | + , " \\ \\ "
|
|
68 | + )
|
|
69 | + , ( "escaped \\FS in " ++ lexLabel ++ " not mistaken for beginning of string gap"
|
|
70 | + , ['\\', '^', '\\']
|
|
71 | + , "\FS"
|
|
72 | + )
|
|
73 | + , ( "escaped \\FS in " ++ lexLabel ++ " not mistaken for unterminated string gap"
|
|
74 | + , ['\\', '^', '\\', ' ']
|
|
75 | + , "\FS "
|
|
76 | + )
|
|
77 | + , ( "escaped \\FS in " ++ lexLabel ++ " does not collapse mistaken string gap"
|
|
78 | + , ['\\', '^', '\\', ' ', '\\', 'n']
|
|
79 | + , "\FS \n"
|
|
80 | + )
|
|
81 | + ]
|
|
82 | + ] |