Marge Bot pushed to branch wip/marge_bot_batch_merge_job at Glasgow Haskell Compiler / GHC
Commits:
-
6467d61e
by Brandon Chinn at 2025-04-29T18:36:03-04:00
-
99868a86
by Jens Petersen at 2025-04-29T18:36:44-04:00
-
840d64c3
by Zubin Duggal at 2025-04-29T19:09:31-04:00
-
3787f533
by Ben Gamari at 2025-04-29T19:09:31-04:00
11 changed files:
- compiler/GHC/Parser/Errors/Types.hs
- compiler/GHC/Parser/String.hs
- compiler/GHC/Unit/Types.hs
- compiler/Language/Haskell/Syntax/Module/Name.hs
- hadrian/README.md
- hadrian/hadrian.cabal
- m4/fp_setup_windows_toolchain.m4
- + testsuite/tests/parser/should_run/T25937.hs
- + testsuite/tests/parser/should_run/T25937.stdout
- testsuite/tests/parser/should_run/all.T
- + testsuite/tests/parser/should_run/parser_unit_tests.hs
Changes:
| ... | ... | @@ -611,6 +611,7 @@ data LexErr |
| 611 | 611 | | LexUnterminatedComment -- ^ Unterminated `{-'
|
| 612 | 612 | | LexUnterminatedOptions -- ^ Unterminated OPTIONS pragma
|
| 613 | 613 | | LexUnterminatedQQ -- ^ Unterminated quasiquotation
|
| 614 | + deriving (Show,Eq,Ord)
|
|
| 614 | 615 | |
| 615 | 616 | -- | Errors from the Cmm parser
|
| 616 | 617 | data CmmParserError
|
| ... | ... | @@ -36,6 +36,7 @@ import GHC.Utils.Panic (panic) |
| 36 | 36 | |
| 37 | 37 | type BufPos = Int
|
| 38 | 38 | data StringLexError = StringLexError LexErr BufPos
|
| 39 | + deriving (Show, Eq)
|
|
| 39 | 40 | |
| 40 | 41 | lexString :: Int -> StringBuffer -> Either StringLexError String
|
| 41 | 42 | lexString = lexStringWith processChars processChars
|
| ... | ... | @@ -122,20 +123,49 @@ bufferLocatedChars initialBuf len = go initialBuf |
| 122 | 123 | -- -----------------------------------------------------------------------------
|
| 123 | 124 | -- Lexing phases
|
| 124 | 125 | |
| 126 | +-- | Collapse all string gaps in the given input.
|
|
| 127 | +--
|
|
| 128 | +-- Iterates through the input in `go` until we encounter a backslash. The
|
|
| 129 | +-- @stringchar Alex regex only allows backslashes in two places: escape codes
|
|
| 130 | +-- and string gaps.
|
|
| 131 | +--
|
|
| 132 | +-- * If the next character is a space, it has to be the start of a string gap
|
|
| 133 | +-- AND it must end, since the @gap Alex regex will only match if it ends.
|
|
| 134 | +-- Collapse the gap and continue the main iteration loop.
|
|
| 135 | +--
|
|
| 136 | +-- * Otherwise, this is an escape code. If it's an escape code, there are
|
|
| 137 | +-- ONLY three possibilities (see the @escape Alex regex):
|
|
| 138 | +-- 1. The escape code is "\\"
|
|
| 139 | +-- 2. The escape code is "\^\"
|
|
| 140 | +-- 3. The escape code does not have a backslash, other than the initial
|
|
| 141 | +-- backslash
|
|
| 142 | +--
|
|
| 143 | +-- In the first two possibilities, just skip them and continue the main
|
|
| 144 | +-- iteration loop ("skip" as in "keep in the list as-is"). In the last one,
|
|
| 145 | +-- we can just skip the backslash, then continue the main iteration loop.
|
|
| 146 | +-- the rest of the escape code will be skipped as normal characters in the
|
|
| 147 | +-- string; no need to fully parse a proper escape code.
|
|
| 125 | 148 | collapseGaps :: HasChar c => [c] -> [c]
|
| 126 | 149 | collapseGaps = go
|
| 127 | 150 | where
|
| 128 | 151 | go = \case
|
| 129 | - c1@(Char '\\') : c2@(Char c) : cs
|
|
| 130 | - -- #25784: string gaps are semantically equivalent to "\&"
|
|
| 152 | + -- Match the start of a string gap + drop gap
|
|
| 153 | + -- #25784: string gaps are semantically equivalent to "\&"
|
|
| 154 | + c1@(Char '\\') : Char c : cs
|
|
| 131 | 155 | | is_space c -> c1 : setChar '&' c1 : go (dropGap cs)
|
| 132 | - | otherwise -> c1 : c2 : go cs
|
|
| 156 | + -- Match all possible escape characters that include a backslash
|
|
| 157 | + c1@(Char '\\') : c2@(Char '\\') : cs
|
|
| 158 | + -> c1 : c2 : go cs
|
|
| 159 | + c1@(Char '\\') : c2@(Char '^') : c3@(Char '\\') : cs
|
|
| 160 | + -> c1 : c2 : c3 : go cs
|
|
| 161 | + -- Otherwise, just keep looping
|
|
| 133 | 162 | c : cs -> c : go cs
|
| 134 | 163 | [] -> []
|
| 135 | 164 | |
| 136 | 165 | dropGap = \case
|
| 137 | 166 | Char '\\' : cs -> cs
|
| 138 | 167 | _ : cs -> dropGap cs
|
| 168 | + -- Unreachable since gaps must end; see docstring
|
|
| 139 | 169 | [] -> panic "gap unexpectedly ended"
|
| 140 | 170 | |
| 141 | 171 | resolveEscapes :: HasChar c => [c] -> Either (c, LexErr) [c]
|
| 1 | -{-# OPTIONS_GHC -Wno-orphans #-} -- instance Data ModuleName
|
|
| 2 | - |
|
| 3 | 1 | {-# LANGUAGE FlexibleInstances #-}
|
| 4 | 2 | {-# LANGUAGE DeriveDataTypeable #-}
|
| 5 | 3 | {-# LANGUAGE DeriveTraversable #-}
|
| ... | ... | @@ -117,13 +115,6 @@ data GenModule unit = Module |
| 117 | 115 | }
|
| 118 | 116 | deriving (Eq,Ord,Data,Functor)
|
| 119 | 117 | |
| 120 | --- TODO: should be moved back into Language.Haskell.Syntax.Module.Name
|
|
| 121 | -instance Data ModuleName where
|
|
| 122 | - -- don't traverse?
|
|
| 123 | - toConstr _ = abstractConstr "ModuleName"
|
|
| 124 | - gunfold _ _ = error "gunfold"
|
|
| 125 | - dataTypeOf _ = mkNoRepType "ModuleName"
|
|
| 126 | - |
|
| 127 | 118 | -- | A Module is a pair of a 'Unit' and a 'ModuleName'.
|
| 128 | 119 | type Module = GenModule Unit
|
| 129 | 120 |
| ... | ... | @@ -3,6 +3,7 @@ module Language.Haskell.Syntax.Module.Name where |
| 3 | 3 | import Prelude
|
| 4 | 4 | |
| 5 | 5 | import Data.Char (isAlphaNum)
|
| 6 | +import Data.Data
|
|
| 6 | 7 | import Control.DeepSeq
|
| 7 | 8 | import qualified Text.ParserCombinators.ReadP as Parse
|
| 8 | 9 | import System.FilePath
|
| ... | ... | @@ -12,6 +13,14 @@ import GHC.Data.FastString |
| 12 | 13 | -- | A ModuleName is essentially a simple string, e.g. @Data.List@.
|
| 13 | 14 | newtype ModuleName = ModuleName FastString deriving (Show, Eq)
|
| 14 | 15 | |
| 16 | +instance Data ModuleName where
|
|
| 17 | + -- don't traverse?
|
|
| 18 | + toConstr x = constr
|
|
| 19 | + where
|
|
| 20 | + constr = mkConstr (dataTypeOf x) "{abstract:ModuleName}" [] Prefix
|
|
| 21 | + gunfold _ _ = error "gunfold"
|
|
| 22 | + dataTypeOf _ = mkNoRepType "ModuleName"
|
|
| 23 | + |
|
| 15 | 24 | instance Ord ModuleName where
|
| 16 | 25 | nm1 `compare` nm2 = stableModuleNameCmp nm1 nm2
|
| 17 | 26 |
| ... | ... | @@ -226,8 +226,8 @@ tested in CI. If you use an untested flavour such as "Quick" then you run the |
| 226 | 226 | risk that not all tests will pass. In particular you can rely on the `validate`
|
| 227 | 227 | and `perf` flavours being tested but no others.
|
| 228 | 228 | |
| 229 | -`build selftest` runs tests of the build system. The current test coverage
|
|
| 230 | -is close to zero (see [#197][test-issue]).
|
|
| 229 | +`build selftest` (no longer enabled by default) runs tests of the build system.
|
|
| 230 | +The current test coverage is close to zero (see [#197][test-issue]).
|
|
| 231 | 231 | |
| 232 | 232 | #### Running linters
|
| 233 | 233 |
| ... | ... | @@ -27,7 +27,7 @@ flag threaded |
| 27 | 27 | -- See also #21913
|
| 28 | 28 | flag selftest
|
| 29 | 29 | manual: True
|
| 30 | - default: True
|
|
| 30 | + default: False
|
|
| 31 | 31 | description: Enables the hadrian selftest rules which require
|
| 32 | 32 | QuickCheck. Disabling it thus saves on a few dependencies
|
| 33 | 33 | which can be problematic when bootstrapping.
|
| ... | ... | @@ -17,12 +17,13 @@ AC_DEFUN([FP_INSTALL_WINDOWS_TOOLCHAIN],[ |
| 17 | 17 | else
|
| 18 | 18 | action="download"
|
| 19 | 19 | fi
|
| 20 | - $PYTHON mk/get-win32-tarballs.py $action $mingw_arch > missing-win32-tarballs
|
|
| 20 | + $PYTHON mk/get-win32-tarballs.py $action $mingw_arch
|
|
| 21 | 21 | case $? in
|
| 22 | 22 | 0)
|
| 23 | 23 | rm missing-win32-tarballs
|
| 24 | 24 | ;;
|
| 25 | 25 | 2)
|
| 26 | + $PYTHON mk/get-win32-tarballs.py list $mingw_arch > missing-win32-tarballs
|
|
| 26 | 27 | echo
|
| 27 | 28 | echo "Error:"
|
| 28 | 29 | echo "Needed msys2 tarballs are missing. You have a few options to get them,"
|
| 1 | +main :: IO ()
|
|
| 2 | +main = print "\^\ " |
| 1 | +"\FS " |
| 1 | +test('parser_unit_tests',
|
|
| 2 | + normal,
|
|
| 3 | + compile_and_run,
|
|
| 4 | + ['-package ghc'])
|
|
| 5 | + |
|
| 1 | 6 | test('readRun001', normal, compile_and_run, [''])
|
| 2 | 7 | test('readRun002', normal, compile_and_run, [''])
|
| 3 | 8 | test('readRun003', normal, compile_and_run, [''])
|
| ... | ... | @@ -21,6 +26,7 @@ test('RecordDotSyntax3', [extra_files(['RecordDotSyntaxA.hs'])], multimod_compil |
| 21 | 26 | test('RecordDotSyntax4', [extra_files(['RecordDotSyntaxA.hs'])], multimod_compile_and_run, ['RecordDotSyntax4', ''])
|
| 22 | 27 | test('RecordDotSyntax5', normal, compile_and_run, [''])
|
| 23 | 28 | test('ListTuplePunsConstraints', extra_files(['ListTuplePunsConstraints.hs']), ghci_script, ['ListTuplePunsConstraints.script'])
|
| 29 | +test('T25937', normal, compile_and_run, [''])
|
|
| 24 | 30 | |
| 25 | 31 | # Multiline strings
|
| 26 | 32 | test('MultilineStrings', normal, compile_and_run, [''])
|
| 1 | +import GHC.Data.StringBuffer (stringToStringBuffer)
|
|
| 2 | +import qualified GHC.Data.StringBuffer as StringBuffer (StringBuffer (..))
|
|
| 3 | +import GHC.Parser.String (lexString, lexMultilineString)
|
|
| 4 | + |
|
| 5 | +import qualified Control.Exception as E
|
|
| 6 | +import Control.Monad (forM_, unless)
|
|
| 7 | + |
|
| 8 | +main :: IO ()
|
|
| 9 | +main = do
|
|
| 10 | + forM_ tests $ \(testName, test) -> do
|
|
| 11 | + result <- E.try test
|
|
| 12 | + case result of
|
|
| 13 | + Right () -> pure ()
|
|
| 14 | + Left (e :: E.SomeException)
|
|
| 15 | + | Just e' <- E.asyncExceptionFromException e -> do
|
|
| 16 | + E.throwIO (e' :: E.AsyncException)
|
|
| 17 | + | otherwise -> do
|
|
| 18 | + putStrLn $ ">>> FAIL: " ++ testName
|
|
| 19 | + putStrLn $ E.displayException e
|
|
| 20 | + |
|
| 21 | +{----- Test infrastructure -----}
|
|
| 22 | + |
|
| 23 | +data TestFailure = TestFailure String
|
|
| 24 | + deriving (Show)
|
|
| 25 | + |
|
| 26 | +instance E.Exception TestFailure where
|
|
| 27 | + displayException (TestFailure msg) = "Test failure:\n" ++ msg
|
|
| 28 | + |
|
| 29 | +testFailure :: String -> IO a
|
|
| 30 | +testFailure = E.throwIO . TestFailure
|
|
| 31 | + |
|
| 32 | +shouldBe :: (Eq a, Show a) => a -> a -> IO ()
|
|
| 33 | +shouldBe actual expected =
|
|
| 34 | + unless (actual == expected) $
|
|
| 35 | + testFailure $
|
|
| 36 | + "Got: " ++ show actual ++ "\n" ++
|
|
| 37 | + "Expected: " ++ show expected
|
|
| 38 | + |
|
| 39 | +type TestCase = (String, IO ())
|
|
| 40 | + |
|
| 41 | +testCase :: String -> IO () -> TestCase
|
|
| 42 | +testCase = (,)
|
|
| 43 | + |
|
| 44 | +{----- Tests -----}
|
|
| 45 | + |
|
| 46 | +tests :: [TestCase]
|
|
| 47 | +tests = concat
|
|
| 48 | + [ stringTests
|
|
| 49 | + ]
|
|
| 50 | + |
|
| 51 | +-- | Unit tests for GHC.Parser.String
|
|
| 52 | +stringTests :: [TestCase]
|
|
| 53 | +stringTests = concat
|
|
| 54 | + [ escapedBackslashTests
|
|
| 55 | + ]
|
|
| 56 | + where
|
|
| 57 | + withBuf f s = let buf = stringToStringBuffer s in f (StringBuffer.len buf) buf
|
|
| 58 | + |
|
| 59 | + -- Test all situations where backslashes can appear in escape characters (#25937)
|
|
| 60 | + escapedBackslashTests =
|
|
| 61 | + [ testCase label $ do
|
|
| 62 | + withBuf lexStr input `shouldBe` Right output
|
|
| 63 | + | (lexLabel, lexStr) <- [("strings", lexString), ("multiline strings", lexMultilineString)]
|
|
| 64 | + , (label, input, output) <-
|
|
| 65 | + [ ( "escaped backslashes in " ++ lexLabel ++ " not mistaken for string gaps"
|
|
| 66 | + , [' ', '\\', '\\', ' ', '\\', '\\', ' ']
|
|
| 67 | + , " \\ \\ "
|
|
| 68 | + )
|
|
| 69 | + , ( "escaped \\FS in " ++ lexLabel ++ " not mistaken for beginning of string gap"
|
|
| 70 | + , ['\\', '^', '\\']
|
|
| 71 | + , "\FS"
|
|
| 72 | + )
|
|
| 73 | + , ( "escaped \\FS in " ++ lexLabel ++ " not mistaken for unterminated string gap"
|
|
| 74 | + , ['\\', '^', '\\', ' ']
|
|
| 75 | + , "\FS "
|
|
| 76 | + )
|
|
| 77 | + , ( "escaped \\FS in " ++ lexLabel ++ " does not collapse mistaken string gap"
|
|
| 78 | + , ['\\', '^', '\\', ' ', '\\', 'n']
|
|
| 79 | + , "\FS \n"
|
|
| 80 | + )
|
|
| 81 | + ]
|
|
| 82 | + ] |