|
|
1
|
+{-# LANGUAGE LambdaCase #-}
|
|
1
|
2
|
{-# LANGUAGE OverloadedStrings #-}
|
|
2
|
3
|
{-# LANGUAGE ViewPatterns #-}
|
|
3
|
4
|
|
| ... |
... |
@@ -28,6 +29,7 @@ import Control.Applicative |
|
28
|
29
|
import Control.Arrow (first)
|
|
29
|
30
|
import Control.Monad
|
|
30
|
31
|
import Data.Char (chr, isAlpha, isSpace, isUpper)
|
|
|
32
|
+import Data.Functor (($>))
|
|
31
|
33
|
import Data.List (elemIndex, intercalate, intersperse, unfoldr)
|
|
32
|
34
|
import Data.Maybe (fromMaybe, mapMaybe)
|
|
33
|
35
|
import Data.Monoid
|
| ... |
... |
@@ -186,11 +188,29 @@ specialChar = "_/<@\"&'`#[ " |
|
186
|
188
|
-- to ensure that we have already given a chance to more meaningful parsers
|
|
187
|
189
|
-- before capturing their characters.
|
|
188
|
190
|
string' :: Parser (DocH mod a)
|
|
189
|
|
-string' = DocString . unescape . T.unpack <$> takeWhile1_ (`notElem` specialChar)
|
|
|
191
|
+string' =
|
|
|
192
|
+ DocString
|
|
|
193
|
+ <$> ((:) <$> rawOrEscChar "" <*> many (rawOrEscChar "(["))
|
|
|
194
|
+ -- After the first character, stop for @\(@ or @\[@ math starters. (The
|
|
|
195
|
+ -- first character won't start a valid math string because this parser
|
|
|
196
|
+ -- should follow math parsers. But this parser is expected to accept at
|
|
|
197
|
+ -- least one character from all inputs that don't start with special
|
|
|
198
|
+ -- characters, so the first character parser can't have the @"(["@
|
|
|
199
|
+ -- restriction.)
|
|
190
|
200
|
where
|
|
191
|
|
- unescape "" = ""
|
|
192
|
|
- unescape ('\\' : x : xs) = x : unescape xs
|
|
193
|
|
- unescape (x : xs) = x : unescape xs
|
|
|
201
|
+ -- | Parse a single logical character, either raw or escaped. Don't accept
|
|
|
202
|
+ -- escaped characters from the argument string.
|
|
|
203
|
+ rawOrEscChar :: [Char] -> Parser Char
|
|
|
204
|
+ rawOrEscChar restrictedEscapes = try $ Parsec.noneOf specialChar >>= \case
|
|
|
205
|
+ -- Handle backslashes:
|
|
|
206
|
+ -- - Fail on forbidden escape characters.
|
|
|
207
|
+ -- - Non-forbidden characters: simply unescape, e.g. parse "\b" as 'b',
|
|
|
208
|
+ -- - Trailing backslash: treat it as a raw backslash, not an escape
|
|
|
209
|
+ -- sequence. (This is the logic that this parser followed when this
|
|
|
210
|
+ -- comment was written; it is not necessarily intentional but now I
|
|
|
211
|
+ -- don't want to break anything relying on it.)
|
|
|
212
|
+ '\\' -> Parsec.noneOf restrictedEscapes <|> Parsec.eof $> '\\'
|
|
|
213
|
+ c -> pure c
|
|
194
|
214
|
|
|
195
|
215
|
-- | Skips a single special character and treats it as a plain string.
|
|
196
|
216
|
-- This is done to skip over any special characters belonging to other
|