
Ok... I'm doing a lot of testing with monadic parsers. My current test
consists on a Haskell program that parses a C++ source file and:
- Separates individual lines
- Removes comments
- Parses quoted literal strings and characters
- Removes indentation and trailing blanks
- Remembers the original file position of each portion of the result.
The problem is that my test program runs out of stack (not heap) on
both GHC and Hugs when parsing a large-enough file (about 150K for GHC, a
lot less for Hugs). And, the real weird part is that it does so after
printing most of the output. I don't know, I can't claim to understand the
inner workings of GHC/Hugs, but it sounds to me that, in order to have a
stack overflow, nothing should have been printed yet. It's as if, say, in
order to parse the next line, the stack has to contain the complete file up
to the point that's being parsed.
So... any suggestions on what can be wrong? I'm posting a bit of the
main code (maybe I'm lucky and the problem lies there). I don't want to
force everyone in the list to get an attachment, so if anyone is interested
or would like to help, please, just ask and I'll send the whole thing.
--- This part does the actual parsing. I call it the "quoting resolver"
data UnquotedText q = CODE String | QUOTE q
instance Show q => Show (UnquotedText q) where
show (CODE s) = "CODE <" ++ s ++ ">"
show (QUOTE q) = "QUOTE <" ++ show q ++ ">"
type QuotingResolver q = LayoutTextParser [(FilePos,UnquotedText q)]
resolveQuoting :: QuotingResolver q -> (FilePos,String) ->
[(FilePos,UnquotedText q)]
resolveQuoting r s = rq s
where rq s = case applyInputParser r s of
[] -> []
((result,ns):_) -> result ++ rq ns
quotingResolver :: LayoutTextParser (LayoutTextParser (Maybe (UnquotedText
q)),FilePos) -> LayoutTextParser () -> QuotingResolver q
quotingResolver startQuote skipBlanks = do
(_,_,col) <- getFilePos
if col == 1
then skipBlanks
else return ()
filePosBeforeQuote <- getFilePos
(textBeforeQuote,endQuote) <- manynot extractItem (skipBlanks >>
startQuote)
tlist <- case textBeforeQuote of
"" -> return []
_ -> return [(filePosBeforeQuote,CODE textBeforeQuote)]
if isNothing endQuote
then case tlist of
[] -> mzero
_ -> return tlist
else do
Just (endQuoteParser,quotefilePos) <- return endQuote
quote <- endQuoteParser
if isNothing quote
then return tlist
else do
Just textInQuote <- return quote
qlist <- case textInQuote of
(CODE "") -> return []
_ -> return [(quotefilePos,textInQuote)]
return (tlist ++ qlist)
--- This part does the quoting of a C++ file
data CppQuotedText = STRING String | CHAR Char
instance Show CppQuotedText where
show (STRING s) = "STRING \"" ++ s ++ "\""
show (CHAR s) = "CHAR '" ++ [s] ++ "'"
type CppUnquotedText = UnquotedText CppQuotedText
cppSingleLineComment :: LayoutTextParser (Maybe CppUnquotedText)
cppMultiLineComment :: LayoutTextParser (Maybe CppUnquotedText)
cppSingleQuote :: LayoutTextParser (Maybe CppUnquotedText)
cppDoubleQuote :: LayoutTextParser (Maybe CppUnquotedText)
cppSingleLineComment = do { (_ ,mb) <- manynot
extractChar (char '\n'); if isNothing mb then return Nothing else
return (Just (CODE "" )) }
cppMultiLineComment = do { (_ ,mb) <- manynot extractChar (string
"*/"); if isNothing mb then return Nothing else return (Just (CODE "" )) }
cppSingleQuote = do { c <- literalCharacterParserRest; return
(Just (QUOTE (CHAR c))) }
cppDoubleQuote = do { s <- literalStringParserRest; return
(Just (QUOTE (STRING s))) }
cppQuotingStart :: LayoutTextParser (LayoutTextParser (Maybe
CppUnquotedText),FilePos)
cppQuotingStart = do
filePos <- getFilePos
result <- (do { char '\n'; return (return (Just (CODE ""))) } +++
do { string "//"; return cppSingleLineComment } +++
do { string "/*"; return cppMultiLineComment } +++
do { char '\''; return cppSingleQuote } +++
do { char '"' ; return cppDoubleQuote })
return (result,filePos)
cppQuotingResolver :: QuotingResolver CppQuotedText
cppQuotingResolver = quotingResolver cppQuotingStart skipBlanksNoNewLine
--- This is the main program
doTest s fname = (resolveQuoting cppQuotingResolver ((fname,1,1), s))
printLines [] = return ()
printLines (x:xs) = do print x; printLines xs
main = do
args <- getArgs
putStr "Parser v0.0!\n"
fname <- return (if args == [] then "E:/Ronin/RE3D/3DRenderer.cpp"
else (head args))
s <- readFile fname
printLines (doTest s fname)
---
In my example file, the output begins (in Hugs) with:
---
Main> main
Parser v0.0!
(("E:/Ronin/RE3D/3DRenderer.cpp",6,1),CODE <#include>)
(("E:/Ronin/RE3D/3DRenderer.cpp",6,10),QUOTE