... |
... |
@@ -59,7 +59,6 @@ module GHC.Parser.Lexer ( |
59
|
59
|
Token(..), lexer, lexerDbg,
|
60
|
60
|
ParserOpts(..), mkParserOpts,
|
61
|
61
|
PState (..), initParserState, initPragState,
|
62
|
|
- PSavedAlrState(..), getAlrState, setAlrState,
|
63
|
62
|
startSkipping, stopSkipping,
|
64
|
63
|
P(..), ParseResult(POk, PFailed),
|
65
|
64
|
allocateComments, allocatePriorComments, allocateFinalComments,
|
... |
... |
@@ -273,7 +272,6 @@ $tab { warnTab } |
273
|
272
|
-- set.
|
274
|
273
|
|
275
|
274
|
"{-" / { isNormalComment } { nested_comment }
|
276
|
|
--- "/*" / { ifExtension GhcCppBit } { cpp_comment }
|
277
|
275
|
|
278
|
276
|
-- Single-line comments are a bit tricky. Haskell 98 says that two or
|
279
|
277
|
-- more dashes followed by a symbol should be parsed as a varsym, so we
|
... |
... |
@@ -1351,6 +1349,7 @@ hopefully_open_brace :: Action p |
1351
|
1349
|
hopefully_open_brace span buf len buf2
|
1352
|
1350
|
= do relaxed <- getBit RelaxedLayoutBit
|
1353
|
1351
|
ctx <- getContext
|
|
1352
|
+ -- See Note [GHC_CPP saved offset]
|
1354
|
1353
|
offset <- getOffset
|
1355
|
1354
|
let
|
1356
|
1355
|
isOK = relaxed ||
|
... |
... |
@@ -1592,15 +1591,6 @@ nested_doc_comment span buf _len _buf2 = {-# SCC "nested_doc_comment" #-} withLe |
1592
|
1591
|
dropTrailingDec "-}" = ""
|
1593
|
1592
|
dropTrailingDec (x:xs) = x:dropTrailingDec xs
|
1594
|
1593
|
|
1595
|
|
-cpp_comment :: Action p
|
1596
|
|
-cpp_comment span buf len _buf2 = {-# SCC "cpp_comment" #-} do
|
1597
|
|
- l <- getLastLocIncludingComments
|
1598
|
|
- let endComment input (L _ comment) = commentEnd lexToken input (Nothing, ITblockComment comment l) buf span
|
1599
|
|
- input <- getInput
|
1600
|
|
- -- Include decorator in comment
|
1601
|
|
- let start_decorator = reverse $ lexemeToString buf len
|
1602
|
|
- cpp_comment_logic endComment start_decorator input span
|
1603
|
|
-
|
1604
|
1594
|
{-# INLINE nested_comment_logic #-}
|
1605
|
1595
|
-- | Includes the trailing '-}' decorators
|
1606
|
1596
|
-- drop the last two elements with the callback if you don't want them to be included
|
... |
... |
@@ -1635,31 +1625,6 @@ nested_comment_logic endComment commentAcc input span = go commentAcc (1::Int) i |
1635
|
1625
|
Just (_,_) -> go ('\n':commentAcc) n input
|
1636
|
1626
|
Just (c,input) -> go (c:commentAcc) n input
|
1637
|
1627
|
|
1638
|
|
-{-# INLINE cpp_comment_logic #-}
|
1639
|
|
--- | Includes the trailing '*/' decorators
|
1640
|
|
--- drop the last two elements with the callback if you don't want them to be included
|
1641
|
|
-cpp_comment_logic
|
1642
|
|
- :: (AlexInput -> Located String -> P p (PsLocated Token)) -- ^ Continuation that gets the rest of the input and the lexed comment
|
1643
|
|
- -> String -- ^ starting value for accumulator (reversed) - When we want to include a decorator '/*' in the comment
|
1644
|
|
- -> AlexInput
|
1645
|
|
- -> PsSpan
|
1646
|
|
- -> P p (PsLocated Token)
|
1647
|
|
-cpp_comment_logic endComment commentAcc input span = go commentAcc (1::Int) input
|
1648
|
|
- where
|
1649
|
|
- go commentAcc 0 input@(AI end_loc _) = do
|
1650
|
|
- let comment = reverse commentAcc
|
1651
|
|
- cspan = mkSrcSpanPs $ mkPsSpan (psSpanStart span) end_loc
|
1652
|
|
- lcomment = L cspan comment
|
1653
|
|
- endComment input lcomment
|
1654
|
|
- go commentAcc n input = case alexGetChar' input of
|
1655
|
|
- Nothing -> errBrace input (psRealSpan span)
|
1656
|
|
- Just ('*',input) -> case alexGetChar' input of
|
1657
|
|
- Nothing -> errBrace input (psRealSpan span)
|
1658
|
|
- Just ('/',input) -> go ('/':'*':commentAcc) (n-1) input -- '/'
|
1659
|
|
- Just (_,_) -> go ('*':commentAcc) n input
|
1660
|
|
- Just (c,input) -> go (c:commentAcc) n input
|
1661
|
|
-
|
1662
|
|
-
|
1663
|
1628
|
ghcCppSet :: P p Bool
|
1664
|
1629
|
ghcCppSet = do
|
1665
|
1630
|
exts <- getExts
|
... |
... |
@@ -1775,6 +1740,7 @@ linePrag span buf len buf2 = do |
1775
|
1740
|
usePosPrags <- getBit UsePosPragsBit
|
1776
|
1741
|
if usePosPrags
|
1777
|
1742
|
then begin line_prag2 span buf len buf2
|
|
1743
|
+ -- TODO:AZ: should we make this test if GhcCpp is active, and maybe do the old
|
1778
|
1744
|
-- else let !src = lexemeToFastString buf len
|
1779
|
1745
|
-- in return (L span (ITline_prag (SourceText src)))
|
1780
|
1746
|
else nested_comment span buf len buf2
|
... |
... |
@@ -2166,6 +2132,7 @@ do_bol span _str _len _buf2 = do |
2166
|
2132
|
-- See Note [Nested comment line pragmas]
|
2167
|
2133
|
b <- getBit InNestedCommentBit
|
2168
|
2134
|
if b then return (L span ITcomment_line_prag) else do
|
|
2135
|
+ -- See Note [GHC_CPP saved offset]
|
2169
|
2136
|
resetOffset
|
2170
|
2137
|
(pos, gen_semic) <- getOffside
|
2171
|
2138
|
case pos of
|
... |
... |
@@ -2216,6 +2183,7 @@ maybe_layout t = do -- If the alternative layout rule is enabled then |
2216
|
2183
|
new_layout_context :: Bool -> Bool -> Token -> Action p
|
2217
|
2184
|
new_layout_context strict gen_semic tok span _buf len _buf2 = do
|
2218
|
2185
|
_ <- popLexState
|
|
2186
|
+ -- See Note [GHC_CPP saved offset]
|
2219
|
2187
|
current_col <- getOffset
|
2220
|
2188
|
let offset = current_col - len
|
2221
|
2189
|
ctx <- getContext
|
... |
... |
@@ -2670,6 +2638,7 @@ data PState a = PState { |
2670
|
2638
|
pp :: !a,
|
2671
|
2639
|
-- If a CPP directive occurs in the layout context, we need to
|
2672
|
2640
|
-- store the prior column so any alr processing can continue.
|
|
2641
|
+ -- See Note [GHC_CPP saved offset]
|
2673
|
2642
|
pp_last_col :: !(Maybe Int)
|
2674
|
2643
|
}
|
2675
|
2644
|
-- last_loc and last_len are used when generating error messages,
|
... |
... |
@@ -2684,32 +2653,6 @@ data PState a = PState { |
2684
|
2653
|
-- of the action, it is the *current* token. Do I understand
|
2685
|
2654
|
-- correctly?
|
2686
|
2655
|
|
2687
|
|
-data PSavedAlrState = PSavedAlrState {
|
2688
|
|
- -- s_warnings :: Messages PsMessage,
|
2689
|
|
- -- s_errors :: Messages PsMessage,
|
2690
|
|
- s_lex_state :: [Int],
|
2691
|
|
- s_context :: [LayoutContext],
|
2692
|
|
- s_alr_pending_implicit_tokens :: [PsLocated Token],
|
2693
|
|
- s_alr_next_token :: Maybe (PsLocated Token),
|
2694
|
|
- s_alr_last_loc :: PsSpan,
|
2695
|
|
- s_alr_context :: [ALRContext],
|
2696
|
|
- s_alr_expecting_ocurly :: Maybe ALRLayout,
|
2697
|
|
- s_alr_justClosedExplicitLetBlock :: Bool,
|
2698
|
|
- s_last_col :: Int
|
2699
|
|
- }
|
2700
|
|
-
|
2701
|
|
-
|
2702
|
|
--- -- | Use for emulating (limited) CPP preprocessing in GHC.
|
2703
|
|
--- -- TODO: move this into PreProcess, and make a param on PState
|
2704
|
|
--- data PpState = PpState {
|
2705
|
|
--- pp_defines :: !(Map String [String]),
|
2706
|
|
--- pp_continuation :: ![Located Token],
|
2707
|
|
--- -- pp_context :: ![PpContext],
|
2708
|
|
--- pp_context :: ![Token], -- What preprocessor directive we are currently processing
|
2709
|
|
--- pp_accepting :: !Bool
|
2710
|
|
--- }
|
2711
|
|
--- deriving (Show)
|
2712
|
|
-
|
2713
|
2656
|
data PpContext = PpContextIf [Located Token]
|
2714
|
2657
|
deriving (Show)
|
2715
|
2658
|
|
... |
... |
@@ -2825,7 +2768,7 @@ getLastBufCur = P $ \s@(PState { last_buf_cur = last_buf_cur }) -> POk s last_bu |
2825
|
2768
|
getLastLen :: P p Int
|
2826
|
2769
|
getLastLen = P $ \s@(PState { last_len = last_len }) -> POk s last_len
|
2827
|
2770
|
|
2828
|
|
--- see Note [TBD]
|
|
2771
|
+-- See Note [GHC_CPP saved offset]
|
2829
|
2772
|
getOffset :: P p Int
|
2830
|
2773
|
getOffset = P $ \s@(PState { pp_last_col = last_col,
|
2831
|
2774
|
loc = l}) ->
|
... |
... |
@@ -2835,74 +2778,55 @@ getOffset = P $ \s@(PState { pp_last_col = last_col, |
2835
|
2778
|
-- (fromMaybe (srcLocCol (psRealLoc l)) last_col)
|
2836
|
2779
|
in POk s { pp_last_col = Nothing} offset
|
2837
|
2780
|
|
|
2781
|
+-- See Note [GHC_CPP saved offset]
|
2838
|
2782
|
resetOffset :: P p ()
|
2839
|
2783
|
resetOffset = P $ \s -> POk s { pp_last_col = Nothing} ()
|
2840
|
2784
|
|
|
2785
|
+{- Note [GHC_CPP saved offset]
|
|
2786
|
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
2787
|
+
|
|
2788
|
+The layout processing machinery examines the offset of the previous
|
|
2789
|
+line when doing its calculations.
|
|
2790
|
+
|
|
2791
|
+When GHC_CPP is used, a set of CPP directives may ignore some number
|
|
2792
|
+of preceding lines, each of which has a different offset.
|
|
2793
|
+
|
|
2794
|
+We deal with this as follows
|
|
2795
|
+
|
|
2796
|
+- When we start skipping lines due to CPP we store the offset of the
|
|
2797
|
+ line before the CPP directive
|
|
2798
|
+- We explicitly ask for the offset using `getOffset` when doing layout
|
|
2799
|
+ calculations
|
|
2800
|
+- If there is a stored offset, we use that instead of the prior line
|
|
2801
|
+ offset
|
|
2802
|
+
|
|
2803
|
+-}
|
|
2804
|
+
|
2841
|
2805
|
startSkipping :: P p ()
|
2842
|
2806
|
startSkipping = do
|
2843
|
2807
|
pushLexState skipping
|
2844
|
2808
|
-- pushLexState (trace ("startSkipping:" ++ show skipping) skipping)
|
2845
|
2809
|
|
2846
|
|
-stopSkipping :: P p Int
|
|
2810
|
+stopSkipping :: P p ()
|
2847
|
2811
|
stopSkipping = do
|
2848
|
|
- -- popLexState
|
2849
|
|
- ret <- popLexState
|
|
2812
|
+ _ <- popLexState
|
2850
|
2813
|
-- We just processed a CPP directive, which included a trailing newline.
|
2851
|
2814
|
-- To properly sync up, we now need to ensure that `do_bol` processing occurs.
|
2852
|
|
- -- But this call does not emit a token.
|
2853
|
|
- -- Maybe it should be an argument to lexToken instead?
|
2854
|
|
- -- Alternatively, push the input location to the previous char.
|
2855
|
|
- AI ps buf <- getInput
|
2856
|
|
- last_buf_cur <- getLastBufCur
|
2857
|
|
- last_loc <- getLastLoc
|
|
2815
|
+ -- But this call does not emit a token, so we instead
|
|
2816
|
+ -- change the input location to the previous char, the newline
|
|
2817
|
+ AI _ps buf <- getInput
|
2858
|
2818
|
last_tk <- getLastTk
|
2859
|
2819
|
case last_tk of
|
2860
|
2820
|
Strict.Just (L l _) -> do
|
2861
|
2821
|
let ps' = PsLoc (realSrcSpanEnd (psRealSpan l)) (bufSpanEnd (psBufSpan l))
|
2862
|
2822
|
let cur' = (cur buf) - 1
|
2863
|
|
- -- let cur' = trace ("stopSkipping:(cur',ps'):" ++ show (cur'',ps')) cur''
|
2864
|
2823
|
setInput (AI ps' (buf { cur = cur'}))
|
2865
|
2824
|
_ -> return ()
|
2866
|
|
- -- return $ trace ("stopSkipping: (ps, cur buf, last_loc, last_buf_cur, last_tk):" ++ show (ps, cur buf, last_loc, last_buf_cur, last_tk)) ret
|
2867
|
|
- return ret
|
2868
|
2825
|
|
2869
|
2826
|
-- old <- popLexState
|
2870
|
2827
|
-- return (trace ("stopSkipping:" ++ show old) old)
|
2871
|
2828
|
|
2872
|
2829
|
|
2873
|
|
-getAlrState :: P p PSavedAlrState
|
2874
|
|
-getAlrState = P $ \s@(PState {loc=l}) -> POk s
|
2875
|
|
- PSavedAlrState {
|
2876
|
|
- -- s_warnings = warnings s,
|
2877
|
|
- -- s_errors = errors s,
|
2878
|
|
- -- s_lex_state = lex_state s,
|
2879
|
|
- s_lex_state = lex_state s,
|
2880
|
|
- s_context = context s,
|
2881
|
|
- s_alr_pending_implicit_tokens = alr_pending_implicit_tokens s,
|
2882
|
|
- s_alr_next_token = alr_next_token s,
|
2883
|
|
- s_alr_last_loc = alr_last_loc s,
|
2884
|
|
- s_alr_context = alr_context s,
|
2885
|
|
- s_alr_expecting_ocurly = alr_expecting_ocurly s,
|
2886
|
|
- s_alr_justClosedExplicitLetBlock = alr_justClosedExplicitLetBlock s,
|
2887
|
|
- s_last_col = srcLocCol (psRealLoc l)
|
2888
|
|
- }
|
2889
|
|
-
|
2890
|
|
-setAlrState :: PSavedAlrState -> P p ()
|
2891
|
|
-setAlrState ss = P $ \s -> POk s {
|
2892
|
|
- -- errors = s_errors ss,
|
2893
|
|
- -- warnings = s_warnings ss,
|
2894
|
|
- lex_state = s_lex_state ss,
|
2895
|
|
- context = s_context ss,
|
2896
|
|
- alr_pending_implicit_tokens = s_alr_pending_implicit_tokens ss,
|
2897
|
|
- alr_next_token = s_alr_next_token ss,
|
2898
|
|
- alr_last_loc = s_alr_last_loc ss,
|
2899
|
|
- alr_context = s_alr_context ss,
|
2900
|
|
- alr_expecting_ocurly = s_alr_expecting_ocurly ss,
|
2901
|
|
- alr_justClosedExplicitLetBlock = s_alr_justClosedExplicitLetBlock ss,
|
2902
|
|
- pp_last_col = Just (s_last_col ss)
|
2903
|
|
- } ()
|
2904
|
|
-
|
2905
|
|
-
|
2906
|
2830
|
|
2907
|
2831
|
{-# INLINE alexGetChar' #-}
|
2908
|
2832
|
-- This version does not squash unicode characters, it is used when
|
... |
... |
@@ -3199,6 +3123,7 @@ disableHaddock opts = upd_bitmap (xunset HaddockBit) |
3199
|
3123
|
where
|
3200
|
3124
|
upd_bitmap f = opts { pExtsBitmap = f (pExtsBitmap opts) }
|
3201
|
3125
|
|
|
3126
|
+-- TODO:AZ check which of these are actually needed,
|
3202
|
3127
|
enableGhcCpp :: ParserOpts -> ParserOpts
|
3203
|
3128
|
enableGhcCpp = enableExtBit GhcCppBit
|
3204
|
3129
|
|
... |
... |
@@ -3881,8 +3806,6 @@ warn_unknown_prag prags span buf len buf2 = do |
3881
|
3806
|
%************************************************************************
|
3882
|
3807
|
-}
|
3883
|
3808
|
|
3884
|
|
--- TODO:AZ: we should have only mkParensEpToks. Delete mkParensEpAnn, mkParensLocs
|
3885
|
|
-
|
3886
|
3809
|
-- |Given a 'RealSrcSpan' that surrounds a 'HsPar' or 'HsParTy', generate
|
3887
|
3810
|
-- 'EpToken' values for the opening and closing bordering on the start
|
3888
|
3811
|
-- and end of the span
|