-
Notifications
You must be signed in to change notification settings - Fork 0
/
MonoidLexer.hs
115 lines (100 loc) · 2.79 KB
/
MonoidLexer.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
-- | Demonstration of static analysis with a Monoid-equipped lexer.
module Scratch.MonoidLexer where
import qualified Data.Set as Set
import qualified Data.Text as Text
import Scratch.Prelude
import Test.Hspec
data Lexer a = Lexer
{ keywords :: Set Keyword,
-- | Parse a single token.
getToken :: Set Keyword -> Text -> Maybe (Text, a)
}
newtype Keyword
= Keyword Text
deriving (Eq, Ord, Show)
-- | Calls 'getToken' repeatedly to build a list of @a@ tokens.
runLexer ::
Lexer a ->
-- | Input to lex
Text ->
Maybe [a]
runLexer l@Lexer {keywords, getToken} input
| Text.null input = Just []
| otherwise = do
(remainingInput, tok) <- getToken keywords input
fmap (tok :) (runLexer l remainingInput)
emptyLexer :: Lexer a
emptyLexer =
Lexer
{ keywords = mempty,
getToken = \_ _ -> Nothing
}
-- | Left-biased.
combineLexers :: forall a. Lexer a -> Lexer a -> Lexer a
combineLexers (Lexer k1 getToken1) (Lexer k2 getToken2) =
Lexer
{ keywords = k1 <> k2,
getToken = getTok
}
where
getTok :: Set Keyword -> Text -> Maybe (Text, a)
getTok finalKeywords txt =
case getToken1 finalKeywords txt of
Nothing ->
getToken2 finalKeywords txt
res ->
res
instance Semigroup (Lexer a) where
(<>) = combineLexers
instance Monoid (Lexer a) where
mempty = emptyLexer
keywordToken :: Text -> a -> Lexer a
keywordToken tokText tok =
Lexer
{ keywords = Set.singleton (Keyword tokText),
getToken = \_ input -> do
(candidateTok, remaining) <- nextToken input
if candidateTok == tokText
then Just (remaining, tok)
else Nothing
}
-- | Helper function.
--
-- Separates the input on the first space character.
--
-- Returns a tuple whose first @Text@ is what came before the space,
-- and whose second @Text@ is the remaining input after it
-- with leading spaces stripped.
nextToken :: Text -> Maybe (Text, Text)
nextToken input = do
let (tok, remaining) = Text.span (/= ' ') input
if Text.null tok
then Nothing
else Just (tok, Text.dropWhile (== ' ') remaining)
-- * Example use
data Token
= Let
| Equal
| Var Text
deriving (Eq, Ord, Show)
variableToken :: Lexer Token
variableToken =
Lexer
{ keywords = mempty,
getToken = \finalKeywords input -> do
(candidateTok, remaining) <- nextToken input
if Set.member (Keyword candidateTok) finalKeywords
then Nothing
else Just (remaining, Var candidateTok)
}
exampleLexer :: Lexer Token
exampleLexer =
variableToken
<> keywordToken "let" Let
<> keywordToken "=" Equal
spec :: Spec
spec =
describe "monoid lexer" $ do
it "doesn't parse 'let' keyword as a variable" $ do
runLexer exampleLexer "let a = b"
`shouldBe` Just [Let, Var "a", Equal, Var "b"]