Functional Programming guest lecture by Tim Sheard Parsing in Haskell Defining Parsing Combinators.
-
Upload
terence-osborne -
Category
Documents
-
view
221 -
download
1
Transcript of Functional Programming guest lecture by Tim Sheard Parsing in Haskell Defining Parsing Combinators.
Functional Programmingguest lecture by Tim Sheard
Parsing in Haskell
Defining Parsing Combinators
Find these slides at• www.cs.pdx.edu/~sheard/course/guest/ParsingInHaskell.ppt
• Example can be found at• www.cs.pdx.edu/~sheard/course/guest/ParsingInHaskell.hs
Parsing
• Parsing is imposing tree structure on linear text (usually in strings or files)
• Plan of this lecture– Introduce the Parsec library– Write some simple parsers– Test them– Define a simple version of the parsers to see
how they work. • Parsec is a much more sophisticated library
Include the followingmodule ParsingInHaskell where
import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Parsec.Token
import Text.ParserCombinators.Parsec.Language
Parsec• Type:
– data Parser a = …
• Function– parse :: Parser b -> String -> [a] -> Either ParseError b
run :: Show a => Parser a -> String -> IO () run p input = case (parse p "" input) of Left err -> do{ putStr "parse error at " ; print err } Right x -> print x
Operationschar :: Char -> CharParser a Char
string :: String -> CharParser a String
satisfy :: (Char -> Bool) ->
CharParser a Char
(<|>) :: Parser c -> Parser c -> Parser c
test1
test1 = do { string "A"
; char ' '
; string "big"
; char ' '
; string "cat"
}
test2
test2 = do { a <- string "A"
; char ' '
; b <- string "big"
; char ' '
; c <- string "cat"
; return(a,b,c)
}
test3
word s =
lexeme haskell (string s)
test3 = do { a <- word "A"
; b <- word "big"
; c <- word "cat"
; return(a,b,c)
}
A Simple Grammar for English Example taken from Floyd & Beigel.
<Sentence> <Subject> <Predicate>
<Subject> <Pronoun1> | <Pronoun2>
<Pronoun1> I | we | you | he | she | it | they
<Noun Phrase> <Simple Noun Phrase> | <Article> <Noun Phrase>
<Article> a | an | the
<Predicate> <Noun> | <Adjective> <Simple Noun Phrase>
<SimpleNoun Phrase>
<Verb> | <Verb> <Object>
<Object> <Pronoun2> | <Noun Phrase>
<Pronoun2> me | us | you | him | her | it | them
<Noun> . . .
<Verb> . . .
As a parsec grammarsentence = do { subject; verb; predicate}pronoun1 = word "I" <|> word "we" <|> word "you" <|> word "he" <|> word "she" <|> word "it" <|> word "they"pronoun2 = word "me" <|> word "us" <|> word "you" <|> word "him" <|> word "her" <|> word "it" <|> word "them"subject = pronoun1 <|> pronoun2article = word "a" <|> word "the"predicate = do { article; (noun <|> simpleNounPhrase) }adjective = word "red" <|> word "pretty"noun = word "cat" <|> word "ball"simpleNounPhrase = do { adjective; simpleNounPhrase} <|> return ""object = pronoun2 <|> nounPhrasenounPhrase = simpleNounPhrase <|> do {article; noun}verb = word "ate" <|> word "hit"
test4 = run sentence "I hit the pretty red cat"
Some simple combinators• many :: Parser c -> Parser [c]
• sepBy :: Parser c -> Parser d -> Parser [c]
• option :: a -> Parser a -> Parser a
• chainl1 :: GenParser a -> GenParser (a->a->a) -> GenParser a
• (chainl1 p op x) parses one or more occurrences of p, separated by op Returns a value obtained by a left associative application of all functions returned by op to the values returned by p.
Making Parse Treesdata Variable = Var String
deriving (Show,Eq)
data Expression
= Constant Integer -- 5
| Contents Variable -- x
| Minus Expression Expression -- x - 6
| Greater Expression Expression -- 6 > z
| Times Expression Expression -- x * y
deriving (Show,Eq)
Variablesparens x = between (char '(') (char ')') x
pVar = lexeme haskell
(do { c <- lower
; cs <- many (satisfy isAlphaNum)
; return(Var (c:cs))
})
Simple TermssimpleExp :: Parser Expression
simpleExp =
(do { n <- integer haskell; return(Constant n)}) <|>
(do { n <- pVar; return(Contents n)}) <|>
(parens relation)
Complex termsfactor = chainl1 simpleExp
(lexeme haskell (char '*')>> return Times)
summand = chainl1 factor
(lexeme haskell (char '-')>> return Minus)
relation = chainl1 summand
(lexeme haskell (char '>') >> return Greater)
test4 = run pExp "x - 2 > 5"
Defining our own Type of a Parserdata Parser a =
Parser (String -> [(a,String)])
• A function inside a data definition.• The output is a list of successful parses.• This type can be made into a monad
– A monad is the sequencing operator in Haskell.
• Also be made into a Monad with zero and (++) or plus.
Defining the MonadTechnical details, can be ignored when using combinatorsinstance Monad Parser where
return v = Parser (\inp -> [(v,inp)])
p >>= f =
Parser (\inp -> concat
[applyP (f v) out
| (v,out) <- applyP p inp])
instance MonadPlus Parser where
mzero = Parser (\inp -> [])
mplus (Parser p) (Parser q)
= Parser(\inp -> p inp ++ q inp)
instance Functor Parser where . . .
•where applyP undoes the constructor•applyP (Parser f) x = f x
Note the comprehensi
on syntax
Typical Parser• Because the parser is a monad we can use
the Do syntax .
do { x1 <- p1
; x2 <- p2
; ...
; xn <- pn
; f x1 x2 ... Xn
}
Running the Parser
• Running Parsers
papply :: Parser a -> String -> [(a,String)]
papply p = applyP (do {junk; p})
• junk skips over white space and comments. We'll see how to define it later
Simple PrimitivesapplyP :: Parser a -> String -> [(a,String)]
applyP (Parser p) = p
item :: Parser Char
item = Parser (\inp -> case inp of
"" -> []
(x:xs) -> [(x,xs)])
sat :: (Char -> Bool) -> Parser Char
sat p = do {x <- item;
if p x then return x else mzero}
? papply item "abc"
[('a',"bc")]
Examples
? papply item "abc"
[('a',"bc")]
? papply (sat isDigit) "123"
[('1',"23")]
? parse (sat isDigit) "abc"
[]
Useful Parsers char :: Char -> Parser Charchar x = sat (x ==)
digit :: Parser Int
digit = do { x <- sat isDigit
; return (ord x - ord '0') }
lower :: Parser Char
lower = sat isLower
upper :: Parser Char
upper = sat isUpper
Exampleschar x = sat (x ==)
? papply (char 'z') "abc"[]
? papply (char 'a') "abc"[('a',"bc")]
? papply digit "123"[(1,"23")]
? papply upper "ABC"[('A',"BC")]
? papply lower "ABC"[]
More Useful Parsers–letter :: Parser Char–letter = sat isAlpha
• Can even use recursion– string :: String -> Parser String– string "" = return ""– string (x:xs) = – do {char x; string xs; return (x:xs) }
• Helps define even more useful parsers– identifier :: Parser String– identifier = do {x <- lower– ; xs <- many alphanum– ; return (x:xs)}
• What do you think many does?
Examples? papply (string "tim") "tim is red"
[("tim"," is red")]
? papply identifier "tim is blue"
[("tim"," is blue")]
? papply identifier "x5W3 = 12"
[("x5W3"," = 12")]
Choice -- 1 parser or another
• Note that the ++ operator (from MonadPlus) gives non-deterministic choice.
– instance MonadPlus Parser where– (Parser p) ++ (Parser q) – = Parser(\inp -> p inp ++ q inp)
• Sometimes we’d like to prefer one choice over another, and take the second only if the first fails
• We don’t we need an explicit sequencing operator because the monad sequencing plays that role.
Efficiencyforce :: Parser a -> Parser a
force p =
Parser (\ inp ->
let x = applyP p inp
in (fst (head x), snd (head x))
: (tail x) )
Deterministic Choice(+++) :: Parser a -> Parser a -> Parser a
p +++ q =
Parser(\inp ->
case applyP (p `mplus` q) inp of
[] -> []
(x:xs) -> [x])
Example
–? papply (string "x" +++ string "b") "abc"
–[]
–? papply (string "x" +++ string "b") "bcd"
–[("b","cd")]
Sequences (more recursion)many :: Parser a -> Parser [a]many p = force (many1 p +++ return [])
many1 :: Parser a -> Parser [a]many1 p = do {x <- p ; xs <- many p ; return (x:xs)}
sepby :: Parser a -> Parser b -> Parser [a]p `sepby` sep = (p `sepby1` sep) +++ return []
sepby1 :: Parser a -> Parser b -> Parser [a]p `sepby1` sep = do { x <- p ; xs <- many (do {sep; p}) ; return (x:xs) }
Example? papply (many (char 'z')) "zzz234"
[("zzz","234")]
? papply (sepby (char 'z') spaceP) "z z z 34"
[("zzz"," 34")]
Sequences separated by operators
chainl :: Parser a -> Parser (a -> a -> a) -> a -> Parser a
chainl p op v = (p `chainl1` op) +++ return v
chainl1 :: Parser a -> Parser (a -> a -> a) -> Parser a
p `chainl1` op = do {x <- p; rest x }
where rest x =
do {f <- op; y <- p; rest (f x y)} +++ return x
? papply (chainl int (return (+)) 0) "1 3 4 abc"
[(8,"abc")]
Tokens and Lexical IssuesspaceP :: Parser ()spaceP = do {many1 (sat isSpace); return ()}
comment :: Parser ()comment = do{string "--"; many (sat p); return ()} where p x = x /= '\n'
junk :: Parser ()junk = do {many (spaceP +++ comment); return ()}
• A Token is any parser followed by optional white space or a comment
token :: Parser a -> Parser atoken p = do {v <- p; junk; return v}
Using Tokenssymb :: String -> Parser String
symb xs = token (string xs)
ident :: [String] -> Parser String
ident ks =
do { x <- token identifier
; if (not (elem x ks))
then return x else zero }
nat :: Parser Int
nat = token natural
natural :: Parser Int
natural = digit `chainl1` return (\m n -> 10*m + n)
Example? papply (token (char 'z')) "z 123"[('z',"123")]
? papply (symb "tim") "tim is cold"[("tim","is cold")]
? papply natural "123 abc"[(123," abc")]
? papply (many identifier) "x d3 23"[(["x"]," d3 23")]
? papply (many (token identifier)) "x d3 23"[(["x", "d3"],"23")]
More Parsersint :: Parser Int
int = token integer
integer :: Parser Int
integer = (do {char '-’
; n <- natural
; return (-n)})
+++ nat
Example: Parsing Expressions data Term = Add Term Term
| Sub Term Term
| Mult Term Term
| Div Term Term
| Const Int
addop:: Parser(Term -> Term -> Term)
addop = do {symb "+"; return Add} +++
do {symb "-"; return Sub}
mulop:: Parser(Term -> Term -> Term)
mulop = do {symb "*"; return Mult} +++
do {symb "/"; return Div}
Constructing a Parse treeexpr :: Parser Termaddop :: Parser (Term -> Term -> Term)mulop :: Parser (Term -> Term -> Term) expr = term `chainl1` addopterm = factor `chainl1` mulopfactor = (do { n <- token digit ; return (Const n)}) +++ (do {symb "(“ ; n <- expr ; symb ")“ ; return n})
? papply expr "5 abc"[(Const 5,"abc")]
? papply expr "4 + 5 - 2"[(Sub (Add (Const 4) (Const 5))(Const 2),[])]
Array Based Parserstype Subword = (Int,Int)
newtype P a = P (Array Int Char -> Subword -> [a])unP (P z) = z
emptyP :: P ()emptyP = P f where f z (i,j) = [() | i == j]
notchar :: Char -> P Charnotchar s = P f where f z (i,j) = [z!j | i+1 == j, z!j /= s]
charP :: Char -> P CharcharP c = P f where f z (i,j) = [c | i+1 == j, z!j == c]
anychar :: P Charanychar = P f where f z (i,j) = [z!j | i+1 == j]
anystring :: P(Int,Int)anystring = P f where f z (i,j) = [(i,j) | i <= j]
symbol :: String -> P (Int,Int)symbol s = P f where f z (i,j) = if j-i == length s then [(i,j)| and [z!(i+k) == s!!(k-1) | k <-[1..(j-i)]]] else []
Combinatorsinfixr 6 |||
(|||) :: P b -> P b -> P b
(|||) (P r) (P q) = P f
where f z (i,j) = r z (i,j) ++ q z (i,j)
infix 8 <<<
(<<<) :: (b -> c) -> P b -> P c
(<<<) f (P q) = P h
where h z (i,j) = map f (q z (i,j))
infixl 7 ~~~
(~~~) :: P(b -> c) -> P b -> P c
(~~~) (P r) (P q) = P f
where f z (i,j) =
[f y | k <- [i..j], f <- r z (i,k), y <- q z (k,j)]
run :: String -> P b -> [b]
run s (P ax) = ax (s2a s) (0,length s)
s2a s = (array bounds (zip [1..] s))
where bounds = (1,length s)
instance Monad P where
return x =
P(\ z (i,j) -> if i==j then [x] else [])
(>>=) (P f) g = P h
where h z (i,j) =
concat[ unP (g a) z (k,j)
| k <- [i..j] , a <- f z (i,k)]
Examples
p1 = do { symbol "tim"; c <- anychar
; symbol "tom"; return c}
ex4 = run "tim5tom" p1
ex5 = run "timtom" p1
Main> ex4
"5"
Main> ex5
""
Exercise in class
• Write a parser for regular expressions