Chapter 10. Code Case Study: Parsing a Binary Data Format
Grayscale Files
(==>) 函数会创建一个闭包,如“(+5)”就是一个闭包。
functor 可以使得我们的代码更tidy,更expressive 。
functor 可以避免去复制代码。
functor 有助于避免代码冗长。
PMG 图像有两种格式,p2(ASCII编码) 和p5(多为二进制编码) 。
文件头是这样的:一个string("p2" 或 "p5") + 一个whitespace + 三个数字(分别表示宽、高,和最大灰度值) ,并且中间用whitespace 分隔。
文件头后是image data,可能是二进制数据(raw file),也可能是ASCII 数字(plain file),并以单个空格分开。
raw file 在单个文件中可以包含多个图像,每一个都有自已的文件头。plain file 只包含单个图像。
Parsing a Raw PGM File
我们将使用ByteString 来存储graymap 数据,因为PGM 的文件头是文本,而body 是二进制数据,而ByteString 既可用于text,也可用于binary。
这个例子中,lazy 或strict 的ByteString 都适用,任选一种都可以,这里用lazy 的。
通常,Haskell 的Show 实会产生一个String 表示,而且可以用read 从这个String 重建那个对象。
parseP5 会返回一个解析出的Greymap 和剩余ByteString,供将来解析(因为一个raw file 可以有多个图像)
module PNM () where
import qualified Data.ByteString.Lazy.Char8 as L8
import qualified Data.ByteString.Lazy as L
import Data.Char (isSpace)
data Greymap = Greymap {
greyWidth :: Int
, greyHeight :: Int
, greyMax :: Int
, greyData :: L.ByteString
} deriving (Eq)
阻上编译器自动继承Show 实例,因为图像数据非常巨大。
instance Show Greymap where -- 亮点,自已实现Show 的实例,不打印过大的字段
show (Greymap w h m _) = "Greymap " ++ show w ++ "x" ++ show h ++
" " ++ show m
matchHeader :: L.ByteString -> L.ByteString -> Maybe L.ByteString
matchHeader prefix str
| prefix `L8.isPrefixOf` str
= Just (L8.dropWhile isSpace (L.drop (L.length prefix) str))
| otherwise
= Nothing
getNat :: L.ByteString -> Maybe (Int, L.ByteString)
getNat s = case L8.readInt s of
Nothing -> Nothing
Just (num,rest)
| num <= 0 -> Nothing
| otherwise -> Just (fromIntegral num, rest)
getBytes :: Int -> L.ByteString -> Maybe (L.ByteString, L.ByteString)
getBytes n str = let count = fromIntegral n
both@(prefix,_) = L.splitAt count str
in if L.length prefix < count
then Nothing
else Just both
parseP5 :: L.ByteString -> Maybe (Greymap, L.ByteString)
parseP5 s =
case matchHeader (L8.pack "P5") s of
Nothing -> Nothing
Just s1 ->
case getNat s1 of
Nothing -> Nothing
Just (width, s2) ->
case getNat (L8.dropWhile isSpace s2) of
Nothing -> Nothing
Just (height, s3) ->
case getNat (L8.dropWhile isSpace s3) of
Nothing -> Nothing
Just (maxGrey, s4)
| maxGrey > 255 -> Nothing
| otherwise ->
case getBytes 1 s4 of
Nothing -> Nothing
Just (_, s5) ->
case getBytes (width * height) s5 of
Nothing -> Nothing
Just (bitmap, s6) ->
Just (Greymap width height maxGrey bitmap, s6)
parseAllP5 :: L.ByteString -> [Greymap]
parseAllP5 s = case parseP5_take2 s of
Nothing -> []
Just (g, s') -> g : parseAllP5 s'
Getting Rid of Boilerplate Code
如果(>>?) 函数左边得到的不是Nothing 就将它作为参数传给右边的函数,否则就什么也不做。使用这个操作符可以将函数链起来。
(>>?) :: Maybe a -> (a -> Maybe b) -> Maybe b
Nothing >>? _ = Nothing
Just v >>? f = f v
解析函数第二次尝试
parseP5_take2 :: L.ByteString -> Maybe (Greymap, L.ByteString)
parseP5_take2 s =
matchHeader (L8.pack "P5") s >>?
/s -> skipSpace ((), s) >>?
(getNat . snd) >>?
skipSpace >>?
/(width, s) -> getNat s >>?
skipSpace >>?
/(height, s) -> getNat s >>?
/(maxGrey, s) -> getBytes 1 s >>?
(getBytes (width * height) . snd) >>?
/(bitmap, s) -> Just (Greymap width height maxGrey bitmap, s)
skipSpace :: (a, L.ByteString) -> Maybe (a, L.ByteString)
skipSpace (a, s) = Just (a, L8.dropWhile isSpace s)
Implicit State
data ParseState = ParseState {
string :: L.ByteString
, offset :: Int64 -- imported from Data.Int
} deriving (Show)
simpleParse :: ParseState -> (a, ParseState)
simpleParse = undefined
这个解析器能够报告错误
betterParse :: ParseState -> Either String (a, ParseState)
betterParse = undefined
为了不把内部实现暴露给用户所以用newtype包装一下
newtype Parse a = Parse {
runParse :: ParseState -> Either String (a, ParseState)
}
newtype 只是编译时封装一个函数,所以没有运行时overhead 。当我们想要使用那个函数,就用runParser 这个访问器。
不暴露Parse 的值构造器可以确保没有人会意外的创建一个解析器,也不能通过模式匹配检查其内部。
The Identity Parser
identity :: a -> Parse a
identity a = Parse (/s -> Right (a, s))
parse :: Parse a -> L.ByteString -> Either String a
parse parser initState
= case runParse parser (ParseState initState 0) of
Left err -> Left err
Right (result, _) -> Right result
Record Syntax, Updates, and Pattern Matching
记录语法比访问器更有用,我们可以用它拷贝或部份改变已存在的值
更新记录字段的值的方法
modifyOffset :: ParseState -> Int64 -> ParseState
modifyOffset initState newOffset =
initState { offset = newOffset }
{-
ghci> let before = ParseState (L8.pack "foo") 0
ghci> let after = modifyOffset before 3
ghci> before -- 亮点,查看前一个结果
ParseState {string = Chunk "foo" Empty, offset = 0}
ghci> after -- 亮点,查看后一个结果
ParseState {string = Chunk "foo" Empty, offset = 3}
-}
A More Interesting Parser
解析一个字节
----------------------------------------------------------------------------------------------
-- contents <- L.readFile "c:/TOKYO.PGM"
-- print $ parse parseByte contents -- >Right 80
-- print $ chr 80 -- >'P' -- 回想一下,PGM 文件的头两字节是"p5"
----------------------------------------------------------------------------------------------
parseByte :: Parse Word8
parseByte =
getState ==> /initState ->
case L.uncons (string initState) of
Nothing ->
bail "no more input"
Just (byte,remainder) ->
putState newState ==> /_ ->
identity byte
where newState = initState { string = remainder,
offset = newOffset }
newOffset = offset initState + 1
getState :: Parse ParseState
getState = Parse (/s -> Right (s, s))
putState :: ParseState -> Parse ()
putState s = Parse (/_ -> Right ((), s))
bail :: String -> Parse a
bail err = Parse $ /s -> Left $
"byte offset " ++ show (offset s) ++ ": " ++ err
(==>) :: Parse a -> (a -> Parse b) -> Parse b
firstParser ==> secondParser = Parse chainedParser
where chainedParser initState =
case runParse firstParser initState of
Left errMessage ->
Left errMessage
Right (firstResult, newState) ->
runParse (secondParser firstResult) newState
L8.uncons 从ByteString 中取一个元素
{-
ghci> L8.uncons (L8.pack "foo")
Just ('f',Chunk "oo" Empty)
ghci> L8.uncons L8.empty
Nothing
-}
Obtaining and Modifying the Parse State
{-
bail :: String -> Parse a
bail err = Parse $ /s -> Left $
"byte offset " ++ show (offset s) ++ ": " ++ err
-}
Chaining Parsers Together
{-
(==>) :: Parse a -> (a -> Parse b) -> Parse b
firstParser ==> secondParser = Parse chainedParser
where chainedParser initState =
case runParse firstParser initState of
Left errMessage ->
Left errMessage
Right (firstResult, newState) ->
runParse (secondParser firstResult) newState
-}
(==>) 函数会创建一个闭包,如“(+5)”就是一个闭包。
Introducing Functors
fmap 是泛型map ,针对不同的类型实现不同的fmap 。
{-
map (1+) [1,2,3] -- >[2,3,4]
map (+2) [1,2,3] -- >[3,4,5]
-}
map-like activity 在其他例子中也很有用。例如考虑一个二叉树:
data Tree a = Node (Tree a) (Tree a)
| Leaf a
deriving (Show)
如果我们希望将一颗strings 树映射成一颗strings length 树(既原来每个结点string 用它的长度替换),我们可以写这样的函数:
treeLengths (Leaf s) = Leaf (length s)
treeLengths (Node l r) = Node (treeLengths l) (treeLengths r)
改写一下,使其更具通用性
treeMap :: (a -> b) -> Tree a -> Tree b
treeMap f (Leaf a) = Leaf (f a)
treeMap f (Node l r) = Node (treeMap f l) (treeMap f r)
参数是作用于结点的函数和一颗树。
两个函数输出是一样的
{-
let tree = Node (Leaf "foo") (Node (Leaf "x") (Leaf "quux"))
print $ treeLengths tree -- >Node (Leaf 3) (Node (Leaf 1) (Leaf 4))
print $ treeMap length tree -- >Node (Leaf 3) (Node (Leaf 1) (Leaf 4))
print $ treeMap (odd . length) tree -- >Node (Leaf True) (Node (Leaf True) (Leaf False)) -- 亮点,复合函数的妙用
-}
Haskell 提供了一个名为Functor 的著名typeclass ,完成类似treeMap 的功能。
Functor 这个typeclass 提供了一个fmap 函数(位于GHC.Base 库)
{-
class Functor f where
fmap :: (a -> b) -> f a -> f b
-}
树的fmap 实现
instance Functor Tree where
fmap = treeMap
这里一定义就暴露给外部了,不需要导出。
-- fmap length (Node (Leaf "Livingstone") (Leaf "I presume")) -- >Node (Leaf 11) (Leaf 9)
列表的fmap 实现(位于标准库)
{-
instance Functor [] where
fmap = map
-}
Mabe 的fmap 实现(位于标准库)
{-
instance Functor Maybe where
fmap _ Nothing = Nothing
fmap f (Just x) = Just (f x)
-}
{-
fmap (+1) [1,2,3] -- >[2,3,4]
map (+1) [1,2,3] -- >[2,3,4]
-- fmap 相比map 有什么特别的地方?
-}
Functor 的实例只能是那些恰好只有一个类型参数的类型,Either a b 或 (a, b) 有两个类型参数所以不能用来实例化Functor ,Bool 或Int 也不行,因为它们没有类型参数。
data Foo a = Foo a
instance Functor Foo where
fmap f (Foo a) = Foo (f a)
出错:类型参数不能存在某种约束,例如这里的a 明确指定为Eq 的实例
{-
data Eq a => Bar a = Bar a
instance Functor Bar where
fmap f (Bar a) = Bar (f a)
-}
Constraints on Type Definitions Are Bad
准则:不要在类型的参数上加限制,如果有需要就在函数的参数上加。
为一个类型定义加以限制永远不是一个好主意。它会强制你在所有操作那个类型的函数上加入同样的限制。
假设我们需要一个栈,我们需要查看其元素是否符合特定的ordering 。
数据结构:栈
-- a 被限制为有序集的元素
data (Ord a) => OrdStack a = Bottom
| Item a (OrdStack a)
deriving (Show)
我们写了一个函数,此函数检查这个栈是否为递增的(上面的元素总比下面的元素大):
isIncreasing :: (Ord a) => OrdStack a -> Bool
isIncreasing (Item a rest@(Item b _))
| a < b = isIncreasing rest
| otherwise = False
isIncreasing _ = True
push 函数并不需要a 的类型限制,但是如果去掉这个限制就会导至编译错误
push :: (Ord a) => a -> OrdStack a -> OrdStack a
push a s = Item a s
这个例子中应保持isIncreasing 函数的参数a 为ord 这个限制(失去它就不能用“<” 运算符),其它限制全部去掉。
准则:不要在类型的参数上加限制,如果有需要就在函数的参数上加。
多数Haskell 容器类型遵循这一准则。
Infix Use of fmap
“<$>” 操作符是fmap 的别名。
将fmap 作为操作符来用(为了少写括号?),而map 函数几乎从来不这么用的
{-
ghci> (1+) `fmap` [1,2,3] ++ [4,5,6]
[2,3,4,4,5,6]
-}
{-
ghci> fmap (1+) ([1,2,3] ++ [4,5,6])
[2,3,4,5,6,7]
-}
如果你真的想将fmap 函数用作操作符,Control.Applicative 模块提供了一个“<$>” 操作符,“<$>” 操作符是fmap 的别名。
Flexible Instances
你可能希望写一个Functor 的实例,这个实例以Either Int b 为实例参数(注意了,其类型参数只有b 一个)
-- {-# LANGUAGE FlexibleInstances #-} -- 必须加入这个编译选项
{-
instance Functor (Either Int) where
fmap _ (Left n) = Left n
fmap f (Right r) = Right (f r)
-}
fmap (== "cheeseburger") (Left 1 :: Either Int String) -- >Left 1
fmap (== "cheeseburger") (Right "fries" :: Either Int String) -- >Right False
注意了,因为已存在类似定义会与Control.Monad.Instances 发生overlap 冲突。加入{-# LANGUAGE OverlappingInstances #-} 编译选项可以使得当发生多匹配时使得编译器选择最特殊的那一个。
-- instance Functor (Either a) -- Defined in Control.Monad.Instances
Thinking More About Functors
准则一:将fmap id 应用于一个值,就必须能够返回这个值的identical
fmap id (Node (Leaf "a") (Leaf "b")) -- >Node (Leaf "a") (Leaf "b") -- 亮点,id 函数
准则二:functors 必须能够复合
(fmap even . fmap length) (Just "twelve") -- >Just True
fmap (even . length) (Just "twelve") -- >Just True
fmap odd (Just 1) -- >Just True
fmap odd Nothing -- >Nothing
Writing a Functor Instance for Parse
instance Functor Parse where
fmap f parser = parser ==> /result ->
identity (f result)
-- <$> 是famp
{-
ghci> parse parseByte L.empty
Left "byte offset 0: no more input"
ghci> parse (id <$> parseByte) L.empty
Left "byte offset 0: no more input"
-}
{-
ghci> let input = L8.pack "foo"
ghci> L.head input
102
ghci> parse parseByte input
Right 102
ghci> parse (id <$> parseByte) input
Right 102
-}
{-
ghci> parse ((chr . fromIntegral) <$> parseByte) input
Right 'f'
ghci> parse (chr <$> fromIntegral <$> parseByte) input
Right 'f'
-}
Using Functors for Parsing
functors 可以使得我们的代码更tidy,更expressive 。
回想一下前面的parseByte ,现在我们想要解析ASCII 而不只是Word8 值。
虽然我们可以写parseChar ,但它的结构和parseByte 是很相似的。现在我们可以利用functor 来避免去复制代码。
使用chr 函数将Int 转成Char
-- chr :: Int -> Char -- Defined in GHC.Base
--import Data.Char (chr)
w2c :: Word8 -> Char
w2c = chr . fromIntegral
-- import Control.Applicative
peekByte :: Parse (Maybe Word8)
peekByte = (fmap fst . L.uncons . string) <$> getState
-- peekByte 返回Nothing 如果已是input string 的end,否则返回下一个字符,但是没有consuming 它。
peekChar :: Parse (Maybe Char)
peekChar = fmap w2c <$> peekByte
-- 类似于takeWhile 函数
parseWhile :: (Word8 -> Bool) -> Parse [Word8]
parseWhile p = (fmap p <$> peekByte) ==> /mp ->
if mp == Just True
then parseByte ==> /b ->
(b:) <$> parseWhile p
else identity []
-- 这是不使用functor 的版本,较冗长
parseWhileVerbose p =
peekByte ==> /mc ->
case mc of
Nothing -> identity []
Just c | p c ->
parseByte ==> /b ->
parseWhileVerbose p ==> /bs ->
identity (b:bs)
| otherwise ->
identity []
functor 有助于避免代码冗长
Rewriting Our PGM Parser
parseRawPGM =
parseWhileWith w2c notWhite ==> /header -> skipSpaces ==>&
assert (header == "P5") "invalid raw header" ==>&
parseNat ==> /width -> skipSpaces ==>&
parseNat ==> /height -> skipSpaces ==>&
parseNat ==> /maxGrey ->
parseByte ==>&
parseBytes (width * height) ==> /bitmap ->
identity (Greymap width height maxGrey bitmap)
where notWhite = (`notElem` " /r/n/t")
parseWhileWith :: (Word8 -> a) -> (a -> Bool) -> Parse [a]
parseWhileWith f p = fmap f <$> parseWhile (p . f)
parseNat :: Parse Int
parseNat = parseWhileWith w2c isDigit ==> /digits ->
if null digits
then bail "no more input"
else let n = read digits
in if n < 0
then bail "integer overflow"
else identity n
(==>&) :: Parse a -> Parse b -> Parse b
p ==>& f = p ==> /_ -> f
skipSpaces :: Parse ()
skipSpaces = parseWhileWith w2c isSpace ==>& identity ()
assert :: Bool -> String -> Parse ()
assert True _ = identity ()
assert False err = bail err
parseBytes :: Int -> Parse L.ByteString
parseBytes n =
getState ==> /st ->
let n' = fromIntegral n
(h, t) = L.splitAt n' (string st)
st' = st { offset = offset st + L.length h, string = t }
in putState st' ==>&
assert (L.length h == n') "end of input" ==>&
identity h
Future Directions
在第14 章,我们会看到使用monads 可以大大简化本章的代码。
解析PGM文件
{-# LANGUAGE FlexibleInstances #-}
module PNM where
import qualified Data.ByteString.Lazy.Char8 as L8
import qualified Data.ByteString.Lazy as L
import Data.Char (isSpace, isDigit, chr)
import Data.Int
import Data.Word
import Control.Applicative
data Greymap = Greymap {
greyWidth :: Int
, greyHeight :: Int
, greyMax :: Int
, greyData :: L.ByteString
} deriving (Eq)
instance Show Greymap where
show (Greymap w h m _) = "Greymap " ++ show w ++ "x" ++ show h ++
" " ++ show m
data ParseState = ParseState {
string :: L.ByteString
, offset :: Int64
} deriving (Show)
newtype Parse a = Parse {
runParse :: ParseState -> Either String (a, ParseState)
}
-- runParse 这个函数接受一个 Parse a ,返回一个函数ParseState -> Either String (a, ParseState)
-- Parse构造器接受一个函数ParseState -> Either String (a, ParseState),返回一个Parse a
identity :: a -> Parse a
identity a = Parse (/s -> Right (a, s))
-- 解析一个字节
-- print $ parse parseByte contents -- >Right 80
-- print $ chr 80 -- >'P' -- 回想一下,PGM 文件的头两字节是"p5"
-- runParse parser (ParseState contents 0)
-- print $ runParse parseByte (ParseState contents 0) -- >Right (80,ParseState {string = Chunk "5/n122/n116/n255/nl[KSl/210/177/159/159~ltll .. /144/ENQ]]vU" Empty, offset = 1})
-- runParse 接受一个parseByte(类型是Parse a,其中a 是Word8),返回一个函数ParseState -> Either String (Word8, ParseState)
-- 解析PGM 文件
parse :: Parse a -> L.ByteString -> Either String a
parse parser initState
= case runParse parser (ParseState initState 0) of
Left err -> Left err
Right (result, _) -> Right result
-- runParse parseRawPGM (ParseState contents 0)
-- parse parseRawPGM contents -- >Right Greymap 122x116 255
-- parseRawPGM :: Parse Greymap
-- runParse :: Parse a -> ParseState -> Either String (a, ParseState)
-- runParse parseRawPGM (ParseState contents 0) -- >Right (Greymap 122x116 255,ParseState {string = Empty, offset = 14167})
modifyOffset :: ParseState -> Int64 -> ParseState
modifyOffset initState newOffset =
initState { offset = newOffset }
parseByte :: Parse Word8
parseByte =
getState ==> /initState ->
case L.uncons (string initState) of
Nothing ->
bail "no more input"
Just (byte,remainder) ->
putState newState ==> /_ ->
identity byte
where newState = initState { string = remainder,
offset = newOffset }
newOffset = offset initState + 1
getState :: Parse ParseState
getState = Parse (/s -> Right (s, s))
putState :: ParseState -> Parse ()
putState s = Parse (/_ -> Right ((), s))
bail :: String -> Parse a
bail err = Parse $ /s -> Left $
"byte offset " ++ show (offset s) ++ ": " ++ err
(==>) :: Parse a -> (a -> Parse b) -> Parse b
firstParser ==> secondParser = Parse chainedParser
where chainedParser initState =
case runParse firstParser initState of
Left errMessage ->
Left errMessage
Right (firstResult, newState) ->
runParse (secondParser firstResult) newState
instance Functor Parse where
fmap f parser = parser ==> /result ->
identity (f result)
w2c :: Word8 -> Char
w2c = chr . fromIntegral
peekByte :: Parse (Maybe Word8)
peekByte = (fmap fst . L.uncons . string) <$> getState
peekChar :: Parse (Maybe Char)
peekChar = fmap w2c <$> peekByte
parseWhile :: (Word8 -> Bool) -> Parse [Word8]
parseWhile p = (fmap p <$> peekByte) ==> /mp ->
if mp == Just True
then parseByte ==> /b ->
(b:) <$> parseWhile p
else identity []
parseRawPGM =
parseWhileWith w2c notWhite ==> /header -> skipSpaces ==>&
assert (header == "P5") "invalid raw header" ==>&
parseNat ==> /width -> skipSpaces ==>&
parseNat ==> /height -> skipSpaces ==>&
parseNat ==> /maxGrey ->
parseByte ==>&
parseBytes (width * height) ==> /bitmap ->
identity (Greymap width height maxGrey bitmap)
where notWhite = (`notElem` " /r/n/t")
parseWhileWith :: (Word8 -> a) -> (a -> Bool) -> Parse [a]
parseWhileWith f p = fmap f <$> parseWhile (p . f)
parseNat :: Parse Int
parseNat = parseWhileWith w2c isDigit ==> /digits ->
if null digits
then bail "no more input"
else let n = read digits
in if n < 0
then bail "integer overflow"
else identity n
(==>&) :: Parse a -> Parse b -> Parse b
p ==>& f = p ==> /_ -> f
skipSpaces :: Parse ()
skipSpaces = parseWhileWith w2c isSpace ==>& identity ()
assert :: Bool -> String -> Parse ()
assert True _ = identity ()
assert False err = bail err
parseBytes :: Int -> Parse L.ByteString
parseBytes n =
getState ==> /st ->
let n' = fromIntegral n
(h, t) = L.splitAt n' (string st)
st' = st { offset = offset st + L.length h, string = t }
in putState st' ==>&
assert (L.length h == n') "end of input" ==>&
identity h
--------------------------------------------------------------------
解析一个字节
{-# LANGUAGE FlexibleInstances #-}
module PNM where
import qualified Data.ByteString.Lazy as L
import Data.Int
import Data.Word
data ParseState = ParseState {
string :: L.ByteString
, offset :: Int64
} deriving (Show)
newtype Parse a = Parse {
runParse :: ParseState -> Either String (a, ParseState)
}
-- runParse 接受一个解析器,返回解析器里面存储的函数。
-- runParse 这个函数接受一个 Parse a ,返回一个函数ParseState -> Either String (a, ParseState)
-- Parse构造器接受一个函数ParseState -> Either String (a, ParseState),返回一个Parse a
identity :: a -> Parse a
identity a = Parse (/s -> Right (a, s))
解析器里的函数是要你填的,可以使用各种不同实现。
修改记录的方法,把变量当构造器用
--foo = /initState ->initState { string = L.singleton 97 ,offset = 5 }
--print $ ParseState { string = L.singleton 80 ,offset = 2 }
--print $ foo ParseState { string = L.singleton 80 ,offset = 2 }
-- 解析一个字节
-- print $ L.uncons contents -- >Just (80,Chunk "5/n122/n116/n255/nl[KSl/210/177/159/159~ltll../144/ENQ]]vU" Empty) -- 这个例子的关键代码就是这一句
{-
-- contents <- L.readFile "c:/TOKYO.PGM"
--1 print $ parse parseByte contents -- >Right 80
--2 parse parser initState
--3 parse parseByte contents
--4 parseByte
firstParser ==> secondParser
getState ==> /initState -> ..
Parse { runParse :: (/ParseState -> Right (ParseState, ParseState)) } ==> /initState -> ..
==>
Parse chainedParser -- chainedParser 的返回值是Word8
Parse { runParse :: ParseState -> Either String (Word8, ParseState) }
case runParse firstParser initState of
case runParse ( Parse { runParse :: (/ParseState -> Right (ParseState, ParseState)) } ) initState of
case (/ParseState -> Right (ParseState, ParseState)) initState of
parse parseByte contents
case runParse parseByte (ParseState contents 0) of -- 这里要求parseByte 返回一个Parse Word8 。
case runParse (firstParser ==> secondParser) (ParseState contents 0) of -- (firstParser ==> secondParser) 的返回值是Parse chainedParser
case chainedParser (ParseState contents 0) of -- chainedParser 的返回值是Word8 -- runParse 接受一个解析器,返回解析器里面存储的函数。
case runParse firstParser initState of
case runParse getState (ParseState contents 0) of
case (/s -> Right (s, s)) (ParseState contents 0) of -- s 的类型是ParseState
Right ((ParseState contents 0), (ParseState contents 0)) ->
runParse (secondParser (ParseState contents 0)) (ParseState contents 0) -- 这里要求secondParser 返回一个解析器
secondParser
case L.uncons (string (ParseState contents 0)) of
case L.uncons contents of
Just (80,restcontents) ->
putState newState ==> secondParser
( Parse (/_ -> Right ((), newState)) ) ==> secondParser
newState
ParseState { string = restcontents ,offset initState + 1 } -- offset 取出记录的一个字段
( Parse (/_ -> Right ((), newState)) ) ==> secondParser
==>
Parse chainedParser
chainedParser
ase runParse ( Parse (/_ -> Right ((), newState)) ) (ParseState contents 0)) of
ase (/_ -> Right ((), newState)) (ParseState contents 0)) of
Right ((), newState)) ->
runParse (secondParser ()) newState
runParse ( (/_ ->identity byte) () ) newState
runParse ( (/_ ->identity 80) () ) newState
runParse ( (identity 80) newState
runParse (Parse (/s -> Right (80, s))) newState
(/s -> Right (80, s)) newState
Right (80, newState)
Right (80, ParseState { string = restcontents ,offset initState + 1 })
-}
parse :: Parse a -> L.ByteString -> Either String a
parse parser initState
= case runParse parser (ParseState initState 0) of
Left err -> Left err
Right (result, _) -> Right result
parseByte :: Parse Word8
parseByte =
getState ==> /initState ->
case L.uncons (string initState) of
Nothing ->
bail "no more input"
Just (byte,remainder) ->
putState newState ==> /_ ->
identity byte -- Parse (/s -> Right (Word8, s))
where newState = initState { string = remainder, -- 亮点,把变量当构造器用
offset = newOffset }
newOffset = offset initState + 1
getState :: Parse ParseState
getState = Parse (/s -> Right (s, s)) -- 显然s 的类型是ParseState
putState :: ParseState -> Parse ()
putState s = Parse (/_ -> Right ((), s))
bail :: String -> Parse a
bail err = Parse $ /s -> Left $
"byte offset " ++ show (offset s) ++ ": " ++ err
-- “== >” 函数先运行第一个解析器生成一个结果,然后以前面得到的结果作为参数运行第二个函数生成一个新的解析器(解析函数里面是存有最终计算结果的)
(==>) :: Parse a -> (a -> Parse b) -> Parse b
firstParser ==> secondParser = Parse chainedParser
where chainedParser initState =
case runParse firstParser initState of
Left errMessage ->
Left errMessage
Right (firstResult, newState) ->
runParse (secondParser firstResult) newState