Real World Haskell - Chapter 10. Code Case Study: Parsing a Binary Data Format

本文介绍如何使用Haskell解析PGM图像文件格式。通过构建状态解析器，并利用Functor概念简化代码，实现对二进制图像数据的有效解析。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

Chapter 10. Code Case Study: Parsing a Binary Data Format

Grayscale Files

(==>) 函数会创建一个闭包，如“(+5)”就是一个闭包。

functor 可以使得我们的代码更tidy，更expressive 。

functor 可以避免去复制代码。

functor 有助于避免代码冗长。

PMG 图像有两种格式，p2(ASCII编码) 和p5(多为二进制编码) 。

文件头是这样的：一个string("p2" 或 "p5") + 一个whitespace + 三个数字(分别表示宽、高，和最大灰度值) ，并且中间用whitespace 分隔。

文件头后是image data，可能是二进制数据(raw file)，也可能是ASCII 数字(plain file)，并以单个空格分开。

raw file 在单个文件中可以包含多个图像，每一个都有自已的文件头。plain file 只包含单个图像。

Parsing a Raw PGM File

我们将使用ByteString 来存储graymap 数据，因为PGM 的文件头是文本，而body 是二进制数据，而ByteString 既可用于text，也可用于binary。

这个例子中，lazy 或strict 的ByteString 都适用，任选一种都可以，这里用lazy 的。

通常，Haskell 的Show 实会产生一个String 表示，而且可以用read 从这个String 重建那个对象。

parseP5 会返回一个解析出的Greymap 和剩余ByteString，供将来解析(因为一个raw file 可以有多个图像)

module PNM () where

import qualified Data.ByteString.Lazy.Char8 as L8

import qualified Data.ByteString.Lazy as L

import Data.Char (isSpace)

data Greymap = Greymap {

greyWidth :: Int

, greyHeight :: Int

, greyMax :: Int

, greyData :: L.ByteString

} deriving (Eq)

阻上编译器自动继承Show 实例，因为图像数据非常巨大。

instance Show Greymap where -- 亮点，自已实现Show 的实例，不打印过大的字段

show (Greymap w h m _) = "Greymap " ++ show w ++ "x" ++ show h ++

" " ++ show m

matchHeader :: L.ByteString -> L.ByteString -> Maybe L.ByteString

matchHeader prefix str

| prefix `L8.isPrefixOf` str

= Just (L8.dropWhile isSpace (L.drop (L.length prefix) str))

| otherwise

= Nothing

getNat :: L.ByteString -> Maybe (Int, L.ByteString)

getNat s = case L8.readInt s of

Nothing -> Nothing

Just (num,rest)

| num <= 0 -> Nothing

| otherwise -> Just (fromIntegral num, rest)

getBytes :: Int -> L.ByteString -> Maybe (L.ByteString, L.ByteString)

getBytes n str = let count = fromIntegral n

both@(prefix,_) = L.splitAt count str

in if L.length prefix < count

then Nothing

else Just both

parseP5 :: L.ByteString -> Maybe (Greymap, L.ByteString)

parseP5 s =

case matchHeader (L8.pack "P5") s of

Nothing -> Nothing

Just s1 ->

case getNat s1 of

Nothing -> Nothing

Just (width, s2) ->

case getNat (L8.dropWhile isSpace s2) of

Nothing -> Nothing

Just (height, s3) ->

case getNat (L8.dropWhile isSpace s3) of

Nothing -> Nothing

Just (maxGrey, s4)

| maxGrey > 255 -> Nothing

| otherwise ->

case getBytes 1 s4 of

Nothing -> Nothing

Just (_, s5) ->

case getBytes (width * height) s5 of

Nothing -> Nothing

Just (bitmap, s6) ->

Just (Greymap width height maxGrey bitmap, s6)

parseAllP5 :: L.ByteString -> [Greymap]

parseAllP5 s = case parseP5_take2 s of

Nothing -> []

Just (g, s') -> g : parseAllP5 s'

Getting Rid of Boilerplate Code

如果(>>?) 函数左边得到的不是Nothing 就将它作为参数传给右边的函数，否则就什么也不做。使用这个操作符可以将函数链起来。

(>>?) :: Maybe a -> (a -> Maybe b) -> Maybe b

Nothing >>? _ = Nothing

Just v >>? f = f v

解析函数第二次尝试

parseP5_take2 :: L.ByteString -> Maybe (Greymap, L.ByteString)

parseP5_take2 s =

matchHeader (L8.pack "P5") s >>?

/s -> skipSpace ((), s) >>?

(getNat . snd) >>?

skipSpace >>?

/(width, s) -> getNat s >>?

skipSpace >>?

/(height, s) -> getNat s >>?

/(maxGrey, s) -> getBytes 1 s >>?

(getBytes (width * height) . snd) >>?

/(bitmap, s) -> Just (Greymap width height maxGrey bitmap, s)

skipSpace :: (a, L.ByteString) -> Maybe (a, L.ByteString)

skipSpace (a, s) = Just (a, L8.dropWhile isSpace s)

Implicit State

data ParseState = ParseState {

string :: L.ByteString

, offset :: Int64 -- imported from Data.Int

} deriving (Show)

simpleParse :: ParseState -> (a, ParseState)

simpleParse = undefined

这个解析器能够报告错误

betterParse :: ParseState -> Either String (a, ParseState)

betterParse = undefined

为了不把内部实现暴露给用户所以用newtype包装一下

newtype Parse a = Parse {

runParse :: ParseState -> Either String (a, ParseState)

}

newtype 只是编译时封装一个函数，所以没有运行时overhead 。当我们想要使用那个函数，就用runParser 这个访问器。

不暴露Parse 的值构造器可以确保没有人会意外的创建一个解析器，也不能通过模式匹配检查其内部。

The Identity Parser

identity :: a -> Parse a

identity a = Parse (/s -> Right (a, s))

parse :: Parse a -> L.ByteString -> Either String a

parse parser initState

= case runParse parser (ParseState initState 0) of

Left err -> Left err

Right (result, _) -> Right result

Record Syntax, Updates, and Pattern Matching

记录语法比访问器更有用，我们可以用它拷贝或部份改变已存在的值

更新记录字段的值的方法

modifyOffset :: ParseState -> Int64 -> ParseState

modifyOffset initState newOffset =

initState { offset = newOffset }

ghci> let before = ParseState (L8.pack "foo") 0

ghci> let after = modifyOffset before 3

ghci> before -- 亮点，查看前一个结果

ParseState {string = Chunk "foo" Empty, offset = 0}

ghci> after -- 亮点，查看后一个结果

ParseState {string = Chunk "foo" Empty, offset = 3}

A More Interesting Parser

解析一个字节

----------------------------------------------------------------------------------------------

-- contents <- L.readFile "c:/TOKYO.PGM"

-- print $ parse parseByte contents -- >Right 80

-- print $ chr 80 -- >'P' -- 回想一下，PGM 文件的头两字节是"p5"

----------------------------------------------------------------------------------------------

parseByte :: Parse Word8

parseByte =

getState ==> /initState ->

case L.uncons (string initState) of

Nothing ->

bail "no more input"

Just (byte,remainder) ->

putState newState ==> /_ ->

identity byte

where newState = initState { string = remainder,

offset = newOffset }

newOffset = offset initState + 1

getState :: Parse ParseState

getState = Parse (/s -> Right (s, s))

putState :: ParseState -> Parse ()

putState s = Parse (/_ -> Right ((), s))

bail :: String -> Parse a

bail err = Parse $ /s -> Left $

"byte offset " ++ show (offset s) ++ ": " ++ err

(==>) :: Parse a -> (a -> Parse b) -> Parse b

firstParser ==> secondParser = Parse chainedParser

where chainedParser initState =

case runParse firstParser initState of

Left errMessage ->

Left errMessage

Right (firstResult, newState) ->

runParse (secondParser firstResult) newState

L8.uncons 从ByteString 中取一个元素

ghci> L8.uncons (L8.pack "foo")

Just ('f',Chunk "oo" Empty)

ghci> L8.uncons L8.empty

Nothing

Obtaining and Modifying the Parse State

bail :: String -> Parse a

bail err = Parse $ /s -> Left $

"byte offset " ++ show (offset s) ++ ": " ++ err

Chaining Parsers Together

(==>) :: Parse a -> (a -> Parse b) -> Parse b

firstParser ==> secondParser = Parse chainedParser

where chainedParser initState =

case runParse firstParser initState of

Left errMessage ->

Left errMessage

Right (firstResult, newState) ->

runParse (secondParser firstResult) newState

(==>) 函数会创建一个闭包，如“(+5)”就是一个闭包。

Introducing Functors

fmap 是泛型map ，针对不同的类型实现不同的fmap 。

map (1+) [1,2,3] -- >[2,3,4]

map (+2) [1,2,3] -- >[3,4,5]

map-like activity 在其他例子中也很有用。例如考虑一个二叉树：

data Tree a = Node (Tree a) (Tree a)

| Leaf a

deriving (Show)

如果我们希望将一颗strings 树映射成一颗strings length 树(既原来每个结点string 用它的长度替换)，我们可以写这样的函数：

treeLengths (Leaf s) = Leaf (length s)

treeLengths (Node l r) = Node (treeLengths l) (treeLengths r)

改写一下，使其更具通用性

treeMap :: (a -> b) -> Tree a -> Tree b

treeMap f (Leaf a) = Leaf (f a)

treeMap f (Node l r) = Node (treeMap f l) (treeMap f r)

参数是作用于结点的函数和一颗树。

两个函数输出是一样的

let tree = Node (Leaf "foo") (Node (Leaf "x") (Leaf "quux"))

print $ treeLengths tree -- >Node (Leaf 3) (Node (Leaf 1) (Leaf 4))

print $ treeMap length tree -- >Node (Leaf 3) (Node (Leaf 1) (Leaf 4))

print $ treeMap (odd . length) tree -- >Node (Leaf True) (Node (Leaf True) (Leaf False)) -- 亮点，复合函数的妙用

Haskell 提供了一个名为Functor 的著名typeclass ，完成类似treeMap 的功能。

Functor 这个typeclass 提供了一个fmap 函数(位于GHC.Base 库)

class Functor f where

fmap :: (a -> b) -> f a -> f b

树的fmap 实现

instance Functor Tree where

fmap = treeMap

这里一定义就暴露给外部了，不需要导出。

-- fmap length (Node (Leaf "Livingstone") (Leaf "I presume")) -- >Node (Leaf 11) (Leaf 9)

列表的fmap 实现(位于标准库)

instance Functor [] where

fmap = map

Mabe 的fmap 实现(位于标准库)

instance Functor Maybe where

fmap _ Nothing = Nothing

fmap f (Just x) = Just (f x)

fmap (+1) [1,2,3] -- >[2,3,4]

map (+1) [1,2,3] -- >[2,3,4]

-- fmap 相比map 有什么特别的地方？

Functor 的实例只能是那些恰好只有一个类型参数的类型，Either a b 或 (a, b) 有两个类型参数所以不能用来实例化Functor ，Bool 或Int 也不行，因为它们没有类型参数。

data Foo a = Foo a

instance Functor Foo where

fmap f (Foo a) = Foo (f a)

出错：类型参数不能存在某种约束，例如这里的a 明确指定为Eq 的实例

data Eq a => Bar a = Bar a

instance Functor Bar where

fmap f (Bar a) = Bar (f a)

Constraints on Type Definitions Are Bad

准则：不要在类型的参数上加限制，如果有需要就在函数的参数上加。

为一个类型定义加以限制永远不是一个好主意。它会强制你在所有操作那个类型的函数上加入同样的限制。

假设我们需要一个栈，我们需要查看其元素是否符合特定的ordering 。

数据结构：栈

-- a 被限制为有序集的元素

data (Ord a) => OrdStack a = Bottom

| Item a (OrdStack a)

deriving (Show)

我们写了一个函数，此函数检查这个栈是否为递增的(上面的元素总比下面的元素大)：

isIncreasing :: (Ord a) => OrdStack a -> Bool

isIncreasing (Item a rest@(Item b _))

| a < b = isIncreasing rest

| otherwise = False

isIncreasing _ = True

push 函数并不需要a 的类型限制，但是如果去掉这个限制就会导至编译错误

push :: (Ord a) => a -> OrdStack a -> OrdStack a

push a s = Item a s

这个例子中应保持isIncreasing 函数的参数a 为ord 这个限制(失去它就不能用“<” 运算符)，其它限制全部去掉。

准则：不要在类型的参数上加限制，如果有需要就在函数的参数上加。

多数Haskell 容器类型遵循这一准则。

Infix Use of fmap

“<$>” 操作符是fmap 的别名。

将fmap 作为操作符来用(为了少写括号？)，而map 函数几乎从来不这么用的

ghci> (1+) `fmap` [1,2,3] ++ [4,5,6]

[2,3,4,4,5,6]

ghci> fmap (1+) ([1,2,3] ++ [4,5,6])

[2,3,4,5,6,7]

如果你真的想将fmap 函数用作操作符，Control.Applicative 模块提供了一个“<$>” 操作符，“<$>” 操作符是fmap 的别名。

Flexible Instances

你可能希望写一个Functor 的实例，这个实例以Either Int b 为实例参数(注意了，其类型参数只有b 一个)

-- {-# LANGUAGE FlexibleInstances #-} -- 必须加入这个编译选项

instance Functor (Either Int) where

fmap _ (Left n) = Left n

fmap f (Right r) = Right (f r)

fmap (== "cheeseburger") (Left 1 :: Either Int String) -- >Left 1

fmap (== "cheeseburger") (Right "fries" :: Either Int String) -- >Right False

注意了，因为已存在类似定义会与Control.Monad.Instances 发生overlap 冲突。加入{-# LANGUAGE OverlappingInstances #-} 编译选项可以使得当发生多匹配时使得编译器选择最特殊的那一个。

-- instance Functor (Either a) -- Defined in Control.Monad.Instances

Thinking More About Functors

准则一：将fmap id 应用于一个值，就必须能够返回这个值的identical

fmap id (Node (Leaf "a") (Leaf "b")) -- >Node (Leaf "a") (Leaf "b") -- 亮点，id 函数

准则二：functors 必须能够复合

(fmap even . fmap length) (Just "twelve") -- >Just True

fmap (even . length) (Just "twelve") -- >Just True

fmap odd (Just 1) -- >Just True

fmap odd Nothing -- >Nothing

Writing a Functor Instance for Parse

instance Functor Parse where

fmap f parser = parser ==> /result ->

identity (f result)

-- <$> 是famp

ghci> parse parseByte L.empty

Left "byte offset 0: no more input"

ghci> parse (id <$> parseByte) L.empty

Left "byte offset 0: no more input"

ghci> let input = L8.pack "foo"

ghci> L.head input

102

ghci> parse parseByte input

Right 102

ghci> parse (id <$> parseByte) input

Right 102

ghci> parse ((chr . fromIntegral) <$> parseByte) input

Right 'f'

ghci> parse (chr <$> fromIntegral <$> parseByte) input

Right 'f'

Using Functors for Parsing

functors 可以使得我们的代码更tidy，更expressive 。

回想一下前面的parseByte ，现在我们想要解析ASCII 而不只是Word8 值。

虽然我们可以写parseChar ，但它的结构和parseByte 是很相似的。现在我们可以利用functor 来避免去复制代码。

使用chr 函数将Int 转成Char

-- chr :: Int -> Char -- Defined in GHC.Base

--import Data.Char (chr)

w2c :: Word8 -> Char

w2c = chr . fromIntegral

-- import Control.Applicative

peekByte :: Parse (Maybe Word8)

peekByte = (fmap fst . L.uncons . string) <$> getState

-- peekByte 返回Nothing 如果已是input string 的end，否则返回下一个字符，但是没有consuming 它。

peekChar :: Parse (Maybe Char)

peekChar = fmap w2c <$> peekByte

-- 类似于takeWhile 函数

parseWhile :: (Word8 -> Bool) -> Parse [Word8]

parseWhile p = (fmap p <$> peekByte) ==> /mp ->

if mp == Just True

then parseByte ==> /b ->

(b:) <$> parseWhile p

else identity []

-- 这是不使用functor 的版本，较冗长

parseWhileVerbose p =

peekByte ==> /mc ->

case mc of

Nothing -> identity []

Just c | p c ->

parseByte ==> /b ->

parseWhileVerbose p ==> /bs ->

identity (b:bs)

| otherwise ->

identity []

functor 有助于避免代码冗长

Rewriting Our PGM Parser

parseRawPGM =

parseWhileWith w2c notWhite ==> /header -> skipSpaces ==>&

assert (header == "P5") "invalid raw header" ==>&

parseNat ==> /width -> skipSpaces ==>&

parseNat ==> /height -> skipSpaces ==>&

parseNat ==> /maxGrey ->

parseByte ==>&

parseBytes (width * height) ==> /bitmap ->

identity (Greymap width height maxGrey bitmap)

where notWhite = (`notElem` " /r/n/t")

parseWhileWith :: (Word8 -> a) -> (a -> Bool) -> Parse [a]

parseWhileWith f p = fmap f <$> parseWhile (p . f)

parseNat :: Parse Int

parseNat = parseWhileWith w2c isDigit ==> /digits ->

if null digits

then bail "no more input"

else let n = read digits

in if n < 0

then bail "integer overflow"

else identity n

(==>&) :: Parse a -> Parse b -> Parse b

p ==>& f = p ==> /_ -> f

skipSpaces :: Parse ()

skipSpaces = parseWhileWith w2c isSpace ==>& identity ()

assert :: Bool -> String -> Parse ()

assert True _ = identity ()

assert False err = bail err

parseBytes :: Int -> Parse L.ByteString

parseBytes n =

getState ==> /st ->

let n' = fromIntegral n

(h, t) = L.splitAt n' (string st)

st' = st { offset = offset st + L.length h, string = t }

in putState st' ==>&

assert (L.length h == n') "end of input" ==>&

identity h

Future Directions

在第14 章，我们会看到使用monads 可以大大简化本章的代码。

解析PGM文件

{-# LANGUAGE FlexibleInstances #-}

module PNM where

import qualified Data.ByteString.Lazy.Char8 as L8

import qualified Data.ByteString.Lazy as L

import Data.Char (isSpace, isDigit, chr)

import Data.Int

import Data.Word

import Control.Applicative

data Greymap = Greymap {

greyWidth :: Int

, greyHeight :: Int

, greyMax :: Int

, greyData :: L.ByteString

} deriving (Eq)

instance Show Greymap where

show (Greymap w h m _) = "Greymap " ++ show w ++ "x" ++ show h ++

" " ++ show m

data ParseState = ParseState {

string :: L.ByteString

, offset :: Int64

} deriving (Show)

newtype Parse a = Parse {

runParse :: ParseState -> Either String (a, ParseState)

}

-- runParse 这个函数接受一个 Parse a ，返回一个函数ParseState -> Either String (a, ParseState)

-- Parse构造器接受一个函数ParseState -> Either String (a, ParseState)，返回一个Parse a

identity :: a -> Parse a

identity a = Parse (/s -> Right (a, s))

-- 解析一个字节

-- print $ parse parseByte contents -- >Right 80

-- print $ chr 80 -- >'P' -- 回想一下，PGM 文件的头两字节是"p5"

-- runParse parser (ParseState contents 0)

-- print $ runParse parseByte (ParseState contents 0) -- >Right (80,ParseState {string = Chunk "5/n122/n116/n255/nl[KSl/210/177/159/159~ltll .. /144/ENQ]]vU" Empty, offset = 1})

-- runParse 接受一个parseByte(类型是Parse a，其中a 是Word8)，返回一个函数ParseState -> Either String (Word8, ParseState)

-- 解析PGM 文件

parse :: Parse a -> L.ByteString -> Either String a

parse parser initState

= case runParse parser (ParseState initState 0) of

Left err -> Left err

Right (result, _) -> Right result

-- runParse parseRawPGM (ParseState contents 0)

-- parse parseRawPGM contents -- >Right Greymap 122x116 255

-- parseRawPGM :: Parse Greymap

-- runParse :: Parse a -> ParseState -> Either String (a, ParseState)

-- runParse parseRawPGM (ParseState contents 0) -- >Right (Greymap 122x116 255,ParseState {string = Empty, offset = 14167})

modifyOffset :: ParseState -> Int64 -> ParseState

modifyOffset initState newOffset =

initState { offset = newOffset }

parseByte :: Parse Word8

parseByte =

getState ==> /initState ->

case L.uncons (string initState) of

Nothing ->

bail "no more input"

Just (byte,remainder) ->

putState newState ==> /_ ->

identity byte

where newState = initState { string = remainder,

offset = newOffset }

newOffset = offset initState + 1

getState :: Parse ParseState

getState = Parse (/s -> Right (s, s))

putState :: ParseState -> Parse ()

putState s = Parse (/_ -> Right ((), s))

bail :: String -> Parse a

bail err = Parse $ /s -> Left $

"byte offset " ++ show (offset s) ++ ": " ++ err

(==>) :: Parse a -> (a -> Parse b) -> Parse b

firstParser ==> secondParser = Parse chainedParser

where chainedParser initState =

case runParse firstParser initState of

Left errMessage ->

Left errMessage

Right (firstResult, newState) ->

runParse (secondParser firstResult) newState

instance Functor Parse where

fmap f parser = parser ==> /result ->

identity (f result)

w2c :: Word8 -> Char

w2c = chr . fromIntegral

peekByte :: Parse (Maybe Word8)

peekByte = (fmap fst . L.uncons . string) <$> getState

peekChar :: Parse (Maybe Char)

peekChar = fmap w2c <$> peekByte

parseWhile :: (Word8 -> Bool) -> Parse [Word8]

parseWhile p = (fmap p <$> peekByte) ==> /mp ->

if mp == Just True

then parseByte ==> /b ->

(b:) <$> parseWhile p

else identity []

parseRawPGM =

parseWhileWith w2c notWhite ==> /header -> skipSpaces ==>&

assert (header == "P5") "invalid raw header" ==>&

parseNat ==> /width -> skipSpaces ==>&

parseNat ==> /height -> skipSpaces ==>&

parseNat ==> /maxGrey ->

parseByte ==>&

parseBytes (width * height) ==> /bitmap ->

identity (Greymap width height maxGrey bitmap)

where notWhite = (`notElem` " /r/n/t")

parseWhileWith :: (Word8 -> a) -> (a -> Bool) -> Parse [a]

parseWhileWith f p = fmap f <$> parseWhile (p . f)

parseNat :: Parse Int

parseNat = parseWhileWith w2c isDigit ==> /digits ->

if null digits

then bail "no more input"

else let n = read digits

in if n < 0

then bail "integer overflow"

else identity n

(==>&) :: Parse a -> Parse b -> Parse b

p ==>& f = p ==> /_ -> f

skipSpaces :: Parse ()

skipSpaces = parseWhileWith w2c isSpace ==>& identity ()

assert :: Bool -> String -> Parse ()

assert True _ = identity ()

assert False err = bail err

parseBytes :: Int -> Parse L.ByteString

parseBytes n =

getState ==> /st ->

let n' = fromIntegral n

(h, t) = L.splitAt n' (string st)

st' = st { offset = offset st + L.length h, string = t }

in putState st' ==>&

assert (L.length h == n') "end of input" ==>&

identity h

--------------------------------------------------------------------

解析一个字节

{-# LANGUAGE FlexibleInstances #-}

module PNM where

import qualified Data.ByteString.Lazy as L

import Data.Int

import Data.Word

data ParseState = ParseState {

string :: L.ByteString

, offset :: Int64

} deriving (Show)

newtype Parse a = Parse {

runParse :: ParseState -> Either String (a, ParseState)

}

-- runParse 接受一个解析器，返回解析器里面存储的函数。

-- runParse 这个函数接受一个 Parse a ，返回一个函数ParseState -> Either String (a, ParseState)

-- Parse构造器接受一个函数ParseState -> Either String (a, ParseState)，返回一个Parse a

identity :: a -> Parse a

identity a = Parse (/s -> Right (a, s))

解析器里的函数是要你填的，可以使用各种不同实现。

修改记录的方法，把变量当构造器用

--foo = /initState ->initState { string = L.singleton 97 ,offset = 5 }

--print $ ParseState { string = L.singleton 80 ,offset = 2 }

--print $ foo ParseState { string = L.singleton 80 ,offset = 2 }

-- 解析一个字节

-- print $ L.uncons contents -- >Just (80,Chunk "5/n122/n116/n255/nl[KSl/210/177/159/159~ltll../144/ENQ]]vU" Empty) -- 这个例子的关键代码就是这一句

-- contents <- L.readFile "c:/TOKYO.PGM"

--1 print $ parse parseByte contents -- >Right 80

--2 parse parser initState

--3 parse parseByte contents

--4 parseByte

firstParser ==> secondParser

getState ==> /initState -> ..

Parse { runParse :: (/ParseState -> Right (ParseState, ParseState)) } ==> /initState -> ..

==>

Parse chainedParser -- chainedParser 的返回值是Word8

Parse { runParse :: ParseState -> Either String (Word8, ParseState) }

case runParse firstParser initState of

case runParse ( Parse { runParse :: (/ParseState -> Right (ParseState, ParseState)) } ) initState of

case (/ParseState -> Right (ParseState, ParseState)) initState of

parse parseByte contents

case runParse parseByte (ParseState contents 0) of -- 这里要求parseByte 返回一个Parse Word8 。

case runParse (firstParser ==> secondParser) (ParseState contents 0) of -- (firstParser ==> secondParser) 的返回值是Parse chainedParser

case chainedParser (ParseState contents 0) of -- chainedParser 的返回值是Word8 -- runParse 接受一个解析器，返回解析器里面存储的函数。

case runParse firstParser initState of

case runParse getState (ParseState contents 0) of

case (/s -> Right (s, s)) (ParseState contents 0) of -- s 的类型是ParseState

Right ((ParseState contents 0), (ParseState contents 0)) ->

runParse (secondParser (ParseState contents 0)) (ParseState contents 0) -- 这里要求secondParser 返回一个解析器

secondParser

case L.uncons (string (ParseState contents 0)) of

case L.uncons contents of

Just (80,restcontents) ->

putState newState ==> secondParser

( Parse (/_ -> Right ((), newState)) ) ==> secondParser

newState

ParseState { string = restcontents ,offset initState + 1 } -- offset 取出记录的一个字段

( Parse (/_ -> Right ((), newState)) ) ==> secondParser

==>

Parse chainedParser

chainedParser

ase runParse ( Parse (/_ -> Right ((), newState)) ) (ParseState contents 0)) of

ase (/_ -> Right ((), newState)) (ParseState contents 0)) of

Right ((), newState)) ->

runParse (secondParser ()) newState

runParse ( (/_ ->identity byte) () ) newState

runParse ( (/_ ->identity 80) () ) newState

runParse ( (identity 80) newState

runParse (Parse (/s -> Right (80, s))) newState

(/s -> Right (80, s)) newState

Right (80, newState)

Right (80, ParseState { string = restcontents ,offset initState + 1 })

parse :: Parse a -> L.ByteString -> Either String a

parse parser initState

= case runParse parser (ParseState initState 0) of

Left err -> Left err

Right (result, _) -> Right result

parseByte :: Parse Word8

parseByte =

getState ==> /initState ->

case L.uncons (string initState) of

Nothing ->

bail "no more input"

Just (byte,remainder) ->

putState newState ==> /_ ->

identity byte -- Parse (/s -> Right (Word8, s))

where newState = initState { string = remainder, -- 亮点，把变量当构造器用

offset = newOffset }

newOffset = offset initState + 1

getState :: Parse ParseState

getState = Parse (/s -> Right (s, s)) -- 显然s 的类型是ParseState

putState :: ParseState -> Parse ()

putState s = Parse (/_ -> Right ((), s))

bail :: String -> Parse a

bail err = Parse $ /s -> Left $

"byte offset " ++ show (offset s) ++ ": " ++ err

-- “== >” 函数先运行第一个解析器生成一个结果，然后以前面得到的结果作为参数运行第二个函数生成一个新的解析器(解析函数里面是存有最终计算结果的)

(==>) :: Parse a -> (a -> Parse b) -> Parse b

firstParser ==> secondParser = Parse chainedParser

where chainedParser initState =

case runParse firstParser initState of

Left errMessage ->

Left errMessage

Right (firstResult, newState) ->

runParse (secondParser firstResult) newState