在上节讨论里我们介绍了数据行流式操作的设想,主要目的是把后台数据库的数据载入前端内存再拆分为强类型的数据行,这样我们可以对每行数据进行使用和处理。形象点描述就是对内存里的一个数据流(data-stream)进行逐行操作。我们在上节用foreach模拟了一个流控来示范数据行的操作处理。在这节我们讨论一下用scalaz-stream-fs2作为数据流管理工具来实现FunDA的数据行流动管理功能。fs2的Stream是一种自然的拖动型(pull-model)数据流。而fs2的Pipe类型则像是管道的阀门(valve),我们可以在Pipe里截获流动中的数据行。我们看看下面的fs2 Stream例子:
def log[ROW](prompt: String): Pipe[Task,ROW,ROW] =
_.evalMap {row => Task.delay {println(s"$prompt> $row"); row}}
//> log: [ROW](prompt: String)fs2.Pipe[fs2.Task,ROW,ROW]
Stream.range(1,5).through(log("")).run.unsafeRun//> > 1
//| > 2
//| > 3
//| > 4
按照FunDA设计要求:从后台数据库中读取数据、载入内存然后逐行进行处理,那么我们可以用这个Pipe类型来实现数据的逐行处理,包括控制数据流动以及任意插入一些自定义数据元素。下面我们就试试通过定义Pipe类型的不同功能来实现行数据处理:
def stopOn3[ROW]: Pipe[Task,ROW,ROW] = in => {
def go: Handle[Task,ROW] => Pull[Task,ROW,Unit] = h => {
h.receive1Option {
case Some((r,h)) => if ( 3 == r) Pull.done
else Pull.output1(r) >> go(h)
case None => Pull.done
}
}
in.pull(go)
} //> stopOn3: [ROW]=> fs2.Pipe[fs2.Task,ROW,ROW]
Stream(4,2,9,3,8,1)
.through(log("before"))
.through(stopOn3)
.through(log("after"))
.run
.unsafeRun //> before> 4
//| after> 4
//| before> 2
//| after> 2
//| before> 9
//| after> 9
//| before> 3
//数据处理管道
type FDAPipeLine[ROW] = Stream[Task,ROW]
//数据作业节点
type FDAWorkNode[ROW] = Pipe[Task,ROW,ROW]
//数据管道开关阀门,从此处获得管道内数据
type FDAValve[ROW] = Handle[Task,ROW]
//管道连接器
type FDAPipeJoint[ROW] = Pull[Task,ROW,Unit]
//库提供:停止数据流动
def fda_haltFlow = Pull.done //> fda_haltFlow: => fs2.Pull[Nothing,Nothing,Nothing]
//库提供:向下游发送一个ROW
def fda_sendRow[ROW](row: ROW) = Pull.output1(row) //> fda_sendRow: [ROW](row: ROW)fs2.Pull[Nothing,ROW,Unit]
//库提供:处理当前数据。运行用户提供的功能wf
def fda_doWork[ROW](wf: ROW => FDAPipeJoint[ROW]): FDAWorkNode[ROW] = {
def go: FDAValve[ROW] => FDAPipeJoint[ROW] = h => {
h.receive1Option {
case Some((r,h)) => wf(r) >> go(h)
case None => fda_haltFlow
}
}
in => in.pull(go)
} //> fda_doWork: [ROW](wf: ROW => demo.ws.FDAPipe.FDAPipeJoint[ROW])demo.ws.FDAPipe.FDAWorkNode[ROW]
//样板用户提供数据处理功能函数
def breakOn3[ROW]: ROW => FDAPipeJoint[ROW] = row => {
if (3 == row ) fda_haltFlow
else fda_sendRow(row)
} //> breakOn3: [ROW]=> ROW => demo.ws.FDAPipe.FDAPipeJoint[ROW]
//测试运算
Stream(4,2,9,3,8,1)
.through(log("before"))
.through(fda_doWork(breakOn3))
.through(log("after"))
.run
.unsafeRun //> before> 4
//| after> 4
//| before> 2
//| after> 2
//| before> 9
//| after> 9
//| before> 3
import fs2._
object FDAPipe {
def log[ROW](prompt: String): Pipe[Task,ROW,ROW] =
_.evalMap {row => Task.delay {println(s"$prompt> $row"); row}}
Stream.range(1,5).through(log("")).run.unsafeRun
def stopOn3[ROW]: Pipe[Task,ROW,ROW] = in => {
def go: Handle[Task,ROW] => Pull[Task,ROW,Unit] = h => {
h.receive1Option {
case Some((r,h)) => if ( 3 == r) Pull.done
else Pull.output1(r) >> go(h)
case None => Pull.done
}
}
in.pull(go)
}
Stream(4,2,9,3,8,1)
.through(log("before"))
.through(stopOn3)
.through(log("after"))
.run
.unsafeRun
//数据处理管道
type FDAPipeLine[ROW] = Stream[Task,ROW]
//数据作业节点
type FDAWorkNode[ROW] = Pipe[Task,ROW,ROW]
//数据管道开关阀门,从此处获得管道内数据
type FDAValve[ROW] = Handle[Task,ROW]
//管道连接器
type FDAPipeJoint[ROW] = Pull[Task,ROW,Unit]
//库提供:停止数据流动
def fda_haltFlow = Pull.done
//库提供:向下游发送一个ROW
def fda_sendRow[ROW](row: ROW) = Pull.output1(row)
//库提供:处理当前数据。运行用户提供的功能wf
def fda_doWork[ROW](wf: ROW => FDAPipeJoint[ROW]): FDAWorkNode[ROW] = {
def go: FDAValve[ROW] => FDAPipeJoint[ROW] = h => {
h.receive1Option {
case Some((r,h)) => wf(r) >> go(h)
case None => fda_haltFlow
}
}
in => in.pull(go)
}
//用户提供数据处理功能函数
def breakOn3[ROW]: ROW => FDAPipeJoint[ROW] = row => {
if (3 == row ) fda_haltFlow
else fda_sendRow(row)
}
//测试运算
Stream(4,2,9,3,8,1)
.through(log("before"))
.through(fda_doWork(breakOn3))
.through(log("after"))
.run
.unsafeRun