项目功能
**遍历我所定义的规则,来处理实时读进来的文本,利用condition判断当前规则,可以用来作代码高亮处理 **
模块划分
- 处理程序模块
- 过滤器模块
- 处理规则模块
- 语法分析器模块
- 补充模块
处理程序模块
功能:
1.提供固定的HTML标记的输出(start & end)
2.对标记输出的start end提供友好的访问接口
class Handler:
def callback(self, prefix, name, *args):
method = getattr(self, prefix + name, None)
if callable(method):return method(*args)
def start(self, name):
self.callback('start_', name)
def end(self, name):
self.callback('end_', name)
def sub(self, name):
def substation(match):
result = self.callback('sub_', name, match)
if result is 'None': match.group(0)
return result
return substation
class HTMLRenderer(Handler):
def start_document(self):
print('<html><head><title>...</title></head><body')
def end_document(self):
print('</body></html>')
def start_paragraph(self):
print('<p>')
def end_paragraph(self):
print ('</p>')
def start_heading(self):
print ( '<h2>')
def end_heading(self):
print ('</h2>')
def start_list(self):
print ('<ul>')
def end_list(self):
print ('</ul>')
def start_listitem(self):
print ('<li>')
def end_listitem(self):
print ('</li>')
def start_title(self):
print ('<h1>')
def end_title(self):
print ( '</h1>')
def sub_emphasis(self, match):
return '<em>%s</em>' % match.group(1)
def sub_url(self, match):
return '<a href="%s">%s</a>' % (match.group(1), match.group(1))
def sub_mail(self, match):
return '<a href="mailto:%s">%s</a>' % (match.group(1), match.group(1))
def feed(self, data):
print(data)
if __name__ == '__main__':
callable(callable)
过滤器模块
功能:
由正则表达式组成的过滤器
1.强调过滤器(用×号标出的)
2.url过滤器
3.email过滤器
self.addFilter(r'\*(.+?)\*', 'emphasis')
self.addFilter(r'(http://[\.a-z0-9A-Z/]+)', 'url')
self.addFilter(r'([\.a-zA-Z]+@[\.a-zA-Z]+[a-zA-Z]+)','mail')
处理规则模块
功能:
主要有两个方法,condition & action
condition:用来判断读入的字符串是否符合我们的规则
action:用来执行,调用处理程序模块,输出前标签,内容,后标签
class Rule:
def action(self, block, handler):
handler.start(self.type)
handler.feed(block)
handler.end(self.type)
return True
class HeadingRule(Rule):
type = 'heading'
def condition(self, block):
return not '\n' in block and len(block) <= 70 and not block[-1] == ':'
class TitleRule(HeadingRule):
type = 'title'
first = True
def condition(self, block):
if not self.first: return False
self.first = False
return HeadingRule.condition(self, block)
class ListItemRule(Rule):
type = 'listitem'
def condition(self, block):
return block[0] == '-'
def action(self, block, handler):
handler.start(self.type)
handler.feed(block[1:].strip())
handler.end(self.type)
return True
class ListRule(ListItemRule):
type = 'list'
inside = False
def condition(self, block):
return True
def action(self,block, handler):
if not self.inside and ListItemRule.condition(self,block):
handler.start(self.type)
self.inside = True
elif self.inside and not ListItemRule.condition(self,block):
handler.end(self.type)
self.inside = False
return False
class ParagraphRule(Rule):
type = 'paragraph'
def condition(self, block):
return True
语法分析器模块
功能:
协调读入的文本和其他模块的关系,提供了两个存放规则和过滤器的列表,使得规则和过滤器可以动态改变。
import sys, re
from Handler import *
from uilts import *
from rules import *
class Parser:
def __init__(self,handler):
self.handler = handler
self.rules = []
self.filters = []
def addRule(self, rule):
self.rules.append(rule)
def addFilter(self,pattern,name):
def filter(block, handler):
return re.sub(pattern, handler.sub(name),block)
self.filters.append(filter)
def parse(self, file):
self.handler.start('document')
for block in blocks(file):
for filter in self.filters:
block = filter(block, self.handler)
for rule in self.rules:
if rule.condition(block):
last = rule.action(block, self.handler)
if last:break
self.handler.end('document')
class BasicTextParser(Parser):
def __init__(self,handler):
Parser.__init__(self,handler)
self.addRule(ListRule())
self.addRule(ListItemRule())
self.addRule(TitleRule())
self.addRule(HeadingRule())
self.addRule(ParagraphRule())
self.addFilter(r'\*(.+?)\*', 'emphasis')
self.addFilter(r'(http://[\.a-z0-9A-Z/]+)', 'url')
self.addFilter(r'([\.a-zA-Z]+@[\.a-zA-Z]+[a-zA-Z]+)','mail')
handler = HTMLRenderer()
parser = BasicTextParser(handler)
parser.parse(sys.stdin)
补充模块
def line(file):
for line in file:yield line
yield '\n'
def blocks(file):
block = []
for line in file:
if line.strip():
block.append(line)
elif block:
yield ''.join(block).strip()
block = []