body =lambda x:'<html><head><meta charset="UTF-8"></head><body>%s</body></html>'% x
h1 =lambda x:'<h1>{}</h1>'.format(x)# 标题1
h2 =lambda x:'<h2>{}</h2>'.format(x)# 标题2
mark =lambda x:'<mark>{}</mark>'.format(x)# 高亮
pre =lambda x:'<pre>%s</pre>'% x # 预格化
font =lambda x, s=5, c='red':'<font size="%d" color="%s">%s</font>'%(s, c, x)# 字体
href =lambda x, url:'<a href="{}" target="_blank">{}</a>'.format(url, x)# 超链接(新窗口打开)
hr ='\n<hr>\n'# 分割线
table =lambda x:'<table border="1">%s</table>'% x # 表格
tr =lambda x:'<tr>%s</tr>'% x # 表格:行
th =lambda x:'<th bgcolor="#66B3FF">%s</th>'% x # 表头单元(蓝色背景)
td =lambda x:'<td>{}</td>'.format(x)# 表格:单元
colspan =lambda x, i:'<td colspan="%d">%s</td>'%(i, x)# 跨列
rowspan =lambda x, i:'<td rowspan="%d">%s</td>'%(i, x)# 跨行
ls2td =lambda ls: tr(''.join(td(x)for x in ls))# list转表格行(表头)
ls2th =lambda ls: tr(''.join(th(x)for x in ls))# list转表格行
嵌套列表 -> 表格
极简版
defmake_tb(ls_of_ls):return table(''.join(tr(''.join(td(i)for i in ls))for ls in ls_of_ls))
lss =[['笑菊花','深扣菊花舔指笑,菊花一闪误终身'],['菊花红','接天莲叶无穷碧,硬日菊花别样红']]print(make_tb(lss))
笑菊花
深扣菊花舔指笑,菊花一闪误终身
菊花红
接天莲叶无穷碧,硬日菊花别样红
+列名
defmake_tb(ls_of_ls, fields):
_tb =''.join(tr(''.join(td(i)for i in ls))for ls in ls_of_ls)
_th = tr(''.join(th(i)for i in fields))# 表头return table(_th + _tb)
lss =[['笑菊花','深扣菊花舔指笑,菊花一闪误终身'],['菊花红','接天莲叶无穷碧,硬日菊花别样红']]print(make_tb(lss,['title','text']))
title
text
笑菊花
深扣菊花舔指笑,菊花一闪误终身
菊花红
接天莲叶无穷碧,硬日菊花别样红
字典 -> 跨行表格
列数=2
defmake_tb(dt_of_ls, fields):
_th = tr(''.join(th(i)for i in fields))# 表头# _tb = ''# for k, v in dt_of_ls.items():# le = len(v)# for i in range(le):# _tb += '<tr>'# if i == 0:# _tb += '<td rowspan="%d">%s</td>' % (le, k)# _tb += '<td>%s</td>' % v[i]# _tb += '</tr>'
_tb =''.join(tr((rowspan(k,len(v))if i == 0else'')+td(v[i]))for k, v in dt_of_ls.items()for i inrange(len(v)))return table(_th + _tb)
dts ={'苹果':['苹果醋'],'华为':[],'小米':['小米粥','小米蛋','小米肠']}print(make_tb(dts,['title','article']))
title
article
苹果
苹果醋
小米
小米粥
小米蛋
小米肠
列数>2
defmake_tb(dt_of_lss, fields):
_th = tr(''.join(th(i)for i in fields))# 表头# _tb = ''# for k, v in dt_of_lss.items():# le = len(v)# for i in range(le):# _tb += '<tr>'# if i == 0:# _tb += '<td rowspan="%d">%s</td>' % (le, k)# for j in v[i]:# _tb += '<td>%s</td>' % j# _tb += '</tr>'
_tb =''.join(
tr((rowspan(k,len(v))if i ==0else'')+''.join(td(j)for j in v[i]))for k, v in dt_of_lss.items()for i inrange(len(v)))return table(_th+_tb)
dtss ={'用小米机买小米粥和苹果汁':[('小米机','小米'),('小米粥','小米'),('苹果汁','苹果')],'买华为送大米':[],'买大米送苹果醋':[('苹果醋','苹果')],}print(make_tb(dtss,['text','phrase','word']))
text
phrase
word
用小米机买小米粥和苹果汁
小米机
小米
小米粥
小米
苹果汁
苹果
买大米送苹果醋
苹果醋
苹果
邻接表(树形结构) -> 跨行表
名称
说明
例子
线性结构
有序数据元素的集合
栈、队列、一维数组
非线性结构
其逻辑特征是一个结点元素可能有多个直接前驱和多个直接后继
树、图、多维数组
树形结构
数据元素之间存在着【一对多】的树形关系
二叉树
邻接表
存储方法跟树的孩子链表示法相类似,是一种顺序分配和链式分配相结合的存储结构
defmake_tree(adjacency_list):
tree ={'root':set()}
depth:dict[int,set[str]]={0:{'root'}}for node, parent_node in adjacency_list:# 树if parent_node isNone:
parent_node ='root'if parent_node notin tree:
tree[parent_node]=set()
tree[parent_node].add(node)# 树深度
lvl =int(len(node)/2)if lvl notin depth:
depth[lvl]=set()
depth[lvl].add(node)# 行数
rows:dict[str,int]=dict()for lvl insorted(depth.keys(), reverse=True):for node in depth[lvl]:if node in tree:
rows[node]=sum(rows[n]for n in tree[node])else:
rows[node]=1return tree, depth, rows
defmake_tb(tree, depth, rows):deforder(node:str)->list:
ls =[node]for child_node insorted(tree.get(node,[])):
ls.extend(order(child_node))return ls
# 表头
columns =sorted(depth.keys())
columns_th = ls2th(columns)# 表数据
tr_ls =['']
order_ls = order('root')for n in order_ls:
tr_ls[-1]+= rowspan(n, rows[n])if rows[n]==1:
tr_ls[-1]= tr(tr_ls[-1])
tr_ls.append('')print(tree, depth, rows, order_ls, sep='\n')return table(columns_th +'\n'+'\n'.join(t for t in tr_ls if t))if __name__ =='__main__':
_adjacency_list =(('44',None),('4403','44'),('440305','4403'),('440306','4403'),('4406','44'),('440604','4406'),('440605','4406'),('440606','4406'),('440607','4406'),('440608','4406'),('45',None),('4501','45'),('450102','4501'),('450103','4501'))print(make_tb(*make_tree(_adjacency_list)))
NLP版(NER)
from jieba import tokenize
replace_word =lambda clause, word, head, tail: clause[:head]+ mark(word)+ clause[tail:]defmake_tb(dt_of_lss, fields):
_th = tr(''.join(th(i)for i in fields))# 表头
_tb =''.join('<tr>%s%s</tr>'%(rowspan(k,len(v))if i ==0else'',''.join(td(j)for j in v[i]))for k, v in dt_of_lss.items()for i inrange(len(v)))return table(_th+_tb)defner(texts, entities):
dt_of_lss =dict()for text in texts:
dt_of_lss[text]=[]for clause in text.split(','):# 切句for word, head, tail in tokenize(clause):# 分词+位置if word in entities:# NER
dt_of_lss[text].append([replace_word(clause, word, head, tail), word])return dt_of_lss
dtss = ner(['买小米机,送了袋小米和苹果','诺基亚','买华为送苹果'],{'小米','苹果'})print(make_tb(dtss,['text','phrase','word']))
body =lambda x:'<html><head><meta charset="UTF-8"></head><body>%s</body></html>'% x
h1 =lambda x:'<h1>{}</h1>'.format(x)# 标题1
h2 =lambda x:'<h2>{}</h2>'.format(x)# 标题2
mark =lambda x:'<mark>{}</mark>'.format(x)# 高亮
pre =lambda x:'<pre>%s</pre>'% x # 预格化
font =lambda x, s=5, c='red':'<font size="%d" color="%s">%s</font>'%(s, c, x)# 字体
href =lambda x, url:'<a href="{}" target="_blank">{}</a>'.format(url, x)# 超链接(新窗口打开)
hr ='\n<hr>\n'# 分割线
table =lambda x:'<table border="1">%s</table>'% x # 表格
tr =lambda x:'<tr>%s</tr>'% x # 表格:行
th =lambda x:'<th bgcolor="#66B3FF">%s</th>'% x # 表头单元(蓝色背景)
td =lambda x:'<td>{}</td>'.format(x)# 表格:单元
colspan =lambda x, i:'<td colspan="%d">%s</td>'%(i, x)# 跨列
rowspan =lambda x, i:'<td rowspan="%d">%s</td>'%(i, x)# 跨行
ls2td =lambda ls: tr(''.join(td(x)for x in ls))# list转表格行(表头)
ls2th =lambda ls: tr(''.join(th(x)for x in ls))# list转表格行defmake_tb(ls_of_ls):return table(''.join(tr(''.join(td(i)for i in ls))for ls in ls_of_ls))
lss =[['笑菊花','深扣菊花舔指笑,菊花一闪误终身'],['菊花红','接天莲叶无穷碧,硬日菊花别样红']]print(make_tb(lss))print('-'*99)defmake_tb(ls_of_ls, fields):
_tb =''.join(tr(''.join(td(i)for i in ls))for ls in ls_of_ls)
_th = tr(''.join(th(i)for i in fields))# 表头return table(_th + _tb)
lss =[['笑菊花','深扣菊花舔指笑,菊花一闪误终身'],['菊花红','接天莲叶无穷碧,硬日菊花别样红']]print(make_tb(lss,['title','text']))print('-'*99)defmake_tb(dt_of_ls, fields):
_th = tr(''.join(th(i)for i in fields))# 表头
_tb =''.join(tr((rowspan(k,len(v))if i == 0else'')+td(v[i]))for k, v in dt_of_ls.items()for i inrange(len(v)))return table(_th + _tb)
dts ={'苹果':['苹果醋'],'华为':[],'小米':['小米粥','小米蛋','小米肠']}print(make_tb(dts,['title','article']))print('-'*99)defmake_tb(dt_of_lss, fields):
_th = tr(''.join(th(i)for i in fields))# 表头
_tb =''.join(
tr((rowspan(k,len(v))if i ==0else'')+''.join(td(j)for j in v[i]))for k, v in dt_of_lss.items()for i inrange(len(v)))return table(_th+_tb)
dtss ={'用小米机买小米粥和苹果汁':[('小米机','小米'),('小米粥','小米'),('苹果汁','苹果')],'买华为送大米':[],'买大米送苹果醋':[('苹果醋','苹果')],}print(make_tb(dtss,['text','phrase','word']))print('-'*99)from jieba import tokenize
replace_word =lambda clause, word, head, tail: clause[:head]+ mark(word)+ clause[tail:]defmake_tb(dt_of_lss, fields):
_th = tr(''.join(th(i)for i in fields))# 表头
_tb =''.join('<tr>%s%s</tr>'%(rowspan(k,len(v))if i ==0else'',''.join(td(j)for j in v[i]))for k, v in dt_of_lss.items()for i inrange(len(v)))return table(_th+_tb)defner(texts, entities):
dt_of_lss =dict()for text in texts:
dt_of_lss[text]=[]for clause in text.split(','):# 切句for word, head, tail in tokenize(clause):# 分词+位置if word in entities:# NER
dt_of_lss[text].append([replace_word(clause, word, head, tail), word])return dt_of_lss
dtss = ner(['买小米机,送了袋小米和苹果','诺基亚','买华为送苹果'],{'小米','苹果'})print(make_tb(dtss,['text','phrase','word']))