思路(流程图):
代码(基于正规式和有限自动机一):
正规式和有限自动机一:https://blog.youkuaiyun.com/Willy__QI/article/details/102981220
from queue import Queue
from GrammarToNFA import GrammarToNFA
# 求集合关于终结符的闭包
# char = 'E' 求集合的ε闭包
def set_Closure(node_list,statusNum,char = 'E'):
nfaNode_Closure = []
if(char == 'E'): # 求集合的ε闭包
visit = [False] * statusNum
Q = Queue()
nfaNode_Closure = node_list
for node in node_list:
if (not visit[node.stateNum]):
Q.put(node)
while (not Q.empty()):
pre_node = Q.get()
status = pre_node.stateNum
if (not visit[status]):
visit[status] = True
for suf_node in pre_node.nextNodes:
if (suf_node.pathChar == char):
if suf_node not in nfaNode_Closure: nfaNode_Closure.append(suf_node)
if (not visit[suf_node.stateNum]): Q.put(suf_node)
else:
for pre_node in node_list:
for suf_node in pre_node.nextNodes:
if (suf_node.pathChar == char):
if suf_node not in nfaNode_Closure: nfaNode_Closure.append(suf_node)
return nfaNode_Closure
# 判断当前dfa的划分中是否包含set集合
def exist(dfaNode_divide,status_set):
exist_flag = False
values = dfaNode_divide.values()
# divide
set_length = len(status_set)
for key,value in dfaNode_divide.items():
divide_length = len(value)
if divide_length == set_length:
i = 0
while (i < divide_length):
if value[i].stateNum not in status_set:
break
i += 1
if (i == divide_length):
exist_flag = True
return [exist_flag,key]
return [exist_flag]
# 判断该划分中是否包含终态,为dfa最小化的终态组作准备
def in_finalstate_set(tail_state,dfaNode_set_char):
status_set = []
for node in dfaNode_set_char:
status_set.append(node.stateNum)
if tail_state in status_set:
return True
else:
return False
def NfaToDfa(li): #li = [NFAStack.peek(),gnm.gsn.statusNum + 1,char_list]
s = 0 # 出现新状态自增
final_state = [] # 终态组
non_final_state = [] # 非终态组
M = [] # 状态转换矩阵
tail_state = li[0].tailNode.stateNum
# 划分
start_set = set_Closure([li[0].headNode], li[1])
if in_finalstate_set(tail_state,start_set): # 判断开始结点的闭包集是否属于终态组
final_state.append(s) # 终态组
else:
non_final_state.append(s)
dfaNode_divide = {s:start_set}
Q = Queue()
Q.put(start_set)
while(not Q.empty()):
state_list = [] # 状态转换矩阵的一行
dfaNode_set = Q.get()
for char in li[2]: # 字符集
dfaNode_set_char = set_Closure(dfaNode_set, li[1], char) # 先求字符闭包
dfaNode_set_char = set_Closure(dfaNode_set_char, li[1]) # 再求ε闭包
if dfaNode_set_char:
status_set = [] # 字符闭包结点的状态
for node in dfaNode_set_char:
status_set.append(node.stateNum)
exist_info = exist(dfaNode_divide, status_set) # [exist_flag,key]
if not exist_info[0]:
s += 1
new_item = {s:dfaNode_set_char}
dfaNode_divide.update(new_item)
state_list.append(s)
if tail_state in status_set:
final_state.append(s) # 终态组
else:
non_final_state.append(s)
Q.put(dfaNode_set_char)
else:
state_list.append(exist_info[1])
else:
state_list.append(-1) # 表示不可达的状态
M.append(state_list)
return [dfaNode_divide,M,final_state,non_final_state]
def main():
regExp = str(input("请输入正则式:"))
li = GrammarToNFA().ToNFA(regExp)
dfaNode_divide_info = NfaToDfa(li)
dfaNode_divide_dict = dfaNode_divide_info[0]
for divide in dfaNode_divide_dict.values():
print("{",end='')
for node in divide:
print(str(node.stateNum) + " ",end='')
print("}\n")
if __name__ == '__main__':
main()
测试:
使用集合中最小的状态代表整个集合,即: