哈夫曼树的构建和编码
1, 哈夫曼树编码的python代码实现
'''
huffman编码
'''
import copy
class Node:
def __init__(self, name, weight):
self.name = name
self.weight = weight
self.left = None
self.right = None
self.father = None
def is_left_child(self):
return self.father.left == self
def create_prim_nodes(data_set, labels):
if(len(data_set) != len(labels)):
raise Exception('数据和标签不匹配!')
nodes = []
for i in range(len(labels)):
nodes.append( Node(labels[i],data_set[i]) )
return nodes
def create_HF_tree(nodes):
tree_nodes = nodes.copy()
while len(tree_nodes) > 1:
tree_nodes.sort(key=lambda node: node.weight)
new_left = tree_nodes.pop(0)
new_right = tree_nodes.pop(0)
new_node = Node(None, (new_left.weight + new_right.weight))
new_node.left = new_left
new_node.right = new_right
new_left.father = new_right.father = new_node
tree_nodes.append(new_node)
tree_nodes[0].father = None
return tree_nodes[0]
def get_huffman_code(nodes):
codes = {}
for node in nodes:
code=''
name = node.name
while node.father != None:
if node.is_left_child():
code = '0' + code
else:
code = '1' + code
node = node.father
codes[name] = code
return codes
if __name__ == '__main__':
labels = ['我','喜欢','观看','巴西','足球','世界杯']
data_set = [15,8,6,5,3,1]
nodes = create_prim_nodes(data_set,labels)
root = create_HF_tree(nodes)
codes = get_huffman_code(nodes)
for key in codes.keys():
print(key,': ',codes[key])
2, 哈夫曼树编码模型的图解说

- 哈夫曼编码的实现原理是将文本中词语出现的次数由高到低树排序,叶子节点(词语)再由上而下左1右0(左0右1)得到编码。
- 哈夫曼编码的实现意义在于将文本中词语出现的次数越高,编码约短,用来节约存储空间。
- 所有文章中出现的词语都是叶子节点。非叶子节点(黄色)为低频词汇的权重总和。
- 在比较循环中,比较的是 最低的词频总和 与 最低频叶子节点 ;比较后总数相加,再重新排序,继续循环。