--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
for k, v in contents.items():
print(k,'------------',v)
frequent_token = tuple(sorted(vocab.topk_tokens(v[0].split(), 3)))
print(frequent_token)
0 ------------ ('LDAP Built with OpenLDAP LDAP / SDK', '/:=@')
('Built', 'LDAP', 'OpenLDAP')
1 ------------ ('LDAP SSL support unavailable', ':')
('LDAP', 'SSL', 'support')
2 ------------ ('suEXEC mechanism enabled lili wrapper /usr/sbin/suexec', '()/:')
('lili', 'mechanism', 'wrapper')
3 ------------ ('Digest generating secret for digest authentication ...', '.:')
('Digest', 'digest', 'for')
4 ------------ ('Digest done', ':')
('Digest', 'done')
5 ------------ ('LDAP Built with OpenLDAP LMAP SDK', ':')
('Built', 'LDAP', 'OpenLDAP')
6 ------------ ('LDAP SSL support available', ':')
('LDAP', 'SSL', 'support')
7 ------------ ('Factory error creating channel.jni jni channel.jni jni', '().:')
('channel.jni', 'error', 'jni')
8 ------------ ("config.update Can't create avalibe hannel.jni jni", "'().:")
('avalibe', 'create', 'jni')
frequent_token : ('LDAP', 'OpenLDAP', 'with')
log_format : /:=@
1,6
0,5
if frequent_token not in hierichical_clusters:
hierichical_clusters[frequent_token] = {"size": 1, "cluster": {log_format: [k]}}
else:
hierichical_clusters[frequent_token]["size"] = hierichical_clusters[frequent_token]["size"] + 1
当频繁标记已经存在,只是把size+1
222222222222222222222------------hierichical_clusters------------2222222222222222222222222222
('Built', 'LDAP', 'with')
{('Built', 'LDAP', 'with'): {'size': 1, 'cluster': {'/:=@': [0]}}}
333333333333333333333---------erichical_clusters-------------------33333333333333333333333333333
('Built', 'LDAP', 'with')
{('Built', 'LDAP', 'with'): {'size': 2, 'cluster': {'/:=@': [0]}}, ('LDAP', 'SSL', 'support'): {'size': 1, 'cluster': {':': [1]}}, ('lili', 'mechanism', 'suEXEC'): {'size': 1, 'cluster': {'()/:': [2]}}, ('Digest', 'digest', 'generating'): {'size': 1, 'cluster': {'.:': [3]}}, ('Digest', 'done'): {'size': 1, 'cluster': {':': [4]}}}
222222222222222222222------------hierichical_clusters------------2222222222222222222222222222
('LDAP', 'SSL', 'support')
{('Built', 'LDAP', 'with'): {'size': 1, 'cluster': {'/:=@': [0]}}, ('LDAP', 'SSL', 'support'): {'size': 1, 'cluster': {':': [1]}}}
333333333333333333333---------erichical_clusters-------------------33333333333333333333333333333
('LDAP', 'SSL', 'support')
{('Built', 'LDAP', 'with'): {'size': 2, 'cluster': {'/:=@': [0]}}, ('LDAP', 'SSL', 'support'): {'size': 2, 'cluster': {':': [1]}}, ('lili', 'mechanism', 'suEXEC'): {'size': 1, 'cluster': {'()/:': [2]}}, ('Digest', 'digest', 'generating'): {'size': 1, 'cluster': {'.:': [3]}}, ('Digest', 'done'): {'size': 1, 'cluster': {':': [4]}}}