for k, v in contents.items():
frequent_token = tuple(sorted(vocab.topk_tokens(v[0].split(), 3)))
log_format = v[1]
if frequent_token not in hierichical_clusters:
hierichical_clusters[frequent_token] = {"size": 1, "cluster": {log_format: [k]}}
else:
hierichical_clusters[frequent_token]["size"] = hierichical_clusters[frequent_token]["size"] + 1
------------------------------------------------------------------------------
111111111111111111111---hierichical_clusters--1111111111111111111111111
0 ('LDAP Built with OpenLDAP LDAP / SDK', '/:=@')
('LDAP', 'SDK', 'with')
/:=@
{('LDAP', 'SDK', 'with'): {'size': 1, 'cluster': {'/:=@': [0]}}}
111