Python字典处理技巧

最新推荐文章于 2020-11-24 07:12:13 发布

原创最新推荐文章于 2020-11-24 07:12:13 发布 · 789 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#python

Python 专栏收录该内容

22 篇文章

订阅专栏

《Python for Data Analysis》

path = 'cho2/usagov_bitly_data2012-03-16-1331923249.txt'

import json
records = [json.loads(line) for line in open(path)]

time_zones = [rec['tz'] for rec in records if 'tz' in rec]

dict (基础用法)

def get_counts(sequence):
    counts = {}
    for x in sequence:
        if x in counts:
            counts[x] += 1
        else:
            counts[x] = 1
    return counts

def top_counts(count_dict, n = 10):
    value_key_pairs = [(count,tz) for tz, count in count_dict.items()]
    value_key_pairs.sort()
    return value_key_pairs[-n:]

使用标准Python库

from collections import defaultdict

def get_counts2(sequence):
    counts = defaultdict(int) # 所有值会被初始化为0
    for x in sequence:
        counts[x] += 1
    return counts

from collections import Counter 

counts = Counter(time_zones)
counts.most_common(10)

使用pandas

from pandas import DataFrame, Series

import pandas as pd
import numpy as np

frame = DataFrame(records)
frame['tz'][:10]

tz_counts = frame['tz'].value_counts()
tz_counts[:10]