import pandas as p
1.SQL题就是利用窗口函数然后套娃 加一个查询
2.python题
# 搜狐数据
raw_data = {'platform': ['pcnews', 'pcnews', 'pcnews', 'pcnews', 'news', 'news', 'news', 'news', 'video', 'video', 'video', 'video'],
'order_num': ['1st', '1st', '2nd', '2nd', '1st', '1st', '2nd', '2nd','1st', '1st', '2nd', '2nd'],
'comment': [523, 52, 25, 616, 43, 234, 523, 62, 62, 73, 37, 35],
'uv': [1045, 957, 1099, 1400, 1592, 1006, 987, 849, 973, 1005, 1099, 1523]}
# Q1: 读入数据,到一个dataframe命名为content,并且将其中的列名作为dataframe的column index
import pandas as pd
content=pd.DataFrame(raw_data)
# Q2: 筛选所有order_num = '2nd' 且platform = 'news'
content[(content['order_num']=='2nd') & (content['platform']=='news')]
# Q3: 筛选content的第2-4行和第2列之后的数据
content.iloc[1:4,2:]
# Q4: 对content进行分组计算,按照platform 和order_num 对comment 求和 并按照求和结果进行降序排列
content['comment'].groupby([content['platform'],content['order_num']]).sum()
# Q5: 新生成一列变量为 rank,评价标准为,当platform='news'且 comment >= 70时 rank = 'good', 当platform='pcnews'且 comment >= 500时 rank = 'medium', 其他 rank = 'worse'
def function(platform,comment):
if 'new' in platform and comment>=70:
rank='good'
elif 'pcnews' in platform and comment>=500:
rank='medium'
else :
rank='worse'
return rank
content['rank']=content.apply(lambda x: function(x.platform,x.comment),axis=1)
print(content)