数据来源:链接
# coding: utf-8
import pandas as pd
data = pd.read_csv("thanksgiving.csv",encoding="Latin-1")
data.head(1)
data.columns
DYCT = data["Do you celebrate Thanksgiving?"].value_counts()
Filter_DYCT = data[data["Do you celebrate Thanksgiving?"]=="Yes"]
MDTD = data["What is typically the main dish at your Thanksgiving dinner?"].value_counts()
is_Tofurkey = data["What is typically the main dish at your Thanksgiving dinner?"]=="Tofurkey"
gravy = data["Do you typically have gravy?"][is_Tofurkey]
apple_isnull = pd.isnull(data["Which type of pie is typically served at your Thanksgiving dinner? Please select all that apply. - Apple"])
Pumpkin_isnull = pd.isnull(data["Which type of pie is typically served at your Thanksgiving dinner? Please select all that apply. - Pumpkin"])
Pecan_isnull = pd.isnull(data["Which type of pie is typically served at your Thanksgiving dinner? Please select all that apply. - Pecan"])
ate_pies = apple_isnull & Pumpkin_isnull & Pecan_isnull
data[ate_pies]
ate_pies.value_counts()
def convert_age(x):
if pd.isnull(x):
return None
x = int(x.split(" ")[0].replace("+",""))
return x
data["int_age"] = data["Age"].apply(convert_age)
data["int_age"].describe()
def convert_income(x):
if pd.isnull(x):
return None
x = x.split(" ")[0]
if x=="Prefer":
return None
else:
x=x.replace("$","").replace(",","")
return int(x)
int_income = data["How much total combined money did all members of your HOUSEHOLD earn last year?"].apply(convert_income)
int_income.value_counts().sort_index()
int_income.describe()
data[int_income < 150000]
data["How far will you travel for Thanksgiving?"][int_income >150000].value_counts()
# 在两者数据中显示,不管收入如何,感恩节不回去旅游的人是占最多的
data.pivot_table(index="Have you ever tried to meet up with hometown friends on Thanksgiving night?",columns='Have you ever attended a "Friendsgiving?"',values="int_age")
# 从两组数据看出,参加朋友聚会的人群呈年轻化