import pandas as pd
df['age']
df['age'].max()
ages.max()
df.describe()
df=pd.read_csv('data/df.csv')
df=pd.read_excel('data/df.xlsx')
df=pd.read_csv('data/df.tsv')
df=pd.read_csv('data/df.txt')
df.head()
df.head(10)
df.dtypes
df.to_excel('df.xlsx',sheet_name='df1',index=False)
df.info()
ages=df['age']
type(df.['age'])
df['age'].shape
df1=df[['age','sex']]
above_35=df[df['age']>35]
class_23 = titanic[titanic["Pclass"].isin([2, 3])]
class_23 = titanic[(titanic["Pclass"] == 2) | (titanic["Pclass"] == 3)]
age_notna=df[df['age'].notna()]
a_names=df.loc[df['age']>35,'name']
df.iloc[9:25,2:5]
df.iloc[0:3,3]='anonymous'
import pandas as pd
import matplotlib.pyplot as plt
df.plot()
df['s'].plot()
df.plot.scatter(x='s',y='w',alpha=0.5)
[method_name for method_name in dir(df.plot)
if not method_name.startswith("_")]
df.plot.box()
axs = df.plot.area(figsize=(12, 4), subplots=True)
fig, axs = plt.subplots(figsize=(12, 4))
df.plot.area(ax=axs)
axs.set_ylabel("NO$_2$ concentration")
fig.savefig("no2_concentrations.png")
df['n']=df['t']*x
df['n']=df['t']/df['w']
df_renamed=df.rename(columns={'name1':'RENAME1','RENAME2':'RENAME2'})
df_renamed=df_renamed.rename(columns=str.lower)
df['age'].mean()
df[['age','fare']].median()
df[['age','fare']].describe()
df.agg({'age':['min','max','median','skew'],
'fare':['min','max','median','mean']})
df[['sex','age']].groupby('sex').mean()
df.groupby('sex')['age'].mean()
df.groupby('sex').mean()
df.groupby(['sex','pclas'])['fare'].mean()
df['pclass'].value_counts()
df.groupby('pclass')['pclass'].count()
df.sort_values(by='age')
df.sort_values(by=['pclass','age'],ascending=False)
no2 = df[df["parameter"] == "no2"]
no2_subset = no2.sort_index().groupby(["location"]).head(2)
no2_subset.pivot(columns="location", values="value")
df.pivot_table(values="value",
index="location",columns="parameter", aggfunc="mean")
df.pivot_table(values="value", index="location",
columns="parameter", aggfunc="mean",
margins=True)
df=pd.concat([df1,df2],axis=0)
df=pd.concat([df1,df2],axis=1)
df=pd.concat([df1,df2],keys=['P','N'])
df=pd.merge(df1,df2,how='left',on='ID')
df=pd.merge(df1,df2,how='left',left_on='p',right_on='d')
df["datetime"]=pd.to_datetime(df["datetime"])
pd.read_csv("../data/df.csv", parse_dates=["datetime"])
df['datetime'].min(),df['datetime'].max()
df['datetime'].max()-df['datetime'].min()
df["month"]=df["datetime"].dt.month
air_quality.groupby(
[air_quality["datetime"].dt.weekday, "location"])["value"].mean()
air_quality.groupby(
air_quality["datetime"].dt.hour)["value"].mean().plot(kind='bar',
rot=0,
ax=axs)
plt.xlabel("Hour of the day");
plt.ylabel("$NO_2 (µg/m^3)$");
no_2 = air_quality.pivot(index="datetime", columns="location", values="value")
no_2.index.year, no_2.index.weekday
no_2["2019-05-20":"2019-05-21"].plot()
monthly_max = no_2.resample("M").max()
monthly_max.index.freq
no_2.resample("D").mean().plot(style="-o", figsize=(10, 5))
df["name"].str.lower()
df["Name"].str.split(",")
df["Surname"] = df["Name"].str.split(",").str.get(0)
df[df["Name"].str.contains("Countess")]
df["name"].str.len().idxmax()
df.loc[df["Name"].str.len().idxmax(), "Name"]
df["Sex_short"] = df["Sex"].replace({"male": "M",
"female": "F"})