# Jupyter Notebook Exercises
%matplotlib inline
import random
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf
sns.set_context("talk")
C:\ProgramData\Anaconda3\lib\site-packages\statsmodels\compat\pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
from pandas.core import datetools
anascombe = pd.read_csv('anscombe.csv')
anascombe.head()
.dataframe thead tr:only-child th {
text-align: right;
}
.dataframe thead th {
text-align: left;
}
.dataframe tbody tr th {
vertical-align: top;
}
dataset | x | y | |
---|---|---|---|
0 | I | 10.0 | 8.04 |
1 | I | 8.0 | 6.95 |
2 | I | 13.0 | 7.58 |
3 | I | 9.0 | 8.81 |
4 | I | 11.0 | 8.33 |
Part 1
anascombe.groupby(['dataset']).mean()
.dataframe thead tr:only-child th {
text-align: right;
}
.dataframe thead th {
text-align: left;
}
.dataframe tbody tr th {
vertical-align: top;
}
x | y | |
---|---|---|
dataset | ||
I | 9.0 | 7.500909 |
II | 9.0 | 7.500909 |
III | 9.0 | 7.500000 |
IV | 9.0 | 7.500909 |
anascombe.groupby(['dataset']).std()
.dataframe thead tr:only-child th {
text-align: right;
}
.dataframe thead th {
text-align: left;
}
.dataframe tbody tr th {
vertical-align: top;
}
x | y | |
---|---|---|
dataset | ||
I | 3.316625 | 2.031568 |
II | 3.316625 | 2.031657 |
III | 3.316625 | 2.030424 |
IV | 3.316625 | 2.030579 |
anascombe.groupby(['dataset']).corr()
.dataframe thead tr:only-child th {
text-align: right;
}
.dataframe thead th {
text-align: left;
}
.dataframe tbody tr th {
vertical-align: top;
}
x | y | ||
---|---|---|---|
dataset | |||
I | x | 1.000000 | 0.816421 |
y | 0.816421 | 1.000000 | |
II | x | 1.000000 | 0.816237 |
y | 0.816237 | 1.000000 | |
III | x | 1.000000 | 0.816287 |
y | 0.816287 | 1.000000 | |
IV | x | 1.000000 | 0.816521 |
y | 0.816521 | 1.000000 |
for i in range(4):
model = smf.ols('y ~ x', anascombe[11*i:11*i+11]).fit()
print('----'+ str(i) +'----')
print(model.params)
----0----
Intercept 3.000091
x 0.500091
dtype: float64
----1----
Intercept 3.000909
x 0.500000
dtype: float64
----2----
Intercept 3.002455
x 0.499727
dtype: float64
----3----
Intercept 3.001727
x 0.499909
dtype: float64
Part 2
fig = sns.FacetGrid(anascombe, row='dataset')
fig.map(plt.scatter, 'x', 'y')
<seaborn.axisgrid.FacetGrid at 0x1f1ba7b3d30>