题目:
代码:
import random
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf
import statistics as sta
import scipy.stats.stats as stats
anscombe = sns.load_dataset("anscombe")
print(anscombe)
Xarray_I = anscombe.x[:10].values
Xarray_II = anscombe.x[11:21].values
Xarray_III = anscombe.x[22:32].values
Xarray_IV = anscombe.x[33:43].values
print("Xarray in I:", Xarray_I)
print("Xarray in II:", Xarray_II)
print("Xarray in III:", Xarray_III)
print("Xarray in IV:", Xarray_IV)
Xmean_I = np.mean(Xarray_I)
print("mean of x in I: ", Xmean_I)
Xmean_II = np.mean(Xarray_II)
print("mean of x in II: ", Xmean_II)
Xmean_III = np.mean(Xarray_III)
print("mean of x in III: ", Xmean_III)
Xmean_IV = np.mean(Xarray_IV)
print("mean of x in IV: ", Xmean_IV)
Xvariance_I = sta.variance(Xarray_I)
print("variance of x in I: ", Xvariance_I)
Xvariance_II = sta.variance(Xarray_II)
print("variance of x in II: ", Xvariance_II)
Xvariance_III = sta.variance(Xarray_III)
print("variance of x in III: ", Xvariance_III)
Xvariance_IV = sta.variance(Xarray_IV)
print("variance of x in IV: ", Xvariance_IV)
print(' ')
Yarray_I = anscombe.y[:10].values
Yarray_II = anscombe.y[11:21].values
Yarray_III = anscombe.y[22:32].values
Yarray_IV = anscombe.y[33:43].values
print("Yarray in I:", Yarray_I)
print("Yarray in II:", Yarray_II)
print("Yarray in III:", Yarray_III)
print("Yarray in IV:", Yarray_IV)
Ymean_I = np.mean(Yarray_I)
print("mean of y in I: ", Ymean_I)
Ymean_II = np.mean(Yarray_II)
print("mean of y in II: ", Ymean_II)
Ymean_III = np.mean(Yarray_III)
print("mean of y in III: ", Ymean_III)
Ymean_IV = np.mean(Yarray_IV)
print("mean of y in IV: ", Ymean_IV)
Yvariance_I = sta.variance(Yarray_I)
print("variance of y in I: ", Yvariance_I)
Yvariance_II = sta.variance(Yarray_II)
print("variance of y in II: ", Yvariance_II)
Yvariance_III = sta.variance(Yarray_III)
print("variance of y in III: ", Yvariance_III)
Yvariance_IV = sta.variance(Yarray_IV)
print("variance of y in IV: ", Yvariance_IV)
print('')
cof_I = stats.pearsonr(Xarray_I, Yarray_I)[0]
cof_II = stats.pearsonr(Xarray_II, Yarray_II)[0]
cof_III = stats.pearsonr(Xarray_III, Yarray_III)[0]
cof_IV = stats.pearsonr(Xarray_IV, Yarray_IV)[0]
print("correlation coefficient of I: ", cof_I)
print("correlation coefficient of II: ", cof_II)
print("correlation coefficient of III: ", cof_III)
print("correlation coefficient of IV: ", cof_IV)
print(" ")
X_I = sm.add_constant(Xarray_I)
model_I = sm.OLS(Yarray_I, X_I)
result_I = model_I.fit()
params_I = result_I.params
print("DatasetI: y =", params_I[0], "+", params_I[1], "* x")
X_II = sm.add_constant(Xarray_II)
model_II = sm.OLS(Yarray_II, X_II)
result_II = model_II.fit()
params_II = result_II.params
print("DatasetII: y =", params_II[0], "+", params_II[1], "* x")
X_III = sm.add_constant(Xarray_III)
model_III = sm.OLS(Yarray_III, X_III)
result_III = model_III.fit()
params_III = result_III.params
print("DatasetIII: y =", params_III[0], "+", params_III[1], "* x")
X_IV = sm.add_constant(Xarray_IV)
model_IV = sm.OLS(Yarray_IV, X_IV)
result_IV = model_IV.fit()
params_IV = result_IV.params
print("DatasetIV: y =", params_IV[0], "+", params_IV[1], "* x")
sns.set(style='whitegrid')
g = sns.FacetGrid(anscombe, col="dataset", hue="dataset", size=3)
g.map(plt.scatter, 'x', 'y')
plt.show()
结果展示: