终于要写完了论文,太不容易了。(预计23号最终版本60页以上)
代码分享:
import matplotlib.pyplot as plt
import numpy as np
# 数据
years = np.array([2019, 2020, 2021, 2022, 2023])
cats = np.array([4412, 4862, 5806, 6536, 6980])
dogs = np.array([5503, 5222, 5429, 5119, 5175])
# 计算增长率
cat_growth_rate = (cats[1:] - cats[:-1]) / cats[:-1] * 100
dog_growth_rate = (dogs[1:] - dogs[:-1]) / dogs[:-1] * 100
# 定义柱状图宽度
bar_width = 0.4
x = np.arange(len(years)) # x轴位置
# 创建图形
plt.figure(figsize=(12, 10))
# 图 1: 分组柱状图(猫和狗的数量)
plt.subplot(2, 1, 1)
plt.bar(x - bar_width / 2, cats, width=bar_width, color='skyblue', label='Cats')
plt.bar(x + bar_width / 2, dogs, width=bar_width, color='lightcoral', label='Dogs')
plt.title("Population of Cats and Dogs (2019-2023)", fontsize=14)
plt.xlabel("Year", fontsize=12)
plt.ylabel("Population (10,000)", fontsize=12)
plt.xticks(x, years) # 设置x轴标签为年份
plt.legend()
# 图 2: 折线图(增长率)
plt.subplot(2, 1, 2)
plt.plot(years[1:], cat_growth_rate, 'b-o', label='Cat Growth Rate')
plt.plot(years[1:], dog_growth_rate, 'r-o', label='Dog Growth Rate')
plt.axhline(0, color='gray', linestyle='--') # 增长率为0的水平线
plt.title("Growth Rates of Cats and Dogs (2019-2023)", fontsize=14)
plt.xlabel("Year", fontsize=12)
plt.ylabel("Growth Rate (%)", fontsize=12)
plt.legend()
# 调整布局
plt.tight_layout()
plt.show()
第一问的第二小问:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import Ridge, Lasso
from sklearn.preprocessing import StandardScaler
from scipy.stats import pearsonr
# 读取更新的数据文件
file_path = 'D:\\Desktop\\数据 (2).xlsx'
data = pd.ExcelFile(file_path)
# 提取目标变量和自变量
df = data.parse("中国")
y_cats = df["猫(万)"].values
y_dogs = df["狗(万)"].values
X = df[["宠物市场规模 (亿美元)", "宠物食品开支 (亿美元)", "兽医服务开支(亿美元)",
"中国宠物家庭渗透率", "总生育率(女性人均生育数)", "城镇人口",
"食品生产指数(2014-2016 = 100)", "人均 GDP(现价美元)"]]
# 数据清洗
X_cleaned = X.replace([np.inf, -np.inf], np.nan).dropna()
y_cats_cleaned = y_cats[:len(X_cleaned)]
y_dogs_cleaned = y_dogs[:len(X_cleaned)]
# 数据标准化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_cleaned)
# 相关性分析
correlations_cats = {col: pearsonr(X_cleaned[col], y_cats_cleaned)[0] for col in X_cleaned.columns}
correlations_dogs = {col: pearsonr(X_cleaned[col], y_dogs_cleaned)[0] for col in X_cleaned.columns}
# 相关性可视化
def plot_correlation(correlations, title):
plt.figure(figsize=(10, 6))
plt.bar(correlations.keys(), correlations.values(), color='skyblue')
plt.title(title, fontsize=16)
plt.ylabel("Pearson Correlation", fontsize=12)
plt.xticks(rotation=45, fontsize=10)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()
plot_correlation(correlations_cats, "Correlation between Features and Cat Numbers")
plot_correlation(correlations_dogs, "Correlation between Features and Dog Numbers")
# 岭回归和 LASSO 回归
ridge_cats = Ridge(alpha=1.0)
ridge_cats.fit(X_scaled, y_cats_cleaned)
ridge_coefficients_cats = ridge_cats.coef_
lasso_cats = Lasso(alpha=0.1, max_iter=10000)
lasso_cats.fit(X_scaled, y_cats_cleaned)
lasso_coefficients_cats = lasso_cats.coef_
ridge_dogs = Ridge(alpha=1.0)
ridge_dogs.fit(X_scaled, y_dogs_cleaned)
ridge_coefficients_dogs = ridge_dogs.coef_
lasso_dogs = Lasso(alpha=0.1, max_iter=10000)
lasso_dogs.fit(X_scaled, y_dogs_cleaned)
lasso_coefficients_dogs = lasso_dogs.coef_
# 回归系数可视化
def plot_regression_coefficients(ridge_coefficients, lasso_coefficients, features, title):
x_indexes = range(len(features))
plt.figure(figsize=(12, 6))
plt.bar([i - 0.2 for i in x_indexes], ridge_coefficients, width=0.4, label="Ridge Coefficients", color='orange')
plt.bar([i + 0.2 for i in x_indexes], lasso_coefficients, width=0.4, label="LASSO Coefficients", color='green')
plt.xticks(x_indexes, features, rotation=45, fontsize=10)
plt.title(title, fontsize=16)
plt.ylabel("Coefficient Value", fontsize=12)
plt.legend(fontsize=12)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()
plot_regression_coefficients(ridge_coefficients_cats, lasso_coefficients_cats, X_cleaned.columns, "Regression Coefficients for Cat Numbers")
plot_regression_coefficients(ridge_coefficients_dogs, lasso_coefficients_dogs, X_cleaned.columns, "Regression Coefficients for Dog Numbers")
# 汇总分析结果
correlation_df_cats = pd.DataFrame({
"Pearson Correlation": correlations_cats.values(),
"Ridge Coefficients": ridge_coefficients_cats,
"LASSO Coefficients": lasso_coefficients_cats
}, index=correlations_cats.keys())
correlation_df_dogs = pd.DataFrame({
"Pearson Correlation": correlations_dogs.values(),
"Ridge Coefficients": ridge_coefficients_dogs,
"LASSO Coefficients": lasso_coefficients_dogs
}, index=correlations_dogs.keys())
# 保存结果到文件
correlation_df_cats.to_csv('Cats_Analysis_Results.csv', encoding='utf-8-sig')
correlation_df_dogs.to_csv('Dogs_Analysis_Results.csv', encoding='utf-8-sig')
# 打印结果
print("主控因素分析结果(猫):")
print(correlation_df_cats)
print("\n主控因素分析结果(狗):")
print(correlation_df_dogs)
以上仅为部分。其中更详细的思路、各题目思路、代码、讲解视频、成品论文及其他相关内容,可以点击下方名片: