【# 1.1 加载数据
import pandas as pd
df = pd.read_excel("NFT_by.xlsx")
# 1.2 概率转稀缺性指标
attributes = ['Earring','Background','Fur','Clothes','Mouth','Eyes','Hat']
for col in attributes:
df[f"{col}_Scarcity"] = 1 - df[col] # 稀缺性 = 1 - 出现概率
# 1.3 异常值处理(Winsorize)
from scipy.stats.mstats import winsorize
df['Price'] = winsorize(df['Price'], limits=[0.01, 0.01]) # 截断1%极端值
# 1.4 数据标准化
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
df[attributes] = scaler.fit_transform(df[attributes])
import statsmodels.api as sm
import numpy as np
# Stage 1: Hedonic回归
X = df[[f"{col}_Scarcity" for col in attributes]] # 7个稀缺性特征
X = sm.add_constant(X) # 添加截距项
y = np.log(df['Price']) # 对数价格
model_stage1 = sm.OLS(y, X).fit()
print(model_stage1.summary()) # 输出系数表格
# 修正代码
import numpy as np
# 假设commonality是numpy数组
fill_values = np.nanmedian(commonality) # 使用全局函数计算中位数
# 填充NaN(若commonality为Pandas Series则用.fillna(fill_values))
clean_commonality = np.where(np.isnan(commonality), fill_values, commonality)
# 处理Inf(统一转为NaN后填充)
clean_commonality = np.where(np.isinf(clean_commonality), fill_values, clean_commonality)
# Stage 2: 共性回归
# 计算属性共性(出现概率均值)
commonality = df[attributes].mean(axis=0).values.reshape(-1,1)
# 提取Stage1系数
betas = model_stage1.params[1:].values.reshape(-1,1) # 排除截距项
# 建立共性回归模型
model_stage2 = sm.OLS(betas, np.log(commonality)).fit()
print(model_stage2.summary()) # 输出α1系数】
发现错误,请给出完整代码
MissingDataError Traceback (most recent call last)
Cell In[6], line 20
17 betas = model_stage1.params[1:].values.reshape(-1,1) # 排除截距项
19 # 建立共性回归模型
---> 20 model_stage2 = sm.OLS(betas, np.log(commonality)).fit()
21 print(model_stage2.summary())
File C:\Anaconda\envs\december\lib\site-packages\statsmodels\regression\linear_model.py:890, in OLS.__init__(self, endog, exog, missing, hasconst, **kwargs)
887 msg = ("Weights are not supported in OLS and will be ignored"
888 "An exception will be raised in the next version.")
889 warnings.warn(msg, ValueWarning)
--> 890 super(OLS, self).__init__(endog, exog, missing=missing,
891 hasconst=hasconst, **kwargs)
892 if "weights" in self._init_keys:
893 self._init_keys.remove("weights")
File C:\Anaconda\envs\december\lib\site-packages\statsmodels\regression\linear_model.py:717, in WLS.__init__(self, endog, exog, weights, missing, hasconst, **kwargs)
715 else:
716 weights = weights.squeeze()
--> 717 super(WLS, self).__init__(endog, exog, missing=missing,
718 weights=weights, hasconst=hasconst, **kwargs)
719 nobs = self.exog.shape[0]
720 weights = self.weights
File C:\Anaconda\envs\december\lib\site-packages\statsmodels\regression\linear_model.py:191, in RegressionModel.__init__(self, endog, exog, **kwargs)
190 def __init__(self, endog, exog, **kwargs):
--> 191 super(RegressionModel, self).__init__(endog, exog, **kwargs)
192 self._data_attr.extend(['pinv_wexog', 'wendog', 'wexog', 'weights'])
File C:\Anaconda\envs\december\lib\site-packages\statsmodels\base\model.py:267, in LikelihoodModel.__init__(self, endog, exog, **kwargs)
266 def __init__(self, endog, exog=None, **kwargs):
--> 267 super().__init__(endog, exog, **kwargs)
268 self.initialize()
File C:\Anaconda\envs\december\lib\site-packages\statsmodels\base\model.py:92, in Model.__init__(self, endog, exog, **kwargs)
90 missing = kwargs.pop('missing', 'none')
91 hasconst = kwargs.pop('hasconst', None)
---> 92 self.data = self._handle_data(endog, exog, missing, hasconst,
93 **kwargs)
94 self.k_constant = self.data.k_constant
95 self.exog = self.data.exog
File C:\Anaconda\envs\december\lib\site-packages\statsmodels\base\model.py:132, in Model._handle_data(self, endog, exog, missing, hasconst, **kwargs)
131 def _handle_data(self, endog, exog, missing, hasconst, **kwargs):
--> 132 data = handle_data(endog, exog, missing, hasconst, **kwargs)
133 # kwargs arrays could have changed, easier to just attach here
134 for key in kwargs:
File C:\Anaconda\envs\december\lib\site-packages\statsmodels\base\data.py:673, in handle_data(endog, exog, missing, hasconst, **kwargs)
670 exog = np.asarray(exog)
672 klass = handle_data_class_factory(endog, exog)
--> 673 return klass(endog, exog=exog, missing=missing, hasconst=hasconst,
674 **kwargs)
File C:\Anaconda\envs\december\lib\site-packages\statsmodels\base\data.py:86, in ModelData.__init__(self, endog, exog, missing, hasconst, **kwargs)
84 self.const_idx = None
85 self.k_constant = 0
---> 86 self._handle_constant(hasconst)
87 self._check_integrity()
88 self._cache = {}
File C:\Anaconda\envs\december\lib\site-packages\statsmodels\base\data.py:132, in ModelData._handle_constant(self, hasconst)
130 exog_max = np.max(self.exog, axis=0)
131 if not np.isfinite(exog_max).all():
--> 132 raise MissingDataError('exog contains inf or nans')
133 exog_min = np.min(self.exog, axis=0)
134 const_idx = np.where(exog_max == exog_min)[0].squeeze()
MissingDataError: exog contains inf or nans