【Python】数据截断np.clip() 与 Series.clip()

本文介绍如何使用Numpy的clip函数限制数组值范围,以及Pandas Series的clip方法,实现数据的有效管理和预处理,是数据分析和科学计算的重要技能。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

# numpy
# 将数组a的取值范围限制为[a_min, a_max],其中a_min, a_max的取值为人为定义。
numpy.clip(a, a_min, a_max, out=None)

# pandas
Series.clip(lower=None, upper=None, axis=None, inplace=False)

 

【# 1.1 加载数据 import pandas as pd df = pd.read_excel("NFT_by.xlsx") # 1.2 概率转稀缺性指标 attributes = ['Earring','Background','Fur','Clothes','Mouth','Eyes','Hat'] for col in attributes: df[f"{col}_Scarcity"] = 1 - df[col] # 稀缺性 = 1 - 出现概率 # 1.3 异常值处理(Winsorize) from scipy.stats.mstats import winsorize df['Price'] = winsorize(df['Price'], limits=[0.01, 0.01]) # 截断1%极端值 # 1.4 数据标准化 from sklearn.preprocessing import StandardScaler scaler = StandardScaler() df[attributes] = scaler.fit_transform(df[attributes]) import statsmodels.api as sm import numpy as np # Stage 1: Hedonic回归 X = df[[f"{col}_Scarcity" for col in attributes]] # 7个稀缺性特征 X = sm.add_constant(X) # 添加截距项 y = np.log(df['Price']) # 对数价格 model_stage1 = sm.OLS(y, X).fit() print(model_stage1.summary()) # 输出系数表格 # 修正代码 import numpy as np # 假设commonality是numpy数组 fill_values = np.nanmedian(commonality) # 使用全局函数计算中位数 # 填充NaN(若commonality为Pandas Series则用.fillna(fill_values)) clean_commonality = np.where(np.isnan(commonality), fill_values, commonality) # 处理Inf(统一转为NaN后填充) clean_commonality = np.where(np.isinf(clean_commonality), fill_values, clean_commonality) # Stage 2: 共性回归 # 计算属性共性(出现概率均值) commonality = df[attributes].mean(axis=0).values.reshape(-1,1) # 提取Stage1系数 betas = model_stage1.params[1:].values.reshape(-1,1) # 排除截距项 # 建立共性回归模型 model_stage2 = sm.OLS(betas, np.log(commonality)).fit() print(model_stage2.summary()) # 输出α1系数】 发现错误,请给出完整代码 MissingDataError Traceback (most recent call last) Cell In[6], line 20 17 betas = model_stage1.params[1:].values.reshape(-1,1) # 排除截距项 19 # 建立共性回归模型 ---> 20 model_stage2 = sm.OLS(betas, np.log(commonality)).fit() 21 print(model_stage2.summary()) File C:\Anaconda\envs\december\lib\site-packages\statsmodels\regression\linear_model.py:890, in OLS.__init__(self, endog, exog, missing, hasconst, **kwargs) 887 msg = ("Weights are not supported in OLS and will be ignored" 888 "An exception will be raised in the next version.") 889 warnings.warn(msg, ValueWarning) --> 890 super(OLS, self).__init__(endog, exog, missing=missing, 891 hasconst=hasconst, **kwargs) 892 if "weights" in self._init_keys: 893 self._init_keys.remove("weights") File C:\Anaconda\envs\december\lib\site-packages\statsmodels\regression\linear_model.py:717, in WLS.__init__(self, endog, exog, weights, missing, hasconst, **kwargs) 715 else: 716 weights = weights.squeeze() --> 717 super(WLS, self).__init__(endog, exog, missing=missing, 718 weights=weights, hasconst=hasconst, **kwargs) 719 nobs = self.exog.shape[0] 720 weights = self.weights File C:\Anaconda\envs\december\lib\site-packages\statsmodels\regression\linear_model.py:191, in RegressionModel.__init__(self, endog, exog, **kwargs) 190 def __init__(self, endog, exog, **kwargs): --> 191 super(RegressionModel, self).__init__(endog, exog, **kwargs) 192 self._data_attr.extend(['pinv_wexog', 'wendog', 'wexog', 'weights']) File C:\Anaconda\envs\december\lib\site-packages\statsmodels\base\model.py:267, in LikelihoodModel.__init__(self, endog, exog, **kwargs) 266 def __init__(self, endog, exog=None, **kwargs): --> 267 super().__init__(endog, exog, **kwargs) 268 self.initialize() File C:\Anaconda\envs\december\lib\site-packages\statsmodels\base\model.py:92, in Model.__init__(self, endog, exog, **kwargs) 90 missing = kwargs.pop('missing', 'none') 91 hasconst = kwargs.pop('hasconst', None) ---> 92 self.data = self._handle_data(endog, exog, missing, hasconst, 93 **kwargs) 94 self.k_constant = self.data.k_constant 95 self.exog = self.data.exog File C:\Anaconda\envs\december\lib\site-packages\statsmodels\base\model.py:132, in Model._handle_data(self, endog, exog, missing, hasconst, **kwargs) 131 def _handle_data(self, endog, exog, missing, hasconst, **kwargs): --> 132 data = handle_data(endog, exog, missing, hasconst, **kwargs) 133 # kwargs arrays could have changed, easier to just attach here 134 for key in kwargs: File C:\Anaconda\envs\december\lib\site-packages\statsmodels\base\data.py:673, in handle_data(endog, exog, missing, hasconst, **kwargs) 670 exog = np.asarray(exog) 672 klass = handle_data_class_factory(endog, exog) --> 673 return klass(endog, exog=exog, missing=missing, hasconst=hasconst, 674 **kwargs) File C:\Anaconda\envs\december\lib\site-packages\statsmodels\base\data.py:86, in ModelData.__init__(self, endog, exog, missing, hasconst, **kwargs) 84 self.const_idx = None 85 self.k_constant = 0 ---> 86 self._handle_constant(hasconst) 87 self._check_integrity() 88 self._cache = {} File C:\Anaconda\envs\december\lib\site-packages\statsmodels\base\data.py:132, in ModelData._handle_constant(self, hasconst) 130 exog_max = np.max(self.exog, axis=0) 131 if not np.isfinite(exog_max).all(): --> 132 raise MissingDataError('exog contains inf or nans') 133 exog_min = np.min(self.exog, axis=0) 134 const_idx = np.where(exog_max == exog_min)[0].squeeze() MissingDataError: exog contains inf or nans
05-13
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值