---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[17], line 89
87 # 使用SMOTE处理不均衡
88 smote = SMOTE(random_state=42)
---> 89 X_res, y_res = smote.fit_resample(X_train, y_train_encoded)
90 print("\n应用SMOTE后的类别分布:", pd.Series(y_res).value_counts())
92 # 6. 特征编码(分类特征转换为数值)
File D:\Anaconda\Lib\site-packages\imblearn\base.py:202, in BaseSampler.fit_resample(self, X, y, **params)
181 def fit_resample(self, X, y, **params):
182 """Resample the dataset.
183
184 Parameters
(...)
200 The corresponding label of `X_resampled`.
201 """
--> 202 return super().fit_resample(X, y, **params)
File D:\Anaconda\Lib\site-packages\sklearn\base.py:1473, in _fit_context.<locals>.decorator.<locals>.wrapper(estimator, *args, **kwargs)
1466 estimator._validate_params()
1468 with config_context(
1469 skip_parameter_validation=(
1470 prefer_skip_nested_validation or global_skip_validation
1471 )
1472 ):
-> 1473 return fit_method(estimator, *args, **kwargs)
File D:\Anaconda\Lib\site-packages\imblearn\base.py:99, in SamplerMixin.fit_resample(self, X, y, **params)
97 check_classification_targets(y)
98 arrays_transformer = ArraysTransformer(X, y)
---> 99 X, y, binarize_y = self._check_X_y(X, y)
101 self.sampling_strategy_ = check_sampling_strategy(
102 self.sampling_strategy, y, self._sampling_type
103 )
105 output = self._fit_resample(X, y, **params)
File D:\Anaconda\Lib\site-packages\imblearn\base.py:157, in BaseSampler._check_X_y(self, X, y, accept_sparse)
155 accept_sparse = ["csr", "csc"]
156 y, binarize_y = check_target_type(y, indicate_one_vs_all=True)
--> 157 X, y = validate_data(self, X=X, y=y, reset=True, accept_sparse=accept_sparse)
158 return X, y, binarize_y
File D:\Anaconda\Lib\site-packages\imblearn\utils\_sklearn_compat.py:426, in validate_data(_estimator, X, y, reset, validate_separately, skip_check_array, **kwargs)
424 else:
425 force_all_finite = True
--> 426 return _estimator._validate_data(
427 X=X,
428 y=y,
429 reset=reset,
430 validate_separately=validate_separately,
431 force_all_finite=force_all_finite,
432 **kwargs,
433 )
File D:\Anaconda\Lib\site-packages\sklearn\base.py:650, in BaseEstimator._validate_data(self, X, y, reset, validate_separately, cast_to_ndarray, **check_params)
648 y = check_array(y, input_name="y", **check_y_params)
649 else:
--> 650 X, y = check_X_y(X, y, **check_params)
651 out = X, y
653 if not no_val_X and check_params.get("ensure_2d", True):
File D:\Anaconda\Lib\site-packages\sklearn\utils\validation.py:1273, in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, estimator)
1268 estimator_name = _check_estimator_name(estimator)
1269 raise ValueError(
1270 f"{estimator_name} requires y to be passed, but the target y is None"
1271 )
-> 1273 X = check_array(
1274 X,
1275 accept_sparse=accept_sparse,
1276 accept_large_sparse=accept_large_sparse,
1277 dtype=dtype,
1278 order=order,
1279 copy=copy,
1280 force_all_finite=force_all_finite,
1281 ensure_2d=ensure_2d,
1282 allow_nd=allow_nd,
1283 ensure_min_samples=ensure_min_samples,
1284 ensure_min_features=ensure_min_features,
1285 estimator=estimator,
1286 input_name="X",
1287 )
1289 y = _check_y(y, multi_output=multi_output, y_numeric=y_numeric, estimator=estimator)
1291 check_consistent_length(X, y)
File D:\Anaconda\Lib\site-packages\sklearn\utils\validation.py:1007, in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name)
1005 array = xp.astype(array, dtype, copy=False)
1006 else:
-> 1007 array = _asarray_with_order(array, order=order, dtype=dtype, xp=xp)
1008 except ComplexWarning as complex_warning:
1009 raise ValueError(
1010 "Complex data not supported\n{}\n".format(array)
1011 ) from complex_warning
File D:\Anaconda\Lib\site-packages\sklearn\utils\_array_api.py:746, in _asarray_with_order(array, dtype, order, copy, xp, device)
744 array = numpy.array(array, order=order, dtype=dtype)
745 else:
--> 746 array = numpy.asarray(array, order=order, dtype=dtype)
748 # At this point array is a NumPy ndarray. We convert it to an array
749 # container that is consistent with the input's namespace.
750 return xp.asarray(array)
File D:\Anaconda\Lib\site-packages\pandas\core\generic.py:2153, in NDFrame.__array__(self, dtype, copy)
2149 def __array__(
2150 self, dtype: npt.DTypeLike | None = None, copy: bool_t | None = None
2151 ) -> np.ndarray:
2152 values = self._values
-> 2153 arr = np.asarray(values, dtype=dtype)
2154 if (
2155 astype_is_view(values.dtype, arr.dtype)
2156 and using_copy_on_write()
2157 and self._mgr.is_single_block
2158 ):
2159 # Check if both conversions can be done without a copy
2160 if astype_is_view(self.dtypes.iloc[0], values.dtype) and astype_is_view(
2161 values.dtype, arr.dtype
2162 ):
ValueError: could not convert string to float: 'Male'