HTTPError Traceback (most recent call last)
Cell In[7], line 5
2 from sklearn.model_selection import train_test_split
4 # 加载数据集
----> 5 news = fetch_20newsgroups(subset='all')
7 # 拆分训练集和测试集
8 x_train, x_test, y_train, y_test = train_test_split(news.data, news.target, test_size=0.25, random_state=42)
File D:\develop\anaconda\Lib\site-packages\sklearn\utils\_param_validation.py:211, in validate_params.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
205 try:
206 with config_context(
207 skip_parameter_validation=(
208 prefer_skip_nested_validation or global_skip_validation
209 )
210 ):
--> 211 return func(*args, **kwargs)
212 except InvalidParameterError as e:
213 # When the function is just a wrapper around an estimator, we allow
214 # the function to delegate validation to the estimator, but we replace
215 # the name of the estimator by the name of the function in the error
216 # message to avoid confusion.
217 msg = re.sub(
218 r"parameter of \w+ must be",
219 f"parameter of {func.__qualname__} must be",
220 str(e),
221 )
File D:\develop\anaconda\Lib\site-packages\sklearn\datasets\_twenty_newsgroups.py:284, in fetch_20newsgroups(data_home, subset, categories, shuffle, random_state, remove, download_if_missing, return_X_y)
282 if download_if_missing:
283 logger.info("Downloading 20news dataset. This may take a few minutes.")
--> 284 cache = _download_20newsgroups(
285 target_dir=twenty_home, cache_path=cache_path
286 )
287 else:
288 raise OSError("20Newsgroups dataset not found")
File D:\develop\anaconda\Lib\site-packages\sklearn\datasets\_twenty_newsgroups.py:76, in _download_20newsgroups(target_dir, cache_path)
73 os.makedirs(target_dir)
75 logger.info("Downloading dataset from %s (14 MB)", ARCHIVE.url)
---> 76 archive_path = _fetch_remote(ARCHIVE, dirname=target_dir)
78 logger.debug("Decompressing %s", archive_path)
79 tarfile.open(archive_path, "r:gz").extractall(path=target_dir)
File D:\develop\anaconda\Lib\site-packages\sklearn\datasets\_base.py:1388, in _fetch_remote(remote, dirname)
1366 """Helper function to download a remote dataset into path
1367
1368 Fetch a dataset pointed by remote's url, save into path using remote's
(...)
1384 Full path of the created file.
1385 """
1387 file_path = remote.filename if dirname is None else join(dirname, remote.filename)
-> 1388 urlretrieve(remote.url, file_path)
1389 checksum = _sha256(file_path)
1390 if remote.checksum != checksum:
File D:\develop\anaconda\Lib\urllib\request.py:241, in urlretrieve(url, filename, reporthook, data)
224 """
225 Retrieve a URL into a temporary location on disk.
226
(...)
237 data file as well as the resulting HTTPMessage object.
238 """
239 url_type, path = _splittype(url)
--> 241 with contextlib.closing(urlopen(url, data)) as fp:
242 headers = fp.info()
244 # Just return the local path and the "headers" for file://
245 # URLs. No sense in performing a copy unless requested.
File D:\develop\anaconda\Lib\urllib\request.py:216, in urlopen(url, data, timeout, cafile, capath, cadefault, context)
214 else:
215 opener = _opener
--> 216 return opener.open(url, data, timeout)
File D:\develop\anaconda\Lib\urllib\request.py:525, in OpenerDirector.open(self, fullurl, data, timeout)
523 for processor in self.process_response.get(protocol, []):
524 meth = getattr(processor, meth_name)
--> 525 response = meth(req, response)
527 return response
File D:\develop\anaconda\Lib\urllib\request.py:634, in HTTPErrorProcessor.http_response(self, request, response)
631 # According to RFC 2616, "2xx" code indicates that the client's
632 # request was successfully received, understood, and accepted.
633 if not (200 <= code < 300):
--> 634 response = self.parent.error(
635 'http', request, response, code, msg, hdrs)
637 return response
File D:\develop\anaconda\Lib\urllib\request.py:563, in OpenerDirector.error(self, proto, *args)
561 if http_err:
562 args = (dict, 'default', 'http_error_default') + orig_args
--> 563 return self._call_chain(*args)
File D:\develop\anaconda\Lib\urllib\request.py:496, in OpenerDirector._call_chain(self, chain, kind, meth_name, *args)
494 for handler in handlers:
495 func = getattr(handler, meth_name)
--> 496 result = func(*args)
497 if result is not None:
498 return result
File D:\develop\anaconda\Lib\urllib\request.py:643, in HTTPDefaultErrorHandler.http_error_default(self, req, fp, code, msg, hdrs)
642 def http_error_default(self, req, fp, code, msg, hdrs):
--> 643 raise HTTPError(req.full_url, code, msg, hdrs, fp)
HTTPError: HTTP Error 403: Forbidden