from sentence_transformers import SentenceTransformer
import pandas as pd
import torch
import os
# 加载模型 (首次运行自动下载)
model = SentenceTransformer('BAAI/bge-large-zh', device='cuda' if torch.cuda.is_available() else 'cpu')
# 读取CSV数据
CSV_PATH = os.path.join(os.environ['USERPROFILE'], 'Desktop', 'es_textdoc.csv') # 自动定位桌面文件
INDEX_NAME = "products" # 索引名称
# 组合文本字段
df['combined_text'] = df['title'] + " [品牌] " + df['brand'] + " [分类] " + df['category']
# 批量生成向量 (维度1024)
batch_size = 32
embeddings = []
for i in range(0, len(df), batch_size):
batch = df['combined_text'].iloc[i:i+batch_size].tolist()
embeddings.extend(model.encode(batch, normalize_embeddings=True))
df['vector'] = [e.tolist() for e in embeddings] # 添加向量列我执行这个代码,报了这个错:D:\Pythonproject\elasticsearch9\.venv\Scripts\python.exe D:\Pythonproject\elasticsearch9\src\0704_date.py
No sentence-transformers model found with name BAAI/bge-large-zh. Creating a new one with mean pooling.
Traceback (most recent call last):
File "D:\Pythonproject\elasticsearch9\.venv\Lib\site-packages\urllib3\connection.py", line 198, in _new_conn
sock = connection.create_connection(
(self._dns_host, self.port),
...<2 lines>...
socket_options=self.socket_options,
)
File "D:\Pythonproject\elasticsearch9\.venv\Lib\site-packages\urllib3\util\connection.py", line 85, in create_connection
raise err
File "D:\Pythonproject\elasticsearch9\.venv\Lib\site-packages\urllib3\util\connection.py", line 73, in create_connection
sock.connect(sa)
~~~~~~~~~~~~^^^^
TimeoutError: timed out
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "D:\Pythonproject\elasticsearch9\.venv\Lib\site-packages\urllib3\connectionpool.py", line 787, in urlopen
response = self._make_request(
conn,
...<10 lines>...
**response_kw,
)
File "D:\Pythonproject\elasticsearch9\.venv\Lib\site-packages\urllib3\connectionpool.py", line 488, in _make_request
raise new_e
File "D:\Pythonproject\elasticsearch9\.venv\Lib\site-packages\urllib3\connectionpool.py", line 464, in _make_request
self._validate_conn(conn)
~~~~~~~~~~~~~~~~~~~^^^^^^
File "D:\Pythonproject\elasticsearch9\.venv\Lib\site-packages\urllib3\connectionpool.py", line 1093, in _validate_conn
conn.connect()
~~~~~~~~~~~~^^
File "D:\Pythonproject\elasticsearch9\.venv\Lib\site-packages\urllib3\connection.py", line 753, in connect
self.sock = sock = self._new_conn()
~~~~~~~~~~~~~~^^
File "D:\Pythonproject\elasticsearch9\.venv\Lib\site-packages\urllib3\connection.py", line 207, in _new_conn
raise ConnectTimeoutError(
...<2 lines>...
) from e
urllib3.exceptions.ConnectTimeoutError: (<urllib3.connection.HTTPSConnection object at 0x000002A53441F4D0>, 'Connection to huggingface.co timed out. (connect timeout=10)')
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "D:\Pythonproject\elasticsearch9\.venv\Lib\site-packages\requests\adapters.py", line 667, in send
resp = conn.urlopen(
method=request.method,
...<9 lines>...
chunked=chunked,
)
File "D:\Pythonproject\elasticsearch9\.venv\Lib\site-packages\urllib3\connectionpool.py", line 841, in urlopen
retries = retries.increment(
method, url, error=new_e, _pool=self, _stacktrace=sys.exc_info()[2]
)
File "D:\Pythonproject\elasticsearch9\.venv\Lib\site-packages\urllib3\util\retry.py", line 519, in increment
raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /BAAI/bge-large-zh/resolve/main/config.json (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x000002A53441F4D0>, 'Connection to huggingface.co timed out. (connect timeout=10)'))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "D:\Pythonproject\elasticsearch9\.venv\Lib\site-packages\huggingface_hub\file_download.py", line 1533, in _get_metadata_or_catch_error
metadata = get_hf_file_metadata(
url=url, proxies=proxies, timeout=etag_timeout, headers=headers, token=token
)
File "D:\Pythonproject\elasticsearch9\.venv\Lib\site-packages\huggingface_hub\utils\_validators.py", line 114, in _inner_fn
return fn(*args, **kwargs)
File "D:\Pythonproject\elasticsearch9\.venv\Lib\site-packages\huggingface_hub\file_download.py", line 1450, in get_hf_file_metadata
r = _request_wrapper(
method="HEAD",
...<5 lines>...
timeout=timeout,
)
File "D:\Pythonproject\elasticsearch9\.venv\Lib\site-packages\huggingface_hub\file_download.py", line 286, in _request_wrapper
response = _request_wrapper(
method=method,
...<2 lines>...
**params,
)
File "D:\Pythonproject\elasticsearch9\.venv\Lib\site-packages\huggingface_hub\file_download.py", line 309, in _request_wrapper
response = http_backoff(method=method, url=url, **params, retry_on_exceptions=(), retry_on_status_codes=(429,))
File "D:\Pythonproject\elasticsearch9\.venv\Lib\site-packages\huggingface_hub\utils\_http.py", line 310, in http_backoff
response = session.request(method=method, url=url, **kwargs)
File "D:\Pythonproject\elasticsearch9\.venv\Lib\site-packages\requests\sessions.py", line 589, in request
resp = self.send(prep, **send_kwargs)
File "D:\Pythonproject\elasticsearch9\.venv\Lib\site-packages\requests\sessions.py", line 703, in send
r = adapter.send(request, **kwargs)
File "D:\Pythonproject\elasticsearch9\.venv\Lib\site-packages\huggingface_hub\utils\_http.py", line 96, in send
return super().send(request, *args, **kwargs)
~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\Pythonproject\elasticsearch9\.venv\Lib\site-packages\requests\adapters.py", line 688, in send
raise ConnectTimeout(e, request=request)
requests.exceptions.ConnectTimeout: (MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /BAAI/bge-large-zh/resolve/main/config.json (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x000002A53441F4D0>, 'Connection to huggingface.co timed out. (connect timeout=10)'))"), '(Request ID: a46ed614-e7b0-4d32-b35f-f6caf6a7200b)')
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "D:\Pythonproject\elasticsearch9\.venv\Lib\site-packages\transformers\utils\hub.py", line 470, in cached_files
hf_hub_download(
~~~~~~~~~~~~~~~^
path_or_repo_id,
^^^^^^^^^^^^^^^^
...<10 lines>...
local_files_only=local_files_only,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File "D:\Pythonproject\elasticsearch9\.venv\Lib\site-packages\huggingface_hub\utils\_validators.py", line 114, in _inner_fn
return fn(*args, **kwargs)
File "D:\Pythonproject\elasticsearch9\.venv\Lib\site-packages\huggingface_hub\file_download.py", line 1008, in hf_hub_download
return _hf_hub_download_to_cache_dir(
# Destination
...<14 lines>...
force_download=force_download,
)
File "D:\Pythonproject\elasticsearch9\.venv\Lib\site-packages\huggingface_hub\file_download.py", line 1115, in _hf_hub_download_to_cache_dir
_raise_on_head_call_error(head_call_error, force_download, local_files_only)
~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\Pythonproject\elasticsearch9\.venv\Lib\site-packages\huggingface_hub\file_download.py", line 1648, in _raise_on_head_call_error
raise LocalEntryNotFoundError(
...<3 lines>...
) from head_call_error
huggingface_hub.errors.LocalEntryNotFoundError: An error happened while trying to locate the file on the Hub and we cannot find the requested files in the local cache. Please check your connection and try again or make sure your Internet connection is on.
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "D:\Pythonproject\elasticsearch9\src\0704_date.py", line 7, in <module>
model = SentenceTransformer('BAAI/bge-large-zh', device='cuda' if torch.cuda.is_available() else 'cpu')
File "D:\Pythonproject\elasticsearch9\.venv\Lib\site-packages\sentence_transformers\SentenceTransformer.py", line 339, in __init__
modules = self._load_auto_model(
model_name_or_path,
...<8 lines>...
has_modules=has_modules,
)
File "D:\Pythonproject\elasticsearch9\.venv\Lib\site-packages\sentence_transformers\SentenceTransformer.py", line 2061, in _load_auto_model
transformer_model = Transformer(
model_name_or_path,
...<4 lines>...
backend=self.backend,
)
File "D:\Pythonproject\elasticsearch9\.venv\Lib\site-packages\sentence_transformers\models\Transformer.py", line 87, in __init__
config, is_peft_model = self._load_config(model_name_or_path, cache_dir, backend, config_args)
~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\Pythonproject\elasticsearch9\.venv\Lib\site-packages\sentence_transformers\models\Transformer.py", line 152, in _load_config
return AutoConfig.from_pretrained(model_name_or_path, **config_args, cache_dir=cache_dir), False
~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\Pythonproject\elasticsearch9\.venv\Lib\site-packages\transformers\models\auto\configuration_auto.py", line 1197, in from_pretrained
config_dict, unused_kwargs = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\Pythonproject\elasticsearch9\.venv\Lib\site-packages\transformers\configuration_utils.py", line 608, in get_config_dict
config_dict, kwargs = cls._get_config_dict(pretrained_model_name_or_path, **kwargs)
~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\Pythonproject\elasticsearch9\.venv\Lib\site-packages\transformers\configuration_utils.py", line 667, in _get_config_dict
resolved_config_file = cached_file(
pretrained_model_name_or_path,
...<10 lines>...
_commit_hash=commit_hash,
)
File "D:\Pythonproject\elasticsearch9\.venv\Lib\site-packages\transformers\utils\hub.py", line 312, in cached_file
file = cached_files(path_or_repo_id=path_or_repo_id, filenames=[filename], **kwargs)
File "D:\Pythonproject\elasticsearch9\.venv\Lib\site-packages\transformers\utils\hub.py", line 543, in cached_files
raise OSError(
...<3 lines>...
) from e
OSError: We couldn't connect to 'https://huggingface.co' to load the files, and couldn't find them in the cached files.
Check your internet connection or see how to run the library in offline mode at 'https://huggingface.co/docs/transformers/installation#offline-mode'.
进程已结束,退出代码为 1