No matching distribution found for clip==1.0

pip install git+https://github.com/openai/CLIP.git

import os import torch from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize from PIL import Image import clip # 加载预训练的 CLIP 模型 device = "cuda" if torch.cuda.is_available() else "cpu" model, preprocess = clip.load("ViT-B/32", device=device) # 加载自定义权重文件 weights_path = r"/home/jinyou/file/CLIP4Clip-master/ckpts/ckpt_msrvtt_retrieval_looseType/pytorch_model.bin.2" weights = torch.load(weights_path, map_location=device, weights_only=True) # 调整键名(如果需要) new_weights = {} for key, value in weights.items(): new_key = key.replace("clip.", "") # 根据实际情况调整键名 new_weights[new_key] = value # 加载权重到模型中 model.load_state_dict(new_weights, strict=False) # 设置模型为评估模式 model.eval() # 改进的视频帧预处理 def preprocess_frames(frame_dir, max_frames=32): frames = [] frame_files = sorted(os.listdir(frame_dir))[:max_frames] # 限制最大帧数 # 均匀采样(如果帧数超过max_frames) if len(frame_files) > max_frames: indices = torch.linspace(0, len(frame_files)-1, steps=max_frames).long() frame_files = [frame_files[i] for i in indices] for frame_file in frame_files: frame_path = os.path.join(frame_dir, frame_file) frame = Image.open(frame_path).convert("RGB") frame = preprocess(frame).to(device) frames.append(frame) print(f"Processing frame: {frame_file}") # 组合成时间序列 [T, C, H, W] return torch.stack(frames), frame_files # 定义文本检索函数 def retrieve_video(text_query, video_frames, frame_files): # 文本处理 text = clip.tokenize([text_query]).to(device) with torch.no_grad(): # 获取视频特征(聚合后) video_feature, text_feature = model(video_frames.unsqueeze(0), text) # 计算相似度 logit_scale = model.logit_scale.exp() similarity = logit_scale * text_feature @ video_feature.T # 获取逐帧相似度(调试用) frame_features = model.clip.encode_image(video_frames) frame_similarity = (frame_features @ text_feature.T).squeeze() print(f"Overall similarity: {similarity.item():.4f}") print(f"Frame similarities: {frame_similarity.cpu().numpy()}") # 返回最高相似度的前5帧 values, indices = frame_similarity.topk(5) return [(frame_files[i.item()], v.item()) for i, v in zip(indices, values)] # 示例:检索视频 if __name__ == "__main__": # 视频帧目录 video_frames_dir = r"/home/jinyou/file/CLIP4Clip-master/zhen1/" # 替换为你的视频帧目录路径 text_query = "当你发了一个嗯" # 替换为你的文本查询 # 预处理视频帧并获取文件名 video_frames, frame_files = preprocess_frames(video_frames_dir) # 执行检索 values, relevant_frame_files = retrieve_video(text_query, video_frames, frame_files) # 输出结果 print("Top most relevant frames:") for value, frame_file in zip(values, relevant_frame_files): print(f"Frame {frame_file}: Similarity = {value.item():.4f}")在我所提供的代码上面修改后,给我
03-25
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

Bonefire20

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值