Blip2 部署+测试
本文主要阐述如何在本地部署运行BLIP2, 并使用BLIP2制作Lora训练数据的打标任务。
官网Salesforce/blip2-opt-2.7b | https://huggingface.co/Salesforce/blip2-opt-2.7b
部署环境
conda create -n blip2 python==3.10.14 -y
conda activate clip2
pip3 install torch==2.4.0 torchvision==0.19.0 --index-url https://download.pytorch.org/whl/cu124
pip install -r requirements.txt
transformers
pillow
requests
accelerate>=0.26.0
测试脚本
# vim main.py
# pip install accelerate
import requests
from PIL import Image
from transformers import Blip2Processor, Blip2ForConditionalGeneration
processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b", device_map="auto")
img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
inputs = processor(raw_image, return_tensors="pt").to("cuda")
out = model.generate(**inputs)
print(processor.decode(out[0], skip_special_tokens=True).strip())
打标代码,修改图片目录 path/to/your/images
import os
from PIL import Image
from transformers import Blip2Processor, Blip2ForConditionalGeneration
# Configuration for the image directory
config = {
"image_dir": "path/to/your/images", # Replace with your image directory
"model_name": "Salesforce/blip2-opt-2.7b",
}
def load_image(image_path):
"""Load image from the given path."""
if os.path.exists(image_path):
return Image.open(image_path).convert('RGB')
else:
raise FileNotFoundError(f"Image not found at path: {image_path}")
def process_image(image, model, processor):
"""Process the image and question with the model."""
inputs = processor(image, return_tensors="pt").to("cuda")
out = model.generate(**inputs)
return processor.decode(out[0], skip_special_tokens=True).strip()
def save_result(image_name, result, output_dir):
"""Save the result as a .txt file with the same name as the image."""
txt_file_path = os.path.join(output_dir, f"{os.path.splitext(image_name)[0]}.txt")
with open(txt_file_path, 'w') as file:
file.write(result)
def main():
# Load the processor and model
processor = Blip2Processor.from_pretrained(config["model_name"])
model = Blip2ForConditionalGeneration.from_pretrained(config["model_name"], device_map="auto")
# Get the list of image files from the directory
image_dir = config["image_dir"]
output_dir = image_dir # Save results in the same directory
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# Process each image in the directory
for image_name in os.listdir(image_dir):
if image_name.lower().endswith(('.png', '.jpg', '.jpeg')):
image_path = os.path.join(image_dir, image_name)
# Load the image
try:
image = load_image(image_path)
except FileNotFoundError as e:
print(e)
continue
# Process the image and get the result
result = process_image(image, model, processor)
# Save the result to a .txt file
save_result(image_name, result, output_dir)
print(f"Processed: {image_name} -> {result}")
if __name__ == "__main__":
main()