import base64
import json
import requests
from PIL import Image,ImageDraw
# Ollama本地API地址(默认端口)
OLLAMA_API_URL = "http://localhost:11434/api/generate"
def image_open(image_path):
# 打开一个图像文件
with Image.open(image_path) as img:
# 获取图片尺寸
width, height = img.size
print(f"图片宽度: {width}px, 图片高度: {height}px")
with open(image_path, "rb") as image_file:
encoded_image = base64.b64encode(image_file.read()).decode("utf-8")
return encoded_image
def analyze_image(image_paths,model_name,prompt):
"""
使用Ollama模型分析图片内容
Args:
image_path (str): 图片文件路径
model_name (str): Ollama模型名称(需支持图像处理)
Returns:
dict: API返回的分析结果
"""
encoded_images = [image_open(i) for i in image_paths]
# 构造API请求数据
payload = {
"model": model_name,
"prompt":prompt,
"images": encoded_images, # 注意:部分模型可能需要调整键名(如"image")
"stream": False # 非流式响应
}
try:
# 发送POST请求
response = requests.post(OLLAMA_API_URL, json=payload, timeout=10)
response.raise_for_status() # 检查HTTP错误
return response.json()
except requests.exceptions.RequestException as e:
print(f"请求失败:{e}")
return None
def img_plt(image_path,center_point_dict):
# 打开一个图像文件
with Image.open(image_path) as img:
draw = ImageDraw.Draw(img)
# 假设你有一个中心点坐标 (center_x, center_y)
center_point = (center_point_dict.get('x'),center_point_dict.get('y')) # 示例坐标
# 设置点的颜色和大小
point_color = "blue"
point_radius = 10
# 在中心点绘制一个小圆圈表示点
draw.ellipse((center_point[0] - point_radius, center_point[1] - point_radius,
center_point[0] + point_radius, center_point[1] + point_radius),
fill=point_color)
img.show()
# 示例调用
if __name__ == "__main__":
imgs = [r'download (2).jpg']
prompt = '松鼠在图片的什么位置请用json格式返回给我它的中心点坐标例如::{"x": 240,"y": 60}。我的图片宽度: 320px, 图片高度: 180px'
model_name = "qwen2.5vl:latest"
result = analyze_image(imgs,model_name,prompt)
if result:
print("分析结果:")
response = result.get("response")
center_point = response.replace('json','').replace('```','')
print(center_point)
img_plt(imgs[0],json.loads(center_point))
本地部署的话,大模型视觉实际运用的时候还是需要进行对应微调的,精度还有待提升,简单的demo记录下测试
2万+

被折叠的 条评论
为什么被折叠?



