关于ollama vl图片模型的接口调试

最新推荐文章于 2025-11-20 11:42:34 发布

原创最新推荐文章于 2025-11-20 11:42:34 发布 · 563 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#语言模型

部署运行你感兴趣的模型镜像

import base64
import json

import requests
from PIL import Image,ImageDraw
# Ollama本地API地址（默认端口）
OLLAMA_API_URL = "http://localhost:11434/api/generate"

def image_open(image_path):
    # 打开一个图像文件
    with Image.open(image_path) as img:
        # 获取图片尺寸
        width, height = img.size
        print(f"图片宽度: {width}px, 图片高度: {height}px")
    with open(image_path, "rb") as image_file:
        encoded_image = base64.b64encode(image_file.read()).decode("utf-8")
    return encoded_image

def analyze_image(image_paths,model_name,prompt):
    """
    使用Ollama模型分析图片内容
    Args:
        image_path (str): 图片文件路径
        model_name (str): Ollama模型名称（需支持图像处理）
    Returns:
        dict: API返回的分析结果
    """
    encoded_images = [image_open(i) for i in image_paths]
    # 构造API请求数据
    payload = {
        "model": model_name,
        "prompt":prompt,
        "images": encoded_images,  # 注意：部分模型可能需要调整键名（如"image"）
        "stream": False  # 非流式响应
    }

    try:
        # 发送POST请求
        response = requests.post(OLLAMA_API_URL, json=payload, timeout=10)
        response.raise_for_status()  # 检查HTTP错误
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"请求失败：{e}")
        return None


def img_plt(image_path,center_point_dict):
    # 打开一个图像文件
    with Image.open(image_path) as img:
        draw = ImageDraw.Draw(img)

        # 假设你有一个中心点坐标 (center_x, center_y)
        center_point = (center_point_dict.get('x'),center_point_dict.get('y')) # 示例坐标

        # 设置点的颜色和大小
        point_color = "blue"
        point_radius = 10

        # 在中心点绘制一个小圆圈表示点
        draw.ellipse((center_point[0] - point_radius, center_point[1] - point_radius,
                      center_point[0] + point_radius, center_point[1] + point_radius),
                     fill=point_color)

    img.show()

# 示例调用
if __name__ == "__main__":
    imgs = [r'download (2).jpg']
    prompt = '松鼠在图片的什么位置请用json格式返回给我它的中心点坐标例如::{"x": 240,"y": 60}。我的图片宽度: 320px, 图片高度: 180px'
    model_name = "qwen2.5vl:latest"
    result = analyze_image(imgs,model_name,prompt)
    if result:
        print("分析结果：")
        response = result.get("response")
        center_point = response.replace('json','').replace('```','')
        print(center_point)
        img_plt(imgs[0],json.loads(center_point))

本地部署的话，大模型视觉实际运用的时候还是需要进行对应微调的，精度还有待提升，简单的demo记录下测试

您可能感兴趣的与本文相关的镜像