环境:python3.8
1.安装所需的第三方库
# 1. 可选:创建并激活虚拟环境
python3.8 -m venv venv
source venv/bin/activate # Windows 下用 venv\Scripts\activate
# 2. 升级 pip & wheel
pip install -U pip wheel
# 3. 一键安装全部依赖(含 PyTorch CPU 版)
pip install torch==1.13.1+cpu torchvision==0.14.1+cpu -f https://download.pytorch.org/whl/torch_stable.html && \
pip install mysql-connector-python==8.0.33 nacos-sdk-python==0.9.12 requests PyYAML numpy Pillow
1.相似图片搜索功能文件的代码,search_similar_images.py,这里的products表是产品表,high_image字段为json的图片数组,可根据自己的业务逻辑自行调整
import os
from io import BytesIO
from typing import Optional, List, Tuple
import requests
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
import numpy as np
import torch
from torchvision import models, transforms
from PIL import Image
import mysql.connector
from mysql.connector import Error
from torchvision.models import ResNet18_Weights
import json
from nacos_wrapper import nacos_get_config
# ======================
# 配置
# ======================
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# 加载模型和预处理函数
model = models.resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)
model = torch.nn.Sequential(*(list(model.children())[:-1])) # 去掉最后的全连接层
model.eval()
model.to(device)
preprocess = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
# 配置图片大小限制
Image.MAX_IMAGE_PIXELS = None # 或设置一个更大的数值,如 200000000
db_config = nacos_get_config(data_id='python.yaml', fmt='yaml')['db']
def extract_feature(image):
"""提取单张图片的 512 维特征向量"""
"""
支持路径(str) 或 PIL.Image
"""
if isinstance(image, str):
img = Image.open(image).convert("RGB")
elif isinstance(image, Image.Image):
img = image.convert("RGB")
else:
raise TypeError("image 必须是文件路径(str) 或 PIL.Image 对象")
input_tensor = preprocess(img).unsqueeze(0).to(device)
with torch.no_grad():
feature = model(input_tensor)
return feature.squeeze().cpu().numpy()
def connect_to_db():
try:
conn = mysql.connector.connect(**db_config)
if conn.is_connected():
print('Connected to MySQL database')
return conn
except Error as e:
print(f"Error connecting to MySQL: {e}")
return None
# 插入图库,遍历文件夹
def build_index_and_save_to_db(image_folder):
conn = connect_to_db()
if not conn:
return
cursor = conn.cursor()
# 创建表(如果不存在)
# create_table_query = """
# CREATE TABLE `image_library` (
# `id` int(11) NOT NULL AUTO_INCREMENT COMMENT '主键',
# `model_num` varchar(255) NOT NULL COMMENT '产品型号',
# `path` varchar(500) NOT NULL COMMENT '图片路径',
# `feature` blob NOT NULL COMMENT '特征信息,图片的 512 维特征向量的字节',
# PRIMARY KEY (`id`) USING BTREE
# ) ENGINE=InnoDB AUTO_INCREMENT=11353 DEFAULT CHARSET=utf8mb4 COMMENT='图片库,用于记录产品的图片信息,以便进行相似图片搜索';
# """
# cursor.execute(create_table_query)
# conn.commit()
for root, _, files in os.walk(image_folder):
for file in files:
if file.lower().endswith(('.jpg', '.jpeg', '.png')):
path = os.path.join(root, file)
try:
feat = extract_feature(path)
# 将特征向量转换为字节以便存储在数据库中
feat_bytes = feat.tobytes()
# 检查图片是否已存在
cursor.execute("SELECT COUNT(*) FROM image_library WHERE path = %s", (path,))
if cursor.fetchone()[0] == 0:
# 插入新记录
cursor.execute("INSERT INTO image_library (path, feature) VALUES (%s, %s)", (path, feat_bytes))
print(f"Added to DB: {path}")
else:
print(f"Already exists in DB: {path}")
except Exception as e:
print(f"Skip {path}: {e}")
conn.commit()
cursor.close()
conn.close()
# 插入图库,单张图片入库
def build_index_for_single_image(image, model_num, path_hint: Optional[str] = None):
"""
单张图片入库
:param model_num: 产品型号
:param image: PIL.Image 或 文件路径(str)
:param path_hint: 如果传的是 PIL.Image,需要给一条假路径做唯一键
"""
conn = connect_to_db()
if not conn:
return
cursor = conn.cursor()
# 建表(同原)
# create_table_query = """
# CREATE TABLE `image_library` (
# `id` int(11) NOT NULL AUTO_INCREMENT COMMENT '主键',
# `model_num` varchar(255) NOT NULL COMMENT '产品型号',
# `path` varchar(500) NOT NULL COMMENT '图片路径',
# `feature` blob NOT NULL COMMENT '特征信息,图片的 512 维特征向量的字节',
# PRIMARY KEY (`id`) USING BTREE
# ) ENGINE=InnoDB AUTO_INCREMENT=11353 DEFAULT CHARSET=utf8mb4 COMMENT='图片库,用于记录产品的图片信息,以便进行相似图片搜索';
# """
# cursor.execute(create_table_query)
# conn.commit()
# 1. 统一转成 PIL.Image
if isinstance(image, str): # 传入路径
path = os.path.abspath(image)
img = Image.open(path).convert('RGB')
elif isinstance(image, Image.Image):
path = path_hint or f"pil_upload_{hash(image.tobytes())}.jpg"
img = image.convert('RGB')
else:
raise TypeError("image 必须是文件路径(str) 或 PIL.Image")
# 2. 去重
cursor.execute("SELECT COUNT(*) FROM image_library WHERE path = %s", (path,))
if cursor.fetchone()[0]:
print(f"Already exists: {path}")
cursor.close()
conn.close()
return
# 3. 提取特征
feat = extract_feature(img) # 你的原实现
feat_bytes = feat.tobytes()
# 4. 入库
cursor.execute("INSERT INTO image_library (path, feature, model_num) VALUES (%s, %s, %s)", (path, feat_bytes, model_num))
conn.commit()
cursor.close()
conn.close()
print(f"Added to DB: {path}")
def cosine_similarity(a, b):
"""计算两个向量之间的余弦相似度"""
dot_product = np.dot(a, b)
norm_a = np.linalg.norm(a)
norm_b = np.linalg.norm(b)
return dot_product / (norm_a * norm_b)
# 相似图片搜索,更具图片路径
def search_similar(query_image, top_k=5):
conn = connect_to_db()
if not conn:
return []
cursor = conn.cursor(dictionary=True)
# 提取查询图片的特征向量
query_feat = extract_feature(query_image)
# 获取数据库中的所有图片特征
cursor.execute("SELECT path, feature FROM image_library")
results = cursor.fetchall()
similarities = []
for row in results:
# 将字节数据转换回特征向量
db_feat = np.frombuffer(row['feature'], dtype=np.float32)
# 计算相似度
similarity = cosine_similarity(query_feat, db_feat)
similarities.append((row['path'], similarity))
# 按相似度排序并返回最相似的top_k个结果
similarities.sort(key=lambda x: x[1], reverse=True)
cursor.close()
conn.close()
return similarities[:top_k]
# 搜索相似图片,以图片搜图
def search_similar_by_image(image, top_k=5):
"""
以图搜图(传入图片对象)
:param image: PIL.Image 或 图片路径(str)
:param top_k: 返回前 k 张相似图
:return: list[(path, similarity), ...] 按相似度降序
"""
conn = connect_to_db()
if not conn:
return []
cursor = conn.cursor(dictionary=True)
# 统一转成 PIL.Image
if isinstance(image, str):
query_img = Image.open(image).convert('RGB')
elif isinstance(image, Image.Image):
query_img = image.convert('RGB')
else:
raise TypeError("image 必须是文件路径(str) 或 PIL.Image 对象")
# 1. 提取查询特征
query_feat = extract_feature(query_img)
# 2. 库内全表扫描
cursor.execute("SELECT path, feature FROM image_library")
results = cursor.fetchall()
similarities = []
for row in results:
db_feat = np.frombuffer(row['feature'], dtype=np.float32)
sim = cosine_similarity(query_feat, db_feat)
similarities.append((row['path'], float(sim)))
# 3. 排序 & 截取
similarities.sort(key=lambda x: x[1], reverse=True)
cursor.close()
conn.close()
return similarities[:top_k]
# ==========================
# 新增:找出未入库的高清图
# ==========================
def find_missing_images() -> List[Tuple[str, str]]:
"""
返回 high_image 中尚未出现在 image_library.path 的图片路径与对应型号列表。
每个元素为 (path, model_num)
"""
conn = connect_to_db()
if not conn:
return []
cursor = conn.cursor(dictionary=True)
try:
# 1. 已入库的路径集合
cursor.execute("SELECT path FROM image_library")
lib_paths = {row["path"] for row in cursor.fetchall()}
# 2. 遍历 products
cursor.execute("""
SELECT id, model_num, high_image
FROM products
WHERE high_image IS NOT NULL
AND high_image != ''
AND status = 1
""")
missing = []
for row in cursor.fetchall():
try:
urls = json.loads(row["high_image"])
if not isinstance(urls, list):
continue
for url in urls:
if not url:
continue
# 与库存路径保持一致:如库存为绝对本地路径则转绝对路径,否则直接比较 URL
abs_path = os.path.abspath(url) if not url.startswith("http") else url
if abs_path not in lib_paths:
missing.append((abs_path, row["model_num"]))
except Exception as e:
print(f"[WARN] 解析 high_image 失败 id={row['id']}: {e}")
continue
return missing
finally:
cursor.close()
conn.close()
# 下载超时
DOWNLOAD_TIMEOUT = 60
def sync_aliyun_images_to_library(batch_size: int = 50) -> List[Tuple[str, str]]:
"""
主入口:把 find_missing_images 返回的阿里云图片全部下载到内存并插入图库。
:param batch_size: 仅用于打印进度
:return: 成功入库的 [(url, model_num), ...]
"""
missing: List[Tuple[str, str]] = find_missing_images() # [(url, model_num), ...]
total = len(missing)
print(f"[sync] 共 {total} 张阿里云图片待入库(内存方式)")
success: List[Tuple[str, str]] = []
for idx, (url, model_num) in enumerate(missing, 1):
try:
# 1. 下载到内存
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
rsp = requests.get(url, headers=headers, timeout=DOWNLOAD_TIMEOUT)
rsp.raise_for_status()
img = Image.open(BytesIO(rsp.content)).convert("RGB")
# 2. 直接入库(path_hint 用 url,保证唯一)
build_index_for_single_image(
image=img,
model_num=model_num,
path_hint=url
)
success.append((url, model_num))
print(f"[sync][{idx}/{total}] ✔ {model_num} -> {url}")
except Exception as e:
print(f"[sync][{idx}/{total}] ✘ {model_num} -> {url} 失败:{e}")
continue
print(f"[sync] 完成,成功入库 {len(success)}/{total} 张")
return success
if __name__ == "__main__":
IMAGE_FOLDER = r"D:\\python\\tools\\similarImages\\image\\db"
QUERY_IMAGE = r"D:\\python\\tools\\similarImages\\image\\test1.jpg"
# 首次运行:构建索引并保存到 MySQL
# build_index_and_save_to_db(IMAGE_FOLDER)
# 查询相似图
similar = search_similar(QUERY_IMAGE, top_k=10)
print("\n🔍 Top similar images:")
for path, similarity in similar:
print(f"Similarity: {similarity:.4f} → {path}")
2.nacos相关配置文件,config.py,因为已有项目使用的是springcloud,所以需要接入springcloud,而springcloud使用的配置中心和服务注册中心是nacos,所以要使用nacos
# config.py
import os
class NacosConfig:
# 公共默认值
_BASE = {
"server": "{nacos服务地址}",
"user": "{用户名,没有启用鉴权可去掉}",
"pwd": "{密码,没有启用鉴权可去掉}",
"group": "DEFAULT_GROUP",
"cluster": "DEFAULT",
"service": "python-service",
"port": int(os.getenv("PORT", 5000)),
}
# 环境差异项
_PROFILE = {
"prod": {"namespace": "prod"},
"test": {"namespace": "test"},
"dev": {"namespace": "dev"},
"local": {"namespace": "local"}, # 默认分支
}
def __init__(self):
env = os.getenv("ENV", "local").lower()
# 用公共项 + 环境项 拼装
cfg = {**self._BASE, **self._PROFILE.get(env, {})}
# 全转成实例属性,调用端 0 感知
self.server = cfg["server"]
self.namespace = cfg["namespace"]
self.user = cfg["user"]
self.pwd = cfg["pwd"]
self.group = cfg["group"]
self.cluster = cfg["cluster"]
self.service = cfg["service"]
self.port = cfg["port"]
# 全局单例
nacos_conf = NacosConfig()
3.nacos相关功能的文件,nacos_wrapper.py,报错服务注册,启动心跳,服务注销,获取配置
"""
Nacos 统一封装
提供:注册、注销、心跳线程、配置获取
业务侧无需再 import nacos
"""
import atexit
import threading
import time
import json
import yaml
from typing import Callable, Optional, Literal, Any
from nacos import NacosClient
from config import nacos_conf # 仅依赖配置
# ---------------- 全局客户端(单例) ----------------
_client = NacosClient(
server_addresses=nacos_conf.server,
namespace=nacos_conf.namespace,
username=nacos_conf.user,
password=nacos_conf.pwd,
)
# ---------------- 心跳控制 ----------------
_beat_thread: Optional[threading.Thread] = None
_stop_flag = threading.Event()
def _heartbeat_entry(service: str, ip: str, port: int):
"""后台线程函数"""
while not _stop_flag.is_set():
try:
_client.send_heartbeat(service, ip, port,
group_name=nacos_conf.group,
cluster_name=nacos_conf.cluster)
except Exception as e:
print(f"[Nacos] heartbeat error: {e}")
time.sleep(5)
# ---------------- 对外 API ----------------
def nacos_register(ip: str, port: int) -> None:
"""注册 + 自动启动心跳,可重复调用,幂等"""
global _beat_thread
# 1. 注册
_client.add_naming_instance(
service_name=nacos_conf.service,
ip=ip,
port=port,
group_name=nacos_conf.group,
cluster_name=nacos_conf.cluster,
ephemeral=True
)
print(f"[Nacos] registered {nacos_conf.service} -> {ip}:{port}")
# 2. 启动心跳(保证只起一条线程)
if _beat_thread is None or not _beat_thread.is_alive():
_stop_flag.clear()
_beat_thread = threading.Thread(
target=_heartbeat_entry,
args=(nacos_conf.service, ip, port),
daemon=True
)
_beat_thread.start()
# 3. 注册优雅注销
atexit.register(nacos_unregister, ip, port)
def nacos_unregister(ip: str, port: int) -> None:
"""注销 + 停止心跳"""
# 停心跳
_stop_flag.set()
if _beat_thread and _beat_thread.is_alive():
_beat_thread.join(timeout=2)
# 注销实例
try:
_client.remove_naming_instance(
service_name=nacos_conf.service,
ip=ip,
port=port,
group_name=nacos_conf.group,
cluster_name=nacos_conf.cluster
)
print(f"[Nacos] unregistered {nacos_conf.service} -> {ip}:{port}")
except Exception as e:
print(f"[Nacos] unregister error: {e}")
# ---------------- 配置获取(可选) ----------------
def nacos_config() -> dict:
"""返回配置字典,业务侧想自己拿字段也行"""
return nacos_conf.__dict__
# ---------------- 拉取配置 ----------------
def nacos_get_config(data_id: str,
group: Optional[str] = None,
tenant: Optional[str] = None,
fmt: Literal["str", "json", "yaml"] = "str") -> Any:
"""
从 Nacos 配置中心获取配置
:param data_id: 配置 ID
:param group: 配置分组,默认取 nacos_conf.group
:param tenant: 命名空间 ID,默认取 nacos_conf.namespace
:param fmt: 返回格式:str | json | yaml
:return: 解析后的内容
"""
group = group or nacos_conf.group
tenant = tenant or nacos_conf.namespace
try:
content = _client.get_config(data_id=data_id, group=group)
except Exception as e:
raise RuntimeError(f"[Nacos] get_config error: {e}") from e
if fmt == "str":
return content
if fmt == "json":
return json.loads(content)
if fmt == "yaml":
return yaml.safe_load(content)
raise ValueError("fmt must be str/json/yaml")
4.功能接口文件,image_service.py,提供接口给前端或其他服务调用
import os
import socket
from PIL import Image
from flask import Flask, request, jsonify
from werkzeug.utils import secure_filename
from search_similar_images import build_index_and_save_to_db, search_similar, build_index_for_single_image, \
search_similar_by_image, sync_aliyun_images_to_library
from nacos_wrapper import nacos_register, nacos_config
app = Flask(__name__)
UPLOAD_FOLDER = os.getenv("IMG_UPLOAD_DIR", "./uploads") # 可挂载到容器外
ALLOWED_EXT = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff'}
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
def allowed_file(filename):
return os.path.splitext(filename)[1].lower() in ALLOWED_EXT
@app.route('/image/similar/insert', methods=['POST'])
def insert_image_library():
"""
上传图片并逐张入库
"""
if 'images' not in request.files:
return jsonify(status="fail", msg="No image part"), 400
files = request.files.getlist('images')
saved, failed = [], []
for f in files:
if f and allowed_file(f.filename):
try:
# 直接转 PIL.Image,避免先落盘再读盘
img = Image.open(f.stream).convert('RGB')
# 用原始文件名做唯一键(可再加时间戳防重)
path_hint = secure_filename(f.filename)
build_index_for_single_image(image=img, path_hint=path_hint, model_num='')
saved.append(f.filename)
except Exception as e:
failed.append({"file": f.filename, "error": str(e)})
else:
failed.append({"file": f.filename, "error": "illegal ext"})
return jsonify(status="success", saved=saved, failed=failed)
@app.route('/image/similar/search', methods=['POST'])
def search_similar_images():
"""
以图搜图(上传图片)
表单字段:
image : 图片文件(单张)
top_k : 可选,默认10
"""
if 'image' not in request.files:
return jsonify({"error": "No image part"}), 400
file = request.files['image']
if file.filename == '':
return jsonify({"error": "Empty filename"}), 400
try:
img = Image.open(file.stream).convert('RGB')
except Exception as e:
return jsonify({"error": f"Invalid image: {e}"}), 400
top = int(request.form.get('top', 10))
matches = search_similar_by_image(img, top_k=top) # 核心替换
# 统一返回格式
results = [{"path": path, "similarity": float(sim)} for path, sim in matches]
return jsonify(results)
def convert_float32_to_float(obj):
if isinstance(obj, dict):
return {k: convert_float32_to_float(v) for k, v in obj.items()}
elif isinstance(obj, list):
return [convert_float32_to_float(item) for item in obj]
elif hasattr(obj, 'item'): # numpy scalar 类型具有 .item() 方法
return obj.item()
else:
return obj
# 获取当前服务器的ip
def get_host_ip():
with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as s:
s.connect(('8.8.8.8', 80))
return s.getsockname()[0]
@app.route('/ping')
def ping():
return 'pong'
# --------------------------
# 新增接口
# --------------------------
@app.route('/image/sync/aliyun', methods=['POST'])
def sync_aliyun_images():
"""
同步 products 中缺失的阿里云高清图到图库
返回 JSON:
{
"status": "success",
"total": 123,
"success": 120,
"failed": 3,
"details": [(url, model_num), ...] // 成功的列表
}
"""
try:
# 如需异步,可这里丢线程/消息队列;目前同步执行
success_list = sync_aliyun_images_to_library() # 已经是 [(url, model_num), ...]
total, ok_cnt = len(success_list), len(success_list) # _sync 内部已打印失败日志
return jsonify(
status="success",
total=total,
success=ok_cnt,
failed=total - ok_cnt,
details=success_list
)
except Exception as e:
return jsonify(status="fail", error=str(e)), 500
if __name__ == '__main__':
ip = get_host_ip()
port = nacos_config()['port']
nacos_register(ip, port) # 一行完成注册+心跳+注销
# 注意:在生产环境中不要使用app.run(),而应选择更健壮的部署方式。
app.run(host='0.0.0.0', port=port, debug=True)
5.最后,正式使用时在springcloud的其他服务调用python-service服务的接口即可
6.构建docker容器
6.1.目录结构
project/
├── Dockerfile
├── docker-compose.yml
├── requirements.txt
├── app/
│ ├── search_similar_images.py
│ ├── image_service.py
│ ├── nacos_wrapper.py
│ └── config.py
└── uploads/ # 运行时自动生成,可外挂卷
6.2.需要的第三方库:requirements.txt(已含 CPU 版 PyTorch)
# 先装 PyTorch 官方 CPU 版(直链)
https://download.pytorch.org/whl/cpu/torch-1.13.1%2Bcpu-cp38-cp38-linux_x86_64.whl
https://download.pytorch.org/whl/cpu/torchvision-0.14.1%2Bcpu-cp38-cp38-linux_x86_64.whl
# 其余包装国内源
--index-url https://pypi.tuna.tsinghua.edu.cn/simple
flask>=2.3.2
mysql-connector-python==8.0.33
nacos-sdk-python>=0.9.12
requests>=2.28
PyYAML>=6.0
numpy>=1.21
Pillow>=9.0.0
waitress>=2.1
6.3.Dockerfile(多阶段构建,镜像 < 600 MB)
FROM python:3.8-slim
ENV PYTHONUNBUFFERED=1 \
ENV=dev \
PORT=5000 \
PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple \
PIP_TIMEOUT=120 \
PIP_RETRIES=5
WORKDIR /app
# ① 先单独升级 pip(用国内源)
RUN python -m pip install -i $PIP_INDEX_URL --upgrade pip
# ② 再安装依赖
COPY requirements.txt .
RUN pip install -r requirements.txt
COPY app/ ./
CMD ["python", "-m", "image_service"]
6.4.docker-compose.yml(含 MySQL 示例,可去掉)
version: "3.9"
services:
mysql:
image: mysql:8.0
container_name: img_mysql
environment:
MYSQL_ROOT_PASSWORD: root
MYSQL_DATABASE: imgdb
volumes:
- mysql_data:/var/lib/mysql
ports:
- "3306:3306"
networks:
- img_net
img_service:
build: .
container_name: img_service
environment:
ENV: fengjian
PORT: 5000
# 数据库连接指向容器内 mysql;如用外部库,改成真实地址
DB_HOST: mysql
DB_PORT: 3306
DB_USER: root
DB_PASS: root
DB_NAME: imgdb
ports:
- "5000:5000"
volumes:
- ./uploads:/app/uploads # 上传目录持久化
depends_on:
- mysql
networks:
- img_net
volumes:
mysql_data:
networks:
img_net:
6.5.启动命令
# 1. 进入项目根目录(含 docker-compose.yml)
cd project
# 2. 一键构建 & 启动
docker-compose up -d --build
# 3. 查看日志
docker-compose logs -f img_service
# 4. 停止/删除
docker-compose down -v # -v 会把 mysql 数据也清掉,谨慎
6.6.验证
curl http://localhost:5000/ping # 应返回 pong
curl -X POST http://localhost:5000/image/sync/aliyun
1716

被折叠的 条评论
为什么被折叠?



