1. 余弦相似度
from PIL import Image
from numpy import average, dot, linalg
# 对图片进行统一化处理
def get_thum(img_path, size=(64, 64), greyscale=False):
img = Image.open(img_path)
# 对图像大小重置, Image.ANTIALIAS为高质量的
img = img.resize(size, Image.ANTIALIAS)
if greyscale:
# 将图片转换为L模式,其为灰度图,其每个像素用8个bit表示
img = img.convert('L')
return img
# 计算图片的余弦距离
def image_cosin_similarity(img1_path, img2_path):
img1 = get_thum(img1_path)
img2 = get_thum(img2_path)
imgs = [img1, img2]
vectors = []
norms = []
for img in imgs:
vector = []
for pixel_tuple in img.getdata():
vector.append(average(pixel_tuple))
vectors.append(vector)
# linalg=linear(线性)+algebra(代数),norm则表示范数
norms.append(linalg.norm(vector, 2))
# dot返回的是点积,对二维数组(矩阵)进行计算
res = dot(vectors[0] / norms[0], vectors[1] / norms[1])
return res
2. 其他计算
2.1 汉明距离
def hamming_distance(str1, str2):
if len(str1) != len(str2):
return
count = 0
for i in range(len(str1)):
if str1[i] != str2[i]:
count += 1
return count
2.2 计算md5值
import hashlib
def md5_score(img_path='22.png'):
with open(img_path, "rb") as f:
pmd5 = hashlib.md5(f.read())
return pmd5.hexdigest()
2.3 哈希算法
2.3.1 均值哈希算法
import cv2
def ahash(img_path):
image = cv2.imread(img_path='22.png')
# 将图片缩放为8*8的
image = cv2.resize(image, (8,8), interpolation=cv2.INTER_CUBIC)
# 将图片转化为灰度图
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
# s为像素和初始灰度值,hash_str为哈希值初始值
s = 0
ahash_str = ''
# 遍历像素累加和
for i in range(8):
for j in range(8):
s = s + gray[i, j]
# 计算像素平均值
avg = s / 64
# 灰度大于平均值为1相反为0,得到图片的平均哈希值,此时得到的hash值为64位的01字符串
ahash_str = ''
for i in range(8):
for j in range(8):
if gray[i, j] > avg:
ahash_str = ahash_str + '1'
else:
ahash_str = ahash_str + '0'
result = ''
for i in range(0, 64, 4):
result += ''.join('%x' % int(ahash_str[i: i + 4], 2))
# print("ahash值:",result)
return result
2.3.2 差异值哈希算法
import cv2
def dhash(img_path):
image = cv2.imread(img_path)
# 将图片转化为8*8
image = cv2.resize(image, (9, 8), interpolation=cv2.INTER_CUBIC)
# 将图片转化为灰度图
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
dhash_str = ''
for i in range(8):
for j in range(8):
if gray[i, j] > gray[i, j + 1]:
dhash_str = dhash_str + '1'
else:
dhash_str = dhash_str + '0'
result = ''
for i in range(0, 64, 4):
result += ''.join('%x' % int(dhash_str[i: i + 4], 2))
# print("dhash值",result)
return result
2.3.3 计算hash值
import cv2
def phash(img_path):
image = cv2.imread(img_path)
# 将图片转化为8*8
image = cv2.resize(image, (8, 8), interpolation=cv2.INTER_CUBIC)
# 将图片转化为灰度图
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
# 计算图片的平均灰度值
avg = sum([sum(gray[i]) for i in range(8)]) / 64
# 计算哈希值,与平均值比较生成01字符串
str = ''
for i in range(8):
str += ''.join(map(lambda i: '0' if i < avg else '1', gray[i]))
# 计算hash值, 将64位的hash值,每4位合成以为,转化为16 位的hash值
result = ''
for i in range(0, 64, 4):
result += ''.join('%x' % int(str[i: i + 4], 2))
# print(result)
return result
3. 深度神经网络提取图片向量,计算相似度
检索出与指定图片相似度最高的n张图片