python图片去重
from PIL import Image
import imagehash
import os
import numpy as np
def similar_images(dirname, similarity=0.9):
hash_size = 8
image_list = []
hash_list = []
duplicates = []
threshold = 1 - similarity
diff_limit = int(threshold * (hash_size ** 2))
for filename in os.listdir(dirname):
if filename.endswith(".jpg") or filename.endswith(".png") or filename.endswith(".jpeg") or filename.endswith(
".webp"):
img_path = os.path.join(dirname, filename)
hash = imagehash.average_hash(Image.open(img_path), hash_size).hash
for i in range(len(hash_list)):
if np.count_nonzero(hash != hash_list[i]) <= diff_limit:
duplicates.append([image_list[i], img_path])
hash_list.append(hash)
image_list.append(img_path)
print(img_path)
return duplicates
if __name__ == "__main__":
result = similar_images("/Users/mac/Desktop/data/guanxi",0.8)
for i in range(len(result)):
sim_imgs = list(set(result[i]))
sim_imgs.sort()
print(f"delete duplicates{sim_imgs[-1]}")
if os.path.exists(sim_imgs[-1]):
os.remove(sim_imgs[-1])