numpy和scipy计算向量欧氏距离性能对比

最新推荐文章于 2025-08-13 08:29:23 发布

原创最新推荐文章于 2025-08-13 08:29:23 发布 · 1.7k 阅读

4 ·

CC 4.0 BY-SA版权

文章标签：

#python #向量欧式距离 #scipy #numpy

python编程专栏收录该内容

50 篇文章

订阅专栏

本文通过实测对比了使用NumPy和SciPy库计算大规模数据集欧氏距离的效率，结果显示SciPy在计算速度上显著优于NumPy。

# coding:utf-8

from scipy import spatial
from functools import wraps
import datetime
import numpy as np


def print_execution_time(func, iter=100):
    @wraps(func)
    def warpper(*args, **kwargs):
        total_time = 0
        for i in range(iter):
            start_time = datetime.datetime.now()
            res = func(*args, **kwargs)
            end_time = datetime.datetime.now()
            duration_time = (end_time - start_time).microseconds // 1000
            total_time += duration_time
        avg_time = total_time / iter
        print("function name -> %s, elapse time -> %s ms" % (func.__name__, avg_time))
        return res

    return warpper


@print_execution_time
def distance_euclidean_numpy(vec1, vec2):
    return np.sqrt(np.sum(np.power(vec1 - vec2, 2), axis=1))


@print_execution_time
def distance_euclidean_scipy(vec1, vec2, distance="euclidean"):
    return spatial.distance.cdist(vec1, vec2, distance)


x = np.random.rand(1000000).reshape((-1, 2)) * 100
x = x.astype(np.int16)

# print(x.dtype, x.shape)
y = np.array([[1, 2]])

print("starting")

distance_numpy = distance_euclidean_numpy(x, y)
distance_scipy = distance_euclidean_scipy(x, y, "euclidean")
print(distance_numpy[500:510])
print(distance_scipy[500:510])

输出结果：

starting
function name -> distance_euclidean_numpy, elapse time -> 24.62 ms
function name -> distance_euclidean_scipy, elapse time -> 10.21 ms
[ 53.60037313 50.28916384 77.62087348 52.63078947 47.38143096
75.16648189 113.27841807 101.67103816 118.1058847 65.8559033 ]
[[ 53.60037313]
[ 50.28916384]
[ 77.62087348]
[ 52.63078947]
[ 47.38143096]
[ 75.16648189]
[113.27841807]
[101.67103816]
[118.1058847 ]
[ 65.8559033 ]]