逐栅格并行计算偏相关分析

1、背景(我要干的事儿):

自变量(4个):tem、pre、iet、iep

因变量(3个):SOS、EOS、LOS 

目标:逐像素并行计算上述3x4=12个组合的偏相关系数

出处声明:本代码是在借鉴这篇Python多线程计算逐像元偏相关性分析文章的基础上,加入偏相关系数显著性(p值)的计算,并结合自己情况,一口气输出上述12个组合的结果。此处感谢该文大佬~

2、代码 

代码分为 ‘process(函数)’ 和 ‘实际运行’ 两个.py文件在pycharm中运行(这样避免了‘溢出’报错)

Part 1-process.py

#%%
import rasterio
import numpy as np
from sklearn.linear_model import LinearRegression
from multiprocessing import Pool
import itertools
import numpy as np
import pickle
from scipy import stats
# 忽略除以零等运行时警告
np.seterr(divide='ignore', invalid='ignore')

'''
目标:计算偏相关系数+t检验显著性
输入:两个变量(x和y)在控制其他变量的情况下的偏相关系数
输出:偏相关系数+p值
'''
def calculate_partial_correlation(x, y, controls):
    # 检查NaN值
    if np.isnan(x).any() or np.isnan(y).any() or np.isnan(controls).any():
        return np.nan
    # 构建模型并预测残差
    model = LinearRegression().fit(controls, y)
    residuals_y = y - model.predict(controls)
    model = LinearRegression().fit(controls, x)
    residuals_x = x - model.predict(controls)
    # 计算偏相关系数 返回一个2x2的矩阵,其中 [0, 1] 或 [1, 0] 元素则是 residuals_x 和 residuals_y 之间的相关系数。
    partial_corr = np.corrcoef(residuals_x, residuals_y)[0, 1]
    # 计算样本数量和控制变量数量
    n = len(controls)
    k = controls.shape[1]
    # 计算偏相关系数的标准误差
    SE_partial_corr = 1 / np.sqrt(n - k - 3)
    # 计算t值
    t_value = partial_corr / SE_partial_corr
    # 计算双尾t检验的p值
    p_value = 2 * (1 - stats.t.cdf(abs(t_value), df=n - k - 2))

    return partial_corr, p_value
'''
目的:计算偏相关系数
输入:sos/eos/los/tem/pre/iet/iep-stacked三维数据
输出:sos/eos/los的偏相关系数
'''
def worker(pixel_data):
    i, j, data = pixel_data
    x_sos = data['sos']
    x_eos = data['eos']
    x_los = data['los']
    y_tem = data['tem']
    y_pre = data['pre']
    y_iet = data['iet']
    y_iep = data['iep']

    # Check for NaN or infinite values in the data
    invalid_data = (np.isnan(x_sos) | np.isnan(x_eos) | np.isnan(x_los) |
                    np.isnan(y_tem) | np.isnan(y_pre) | np.isnan(y_iet) | np.isnan(y_iep) |
                    np.isinf(x_sos) | np.isinf(x_eos) | np.isinf(x_los) |
                    np.isinf(y_tem) | np.isinf(y_pre) | np.isinf(y_iet) | np.isinf(y_iep))

    if invalid_data.any():
        # Skip this data point and return None for all correlations
        return (i, j) + (None,) * 12

    controls_tem = np.column_stack([y_pre, y_iet, y_iep])
    controls_pre = np.column_stack([y_tem, y_iet, y_iep])
    controls_iet = np.column_stack([y_tem, y_pre, y_iep])
    controls_iep = np.column_stack([y_tem, y_pre, y_iet])

    # Check for NaN or infinite values in the control variables
    invalid_controls = (np.isnan(controls_tem) | np.isnan(controls_pre) |
                        np.isnan(controls_iet) | np.isnan(controls_iep) |
                        np.isinf(controls_tem) | np.isinf(controls_pre) |
                        np.isinf(controls_iet) | np.isinf(controls_iep))

    if invalid_controls.any():
        # Skip this data point and return None for all correlations
        return (i, j) + (None,) * 12

    #sos
    sos_tem_corr,sos_tem_p = calculate_partial_correlation(x_sos, y_tem, controls_tem)
    sos_pre_corr,sos_pre_p = calculate_partial_correlation(x_sos, y_pre, controls_pre)
    sos_iet_corr,sos_iet_p = calculate_partial_correlation(x_sos, y_iet, controls_iet)
    sos_iep_corr,sos_iep_p = calculate_partial_correlation(x_sos, y_iep, controls_iep)
    # eos
    eos_tem_corr, eos_tem_p = calculate_partial_correlation(x_eos, y_tem, controls_tem)
    eos_pre_corr, eos_pre_p = calculate_partial_correlation(x_eos, y_pre, controls_pre)
    eos_iet_corr, eos_iet_p = calculate_partial_correlation(x_eos, y_iet, controls_iet)
    eos_iep_corr, eos_iep_p = calculate_partial_correlation(x_eos, y_iep, controls_iep)
    # los
    los_tem_corr, los_tem_p = calculate_partial_correlation(x_los, y_tem, controls_tem)
    los_pre_corr, los_pre_p = calculate_partial_correlation(x_los, y_pre, controls_pre)
    los_iet_corr, los_iet_p = calculate_partial_correlation(x_los, y_iet, controls_iet)
    los_iep_corr, los_iep_p = calculate_partial_correlation(x_los, y_iep, controls_iep)

    return (i, j, \
            sos_tem_corr, sos_pre_corr, sos_iet_corr, sos_iep_corr, \
            eos_tem_corr, eos_pre_corr, eos_iet_corr, eos_iep_corr, \
            los_tem_corr, los_pre_corr, los_iet_corr, los_iep_corr, \
            sos_tem_p, sos_pre_p, sos_iet_p, sos_iep_p, \
            eos_tem_p, eos_pre_p, eos_iet_p, eos_iep_p, \
            los_tem_p, los_pre_p, los_iet_p, los_iep_p)

Part 2-实际运行.py

#%%
import rasterio
from sklearn.linear_model import LinearRegression
from multiprocessing import Pool
import itertools
import numpy as np
import pickle
import os
import sys
from PIL import Image
import matplotlib.pyplot as plt
from tifffile import imread, imwrite
from osgeo import gdal
import os
from tqdm import tqdm
lib_path = r'D:\code\MCD12Q2.006_01-19'#process.py所在文件夹路径
sys.path.append(os.path.abspath(lib_path))
from partial_corr_process import worker, calculate_partial_correlation
# 忽略除以零等运行时警告
np.seterr(divide='ignore', invalid='ignore')
#%%
'''
目的:读取栅格数据
输入:文件路径
输出:读取数组、图像元数据,例如数据类型、数据范围等
'''
def read_raster(file_path):
    with rasterio.open(file_path) as src:
        return src.read(1), src.meta
#%%
#主函数
def main():
    # 读取数据
    years = range(2013, 2020)
    # 文件路径-字典
    file_paths = {
        var: [f'{var}\\data_{var}_{year}.tif' for year in years]
        for var in ['sos', 'eos', 'los', 'tem', 'pre', 'iet', 'iep']}
    #print('file_paths', file_paths)

    # 读取tif图像-字典
    rasters = {var: np.array([read_raster(fp)[0] for fp in fps]) for var, fps in file_paths.items()}
    #print('rasters', rasters)

    # 获取数据的形状
    min_shape = tuple(min(sizes) for sizes in zip(*(r.shape for r in [rasters['sos'], rasters['iet']])))

    # 准备工作数据
    '''
    这行代码的目的是创建一个名为 pixel_data 的列表,其中包含像素数据的元组;
    每个元组包括 (i, j, data),其中 i 和 j 是像素的坐标,data 是一个字典,包含来自 rasters 字典的子数组;
    每个 v 数组中提取的,k 是字典中的键。
    '''
    pixel_data = [(i, j, {k: v[:, i, j] for k, v in rasters.items()}) for i, j in
                  itertools.product(range(min_shape[1]), range(min_shape[2]))]#注意min_shape[n]中n的取值,是否对应长和宽维度

    # 初始化结果矩阵
    #corrs
    sos_partial_corrs = {
        'sos_tem': np.full(min_shape[1:], float('nan')),
        'sos_pre': np.full(min_shape[1:], float('nan')),
        'sos_iet': np.full(min_shape[1:], float('nan')),
        'sos_iep': np.full(min_shape[1:], float('nan'))
    }
    eos_partial_corrs = {
        'eos_tem': np.full(min_shape[1:], float('nan')),
        'eos_pre': np.full(min_shape[1:], float('nan')),
        'eos_iet': np.full(min_shape[1:], float('nan')),
        'eos_iep': np.full(min_shape[1:], float('nan'))
    }
    los_partial_corrs = {
        'los_tem': np.full(min_shape[1:], float('nan')),
        'los_pre': np.full(min_shape[1:], float('nan')),
        'los_iet': np.full(min_shape[1:], float('nan')),
        'los_iep': np.full(min_shape[1:], float('nan'))
    }
    #ps
    sos_partial_ps = {
        'sos_tem': np.full(min_shape[1:], float('nan')),
        'sos_pre': np.full(min_shape[1:], float('nan')),
        'sos_iet': np.full(min_shape[1:], float('nan')),
        'sos_iep': np.full(min_shape[1:], float('nan'))
    }
    eos_partial_ps = {
        'eos_tem': np.full(min_shape[1:], float('nan')),
        'eos_pre': np.full(min_shape[1:], float('nan')),
        'eos_iet': np.full(min_shape[1:], float('nan')),
        'eos_iep': np.full(min_shape[1:], float('nan'))
    }
    los_partial_ps = {
        'los_tem': np.full(min_shape[1:], float('nan')),
        'los_pre': np.full(min_shape[1:], float('nan')),
        'los_iet': np.full(min_shape[1:], float('nan')),
        'los_iep': np.full(min_shape[1:], float('nan'))
    }

    # 并行处理每个像素的数据。每处理1000个像素,打印一次进度。
    with Pool() as pool:
        for idx, result in enumerate(pool.imap(worker, pixel_data), 1):
            i, j, \
            sos_tem_corr, sos_pre_corr, sos_iet_corr, sos_iep_corr, \
            eos_tem_corr, eos_pre_corr, eos_iet_corr, eos_iep_corr, \
            los_tem_corr, los_pre_corr, los_iet_corr, los_iep_corr, \
            sos_tem_p, sos_pre_p, sos_iet_p, sos_iep_p, \
            eos_tem_p, eos_pre_p, eos_iet_p, eos_iep_p, \
            los_tem_p, los_pre_p, los_iet_p, los_iep_p = result  # 调用worker函数输出
            #输出corr
            # sos
            sos_partial_corrs['sos_tem'][i, j] = sos_tem_corr  # 从上一步输出的字典中提取并储存
            sos_partial_corrs['sos_pre'][i, j] = sos_pre_corr
            sos_partial_corrs['sos_iet'][i, j] = sos_iet_corr
            sos_partial_corrs['sos_iep'][i, j] = sos_iep_corr
            # eos
            eos_partial_corrs['eos_tem'][i, j] = eos_tem_corr  # 从上一步输出的字典中提取并储存
            eos_partial_corrs['eos_pre'][i, j] = eos_pre_corr
            eos_partial_corrs['eos_iet'][i, j] = eos_iet_corr
            eos_partial_corrs['eos_iep'][i, j] = eos_iep_corr
            # los
            los_partial_corrs['los_tem'][i, j] = los_tem_corr  # 从上一步输出的字典中提取并储存
            los_partial_corrs['los_pre'][i, j] = los_pre_corr
            los_partial_corrs['los_iet'][i, j] = los_iet_corr
            los_partial_corrs['los_iep'][i, j] = los_iep_corr

            # 输出p
            # sos
            sos_partial_ps['sos_tem'][i, j] = sos_tem_p  # 从上一步输出的字典中提取并储存
            sos_partial_ps['sos_pre'][i, j] = sos_pre_p
            sos_partial_ps['sos_iet'][i, j] = sos_iet_p
            sos_partial_ps['sos_iep'][i, j] = sos_iep_p
            # eos
            eos_partial_ps['eos_tem'][i, j] = eos_tem_p  # 从上一步输出的字典中提取并储存
            eos_partial_ps['eos_pre'][i, j] = eos_pre_p
            eos_partial_ps['eos_iet'][i, j] = eos_iet_p
            eos_partial_ps['eos_iep'][i, j] = eos_iep_p
            # los
            los_partial_ps['los_tem'][i, j] = los_tem_p  # 从上一步输出的字典中提取并储存
            los_partial_ps['los_pre'][i, j] = los_pre_p
            los_partial_ps['los_iet'][i, j] = los_iet_p
            los_partial_ps['los_iep'][i, j] = los_iep_p

            if idx % 1000 == 0:
                print(f'Processed {idx} pixels')
    # sos_tem/pre/iet/iep; eos_tem/pre/iet/iep; los__tem/pre/iet/iep

    # 保存结果的相关设置
    # 确保从正确的位置获取元数据
    _, meta = read_raster(file_paths['sos'][0])  # 从任意一个与物候变量相关的文件中获取元数据

    # 更新元数据的dtype和count
    meta['dtype'] = 'float32'  # 设置输出图像数据类型
    meta['count'] = 1

    #corrs
    # 从结果字典中提取偏相关系数,并输出到图像
    # sos
    for var in ['sos_tem', 'sos_pre', 'sos_iet', 'sos_iep']:
        with rasterio.open(f'{var}_partial_corr.tif', 'w', **meta) as dst:
            dst.write(sos_partial_corrs[var].astype('float32'), 1)  
    # eos
    for var in ['eos_tem', 'eos_pre', 'eos_iet', 'eos_iep']:
        with rasterio.open(f'{var}_partial_corr.tif', 'w', **meta) as dst:
            dst.write(eos_partial_corrs[var].astype('float32'), 1)
    # los
    for var in ['los_tem', 'los_pre', 'los_iet', 'los_iep']:
        with rasterio.open(f'{var}_partial_corr.tif', 'w', **meta) as dst:
            dst.write(los_partial_corrs[var].astype('float32'), 1)
   
    #p-values
    # 从结果字典中提取偏相关系数的p值,并输出到图像
    # sos
    for var in ['sos_tem', 'sos_pre', 'sos_iet', 'sos_iep']:
        with rasterio.open(f'{var}_partial_p.tif', 'w', **meta) as dst:
            dst.write(sos_partial_ps[var].astype('float32'), 1)  
    # eos
    for var in ['eos_tem', 'eos_pre', 'eos_iet', 'eos_iep']:
        with rasterio.open(f'{var}_partial_p.tif', 'w', **meta) as dst:
            dst.write(eos_partial_ps[var].astype('float32'), 1)
    # los
    for var in ['los_tem', 'los_pre', 'los_iet', 'los_iep']:
        with rasterio.open(f'{var}_partial_p.tif', 'w', **meta) as dst:
            dst.write(los_partial_ps[var].astype('float32'), 1)


if __name__ == '__main__':
    main()

完结撒花~欢迎交流~

### Python栅格相关分析的方法 在Python环境中执行栅格的相关分析通常涉及多个步骤,包括但不限于准备环境、加载数据集、预处理数据以及应用统计测试来评估两个或更多变量之间的关系。对于地理空间数据而言,这可能涉及到遥感影像的时间序列分析或是不同传感器获取的数据间的比较。 #### 准备工作 为了有效地开展此类任务,建议安装并导入必要的库,比如`rasterio`用于读取和写入栅格文件;`numpy`提供高效的数组运算支持;还有专门针对时间序列趋势检验的`pymannkendall`库[^4]: ```python import rasterio import numpy as np from pymannkendall import original_test, seasonal_test ``` #### 数据载入与预处理 接下来是从磁盘上读取所需的栅格图层,并将其转换成适合进一步计算的形式。这里假设存在两组或多组对应相同地理位置但在不同时刻拍摄到的卫星图片,每张图片代表一个观测周期内的特定参数值分布状况。 ```python def load_rasters(file_paths): """Load multiple rasters into memory.""" datasets = [] for path in file_paths: with rasterio.open(path) as src: data = src.read(1).astype('float32') profile = src.profile.copy() datasets.append((data, profile)) return datasets ``` 一旦完成了上述准备工作,则可以根据实际需求选择合适的方式来进行像素级别的关联度测量。一种常见做法是在整个区域内遍历每一个位置上的数值对儿,进而调用合适的统计模型完成最终的结果汇总。 #### 执行栅格相关性分析 考虑到效率问题,在大多数情况下会优先考虑向量化操作而非显式的循环结构。下面给出了一种简化版方案——利用NumPy广播机制加速计算过程的同时保持代码简洁明了: ```python def calculate_correlation(raster_pairs): """ Calculate Pearson correlation coefficient between pairs of rasters. :param raster_pairs: List containing tuples of paired raster arrays (array1, array2). :return: A single raster representing the spatial distribution of correlations. """ # Initialize an empty list to store results from each pair comparison corr_results = [] for raster_a, raster_b in raster_pairs: valid_mask = ~np.isnan(raster_a) & ~np.isnan(raster_b) if not any(valid_mask.flatten()): raise ValueError("No overlapping pixels found.") flat_a = raster_a[valid_mask].flatten() flat_b = raster_b[valid_mask].flatten() try: pearson_corr = np.corrcoef(flat_a, flat_b)[0][1] except IndexError: continue result_array = np.full_like(raster_a, fill_value=np.nan) result_array[valid_mask] = pearson_corr corr_results.append(result_array) combined_result = sum(corr_results)/len(corr_results) if len(corr_results)>0 else None return combined_result ``` 此函数接收一系列配对后的栅格矩阵作为输入参数,并返回一个新的栅格对象,其中每个单格存储着相应位置处原始图像间皮尔逊积矩相关系数(Pearson product-moment correlation coefficient) 的估计值。值得注意的是,当遇到缺失值时应适当处理以免影响整体准确性。 最后一步则是保存所得成果至本地硬盘以便后续查看或分享给其他研究人员继续深入探讨。
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值