逐栅格并行计算偏相关分析

部署运行你感兴趣的模型镜像

1、背景(我要干的事儿):

自变量(4个):tem、pre、iet、iep

因变量(3个):SOS、EOS、LOS 

目标:逐像素并行计算上述3x4=12个组合的偏相关系数

出处声明:本代码是在借鉴这篇Python多线程计算逐像元偏相关性分析文章的基础上,加入偏相关系数显著性(p值)的计算,并结合自己情况,一口气输出上述12个组合的结果。此处感谢该文大佬~

2、代码 

代码分为 ‘process(函数)’ 和 ‘实际运行’ 两个.py文件在pycharm中运行(这样避免了‘溢出’报错)

Part 1-process.py

#%%
import rasterio
import numpy as np
from sklearn.linear_model import LinearRegression
from multiprocessing import Pool
import itertools
import numpy as np
import pickle
from scipy import stats
# 忽略除以零等运行时警告
np.seterr(divide='ignore', invalid='ignore')

'''
目标:计算偏相关系数+t检验显著性
输入:两个变量(x和y)在控制其他变量的情况下的偏相关系数
输出:偏相关系数+p值
'''
def calculate_partial_correlation(x, y, controls):
    # 检查NaN值
    if np.isnan(x).any() or np.isnan(y).any() or np.isnan(controls).any():
        return np.nan
    # 构建模型并预测残差
    model = LinearRegression().fit(controls, y)
    residuals_y = y - model.predict(controls)
    model = LinearRegression().fit(controls, x)
    residuals_x = x - model.predict(controls)
    # 计算偏相关系数 返回一个2x2的矩阵,其中 [0, 1] 或 [1, 0] 元素则是 residuals_x 和 residuals_y 之间的相关系数。
    partial_corr = np.corrcoef(residuals_x, residuals_y)[0, 1]
    # 计算样本数量和控制变量数量
    n = len(controls)
    k = controls.shape[1]
    # 计算偏相关系数的标准误差
    SE_partial_corr = 1 / np.sqrt(n - k - 3)
    # 计算t值
    t_value = partial_corr / SE_partial_corr
    # 计算双尾t检验的p值
    p_value = 2 * (1 - stats.t.cdf(abs(t_value), df=n - k - 2))

    return partial_corr, p_value
'''
目的:计算偏相关系数
输入:sos/eos/los/tem/pre/iet/iep-stacked三维数据
输出:sos/eos/los的偏相关系数
'''
def worker(pixel_data):
    i, j, data = pixel_data
    x_sos = data['sos']
    x_eos = data['eos']
    x_los = data['los']
    y_tem = data['tem']
    y_pre = data['pre']
    y_iet = data['iet']
    y_iep = data['iep']

    # Check for NaN or infinite values in the data
    invalid_data = (np.isnan(x_sos) | np.isnan(x_eos) | np.isnan(x_los) |
                    np.isnan(y_tem) | np.isnan(y_pre) | np.isnan(y_iet) | np.isnan(y_iep) |
                    np.isinf(x_sos) | np.isinf(x_eos) | np.isinf(x_los) |
                    np.isinf(y_tem) | np.isinf(y_pre) | np.isinf(y_iet) | np.isinf(y_iep))

    if invalid_data.any():
        # Skip this data point and return None for all correlations
        return (i, j) + (None,) * 12

    controls_tem = np.column_stack([y_pre, y_iet, y_iep])
    controls_pre = np.column_stack([y_tem, y_iet, y_iep])
    controls_iet = np.column_stack([y_tem, y_pre, y_iep])
    controls_iep = np.column_stack([y_tem, y_pre, y_iet])

    # Check for NaN or infinite values in the control variables
    invalid_controls = (np.isnan(controls_tem) | np.isnan(controls_pre) |
                        np.isnan(controls_iet) | np.isnan(controls_iep) |
                        np.isinf(controls_tem) | np.isinf(controls_pre) |
                        np.isinf(controls_iet) | np.isinf(controls_iep))

    if invalid_controls.any():
        # Skip this data point and return None for all correlations
        return (i, j) + (None,) * 12

    #sos
    sos_tem_corr,sos_tem_p = calculate_partial_correlation(x_sos, y_tem, controls_tem)
    sos_pre_corr,sos_pre_p = calculate_partial_correlation(x_sos, y_pre, controls_pre)
    sos_iet_corr,sos_iet_p = calculate_partial_correlation(x_sos, y_iet, controls_iet)
    sos_iep_corr,sos_iep_p = calculate_partial_correlation(x_sos, y_iep, controls_iep)
    # eos
    eos_tem_corr, eos_tem_p = calculate_partial_correlation(x_eos, y_tem, controls_tem)
    eos_pre_corr, eos_pre_p = calculate_partial_correlation(x_eos, y_pre, controls_pre)
    eos_iet_corr, eos_iet_p = calculate_partial_correlation(x_eos, y_iet, controls_iet)
    eos_iep_corr, eos_iep_p = calculate_partial_correlation(x_eos, y_iep, controls_iep)
    # los
    los_tem_corr, los_tem_p = calculate_partial_correlation(x_los, y_tem, controls_tem)
    los_pre_corr, los_pre_p = calculate_partial_correlation(x_los, y_pre, controls_pre)
    los_iet_corr, los_iet_p = calculate_partial_correlation(x_los, y_iet, controls_iet)
    los_iep_corr, los_iep_p = calculate_partial_correlation(x_los, y_iep, controls_iep)

    return (i, j, \
            sos_tem_corr, sos_pre_corr, sos_iet_corr, sos_iep_corr, \
            eos_tem_corr, eos_pre_corr, eos_iet_corr, eos_iep_corr, \
            los_tem_corr, los_pre_corr, los_iet_corr, los_iep_corr, \
            sos_tem_p, sos_pre_p, sos_iet_p, sos_iep_p, \
            eos_tem_p, eos_pre_p, eos_iet_p, eos_iep_p, \
            los_tem_p, los_pre_p, los_iet_p, los_iep_p)

Part 2-实际运行.py

#%%
import rasterio
from sklearn.linear_model import LinearRegression
from multiprocessing import Pool
import itertools
import numpy as np
import pickle
import os
import sys
from PIL import Image
import matplotlib.pyplot as plt
from tifffile import imread, imwrite
from osgeo import gdal
import os
from tqdm import tqdm
lib_path = r'D:\code\MCD12Q2.006_01-19'#process.py所在文件夹路径
sys.path.append(os.path.abspath(lib_path))
from partial_corr_process import worker, calculate_partial_correlation
# 忽略除以零等运行时警告
np.seterr(divide='ignore', invalid='ignore')
#%%
'''
目的:读取栅格数据
输入:文件路径
输出:读取数组、图像元数据,例如数据类型、数据范围等
'''
def read_raster(file_path):
    with rasterio.open(file_path) as src:
        return src.read(1), src.meta
#%%
#主函数
def main():
    # 读取数据
    years = range(2013, 2020)
    # 文件路径-字典
    file_paths = {
        var: [f'{var}\\data_{var}_{year}.tif' for year in years]
        for var in ['sos', 'eos', 'los', 'tem', 'pre', 'iet', 'iep']}
    #print('file_paths', file_paths)

    # 读取tif图像-字典
    rasters = {var: np.array([read_raster(fp)[0] for fp in fps]) for var, fps in file_paths.items()}
    #print('rasters', rasters)

    # 获取数据的形状
    min_shape = tuple(min(sizes) for sizes in zip(*(r.shape for r in [rasters['sos'], rasters['iet']])))

    # 准备工作数据
    '''
    这行代码的目的是创建一个名为 pixel_data 的列表,其中包含像素数据的元组;
    每个元组包括 (i, j, data),其中 i 和 j 是像素的坐标,data 是一个字典,包含来自 rasters 字典的子数组;
    每个 v 数组中提取的,k 是字典中的键。
    '''
    pixel_data = [(i, j, {k: v[:, i, j] for k, v in rasters.items()}) for i, j in
                  itertools.product(range(min_shape[1]), range(min_shape[2]))]#注意min_shape[n]中n的取值,是否对应长和宽维度

    # 初始化结果矩阵
    #corrs
    sos_partial_corrs = {
        'sos_tem': np.full(min_shape[1:], float('nan')),
        'sos_pre': np.full(min_shape[1:], float('nan')),
        'sos_iet': np.full(min_shape[1:], float('nan')),
        'sos_iep': np.full(min_shape[1:], float('nan'))
    }
    eos_partial_corrs = {
        'eos_tem': np.full(min_shape[1:], float('nan')),
        'eos_pre': np.full(min_shape[1:], float('nan')),
        'eos_iet': np.full(min_shape[1:], float('nan')),
        'eos_iep': np.full(min_shape[1:], float('nan'))
    }
    los_partial_corrs = {
        'los_tem': np.full(min_shape[1:], float('nan')),
        'los_pre': np.full(min_shape[1:], float('nan')),
        'los_iet': np.full(min_shape[1:], float('nan')),
        'los_iep': np.full(min_shape[1:], float('nan'))
    }
    #ps
    sos_partial_ps = {
        'sos_tem': np.full(min_shape[1:], float('nan')),
        'sos_pre': np.full(min_shape[1:], float('nan')),
        'sos_iet': np.full(min_shape[1:], float('nan')),
        'sos_iep': np.full(min_shape[1:], float('nan'))
    }
    eos_partial_ps = {
        'eos_tem': np.full(min_shape[1:], float('nan')),
        'eos_pre': np.full(min_shape[1:], float('nan')),
        'eos_iet': np.full(min_shape[1:], float('nan')),
        'eos_iep': np.full(min_shape[1:], float('nan'))
    }
    los_partial_ps = {
        'los_tem': np.full(min_shape[1:], float('nan')),
        'los_pre': np.full(min_shape[1:], float('nan')),
        'los_iet': np.full(min_shape[1:], float('nan')),
        'los_iep': np.full(min_shape[1:], float('nan'))
    }

    # 并行处理每个像素的数据。每处理1000个像素,打印一次进度。
    with Pool() as pool:
        for idx, result in enumerate(pool.imap(worker, pixel_data), 1):
            i, j, \
            sos_tem_corr, sos_pre_corr, sos_iet_corr, sos_iep_corr, \
            eos_tem_corr, eos_pre_corr, eos_iet_corr, eos_iep_corr, \
            los_tem_corr, los_pre_corr, los_iet_corr, los_iep_corr, \
            sos_tem_p, sos_pre_p, sos_iet_p, sos_iep_p, \
            eos_tem_p, eos_pre_p, eos_iet_p, eos_iep_p, \
            los_tem_p, los_pre_p, los_iet_p, los_iep_p = result  # 调用worker函数输出
            #输出corr
            # sos
            sos_partial_corrs['sos_tem'][i, j] = sos_tem_corr  # 从上一步输出的字典中提取并储存
            sos_partial_corrs['sos_pre'][i, j] = sos_pre_corr
            sos_partial_corrs['sos_iet'][i, j] = sos_iet_corr
            sos_partial_corrs['sos_iep'][i, j] = sos_iep_corr
            # eos
            eos_partial_corrs['eos_tem'][i, j] = eos_tem_corr  # 从上一步输出的字典中提取并储存
            eos_partial_corrs['eos_pre'][i, j] = eos_pre_corr
            eos_partial_corrs['eos_iet'][i, j] = eos_iet_corr
            eos_partial_corrs['eos_iep'][i, j] = eos_iep_corr
            # los
            los_partial_corrs['los_tem'][i, j] = los_tem_corr  # 从上一步输出的字典中提取并储存
            los_partial_corrs['los_pre'][i, j] = los_pre_corr
            los_partial_corrs['los_iet'][i, j] = los_iet_corr
            los_partial_corrs['los_iep'][i, j] = los_iep_corr

            # 输出p
            # sos
            sos_partial_ps['sos_tem'][i, j] = sos_tem_p  # 从上一步输出的字典中提取并储存
            sos_partial_ps['sos_pre'][i, j] = sos_pre_p
            sos_partial_ps['sos_iet'][i, j] = sos_iet_p
            sos_partial_ps['sos_iep'][i, j] = sos_iep_p
            # eos
            eos_partial_ps['eos_tem'][i, j] = eos_tem_p  # 从上一步输出的字典中提取并储存
            eos_partial_ps['eos_pre'][i, j] = eos_pre_p
            eos_partial_ps['eos_iet'][i, j] = eos_iet_p
            eos_partial_ps['eos_iep'][i, j] = eos_iep_p
            # los
            los_partial_ps['los_tem'][i, j] = los_tem_p  # 从上一步输出的字典中提取并储存
            los_partial_ps['los_pre'][i, j] = los_pre_p
            los_partial_ps['los_iet'][i, j] = los_iet_p
            los_partial_ps['los_iep'][i, j] = los_iep_p

            if idx % 1000 == 0:
                print(f'Processed {idx} pixels')
    # sos_tem/pre/iet/iep; eos_tem/pre/iet/iep; los__tem/pre/iet/iep

    # 保存结果的相关设置
    # 确保从正确的位置获取元数据
    _, meta = read_raster(file_paths['sos'][0])  # 从任意一个与物候变量相关的文件中获取元数据

    # 更新元数据的dtype和count
    meta['dtype'] = 'float32'  # 设置输出图像数据类型
    meta['count'] = 1

    #corrs
    # 从结果字典中提取偏相关系数,并输出到图像
    # sos
    for var in ['sos_tem', 'sos_pre', 'sos_iet', 'sos_iep']:
        with rasterio.open(f'{var}_partial_corr.tif', 'w', **meta) as dst:
            dst.write(sos_partial_corrs[var].astype('float32'), 1)  
    # eos
    for var in ['eos_tem', 'eos_pre', 'eos_iet', 'eos_iep']:
        with rasterio.open(f'{var}_partial_corr.tif', 'w', **meta) as dst:
            dst.write(eos_partial_corrs[var].astype('float32'), 1)
    # los
    for var in ['los_tem', 'los_pre', 'los_iet', 'los_iep']:
        with rasterio.open(f'{var}_partial_corr.tif', 'w', **meta) as dst:
            dst.write(los_partial_corrs[var].astype('float32'), 1)
   
    #p-values
    # 从结果字典中提取偏相关系数的p值,并输出到图像
    # sos
    for var in ['sos_tem', 'sos_pre', 'sos_iet', 'sos_iep']:
        with rasterio.open(f'{var}_partial_p.tif', 'w', **meta) as dst:
            dst.write(sos_partial_ps[var].astype('float32'), 1)  
    # eos
    for var in ['eos_tem', 'eos_pre', 'eos_iet', 'eos_iep']:
        with rasterio.open(f'{var}_partial_p.tif', 'w', **meta) as dst:
            dst.write(eos_partial_ps[var].astype('float32'), 1)
    # los
    for var in ['los_tem', 'los_pre', 'los_iet', 'los_iep']:
        with rasterio.open(f'{var}_partial_p.tif', 'w', **meta) as dst:
            dst.write(los_partial_ps[var].astype('float32'), 1)


if __name__ == '__main__':
    main()

完结撒花~欢迎交流~

您可能感兴趣的与本文相关的镜像

Python3.9

Python3.9

Conda
Python

Python 是一种高级、解释型、通用的编程语言,以其简洁易读的语法而闻名,适用于广泛的应用,包括Web开发、数据分析、人工智能和自动化脚本

### 偏相关分析栅格数据中的应用 偏相关分析是一种用于评估两个变量之间的关系强度的方法,同时控制其他可能影响该关系的因素。对于栅格数据而言,这种分析通常涉及多个地理空间变量的数据集。以下是实现这一目标的具体方法: #### 数据准备阶段 为了确保栅格数据能够顺利进行偏相关分析,需先完成必要的预处理工作。这一步骤包括但不限于统一输入数据的空间参考、像大小(分辨率)、投影等内容[^2]。 - **投影一致性**:如果不同源的栅格数据具有不同的投影方式,则需要通过工具如ArcGIS或其他软件将其转换至同一投影体系下。 - **重采样操作**:当各层栅格图像像素尺寸存在差异时,应采用适当算法调整它们达到相同尺度水平。 - **掩模裁剪**:限定研究范围内的有效区域,排除无关背景干扰项的影响。 #### 使用 ArcGIS 进行初步探索性分析 利用 ArcGIS 的内置功能可以快速获取有关多组栅格间的统计特性描述信息。例如,“Spatial Analyst -> 多分析 -> 波段集统计”命令可以帮助我们了解每一对感兴趣属性之间是否存在显著关联趋势[^3]。 然而需要注意的是,上述过程仅提供了关于总体模式的一个概览视角,并未深入探讨潜在混杂因子的作用机制。 #### 实施高级建模 - R语言解决方案 针对更复杂的场景需求,推荐借助编程环境来构建定制化的工作流方案。下面将以R为例展示具体流程: 1. 加载必要库并读取所需文件; ```r library(raster) ndvi <- raster("path/to/ndvi.tif") precipitation <- raster("path/to/precipitation.tif") temperature <- raster("path/to/temperature.tif") #假设温度作为第三个变量参与调节作用 ``` 2. 将所有要素叠加形成单一对象以便后续运算; ```r stack_data <- stack(ndvi, precipitation, temperature) ``` 3. 提取对应位置处的实际观测值序列; ```r values_df <- as.data.frame(stack_data, xy=FALSE, na.rm=TRUE) colnames(values_df) <- c('NDVI', 'Precipitation', 'Temperature') ``` 4. 应用partial.cor函数计算净效应系数估计量; ```r partial_correlation_matrix <- corpcor::cor2pcor(cor(values_df)) print(partial_correlation_matrix) ``` 以上代码片段展示了如何基于给定条件设定前提下来衡量任意两者间独立于其余因素之外的真实联系程度。 --- ### 注意事项 尽管本文介绍了两种主要途径来进行此类任务,但在实际项目实施过程中还需考虑诸多细节问题,比如缺失值填补策略的选择、异常点检测与修正措施等都会不同程度地影响最终结论的有效性和可靠性。
评论 4
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值