遥感图像处理:读取Tiff数据的一些方法

原创于 2025-06-11 09:28:08 发布 · 718 阅读

8 ·

CC 4.0 BY-SA版权

文章标签：

#图像处理 #人工智能

深度学习研究——遥感视觉专栏收录该内容

3 篇文章

订阅专栏

部署运行你感兴趣的模型镜像

遥感图像处理:读取Tiff数据的一些方法

文章目录

遥感图像处理:读取Tiff数据的一些方法
前言
一、读取tiff图像以及将shp数据转为分割图的方法
总结

前言

深度学习中遥感图像的处理其实很麻烦，毕竟是独立的一些数据，本文提供的方法仅供参考，在读取数据的过程中，可能会碰到一下子读取全部tiff数据导致内存爆炸的情况，所以写了一个tiffreader的类，用于防止这一个情况的发生。

一、读取tiff图像以及将shp数据转为分割图的方法

import os
from fiona.transform import transform_geom
from osgeo import gdal
import geopandas as gpd
import psutil
from rasterio.windows import Window
import rasterio
from rasterio.mask import mask
from rasterio.warp import transform_geom
import fiona
import numpy as np
import cv2
from shapely.geometry import shape, Polygon, mapping


class TiffReader:
    def __init__(self, max_memory_usage=2.0, chunk_size=(512, 512),log=False):
        self.max_memory_usage = max_memory_usage
        self.chunk_size = chunk_size
        self.log = log
    def check_memory_usage(self):
        memory = psutil.virtual_memory()
        return memory.available / (1024 ** 3)  # 转换为GB
    def can_read_chunk(self, bands, window):
        bytes_per_pixel = 4  # 假设使用 float32
        required_memory = bytes_per_pixel * window.width * window.height * len(bands) / (1024 ** 2)  # 转换为MB
        available_memory_mb = self.check_memory_usage() * 1024
        if required_memory > available_memory_mb:
            raise MemoryError(
                f"无法读取此数据块，内存不足。\n"
                f"需要的内存: {required_memory:.2f} MB，可用内存: {available_memory_mb:.2f} MB。\n"
                f"请尝试减少波段数或增大分块大小。"
            )
        return True
    def read_chunk(self, src, bands, window):
        if isinstance(window, tuple):
            window = Window(window[0], window[1], window[2] - window[0], window[3] - window[1])

        # 检查窗口尺寸是否合法
        if window.height <= 0 or window.width <= 0:
            raise ValueError(f"窗口尺寸无效。窗口高度: {window.height}, 窗口宽度: {window.width}。窗口的尺寸必须大于0。")

        if window.col_off < 0 or window.row_off < 0:
            raise ValueError(f"窗口起始位置无效。列偏移: {window.col_off}, 行偏移: {window.row_off}。起始位置必须为非负值。")

        height, width = src.shape
        if window.row_off + window.height > height or window.col_off + window.width > width:
            raise ValueError(
                f"窗口超出图像边界。\n"
                f"图像尺寸: 高度={height}, 宽度={width}。\n"
                f"请求的窗口起始点: ({window.row_off}, {window.col_off})，窗口尺寸: 高度={window.height}, 宽度={window.width}。"
            )

        self.can_read_chunk(bands, window)

        band_data = []
        for band in bands:
            try:
                band_array = src.read(band, window=window)
                band_data.append(band_array)
            except IndexError:
                raise ValueError(
                    f"请求的波段 {band} 超出了文件的波段数。\n"
                    f"文件包含的波段数为 {src.count}，但请求的波段为 {bands}。"
                )
            except Exception as e:
                raise RuntimeError(f"读取波段 {band} 时发生未知错误: {e}")
        return np.stack(band_data, axis=0)
    def read_tif(self, filepath, bands=[1, 2, 3], window=None):
        available_memory_gb = self.check_memory_usage()
        if self.log:
            print(f"可用内存: {available_memory_gb:.2f} GB")

        try:
            with rasterio.open(filepath) as src:
                total_bands = src.count
                height, width = src.shape
                # 检查请求的波段是否超出范围
                if any(band > total_bands for band in bands):
                    invalid_bands = [band for band in bands if band > total_bands]
                    raise ValueError(
                        f"请求的波段 {invalid_bands} 超出了文件的波段数 ({total_bands})。\n"
                        f"文件波段范围是 1 到 {total_bands}，但请求的波段是 {bands}。"
                    )

                # 如果指定了窗口，则读取窗口中的数据
                if window is not None:
                    if self.log:
                        print(f"正在读取指定窗口中的数据: {window}")
                    return self.read_chunk(src, bands, window)

                # 如果图像小于分块大小，则直接读取整个图像
                chunk_height, chunk_width = self.chunk_size
                if height <= chunk_height and width <= chunk_width:
                    if self.log:
                        print("图像大小小于分块大小，直接读取整个图像。")
                    return self.read_chunk(src, bands, Window(0, 0, width, height))

                # 进行分块读取
                chunk_count_vertical = (height + chunk_height - 1) // chunk_height
                chunk_count_horizontal = (width + chunk_width - 1) // chunk_width
                full_data = np.zeros((len(bands), height, width), dtype=np.float32)

                for i in range(chunk_count_vertical):
                    for j in range(chunk_count_horizontal):
                        top = i * chunk_height
                        left = j * chunk_width
                        bottom = min(top + chunk_height, height)
                        right = min(left + chunk_width, width)

                        chunk_window = Window(left, top, right - left, bottom - top)
                        if self.log:
                            print(f"正在读取区域: {chunk_window}")

                        # 检查是否可以读取该块数据
                        if not self.can_read_chunk(bands, chunk_window):
                            raise MemoryError(
                                f"内存不足，无法读取区域 ({left}, {top}, {right}, {bottom}) 的数据。\n"
                                f"请尝试减小分块大小或减少波段数。"
                            )

                        chunk_data = self.read_chunk(src, bands, chunk_window)
                        full_data[:, top:bottom, left:right] = chunk_data
                return full_data

        except FileNotFoundError:
            raise FileNotFoundError(f"文件未找到: {filepath}。请确认路径是否正确。")
        except rasterio.errors.RasterioError as e:
            raise RuntimeError(f"读取 TIFF 文件时发生错误: {e}")
        except MemoryError as e:
            raise MemoryError(f"内存不足: {e}。请尝试减少波段数或减小分块大小。")
        except ValueError as e:
            raise ValueError(f"参数无效: {e}")
        except Exception as e:
            raise RuntimeError(f"发生未知错误: {e}。")
    def export_shp_tif_segment(self, shp_path, tif_path,bands=[1,2,3,4]):
        """
        从Shapefile文件中提取面数据并将其与GeoTIFF文件进行空间对齐，生成分割图和边缘图。
        """
        # 1. 读取Shapefile数据
        shp_data = gpd.read_file(shp_path)
        shp_crs = shp_data.crs

        # 2. 读取GeoTIFF数据
        with rasterio.open(tif_path) as src:
            tif_crs = src.crs
            tif_transform = src.transform  # 仿射变换矩阵
            tif_width = src.width
            tif_height = src.height
            tif_data = src.read(bands)  # 读取指定波段的数据
            tif_bounds = src.bounds  # 获取GeoTIFF的范围（左下和右上坐标）

        # 3. 检查坐标系是否一致
        if shp_crs != tif_crs:
            raise ValueError("坐标系不一致，无法进行处理。Shapefile的坐标系与GeoTIFF的坐标系不一致。")

        # 4. 将Shapefile中的面数据转换为像素坐标
        # 获取与tif范围重合的面数据
        shp_data = shp_data.cx[tif_bounds[0]:tif_bounds[2], tif_bounds[1]:tif_bounds[3]]

        # 将面数据的坐标从地理坐标系转换为像素坐标
        def transform_to_pixel_coords(geometry):
            """将面数据的地理坐标转换为像素坐标"""
            if isinstance(geometry, Polygon):
                coords = list(geometry.exterior.coords)
                # print(coords)
                pixel_coords = [~tif_transform * (x, y) for x, y,*rest in coords]
                pixel_coords = np.array(pixel_coords).astype(int)
                return pixel_coords
            return None

        all_pixel_coords = []
        for _, row in shp_data.iterrows():
            geometry = row['geometry']
            pixel_coords = transform_to_pixel_coords(geometry)
            if pixel_coords is not None:
                all_pixel_coords.append(pixel_coords)

        # 5. 创建分割图和边缘图
        segmentation_image = np.zeros((tif_height, tif_width), dtype=np.uint8)
        edge_image = np.zeros((tif_height, tif_width), dtype=np.uint8)

        for pixel_coords in all_pixel_coords:
            # 在分割图上填充面数据
            cv2.fillPoly(segmentation_image, [pixel_coords], 255)

            # 提取边缘图
            cv2.polylines(edge_image, [pixel_coords], isClosed=True, color=255, thickness=1)

        # 6. 返回处理结果
        return tif_data, segmentation_image, edge_image