pygdal-基础操作

最新推荐文章于 2024-09-22 19:33:16 发布

风吴痕

最新推荐文章于 2024-09-22 19:33:16 发布

阅读量1.3k

点赞数 2

分类专栏： gdal 文章标签： gdal

gdal 专栏收录该内容

8 篇文章

订阅专栏

参考：

1、https://github.com/ceholden/open-geo-tutorial/tree/master/Python/chapters

2、http://pcjericks.github.io/py-gdalogr-cookbook/index.html

3、http://gdal.org/python/

4、http://blog.youkuaiyun.com/liminlu0314/article/category/777646

5、http://download.youkuaiyun.com/download/andreying/9555476# gdal源码剖析与开发指南

Chapter 1: Exploring the GDALDataset class

# Import the Python 3 print function
from __future__ import print_function

# Import the "gdal" submodule from within the "osgeo" module
from osgeo import gdal

# We can check which version we're running by printing the "__version__" variable
print("GDAL's version is: " + gdal.__version__)
print(gdal)
"""
GDAL's version is: 2.1.3
<module 'osgeo.gdal' from 'E:\\Program Files\\Anaconda\\lib\\site-packages\\osgeo\\gdal.py'>
"""

# Let's print out the value of the GDAL Byte data type (GDT_Byte)
#     the number printed out is how GDAL keeps track of the various data types
#     this variable, which has a fixed numeric value, is what's called an enumerated type, or enum

# Works
print(gdal.GDT_Byte) # 1
# Doesn't work
# print(GDT_Byte)

Examples

Open an image

#!/usr/bin/python3
# -*- coding: UTF-8 -*-

# Import the Python 3 print function
from __future__ import print_function

# Import the "gdal" submodule from within the "osgeo" module
from osgeo import gdal

# Open an image
# Open a GDAL dataset
dataset = gdal.Open(r'D:\test.tiff', gdal.GA_ReadOnly)

print(dataset)

Image attributes

# How many bands does this image have?
num_bands = dataset.RasterCount # 波段数
print('Number of bands in image: {n}\n'.format(n=num_bands))

# How many rows and columns?
rows = dataset.RasterYSize  # 图像高
cols = dataset.RasterXSize  # 图像宽
print('Image size is: {r} rows x {c} columns\n'.format(r=rows, c=cols))

# Does the raster have a description or metadata?
desc = dataset.GetDescription()
metadata = dataset.GetMetadata() # 得到的元数据

print('Raster description: {desc}'.format(desc=desc))
print('Raster metadata:')
print(metadata) # {'AREA_OR_POINT': 'Area'}
print('\n')

# What driver was used to open the raster?
driver = dataset.GetDriver()
print('Raster driver: {d}\n'.format(d=driver.ShortName)) # GTiff

# What is the raster's projection?
proj = dataset.GetProjection() # 投影信息
print('Image projection:')
print(proj + '\n')

# What is the raster's "geo-transform"
gt = dataset.GetGeoTransform() # 地理变换
print('Image geo-transform: {gt}\n'.format(gt=gt))

Image raster bands

# Open the blue band in our image
blue = dataset.GetRasterBand(1) # 得到第一个波段

print(blue)


# What is the band's datatype?
datatype = blue.DataType
print('Band datatype: {dt}'.format(dt=blue.DataType)) # 2

# If you recall from our discussion of enumerated types, this "3" we printed has a more useful definition for us to use
datatype_name = gdal.GetDataTypeName(blue.DataType)
print('Band datatype: {dt}'.format(dt=datatype_name))  # UInt16

# We can also ask how much space does this datatype take up
bytes = gdal.GetDataTypeSize(blue.DataType)
print('Band datatype size: {b} bytes\n'.format(b=bytes)) # 16 bytes

# How about some band statistics?
band_max, band_min, band_mean, band_stddev = blue.GetStatistics(0, 1)
print('Band range: {minimum} - {maximum}'.format(maximum=band_max,
                                                 minimum=band_min)) # 636.0 - 0.0
print('Band mean, stddev: {m}, {s}\n'.format(m=band_mean, s=band_stddev)) # 99.14346956521084, 66.95114925438804

读取所有波段到数组，使用默认数据类型

# No alias
import numpy
print(numpy.__version__)

# Alias or rename to "np" -- a very common practice
import numpy as np
print(np.__version__)

help(blue.ReadAsArray)

blue_data = blue.ReadAsArray() # 读取第一个波段转成numpy array

print(blue_data)
print()
print('Blue band mean is: {m}'.format(m=blue_data.mean()))
print('Size is: {sz}'.format(sz=blue_data.shape))

# Initialize a 3d array -- use the size properties of our image for portability!
image = np.zeros((dataset.RasterYSize, dataset.RasterXSize, dataset.RasterCount))

# Loop over all bands in dataset
for b in range(dataset.RasterCount):
    # Remember, GDAL index is on 1, but Python is on 0 -- so we add 1 for our GDAL calls
    band = dataset.GetRasterBand(b + 1)

    # Read in the band's data into the third dimension of our array
    image[:, :, b] = band.ReadAsArray()

print(image)
print(image.dtype)

dataset.GetRasterBand(1).DataType

读取所有波段到数值中并指定数据类型

from osgeo import gdal_array

# DataType is a property of the individual raster bands
image_datatype = dataset.GetRasterBand(1).DataType

# Allocate our array, but in a more efficient way
image_correct = np.zeros((dataset.RasterYSize, dataset.RasterXSize, dataset.RasterCount),
                         dtype=gdal_array.GDALTypeCodeToNumericTypeCode(image_datatype)) # 指定数据类型

# Loop over all bands in dataset
for b in range(dataset.RasterCount):
    # Remember, GDAL index is on 1, but Python is on 0 -- so we add 1 for our GDAL calls
    band = dataset.GetRasterBand(b + 1)

    # Read in the band's data into the third dimension of our array
    image_correct[:, :, b] = band.ReadAsArray()

print("Compare datatypes: ")
print("    when unspecified: {dt}".format(dt=image.dtype))
print("    when specified: {dt}".format(dt=image_correct.dtype))

关闭数据

dataset = None

image = None
image_correct = None

Chapter 2: Your first remote sensing vegetation index

Introduction

# Import the Python 3 print function
from __future__ import print_function

# Import the "gdal" and "gdal_array" submodules from within the "osgeo" module
from osgeo import gdal
from osgeo import gdal_array

# Import the NumPy module
import numpy as np

# Open a GDAL dataset
dataset = gdal.Open('D:/LE70220491999322EDC01_stack.gtif', gdal.GA_ReadOnly)

# Allocate our array using the first band's datatype
image_datatype = dataset.GetRasterBand(1).DataType

image = np.zeros((dataset.RasterYSize, dataset.RasterXSize, dataset.RasterCount),
                 dtype=gdal_array.GDALTypeCodeToNumericTypeCode(image_datatype)) # numpy.int16

# Loop over all bands in dataset
for b in range(dataset.RasterCount):
    # Remember, GDAL index is on 1, but Python is on 0 -- so we add 1 for our GDAL calls
    band = dataset.GetRasterBand(b + 1)

    # Read in the band's data into the third dimension of our array
    image[:, :, b] = band.ReadAsArray()

print('Red band mean: {r}'.format(r=image[:, :, 2].mean())) # 589.379808
print('NIR band mean: {nir}'.format(nir=image[:, :, 3].mean())) # 3442.297712

NDVI（归一化植被指数）

${\mbox{NDVI}}={\frac {({\mbox{NIR}}-{\mbox{Red}})}{({\mbox{NIR}}+{\mbox{Red}})}}$

b_red = 2 # 红光谱
b_nir = 3 # 近红外光谱

ndvi = (image[:, :, b_nir] - image[:, :, b_red]) / (image[:, :, b_red] + image[:, :, b_nir])

print(ndvi)
print(ndvi.max()) # 0.904601300891

ndvi = (image[:, :, b_nir] - image[:, :, b_red]) / \
        (image[:, :, b_nir] + image[:, :, b_red]).astype(np.float64)

print('NDVI matrix: ')
print(ndvi)

print('\nMax NDVI: {m}'.format(m=ndvi.max())) # 0.9046013008913515
print('Mean NDVI: {m}'.format(m=ndvi.mean())) # 0.7088133953809207
print('Median NDVI: {m}'.format(m=np.median(ndvi))) # 0.7319195214790647
print('Min NDVI: {m}'.format(m=ndvi.min())) # 0.09470304975922954

Chapter 3: Plotting and visualizing your data with `matplotlib`

Image display

# Import the Python 3 print function
from __future__ import print_function

# Import the "gdal" and "gdal_array" submodules from within the "osgeo" module
from osgeo import gdal
from osgeo import gdal_array

# Import the NumPy module
import numpy as np

# Open a GDAL dataset
dataset = gdal.Open('D:/LE70220491999322EDC01_stack.gtif', gdal.GA_ReadOnly)

# Allocate our array using the first band's datatype
image_datatype = dataset.GetRasterBand(1).DataType # 获取数据类型

image = np.zeros((dataset.RasterYSize, dataset.RasterXSize, dataset.RasterCount),
                 dtype=gdal_array.GDALTypeCodeToNumericTypeCode(image_datatype))

# Loop over all bands in dataset
for b in range(dataset.RasterCount):
    # Remember, GDAL index is on 1, but Python is on 0 -- so we add 1 for our GDAL calls
    band = dataset.GetRasterBand(b + 1)

    # Read in the band's data into the third dimension of our array
    image[:, :, b] = band.ReadAsArray()

ndvi = (image[:, :, 3] - image[:, :, 2]) / \
       (image[:, :, 3] + image[:, :, 2]).astype(np.float64)

Basic plotting

import matplotlib.pyplot as plt
# %matplotlib inline
# Array of 0 - 9
x = np.arange(10)
# 10 random numbers, between 0 and 10
y = np.random.randint(0, 10, size=10)

# plot them as lines
plt.plot(x, y)

# plot them as just points -- specify "ls" ("linestyle") as a null string
plt.plot(x, y, 'ro', ls='')

Plotting 2D arrays

import matplotlib.pyplot as plt
print('Array shape before: {shp} (size is {sz})'.format(shp=image[:, :, 3].shape, sz=image[:, :, 3].size))

red = np.ndarray.flatten(image[:, :, 2])
nir = np.ndarray.flatten(image[:, :, 3])

print('Array shape after: {shp} (size is {sz})'.format(shp=nir.shape, sz=nir.size))

# Make the plot
plt.scatter(red, nir, color='r', marker='o')

# Add some axis labels
plt.xlabel('Red Reflectance')
plt.ylabel('NIR label')

# Add a title
plt.title('Tasseled Cap, eh?')
plt.show()


# Make the plot
plt.scatter(red, nir, color='r', marker='o')

# Calculate min and max
plot_min = min(red.min(), nir.min())
plot_max = max(red.max(), nir.max())

plt.xlim((plot_min, plot_max))
plt.ylim((plot_min, plot_max))

# Add some axis labels
plt.xlabel('Red Reflectance')
plt.ylabel('NIR label')

# Add a title
plt.title('Tasseled Cap, eh?')

Plotting 2D arrays - images

# use "imshow" for an image -- nir at first
plt.imshow(image[:, :, 3])

# use "imshow" for an image -- nir at first
plt.imshow(image[:, :, 3])
plt.colorbar()

# use "imshow" for an image -- nir in first subplot, red in second
plt.subplot(121)
plt.imshow(image[:, :, 3], cmap=plt.cm.Greys)
plt.colorbar()

# Now red band in the second subplot (indicated by last of the 3 numbers)
plt.subplot(122)
plt.imshow(image[:, :, 2], cmap=plt.cm.Greys)
plt.colorbar()

Plotting 3D arrays - multispectral images

# Extract reference to SWIR1, NIR, and Red bands
index = np.array([4, 3, 2])
colors = image[:, :, index].astype(np.float64)

max_val = 8000
min_val = 0

# Enforce maximum and minimum values
colors[colors[:, :, :] > max_val] = max_val
colors[colors[:, :, :] < min_val] = min_val

for b in range(colors.shape[2]):
    colors[:, :, b] = colors[:, :, b] * 1 / (max_val - min_val)

plt.subplot(121)
plt.imshow(colors)

# Show NDVI
plt.subplot(122)
plt.imshow(ndvi, cmap=plt.cm.Greys_r)

Chapter 4: Importing and using vector data -- the OGR library

Opening an ESRI Shapefile

# Import Python 3 print function
from __future__ import print_function

# Import OGR -
from osgeo import ogr

# Open the dataset from the file
dataset = ogr.Open('D:/training_data/training_data.shp')

# Make sure the dataset exists -- it would be None if we couldn't open it
if not dataset:
    print('Error: could not open dataset')


### Let's get the driver from this file
driver = dataset.GetDriver()
print('Dataset driver is: {n}\n'.format(n=driver.name)) # ESRI Shapefile

### How many layers are contained in this Shapefile?
layer_count = dataset.GetLayerCount()
print('The shapefile has {n} layer(s)\n'.format(n=layer_count)) # 1

### What is the name of the 1 layer?
layer = dataset.GetLayerByIndex(0) # 获取第一层
print('The layer is named: {n}\n'.format(n=layer.GetName())) # training_data

### What is the layer's geometry? is it a point? a polyline? a polygon?
# First read in the geometry - but this is the enumerated type's value
geometry = layer.GetGeomType() # 几何类型

# So we need to translate it to the name of the enum
geometry_name = ogr.GeometryTypeToName(geometry)
print("The layer's geometry is: {geom}\n".format(geom=geometry_name)) # Polygon

### What is the layer's projection?
# Get the spatial reference
spatial_ref = layer.GetSpatialRef() # 参考系

# Export this spatial reference to something we can read... like the Proj4
proj4 = spatial_ref.ExportToProj4()
print('Layer projection is: {proj4}\n'.format(proj4=proj4)) #

### How many features are in the layer?
feature_count = layer.GetFeatureCount() # 特征数
print('Layer has {n} features\n'.format(n=feature_count)) # 30

### How many fields are in the shapefile, and what are their names?
# First we need to capture the layer definition
defn = layer.GetLayerDefn()

# How many fields
field_count = defn.GetFieldCount() # 图层字段数
print('Layer has {n} fields'.format(n=field_count)) # 2

# What are their names?
print('Their names are: ')
for i in range(field_count):
    field_defn = defn.GetFieldDefn(i)
    print('\t{name} - {datatype}'.format(name=field_defn.GetName(),
                                         datatype=field_defn.GetTypeName()))
# id - Integer64
# class - String

Tie-in with our Raster dataset

Pure Python version -- gdal.RasterizeLayer

# Import GDAL
from osgeo import gdal

# Import OGR -
from osgeo import ogr

# Open the dataset from the file
dataset = ogr.Open('D:/training_data/training_data.shp')

# Make sure the dataset exists -- it would be None if we couldn't open it
if not dataset:
    print('Error: could not open dataset')

layer = dataset.GetLayerByIndex(0)

# First we will open our raster image, to understand how we will want to rasterize our vector
raster_ds = gdal.Open('D:/LE70220491999322EDC01_stack.gtif', gdal.GA_ReadOnly)

# Fetch number of rows and columns
ncol = raster_ds.RasterXSize
nrow = raster_ds.RasterYSize

# Fetch projection and extent
proj = raster_ds.GetProjectionRef() # 坐标系
ext = raster_ds.GetGeoTransform() # 地理转换

raster_ds = None

# Create the raster dataset # 创建掩膜
memory_driver = gdal.GetDriverByName('GTiff') # 创建栅格驱动
out_raster_ds = memory_driver.Create('D:/training_data.gtif', ncol, nrow, 1, gdal.GDT_Byte) # 创建栅格数据

# Set the ROI image's projection and extent to our input raster's projection and extent
out_raster_ds.SetProjection(proj) # 设置投影
out_raster_ds.SetGeoTransform(ext) # 设置地理变换

# Fill our output band with the 0 blank, no class label, value
b = out_raster_ds.GetRasterBand(1) # 获取第一个波段
b.Fill(0) # 全部填充 0

# Rasterize the shapefile layer to our new dataset   # 矢量栅格化
status = gdal.RasterizeLayer(out_raster_ds,  # output to our new dataset
                             [1],  # output to our new dataset's first band
                             layer,  # rasterize this layer
                             None, None,  # don't worry about transformations since we're in same projection
                             [255],  # burn value 255
                             ['ALL_TOUCHED=TRUE',  # rasterize all pixels touched by polygons
                              'ATTRIBUTE=id']  # put raster values according to the 'id' field values
                             )

# status=gdal.RasterizeLayer(out_raster_ds, [1], layer, burn_values=[255])  # 几何体内的值全设置为255
out_raster_ds.FlushCache()

# Close dataset
out_raster_ds = None

if status != 0:
    print("I don't think it worked...")
else:
    print("Success")

Check rasterized layer

# Import NumPy for some statistics
import numpy as np
from osgeo import gdal

roi_ds = gdal.Open('D:/training_data.gtif', gdal.GA_ReadOnly)

roi = roi_ds.GetRasterBand(1).ReadAsArray()

# x=set([i for j in roi for i in j]) # set也具有去重复
# How many pixels are in each class?classes = np.unique(roi) # 去除重复的# Iterate over all class labels in the ROI image, printing out some informationfor c in classes:    print('Class {c} contains {n} pixels'.format(c=c,                                                 n=(roi == c).sum()))

Chapter 5: Classification of land cover

scikit-learn

from IPython.display import Image
img = Image('http://scikit-learn.org/stable/_images/plot_classifier_comparison_001.png')
img
print(type(img))

Preparing the dataset

Opening the images

# Import Python 3's print function and division
from __future__ import print_function, division

# Import GDAL, NumPy, and matplotlib
from osgeo import gdal, gdal_array
import numpy as np
import matplotlib.pyplot as plt
# % matplotlib
# inline

# Tell GDAL to throw Python exceptions, and register all drivers
gdal.UseExceptions()
gdal.AllRegister()

# Read in our image and ROI image
img_ds = gdal.Open('D:/LE70220491999322EDC01_stack.gtif', gdal.GA_ReadOnly)
roi_ds = gdal.Open('D:/training_data.gtif', gdal.GA_ReadOnly)

img = np.zeros((img_ds.RasterYSize, img_ds.RasterXSize, img_ds.RasterCount),
               gdal_array.GDALTypeCodeToNumericTypeCode(img_ds.GetRasterBand(1).DataType))
for b in range(img.shape[2]):
    img[:, :, b] = img_ds.GetRasterBand(b + 1).ReadAsArray()

roi = roi_ds.GetRasterBand(1).ReadAsArray().astype(np.uint8)

# Display them
plt.subplot(121)
plt.imshow(img[:, :, 4], cmap=plt.cm.Greys_r)
plt.title('SWIR1')

plt.subplot(122)
plt.imshow(roi, cmap=plt.cm.Spectral)
plt.title('ROI Training Data')

plt.show()

Pairing Y with X

# Find how many non-zero entries we have -- i.e. how many training data samples?
n_samples = (roi > 0).sum()
print('We have {n} samples'.format(n=n_samples)) # 1100

# What are our classification labels?
labels = np.unique(roi[roi > 0])
print('The training data include {n} classes: {classes}'.format(n=labels.size,
                                                                classes=labels)) # [1 2 3 4 5]
# We will need a "X" matrix containing our features, and a "y" array containing our labels
#     These will have n_samples rows
#     In other languages we would need to allocate these and them loop to fill them, but NumPy can be faster

X = img[roi > 0, :]  # include 8th band, which is Fmask, for now
y = roi[roi > 0]

print('Our X matrix is sized: {sz}'.format(sz=X.shape)) # (1100, 8)
print('Our y array is sized: {sz}'.format(sz=y.shape)) # (1100,)

# Mask out clouds, cloud shadows, and snow using Fmask
clear = X[:, 7] <= 1

X = X[clear, :7]  # we can ditch the Fmask band now
y = y[clear]

print('After masking, our X matrix is sized: {sz}'.format(sz=X.shape)) # (1100, 7)
print('After masking, our y array is sized: {sz}'.format(sz=y.shape)) # (1100,)

Training the Random Forest

from sklearn.ensemble import RandomForestClassifier

# Initialize our model with 500 trees
rf = RandomForestClassifier(n_estimators=500, oob_score=True)

# Fit our model to training data
rf = rf.fit(X, y)

Random Forest diagnostics

from sklearn.ensemble import RandomForestClassifier

# Initialize our model with 500 trees
rf = RandomForestClassifier(n_estimators=500, oob_score=True)

# Fit our model to training data
rf = rf.fit(X, y)

print('Our OOB prediction of accuracy is: {oob}%'.format(oob=rf.oob_score_ * 100))

bands = [1, 2, 3, 4, 5, 7, 6]

for b, imp in zip(bands, rf.feature_importances_):
    print('Band {b} importance: {imp}'.format(b=b, imp=imp))
    
import pandas as pd

# Setup a dataframe -- just like R
df = pd.DataFrame()
df['truth'] = y
df['predict'] = rf.predict(X)

# Cross-tabulate predictions
print(pd.crosstab(df['truth'], df['predict'], margins=True))

Predicting the rest of the image

# Take our full image, ignore the Fmask band, and reshape into long 2d array (nrow * ncol, nband) for classification
new_shape = (img.shape[0] * img.shape[1], img.shape[2] - 1)

img_as_array = img[:, :, :7].reshape(new_shape)
print('Reshaped from {o} to {n}'.format(o=img.shape,
                                        n=img_as_array.shape))

# Now predict for each pixel
class_prediction = rf.predict(img_as_array)

# Reshape our classification map
class_prediction = class_prediction.reshape(img[:, :, 0].shape)


# Visualize

# First setup a 5-4-3 composite
def color_stretch(image, index, minmax=(0, 10000)):
    colors = image[:, :, index].astype(np.float64)

    max_val = minmax[1]
    min_val = minmax[0]

    # Enforce maximum and minimum values
    colors[colors[:, :, :] > max_val] = max_val
    colors[colors[:, :, :] < min_val] = min_val

    for b in range(colors.shape[2]):
        colors[:, :, b] = colors[:, :, b] * 1 / (max_val - min_val)

    return colors


img543 = color_stretch(img, [4, 3, 2], (0, 8000))

# See https://github.com/matplotlib/matplotlib/issues/844/
n = class_prediction.max()
# Next setup a colormap for our map
colors = dict((
    (0, (0, 0, 0, 255)),  # Nodata
    (1, (0, 150, 0, 255)),  # Forest
    (2, (0, 0, 255, 255)),  # Water
    (3, (0, 255, 0, 255)),  # Herbaceous
    (4, (160, 82, 45, 255)),  # Barren
    (5, (255, 0, 0, 255))  # Urban
))
# Put 0 - 255 as float 0 - 1
for k in colors:
    v = colors[k]
    _v = [_v / 255.0 for _v in v]
    colors[k] = _v

index_colors = [colors[key] if key in colors else
                (255, 255, 255, 0) for key in range(1, n + 1)]
cmap = plt.matplotlib.colors.ListedColormap(index_colors, 'Classification', n)

# Now show the classmap next to the image
plt.subplot(121)
plt.imshow(img543)

plt.subplot(122)
plt.imshow(class_prediction, cmap=cmap, interpolation='none')

plt.show()