一、Cascaded Shadow Maps
阴影贴图是在游戏引擎中广泛使用的实现实时阴影的技术。基础的阴影贴图方法对于大型场景渲染显得力不从心,很容易出现阴影抖动和锯齿边缘现象。Cascaded Shadow Maps(CSM)方法根据对象到观察者的距离提供不同分辨率的深度纹理来解决上述问题。它将相机的视锥体分割成若干部分,然后为分割的每一部分生成独立的深度贴图。
CSM通常用来在大型场景模拟太阳投射的阴影。在一张阴影贴图中捕捉所有对象需要阴影贴图具有非常高的分辨率。使用多张阴影贴图就可以解决这个问题,对于近处的场景使用较高分辨率的阴影贴图,对于远处的场景使用粗糙的阴影贴图,在两张阴影贴图过渡的地方选择其中一张使用。因为远处的对象只占画面的很少一部分像素,而近处的对象占据了画面的很大一部分,进行这样的处理显然非常合理。
因此,级联阴影贴图是一种使用多个阴影贴图来进一步提高实时渲染阴影的分辨率的技术。它通过划分视锥体并创建一个轴对齐边界框来工作,该边界框用于创建光视图投影矩阵。
它的工作原理是:
- 计算世界空间中的截锥角
- 找到截锥体的最长半径
- 围绕由半径形成的球体创建一个 AABB
- 使用 AABB 创建正交投影
如图所示可说明上述过程:
二、C++端实现
2.1 深度贴图创建
首先,我们需要创建将保存我们的深度值的纹理。由于我们将使用多个纹理,因此我们将创建一个单独的帧缓冲区,关闭颜色缓冲区,将我们的值存储在纹理数组中:
/*
设置深度通道使用的资源,深度图像是分层的,每一层存储一个阴影映射级联
*/
void prepareDepthPass()
{
VkFormat depthFormat = vulkanDevice->getSupportedDepthFormat(true);
/*
深度图创建
*/
VkAttachmentDescription attachmentDescription{};
attachmentDescription.format = depthFormat;
attachmentDescription.samples = VK_SAMPLE_COUNT_1_BIT;
attachmentDescription.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
attachmentDescription.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
attachmentDescription.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
attachmentDescription.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
attachmentDescription.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
attachmentDescription.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL;
VkAttachmentReference depthReference = {};
depthReference.attachment = 0;
depthReference.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
VkSubpassDescription subpass = {};
subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
subpass.colorAttachmentCount = 0;
subpass.pDepthStencilAttachment = &depthReference;
// 使用子传递依赖项进行布局转换
std::array<VkSubpassDependency, 2> dependencies;
dependencies[0].srcSubpass = VK_SUBPASS_EXTERNAL;
dependencies[0].dstSubpass = 0;
dependencies[0].srcStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
dependencies[0].dstStageMask = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT;
dependencies[0].srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
dependencies[0].dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
dependencies[0].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
dependencies[1].srcSubpass = 0;
dependencies[1].dstSubpass = VK_SUBPASS_EXTERNAL;
dependencies[1].srcStageMask = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
dependencies[1].dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
dependencies[1].srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
dependencies[1].dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
dependencies[1].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
VkRenderPassCreateInfo renderPassCreateInfo = vks::initializers::renderPassCreateInfo();
renderPassCreateInfo.attachmentCount = 1;
renderPassCreateInfo.pAttachments = &attachmentDescription;
renderPassCreateInfo.subpassCount = 1;
renderPassCreateInfo.pSubpasses = &subpass;
renderPassCreateInfo.dependencyCount = static_cast<uint32_t>(dependencies.size());
renderPassCreateInfo.pDependencies = dependencies.data();
VK_CHECK_RESULT(vkCreateRenderPass(device, &renderPassCreateInfo, nullptr, &depthPass.renderPass));
/*
分层深度图像和视图
*/
VkImageCreateInfo imageInfo = vks::initializers::imageCreateInfo();
imageInfo.imageType = VK_IMAGE_TYPE_2D;
imageInfo.extent.width = SHADOWMAP_DIM;
imageInfo.extent.height = SHADOWMAP_DIM;
imageInfo.extent.depth = 1;
imageInfo.mipLevels = 1;
imageInfo.arrayLayers = SHADOW_MAP_CASCADE_COUNT;
imageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
imageInfo.format = depthFormat;
imageInfo.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
VK_CHECK_RESULT(vkCreateImage(device, &imageInfo, nullptr, &depth.image));
VkMemoryAllocateInfo memAlloc = vks::initializers::memoryAllocateInfo();
VkMemoryRequirements memReqs;
vkGetImageMemoryRequirements(device, depth.image, &memReqs);
memAlloc.allocationSize = memReqs.size;
memAlloc.memoryTypeIndex = vulkanDevice->getMemoryType(memReqs.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
VK_CHECK_RESULT(vkAllocateMemory(device, &memAlloc, nullptr, &depth.mem));
VK_CHECK_RESULT(vkBindImageMemory(device, depth.image, depth.mem, 0));
//所有深度图图层视图
VkImageViewCreateInfo viewInfo = vks::initializers::imageViewCreateInfo();
viewInfo.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY;
viewInfo.format = depthFormat;
viewInfo.subresourceRange = {};
viewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
viewInfo.subresourceRange.baseMipLevel = 0;
viewInfo.subresourceRange.levelCount = 1;
viewInfo.subresourceRange.baseArrayLayer = 0;
viewInfo.subresourceRange.layerCount = SHADOW_MAP_CASCADE_COUNT;
viewInfo.image = depth.image;
VK_CHECK_RESULT(vkCreateImageView(device, &viewInfo, nullptr, &depth.view));
// One image and framebuffer per cascade
// 每个级联一个贴图和帧缓冲
for (uint32_t i = 0; i < SHADOW_MAP_CASCADE_COUNT; i++) {
//图层的图像视图(在深度贴图里面)这个视图用于渲染特定深度的图像层
VkImageViewCreateInfo viewInfo = vks::initializers::imageViewCreateInfo();
viewInfo.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY;
viewInfo.format = depthFormat;
viewInfo.subresourceRange = {};
viewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
viewInfo.subresourceRange.baseMipLevel = 0;
viewInfo.subresourceRange.levelCount = 1;
viewInfo.subresourceRange.baseArrayLayer = i;
viewInfo.subresourceRange.layerCount = 1;
viewInfo.image = depth.image;
VK_CHECK_RESULT(vkCreateImageView(device, &viewInfo, nullptr, &cascades[i].view));
// Framebuffer
VkFramebufferCreateInfo framebufferInfo = vks::initializers::framebufferCreateInfo();
framebufferInfo.renderPass = depthPass.renderPass;
framebufferInfo.attachmentCount = 1;
framebufferInfo.pAttachments = &cascades[i].view;
framebufferInfo.width = SHADOWMAP_DIM;
framebufferInfo.height = SHADOWMAP_DIM;
framebufferInfo.layers = 1;
VK_CHECK_RESULT(vkCreateFramebuffer(device, &framebufferInfo, nullptr, &cascades[i].frameBuffer));
}
// Shared sampler for cascade depth reads
// 级联深度采样器
VkSamplerCreateInfo sampler = vks::initializers::samplerCreateInfo();
sampler.magFilter = VK_FILTER_LINEAR;
sampler.minFilter = VK_FILTER_LINEAR;
sampler.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR;
sampler.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
sampler.addressModeV = sampler.addressModeU;
sampler.addressModeW = sampler.addressModeU;
sampler.mipLodBias = 0.0f;
sampler.maxAnisotropy = 1.0f;
sampler.minLod = 0.0f;
sampler.maxLod = 1.0f;
sampler.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
VK_CHECK_RESULT(vkCreateSampler(device, &sampler, nullptr, &depth.sampler));
}
2.2 视锥体拆分与矩阵设置
此部分为CSM算法核心:
Nvidia在此处详细解释了计算分割距离的算法:https://developer.nvidia.com/gpugems/gpugems3,它通过使用对数和均匀分割方案来获得最佳分割距离。
代码表示:
void updateCascades()
{
float cascadeSplits[SHADOW_MAP_CASCADE_COUNT];
float nearClip = camera.getNearClip();
float farClip = camera.getFarClip();
float clipRange = farClip - nearClip;
float minZ = nearClip;
float maxZ = nearClip + clipRange;
float range = maxZ - minZ;
float ratio = maxZ / minZ;
// 基于视图摄像机截锥体计算分割深度
// https://developer.nvidia.com/gpugems/GPUGems3/gpugems3_ch10.html
for (uint32_t i = 0; i < SHADOW_MAP_CASCADE_COUNT; i++) {
float p = (i + 1) / static_cast<float>(SHADOW_MAP_CASCADE_COUNT);
float log = minZ * std::pow(ratio, p);
float uniform = minZ + range * p;
float d = cascadeSplitLambda * (log - uniform) + uniform;
cascadeSplits[i] = (d - nearClip) / clipRange;
}
//一旦计算出分割距离,我们将创建正交矩阵:
//计算每个级联的正交投影矩阵
float lastSplitDist = 0.0;
for (uint32_t i = 0; i < SHADOW_MAP_CASCADE_COUNT; i++) {
float splitDist = cascadeSplits[i];
glm::vec3 frustumCorners[8] = {
glm::vec3(-1.0f, 1.0f, -1.0f),
glm::vec3( 1.0f, 1.0f, -1.0f),
glm::vec3( 1.0f, -1.0f, -1.0f),
glm::vec3(-1.0f, -1.0f, -1.0f),
glm::vec3(-1.0f, 1.0f, 1.0f),
glm::vec3( 1.0f, 1.0f, 1.0f),
glm::vec3( 1.0f, -1.0f, 1.0f),
glm::vec3(-1.0f, -1.0f, 1.0f),
};
// Project frustum corners into world space 将截角投影到世界空间中
glm::mat4 invCam = glm::inverse(camera.matrices.perspective * camera.matrices.view);
for (uint32_t i = 0; i < 8; i++) {
glm::vec4 invCorner = invCam * glm::vec4(frustumCorners[i], 1.0f);
frustumCorners[i] = invCorner / invCorner.w;
}
for (uint32_t i = 0; i < 4; i++) {
glm::vec3 dist = frustumCorners[i + 4] - frustumCorners[i];
frustumCorners[i + 4] = frustumCorners[i] + (dist * splitDist);
frustumCorners[i] = frustumCorners[i] + (dist * lastSplitDist);
}
// Get frustum center 得到平截头体中心
glm::vec3 frustumCenter = glm::vec3(0.0f);
for (uint32_t i = 0; i < 8; i++) {
frustumCenter += frustumCorners[i];
}
frustumCenter /= 8.0f;
float radius = 0.0f;
for (uint32_t i = 0; i < 8; i++) {
float distance = glm::length(frustumCorners[i] - frustumCenter);
radius = glm::max(radius, distance);
}
radius = std::ceil(radius * 16.0f) / 16.0f;
glm::vec3 maxExtents = glm::vec3(radius);
glm::vec3 minExtents = -maxExtents;
//我们首先将我们的归一化设备坐标乘以我们的 inverseViewProjection 矩阵以获得世界空间中的截锥角。一旦我们有了角,我们就可以在近角和对应的远角之间创建一条射线,对其进行归一化,然后将其乘以新的长度,然后我们之前的长度就成为下一个分区的起点。然后我们得到这个切片的最长半径,并将其用作我们 AABB 的基础。
glm::vec3 lightDir = normalize(-lightPos);
glm::mat4 lightViewMatrix = glm::lookAt(frustumCenter - lightDir * -minExtents.z, frustumCenter, glm::vec3(0.0f, 1.0f, 0.0f));
glm::mat4 lightOrthoMatrix = glm::ortho(minExtents.x, maxExtents.x, minExtents.y, maxExtents.y, 0.0f, maxExtents.z - minExtents.z);
//我们使用位于视锥体中心的任意光方向设置我们的视图矩阵。使用 Y 轴作为向上向量。为了避免光线闪烁,我们需要创建一个舍入矩阵,以便我们以纹素大小的增量移动。你可以把它想象成找出我们需要多少移动正交矩阵,以便它与阴影贴图匹配,它是这样完成的:
// 将分离距离和矩阵级联存储
cascades[i].splitDepth = (camera.getNearClip() + splitDist * clipRange) * -1.0f;
cascades[i].viewProjMatrix = lightOrthoMatrix * lightViewMatrix;
lastSplitDist = cascadeSplits[i];
}
}
三、着色器实现
首先我们需要获取不同的深度贴图:
3.1 depth pass
顶点着色器:
#version 450
layout (location = 0) in vec3 inPos;
layout (location = 1) in vec2 inUV;
#define SHADOW_MAP_CASCADE_COUNT 4
layout(push_constant) uniform PushConsts {
vec4 position;
uint cascadeIndex;
} pushConsts;
layout (binding = 0) uniform UBO {
mat4[SHADOW_MAP_CASCADE_COUNT] cascadeViewProjMat;
} ubo;
layout (location = 0) out vec2 outUV;
out gl_PerVertex {
vec4 gl_Position;
};
void main()
{
outUV = inUV;
vec3 pos = inPos + pushConsts.position.xyz;
gl_Position = ubo.cascadeViewProjMat[pushConsts.cascadeIndex] * vec4(pos, 1.0);
}
片元着色器:
#version 450
layout (set = 1, binding = 0) uniform sampler2D colorMap;
layout (location = 0) in vec2 inUV;
void main()
{
float alpha = texture(colorMap, inUV).a;
if (alpha < 0.5) {
discard;
}
}
此阶段为简单获取depth map图形,与常规单次Shadow Map相同,唯一不同的是第一步创建贴图的是:
...
VkImageCreateInfo imageInfo = vks::initializers::imageCreateInfo();
imageInfo.arrayLayers = SHADOW_MAP_CASCADE_COUNT //4;
...
所以我们在生成深度贴图的时候是得循环获取:
...
for (uint32_t j = 0; j < SHADOW_MAP_CASCADE_COUNT; j++)
{
renderPassBeginInfo.framebuffer = cascades[j].frameBuffer;
vkCmdBeginRenderPass(drawCmdBuffers[i], &renderPassBeginInfo, VK_SUBPASS_CONTENTS_INLINE);
vkCmdBindPipeline(drawCmdBuffers[i], VK_PIPELINE_BIND_POINT_GRAPHICS, depthPass.pipeline);
renderScene(drawCmdBuffers[i], depthPass.pipelineLayout, cascades[j].descriptorSet, j);
vkCmdEndRenderPass(drawCmdBuffers[i]);
}
...
3.2 csm pass
本阶段,根据片元所在位置确定使用哪一级阴影贴图。
顶点着色器:
#version 450
layout (location = 0) in vec3 inPos;
layout (location = 1) in vec2 inUV;
layout (location = 2) in vec3 inColor;
layout (location = 3) in vec3 inNormal;
layout (binding = 0) uniform UBO {
mat4 projection;
mat4 view;
mat4 model;
} ubo;
layout (location = 0) out vec3 outNormal;
layout (location = 1) out vec3 outColor;
layout (location = 2) out vec3 outViewPos;
layout (location = 3) out vec3 outPos;
layout (location = 4) out vec2 outUV;
layout(push_constant) uniform PushConsts {
vec4 position;
uint cascadeIndex;
} pushConsts;
out gl_PerVertex {
vec4 gl_Position;
};
void main()
{
outColor = inColor;
outNormal = inNormal;
outUV = inUV;
vec3 pos = inPos + pushConsts.position.xyz;
outPos = pos;
outViewPos = (ubo.view * vec4(pos.xyz, 1.0)).xyz;
gl_Position = ubo.projection * ubo.view * ubo.model * vec4(pos.xyz, 1.0);
}
片元着色器:
#version 450
#define SHADOW_MAP_CASCADE_COUNT 4
layout (set = 0, binding = 1) uniform sampler2DArray shadowMap;
layout (set = 1, binding = 0) uniform sampler2D colorMap;
layout (location = 0) in vec3 inNormal;
layout (location = 1) in vec3 inColor;
layout (location = 2) in vec3 inViewPos;
layout (location = 3) in vec3 inPos;
layout (location = 4) in vec2 inUV;
layout (constant_id = 0) const int enablePCF = 0;
layout (location = 0) out vec4 outFragColor;
#define ambient 0.3
layout (set = 0, binding = 2) uniform UBO {
vec4 cascadeSplits;
mat4 cascadeViewProjMat[SHADOW_MAP_CASCADE_COUNT];
mat4 inverseViewMat;
vec3 lightDir;
float _pad;
int colorCascades;
} ubo;
const mat4 biasMat = mat4(
0.5, 0.0, 0.0, 0.0,
0.0, 0.5, 0.0, 0.0,
0.0, 0.0, 1.0, 0.0,
0.5, 0.5, 0.0, 1.0
);
//正常获取
float textureProj(vec4 shadowCoord, vec2 offset, uint cascadeIndex)
{
float shadow = 1.0;
float bias = 0.005;
if ( shadowCoord.z > -1.0 && shadowCoord.z < 1.0 ) {
float dist = texture(shadowMap, vec3(shadowCoord.st + offset, cascadeIndex)).r;
if (shadowCoord.w > 0 && dist < shadowCoord.z - bias) {
shadow = ambient;
}
}
return shadow;
}
//pcf
float filterPCF(vec4 sc, uint cascadeIndex)
{
ivec2 texDim = textureSize(shadowMap, 0).xy;
float scale = 0.75;
float dx = scale * 1.0 / float(texDim.x);
float dy = scale * 1.0 / float(texDim.y);
float shadowFactor = 0.0;
int count = 0;
int range = 1;
for (int x = -range; x <= range; x++) {
for (int y = -range; y <= range; y++) {
shadowFactor += textureProj(sc, vec2(dx*x, dy*y), cascadeIndex);
count++;
}
}
return shadowFactor / count;
}
void main()
{
vec4 color = texture(colorMap, inUV);
if (color.a < 0.5)
discard;
//获取当前片段的视图位置的级联索引
uint cascadeIndex = 0;
for(uint i = 0; i < SHADOW_MAP_CASCADE_COUNT - 1; ++i) {
if(inViewPos.z < ubo.cascadeSplits[i]) {
cascadeIndex = i + 1;
}
}
// 阴影深度比较
vec4 shadowCoord = (biasMat * ubo.cascadeViewProjMat[cascadeIndex]) * vec4(inPos, 1.0);
float shadow = 0;
if (enablePCF == 1) {
shadow = filterPCF(shadowCoord / shadowCoord.w, cascadeIndex);
} else {
shadow = textureProj(shadowCoord / shadowCoord.w, vec2(0.0), cascadeIndex);
}
// 平行光
vec3 N = normalize(inNormal);
vec3 L = normalize(-ubo.lightDir);
vec3 H = normalize(L + inViewPos);
float diffuse = max(dot(N, L), ambient);
vec3 lightColor = vec3(1.0);
outFragColor.rgb = max(lightColor * (diffuse * color.rgb), vec3(0.0));
outFragColor.rgb *= shadow;
outFragColor.a = color.a;
// 级联颜色调试
if (ubo.colorCascades == 1) {
switch(cascadeIndex) {
case 0 :
outFragColor.rgb *= vec3(1.0f, 0.25f, 0.25f);
break;
case 1 :
outFragColor.rgb *= vec3(0.25f, 1.0f, 0.25f);
break;
case 2 :
outFragColor.rgb *= vec3(0.25f, 0.25f, 1.0f);
break;
case 3 :
outFragColor.rgb *= vec3(1.0f, 1.0f, 0.25f);
break;
}
}
}
其中片元着色器的判断尤为重要:
...
//获取当前片段的视图位置的级联索引
uint cascadeIndex = 0;
for(uint i = 0; i < SHADOW_MAP_CASCADE_COUNT - 1; ++i) {
if(inViewPos.z < ubo.cascadeSplits[i]) {
cascadeIndex = i + 1;
}
}
...
在选择正确的纹理后,您可以按照通常的方式进行阴影计算。