对比时间多帧深度 ,位置,法向量差距
进行周围像素7X7加权
然后5X5 滤波
3X3高斯模糊
时间积累
为了重用前一帧中的样本,我们将每个像素样本重新投影到其先前帧,并计算其屏幕空间坐标。这项工作按以下步骤完成:
在G-缓冲区中查找当前帧的世界空间位置。
使用存储的摄像机视图矩阵将当前世界空间转换为以前的剪辑空间。
使用透视投影将以前的剪辑空间转换为以前的屏幕空间。
对于每个重投影样本,我们通过比较当前和以前的G-缓冲区数据(法线、位置、对象ID)来检验其一致性。如果测试拒绝,则丢弃相应像素的颜色和瞬间历史,并将历史长度设置为零。
空间滤波
空间滤波采用小波变换.如下图所示,经过多次迭代,a-trous小波在增加内核大小但不变数量的非零元素的情况下,从分层滤波器中转移出来。通过在非零核权重之间插入零点,计算时间不会二次增加.
// A-三阶滤波器
__global__ void ATrousFilter(glm::vec3 * colorin, glm::vec3 * 着色, float * 方差,
GBufferTexel * gBuffer, glm::ivec2 res, int level, bool is_last,
float sigma_c, float sigma_n, float sigma_x, bool blur_方差, bool addcolor)
{
// 5x5 A-Trous kernel
float h[25] = { 1.0 / 256.0, 1.0 / 64.0, 3.0 / 128.0, 1.0 / 64.0, 1.0 / 256.0,
1.0 / 64.0, 1.0 / 16.0, 3.0 / 32.0, 1.0 / 16.0, 1.0 / 64.0,
3.0 / 128.0, 3.0 / 32.0, 9.0 / 64.0, 3.0 / 32.0, 3.0 / 128.0,
1.0 / 64.0, 1.0 / 16.0, 3.0 / 32.0, 1.0 / 16.0, 1.0 / 64.0,
1.0 / 256.0, 1.0 / 64.0, 3.0 / 128.0, 1.0 / 64.0, 1.0 / 256.0 };
// 3x3 高斯型 kernel
float 高斯型[9] = { 1.0 / 16.0, 1.0 / 8.0, 1.0 / 16.0,
1.0 / 8.0, 1.0 / 4.0, 1.0 / 8.0,
1.0 / 16.0, 1.0 / 8.0, 1.0 / 16.0 };
int x = (blockIdx.x * blockDim.x) + threadIdx.x;
int y = (blockIdx.y * blockDim.y) + threadIdx.y;
if (x < res.x && y < res.y) {
int p = x + y * res.x;
int step = 1 << level;
float var;
// 执行 3x3 高斯模糊 方差
if (blur_方差) {
float sum = 0.0f;
float sumw = 0.0f;
glm::ivec2 g[9] = { glm::ivec2(-1, -1), glm::ivec2(0, -1), glm::ivec2(1, -1),
glm::ivec2(-1, 0), glm::ivec2(0, 0), glm::ivec2(1, 0),
glm::ivec2(-1, 1), glm::ivec2(0, 1), glm::ivec2(1, 1) };
for (int sampleIdx = 0; sampleIdx < 9; sampleIdx++) {
glm::ivec2 loc = glm::ivec2(x, y) + g[sampleIdx];
if (loc.x >= 0 && loc.y >= 0 && loc.x < res.x && loc.y < res.y) {
sum += 高斯型[sampleIdx] * 方差[loc.x + loc.y * res.x];
sumw += 高斯型[sampleIdx];
}
}
var = max(sum / sumw, 0.0f);
}
else {
var = max(方差[p], 0.0f);
}
//加载像素p数据
float lp = 0.2126 * colorin[p].x + 0.7152 * colorin[p].y + 0.0722 * colorin[p].z;
glm::vec3 pp = gBuffer[p].位置;
glm::vec3 np = gBuffer[p].法向;
glm::vec3 颜色_和 = glm::vec3(0.0f);
float 方差_和 = 0.0f;
float 权重_和 = 0;
float 权重平方_和 = 0;
for (int i = -2; i <= 2; i++) {
for (int j = -2; j <= 2; j++) {
int xq = x + step * i;
int yq = y + step * j;
if (xq >= 0 && xq < res.x && yq >= 0 && yq < res.y) {
int q = xq + yq * res.x;
//加载像素Q数据
float lq = 0.2126 * colorin[q].x + 0.7152 * colorin[q].y + 0.0722 * colorin[q].z;
glm::vec3 pq = gBuffer[q].位置;
glm::vec3 nq = gBuffer[q].法向;
//边缘-停止权重
float wl = expf(-glm::距离(lp, lq) / (sqrt(var) * sigma_c + 1e-6));
float wn = min(1.0f, expf(-glm::距离(np, nq) / (sigma_n + 1e-6)));
float wx = min(1.0f, expf(-glm::距离(pp, pq) / (sigma_x + 1e-6)));
//过滤器权重
int k = (2 + i) + (2 + j) * 5;
float 分量 = h[k] * wl * wn * wx;
权重_和 += 分量;
权重平方_和 += 分量 * 分量;
颜色_和 += (colorin[q] * 分量);
方差_和 += (方差[q] * 分量 * 分量);
}
}
}
// 更新颜色和方差
if (权重_和 > 10e-6) {
着色[p] = 颜色_和 / 权重_和 ;
方差[p] = 方差_和 / 权重平方_和;
}
else {
着色[p] = colorin[p];
}
if (is_last && addcolor) {
着色[p] *= gBuffer[p].albedo * gBuffer[p].ialbedo;
}
}
}
#include <cstdio>
#include <cuda.h>
#include <cmath>
#include <thrust/execution_policy.h>
#include <thrust/random.h>
#include <thrust/remove.h>
#include <thrust/partition.h>
#include <chrono>
#include "denoise.h"
#include "main.h"
CPU
static 场景 * hst_场景 = NULL;
static glm::mat4 view_matrix_prev; // previous view projection matrix (CPU)
///
GPU
static glm::vec2 * dev_moment_history = NULL;
static glm::vec3 * dev_color_history = NULL;
static glm::vec3 * dev_颜色积累 = NULL; // 累积颜色
static glm::vec2 * dev_moment_acc = NULL; // accumulated moment
static int * dev_发展历史长度 = NULL; // 反投影前的历史长度
static int * dev_发展历史长度更新 = NULL; //反投影后的历史长度
static GBufferTexel * dev_gbuffer_prev = NULL;
static float * dev_发展方差 = NULL;
static glm::vec3 * dev_temp[2] = { NULL }; // ping-pong buffers缓冲在小波变换中的应用
///
void denoiseInit(场景 *场景) {
hst_场景 = 场景;
const 摄像机 &cam = hst_场景->state.摄像机;
const int 像素数 = cam.resolution.x * cam.resolution.y;
// 临时 ping-pong buffers 缓冲区
cudaMalloc(&dev_temp[0], 像素数 * sizeof(glm::vec3));
cudaMalloc(&dev_temp[1], 像素数 * sizeof(glm::vec3));
// 历史长度
cudaMalloc(&dev_发展历史长度, 像素数 * sizeof(int));
cudaMemset(dev_发展历史长度, 0, 像素数 * sizeof(int));
cudaMalloc(&dev_发展历史长度更新, 像素数 * sizeof(int));
// moment history and moment accumulation (history + current -> accumulation)
cudaMalloc(&dev_moment_history, 像素数 * sizeof(glm::vec2));
cudaMemset(dev_moment_history, 0, 像素数 * sizeof(glm::vec2));
cudaMalloc(&dev_moment_acc, 像素数 * sizeof(glm::vec2));
cudaMemset(dev_moment_acc, 0, 像素数 * sizeof(glm::vec2));
//颜色历史和颜色积累(历史+当前->积累)
cudaMalloc(&dev_color_history, 像素数 * sizeof(glm::vec3));
cudaMalloc(&dev_颜色积累, 像素数 * sizeof(glm::vec3));
// 存储上一帧中的gBuffer
cudaMalloc(&dev_gbuffer_prev, 像素数 * sizeof(GBufferTexel));
// 每像素差异
cudaMalloc(&dev_发展方差, 像素数 * sizeof(float));
cudaMemset(dev_发展方差, 0, 像素数 * sizeof(float));
}
void denoiseFree() {
cuda释放(dev_temp[0]);
cuda释放(dev_temp[1]);
cuda释放(dev_发展历史长度);
cuda释放(dev_发展历史长度更新);
cuda释放(dev_moment_acc);
cuda释放(dev_颜色积累);
cuda释放(dev_moment_history);
cuda释放(dev_color_history);
cuda释放(dev_gbuffer_prev);
cuda释放(dev_发展方差);
}
// A-三阶滤波器
__global__ void ATrousFilter(glm::vec3 * colorin, glm::vec3 * 着色, float * 方差,
GBufferTexel * gBuffer, glm::ivec2 res, int level, bool is_last,
float sigma_c, float sigma_n, float sigma_x, bool blur_方差, bool addcolor)
{
// 5x5 A-Trous kernel
float h[25] = { 1.0 / 256.0, 1.0 / 64.0, 3.0 / 128.0, 1.0 / 64.0, 1.0 / 256.0,
1.0 / 64.0, 1.0 / 16.0, 3.0 / 32.0, 1.0 / 16.0, 1.0 / 64.0,
3.0 / 128.0, 3.0 / 32.0, 9.0 / 64.0, 3.0 / 32.0, 3.0 / 128.0,
1.0 / 64.0, 1.0 / 16.0, 3.0 / 32.0, 1.0 / 16.0, 1.0 / 64.0,
1.0 / 256.0, 1.0 / 64.0, 3.0 / 128.0, 1.0 / 64.0, 1.0 / 256.0 };
// 3x3 高斯型 kernel
float 高斯型[9] = { 1.0 / 16.0, 1.0 / 8.0, 1.0 / 16.0,
1.0 / 8.0, 1.0 / 4.0, 1.0 / 8.0,
1.0 / 16.0, 1.0 / 8.0, 1.0 / 16.0 };
int x = (blockIdx.x * blockDim.x) + threadIdx.x;
int y = (blockIdx.y * blockDim.y) + threadIdx.y;
if (x < res.x && y < res.y) {
int p = x + y * res.x;
int step = 1 << level;
float var;
// 执行 3x3 高斯模糊 方差
if (blur_方差) {
float sum = 0.0f;
float sumw = 0.0f;
glm::ivec2 g[9] = { glm::ivec2(-1, -1), glm::ivec2(0, -1), glm::ivec2(1, -1),
glm::ivec2(-1, 0), glm::ivec2(0, 0), glm::ivec2(1, 0),
glm::ivec2(-1, 1), glm::ivec2(0, 1), glm::ivec2(1, 1) };
for (int sampleIdx = 0; sampleIdx < 9; sampleIdx++) {
glm::ivec2 loc = glm::ivec2(x, y) + g[sampleIdx];
if (loc.x >= 0 && loc.y >= 0 && loc.x < res.x && loc.y < res.y) {
sum += 高斯型[sampleIdx] * 方差[loc.x + loc.y * res.x];
sumw += 高斯型[sampleIdx];
}
}
var = max(sum / sumw, 0.0f);
}
else {
var = max(方差[p], 0.0f);
}
//加载像素p数据
float lp = 0.2126 * colorin[p].x + 0.7152 * colorin[p].y + 0.0722 * colorin[p].z;
glm::vec3 pp = gBuffer[p].位置;
glm::vec3 np = gBuffer[p].法向;
glm::vec3 颜色_和 = glm::vec3(0.0f);
float 方差_和 = 0.0f;
float 权重_和 = 0;
float 权重平方_和 = 0;
for (int i = -2; i <= 2; i++) {
for (int j = -2; j <= 2; j++) {
int xq = x + step * i;
int yq = y + step * j;
if (xq >= 0 && xq < res.x && yq >= 0 && yq < res.y) {
int q = xq + yq * res.x;
//加载像素Q数据
float lq = 0.2126 * colorin[q].x + 0.7152 * colorin[q].y + 0.0722 * colorin[q].z;
glm::vec3 pq = gBuffer[q].位置;
glm::vec3 nq = gBuffer[q].法向;
//边缘-停止权重
float wl = expf(-glm::距离(lp, lq) / (sqrt(var) * sigma_c + 1e-6));
float wn = min(1.0f, expf(-glm::距离(np, nq) / (sigma_n + 1e-6)));
float wx = min(1.0f, expf(-glm::距离(pp, pq) / (sigma_x + 1e-6)));
//过滤器权重
int k = (2 + i) + (2 + j) * 5;
float 分量 = h[k] * wl * wn * wx;
权重_和 += 分量;
权重平方_和 += 分量 * 分量;
颜色_和 += (colorin[q] * 分量);
方差_和 += (方差[q] * 分量 * 分量);
}
}
}
// 更新颜色和方差
if (权重_和 > 10e-6) {
着色[p] = 颜色_和 / 权重_和 ;
方差[p] = 方差_和 / 权重平方_和;
}
else {
着色[p] = colorin[p];
}
if (is_last && addcolor) {
着色[p] *= gBuffer[p].albedo * gBuffer[p].ialbedo;
}
}
}
__device__ bool isReprj有效(glm::ivec2 res, glm::vec2 curr_coord, glm::vec2 prev_coord, GBufferTexel * curr_gbuffer, GBufferTexel * prev_gbuffer) {
int p = curr_coord.x + curr_coord.y * res.x;
int q = prev_coord.x + prev_coord.y * res.x;
// 拒绝(if像素位于屏幕外)
if (prev_coord.x < 0 || prev_coord.x >= res.x || prev_coord.y < 0 || prev_coord.y >= res.y) return false;
// 拒绝if像素是不同的几何图形
if (prev_gbuffer[q].geomId == -1 || prev_gbuffer[q].geomId != curr_gbuffer[p].geomId) return false;
// 拒绝if法向偏差不可接受
if (距离(prev_gbuffer[q].法向, curr_gbuffer[p].法向) > 1e-1f) return false;
return true;
}
// TODO: 反投影
__global__ void 反投影(float * variacne_out, int * 历史长度, int * 历史长度更新, glm::vec2 * moment_history, glm::vec3 * color_history, glm::vec2 * moment_acc, glm::vec3 * 颜色积累,
glm::vec3 * 现在颜色, GBufferTexel * current_gbuffer, GBufferTexel * prev_gbuffer, glm::mat4 prev_viewmat, glm::ivec2 res,
float color_alpha_min, float moment_alpha_min)
{
int x = (blockIdx.x * blockDim.x) + threadIdx.x;
int y = (blockIdx.y * blockDim.y) + threadIdx.y;
if (x < res.x && y < res.y) {
int p = x + y * res.x;
int N = 历史长度[p];
glm::vec3 sample = 现在颜色[p];
float 亮度 = 0.2126 * sample.x + 0.7152 * sample.y + 0.0722 * sample.z;
if (N > 0 && current_gbuffer[p].geomId != -1) {
/
// 计算前一帧中的NDC坐标(TODO:检查正确性)
glm::vec4 viewspace_位置 = prev_viewmat * glm::vec4(current_gbuffer[p].位置, 1.0f);
float clipx = viewspace_位置.x / viewspace_位置.z /** tanf(PI / 4)*/;
float clipy = viewspace_位置.y / viewspace_位置.z /** tanf(PI / 4)*/;
float ndcx = -clipx * 0.5f + 0.5f;
float ndcy = -clipy * 0.5f + 0.5f;
float prevx = ndcx * res.x - 0.5f;
float prevy = ndcy * res.y - 0.5f;
/
bool v[4];
float floorx = floor(prevx);
float floory = floor(prevy);
float fracx = prevx - floorx;
float fracy = prevy - floory;
bool 有效 = (floorx >= 0 && floory >= 0 && floorx < res.x && floory < res.y);
// 2x2抽头双线性滤波器
glm::ivec2 偏移量[4] = { glm::ivec2(0,0), glm::ivec2(1,0), glm::ivec2(0,1), glm::ivec2(1,1) };
//检查有效性
{
for (int sampleIdx = 0; sampleIdx < 4; sampleIdx++) {
glm::ivec2 loc = glm::ivec2(floorx, floory) + 偏移量[sampleIdx];
v[sampleIdx] = isReprj有效(res, glm::ivec2(x, y), loc, current_gbuffer, prev_gbuffer);
有效 = 有效 && v[sampleIdx];
}
}
glm::vec3 prevColor = glm::vec3(0.0f);
glm::vec2 prevMoments = glm::vec2(0.0f);
float prevHistoryLength = 0.0f;
if (有效) {
// interpolate?
float sumw = 0.0f;
float w[4] = { (1 - fracx) * (1 - fracy),
fracx * (1 - fracy),
(1 - fracx) * fracy,
fracx * fracy };
for (int sampleIdx = 0; sampleIdx < 4; sampleIdx++) {
glm::ivec2 loc = glm::ivec2(floorx, floory) + 偏移量[sampleIdx];
int locq = loc.x + loc.y * res.x;
if (v[sampleIdx]) {
prevColor += w[sampleIdx] * color_history[locq];
prevMoments += w[sampleIdx] * moment_history[locq];
prevHistoryLength += w[sampleIdx] * (float)历史长度[locq];
sumw += w[sampleIdx];
}
}
if (sumw >= 0.01) {
prevColor /= sumw;
prevMoments /= sumw;
prevHistoryLength /= sumw;
//prevHistoryLength = 1;
有效 = true;
}
}
// 在否则地方找到合适的样本
if (!有效) {
float cnt = 0.0f;
const int radius = 1;
for (int yy = -radius; yy <= radius; yy++) {
for (int xx = -radius; xx <= radius; xx++) {
glm::vec2 loc = glm::vec2(floorx, floory) + glm::vec2(xx, yy);
int q = loc.x + res.x * loc.y;
if (isReprj有效(res, glm::ivec2(x, y), loc, current_gbuffer, prev_gbuffer)) {
prevColor += color_history[q];
prevMoments += moment_history[q];
prevHistoryLength += 历史长度[q];
cnt += 1.0f;
}
}
}
if (cnt > 0.0f) {
prevColor /= cnt;
prevMoments /= cnt;
prevHistoryLength /= cnt;
//prevHistoryLength = 0;
有效 = true;
}
}
if (有效) {
//计算控制褪色的alpha值
float color_alpha = max(1.0f / (float)(N + 1), color_alpha_min);
float moment_alpha = max(1.0f / (float)(N + 1), moment_alpha_min);
// 增加历史长度
历史长度更新[p] = (int)prevHistoryLength + 1;
// 颜色积累
颜色积累[p] = 现在颜色[p] * color_alpha + prevColor * (1.0f - color_alpha);
// 矩累积
float first_moment = moment_alpha * prevMoments.x + (1.0f - moment_alpha) * 亮度;
float second_moment = moment_alpha * prevMoments.y + (1.0f - moment_alpha) * 亮度 * 亮度;
moment_acc[p] = glm::vec2(first_moment, second_moment);
// 计算矩的差异
float 方差 = second_moment - first_moment * first_moment;
variacne_out[p] = 方差 > 0.0f ? 方差 : 0.0f;
return;
}
}
//如果没有历史
历史长度更新[p] = 1;
颜色积累[p] = 现在颜色[p];
moment_acc[p] = glm::vec2(亮度, 亮度 * 亮度);
variacne_out[p] = 100.0f;
}
}
// 估计空间差异
__global__ void 估计方差(float * variacne, glm::vec3 * color, glm::vec2 res) {
int x = (blockIdx.x * blockDim.x) + threadIdx.x;
int y = (blockIdx.y * blockDim.y) + threadIdx.y;
if (x < res.x && y < res.y) {
int p = x + y * res.x;
// TODO
variacne[p] = 10.0f;
}
}
template <typename T>
__global__ void DebugView(glm::ivec2 res, glm::vec3 * 着色, T * value, float scale) {
int x = (blockIdx.x * blockDim.x) + threadIdx.x;
int y = (blockIdx.y * blockDim.y) + threadIdx.y;
if (x < res.x && y < res.y) {
int p = x + y * res.x;
着色[p] = glm::vec3((float)value[p] / scale);
}
}
glm::mat4 GetViewMatrix(const 摄像机& cam) {
return glm::inverse(glm::mat4(glm::vec4(cam.right, 0.f),
glm::vec4(cam.up, 0.f),
glm::vec4(cam.view, 0.f),
glm::vec4(cam.位置, 1.f)));
}
void denoise(glm::vec3 * output, glm::vec3 * input, GBufferTexel * gbuffer) {
const 摄像机 &cam = hst_场景->state.摄像机;
const int 像素数 = cam.resolution.x * cam.resolution.y;
// 2D 区块
const dim3 分块2d(8, 8);
const dim3 区块PerGrid2d(
(cam.resolution.x + 分块2d.x - 1) / 分块2d.x,
(cam.resolution.y + 分块2d.y - 1) / 分块2d.y);
/* Estimate 方差 */
float color_alpha = ui_temporal_enable ? ui_color_alpha : 1.0f;
float moment_alpha = ui_temporal_enable ? ui_moment_alpha : 1.0f;
if (ui_temporal_enable) {
反投影 << <区块PerGrid2d, 分块2d >> >(dev_发展方差, dev_发展历史长度, dev_发展历史长度更新, dev_moment_history, dev_color_history,
dev_moment_acc, dev_颜色积累, input, gbuffer, dev_gbuffer_prev, view_matrix_prev, cam.resolution,
color_alpha, moment_alpha);
cudaMemcpy(dev_color_history, dev_颜色积累, 像素数 * sizeof(glm::vec3), cudaMemcpyDeviceToDevice);
}
else {
估计方差 << <区块PerGrid2d, 分块2d >> >(dev_发展方差, dev_颜色积累, cam.resolution);
cudaMemcpy(dev_color_history, input, 像素数 * sizeof(glm::vec3), cudaMemcpyDeviceToDevice);
}
if (UI右视图选项 == 1) {
DebugView << <区块PerGrid2d, 分块2d >> >(cam.resolution, output, dev_发展历史长度, 100.0f);
}
else if (UI右视图选项 == 2) {
DebugView << <区块PerGrid2d, 分块2d >> >(cam.resolution, output, dev_发展方差, 0.1f);
}
else {
if (ui_atrous_nlevel == 0 || !UI空间启用) {
/* Skip A-Tours filter */
cudaMemcpy(output, dev_color_history, sizeof(glm::vec3) * 像素数, cudaMemcpyDeviceToDevice);
}
else {
/* Apply A-Tours filter */
for (int level = 1; level <= ui_atrous_nlevel; level++) {
glm::vec3* src = (level == 1) ? dev_color_history : dev_temp[level % 2];
glm::vec3* dst = (level == ui_atrous_nlevel) ? output : dev_temp[(level + 1) % 2];
ATrousFilter << <区块PerGrid2d, 分块2d >> >(src, dst, dev_发展方差, gbuffer, cam.resolution, level, (level == ui_atrous_nlevel),
ui_sigmal, ui_sigman, ui_sigmax, ui_blur方差, (ui_sepcolor && ui_addcolor));
if (level == ui_history_level) cudaMemcpy(dev_color_history, dst, 像素数 * sizeof(glm::vec3), cudaMemcpyDeviceToDevice);
}
}
}
cudaMemcpy(dev_gbuffer_prev, gbuffer, sizeof(GBufferTexel) * 像素数, cudaMemcpyDeviceToDevice);
cudaMemcpy(dev_moment_history, dev_moment_acc, sizeof(glm::vec2) * 像素数, cudaMemcpyDeviceToDevice);
cudaMemcpy(dev_发展历史长度, dev_发展历史长度更新, sizeof(int) * 像素数, cudaMemcpyDeviceToDevice);
view_matrix_prev = GetViewMatrix(cam);
cudaDeviceSynchronize();
}
呵呵这才是重点: 简单的说SVGF就是每帧都 进行 高斯模糊处理