This article is going to talk about GaussBlur1D, which is an one dimensional kernel to convolve the image.
a horizontal (flag_dir=0)or vertical direction to convolve image is decided by the name flag_dir.
在文章中是分别做了水平方向上和竖直向上的倦积。由于高斯函数的可分离性,即是对图像进行高斯模糊。
/* 1D Convolve image with a Gaussian of width sigma and store result back
in image. This routine creates the Gaussian kernel, and then applies
it in horizontal (flag_dir=0) OR vertical directions (flag_dir!=0).
*/
void GaussianBlur1D(vector<float>& image, int width, int height, float sigma, int flag_dir)
{
float x, kernel[100], sum = 0.0;
int ksize, i;
/* The Gaussian kernel is truncated at GaussTruncate sigmas from
center. The kernel size should be odd.
GaussTruncate1=4.0
*/
ksize = (int)(2.0 * GaussTruncate1 * sigma + 1.0);
ksize = MAX(3, ksize); /* Kernel must be at least 3. */
if (ksize % 2 == 0) /* Make kernel size odd. */
ksize++;
assert(ksize < 100);
/* Fill in kernel values. */
for (i = 0; i <= ksize; i++)
{ x = i - ksize / 2;
kernel[i] = exp(- x * x / (2.0 * sigma * sigma));
sum += kernel[i]; }
/* Normalize kernel values to sum to 1.0. */
for (i = 0; i < ksize; i++)
kernel[i] /= sum;
if (flag_dir == 0)
{
ConvHorizontal(image, width, height, kernel, ksize);
}
else
{ ConvVertical(image, width, height, kernel, ksize); }
}
//Horizontal convolve
/* Convolve image with the 1-D kernel vector along image rows. This
is designed to be as efficient as possible. Pixels outside the
image are set to the value of the closest image pixel.
*/
void ConvHorizontal(vector<float>& image, int width, int height, float *kernel, int ksize)
{
int rows, cols, r, c, i, halfsize;
float buffer[4000];
vector<float> pixels(width*height);
rows = height;
cols = width;
halfsize = ksize / 2;
pixels = image;
assert(cols + ksize < 4000);
for (r = 0; r < rows; r++) {
/* Copy the row into buffer with pixels at ends replicated for
half the mask size. This avoids need to check for ends
within inner loop. */
for (i = 0; i < halfsize; i++)
buffer[i] = pixels[r*cols];
for (i = 0; i < cols; i++)
buffer[halfsize + i] = pixels[r*cols+i];
for (i = 0; i < halfsize; i++)
buffer[halfsize + cols + i] = pixels[r*cols+cols-1];
ConvBufferFast(buffer, kernel, cols, ksize);
for (c = 0; c < cols; c++)
pixels[r*cols+c] = buffer[c];
}
image = pixels;
}
convBufferFast
/* Same as ConvBuffer, but implemented with loop unrolling for increased
82 speed. This is the most time intensive routine in keypoint detection,
83 so deserves careful attention to efficiency. Loop unrolling simply
84 sums 5 multiplications at a time to allow the compiler to schedule
85 operations better and avoid loop overhead. This almost triples
86 speed of previous version on a Pentium with gcc.
87 */
88 void ConvBufferFast(float *buffer, float *kernel, int rsize, int ksize)
89 {
90 int i;
91 float *bp, *kp, *endkp;
92 float sum;
93
94 for (i = 0; i < rsize; i++) {
95 sum = 0.0;
96 bp = &buffer[i];
97 kp = &kernel[0];
98 endkp = &kernel[ksize];
99
100 /* Loop unrolling: do 5 multiplications at a time. */
101 // while (kp + 4 < endkp) {
102 // sum += (double) bp[0] * (double) kp[0] + (double) bp[1] * (double) kp[1] + (double) bp[2] * (double) kp[2] +
103 // (double) bp[3] * (double) kp[3] + (double) bp[4] * (double) kp[4];
104 // bp += 5;
105 // kp += 5;
106 // }
107 // /* Do 2 multiplications at a time on remaining items. */
108 // while (kp + 1 < endkp) {
109 // sum += (double) bp[0] * (double) kp[0] + (double) bp[1] * (double) kp[1];
110 // bp += 2;
111 // kp += 2;
112 // }
113 // /* Finish last one if needed. */
114 // if (kp < endkp) {
115 // sum += (double) *bp * (double) *kp;
116 // }
117
118 while (kp < endkp) {
119 sum += *bp++ * *kp++;
120 }
121
122 buffer[i] = sum;
123 }
124 }