双线性内插法进行图像旋转算法的C代码实现

原创已于 2022-12-09 18:34:12 修改 · 置顶 · 3k 阅读

5 ·

CC 4.0 BY-SA版权

文章标签：

#图像算法

于 2022-03-20 06:45:08 首次发布

嵌入式系统同时被 2 个专栏收录

354 篇文章

订阅专栏

Linux

217 篇文章

订阅专栏

这篇博客介绍了如何使用双线性内插法实现图像旋转，并通过C代码展示了具体实现过程。同时，讨论了利用GPU进行计算加速的原因及其在图像处理中的优势，包括矩阵乘法的并行计算特性。文章最后提到了图像转置操作的算法效果。

图像做rotate的数学原理如下，它等效于二维空间中的线性变换（旋转）.

原图：

代码：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stddef.h>
#include <stdint.h>
#include <fcntl.h>
#include <unistd.h>
#include <assert.h>
#include <sys/ioctl.h>
#include <math.h>

#define DBG(fmt, ...)   do { printf("%s line %d, "fmt"\n", __func__, __LINE__, ##__VA_ARGS__); } while (0)

typedef struct image {
    int w;
    int h;
    int c;
    unsigned char *data;
} image;

static void dump_memory(uint8_t *buf, int32_t len)
{
    int i;
 
    printf("\n\rdump file memory:");
    for (i = 0; i < len; i ++)
    {
        if ((i % 16) == 0)
        {
            printf("\n\r%p: ", buf + i);
        }
        printf("0x%02x ", buf[i]);
    }

    printf("\n\r");

    return;
}

image make_empty_image(int w, int h, int c)
{
    image out;             
    out.data = 0;
    out.h = h;                                                                                                                                                                                              
    out.w = w;
    out.c = c;
    return out;
}

image copy_image(image p)
{
	image copy = p;
    copy.data = (unsigned char*)calloc(p.h * p.w * p.c, sizeof(float));
    memcpy(copy.data, p.data, p.h*p.w*p.c*sizeof(float));
    return copy;
}

image make_image(int w, int h, int c)
{
    image out = make_empty_image(w,h,c);
    out.data = (unsigned char*)calloc(h * w * c, sizeof(char));
    return out;
}

static void set_pixel(image m, int x, int y, int c, float val)
{
    if (x < 0 || y < 0 || c < 0 || x >= m.w || y >= m.h || c >= m.c) return;

    assert(x < m.w && y < m.h && c < m.c);
    m.data[c*m.h*m.w + y*m.w + x] = val;
}

static float get_pixel(image m, int x, int y, int c)
{ 
    assert(x < m.w && y < m.h && c < m.c);
    return m.data[c*m.h*m.w + y*m.w + x];
} 

void free_image(image m)
{   
    if(m.data){
       free(m.data);
    }
} 

static void add_pixel(image m, int x, int y, int c, float val)
{            
    assert(x < m.w && y < m.h && c < m.c);
    m.data[c*m.h*m.w + y*m.w + x] += val;
}

image resize_image(image im, int w, int h)
{                                         
    if (im.w == w && im.h == h) return copy_image(im);

    image resized = make_image(w, h, im.c);
    image part = make_image(w, im.h, im.c);
    int r, c, k;                          
    float w_scale = (float)(im.w - 1) / (w - 1);
    float h_scale = (float)(im.h - 1) / (h - 1);
    for(k = 0; k < im.c; ++k){            
        for(r = 0; r < im.h; ++r){        
            for(c = 0; c < w; ++c){       
                float val = 0;            
                if(c == w-1 || im.w == 1){
                    val = get_pixel(im, im.w-1, r, k);
                } else {                  
                    float sx = c*w_scale; 
                    int ix = (int) sx;    
                    float dx = sx - ix;   
                    val = (1 - dx) * get_pixel(im, ix, r, k) + dx * get_pixel(im, ix+1, r, k);
                }                         
                set_pixel(part, c, r, k, val);
            }                             
        }                                 
    }                                     
    for(k = 0; k < im.c; ++k){            
        for(r = 0; r < h; ++r){           
           float sy = r*h_scale;         
           int iy = (int) sy;            
           float dy = sy - iy;           
           for(c = 0; c < w; ++c){       
               float val = (1-dy) * get_pixel(part, c, iy, k);
               set_pixel(resized, c, r, k, val);
           }                             
           if(r == h-1 || im.h == 1) continue;
           for(c = 0; c < w; ++c){       
               float val = dy * get_pixel(part, c, iy+1, k);
               add_pixel(resized, c, r, k, val);
           }                             
        }                                 
    }                                     

    free_image(part);                     
    return resized;                       
}

static float get_pixel_extend(image m, int x, int y, int c)
{             
    if (x < 0 || x >= m.w || y < 0 || y >= m.h) return 0xff;
    /*        
    if(x < 0) x = 0;
    if(x >= m.w) x = m.w-1;
    if(y < 0) y = 0;
    if(y >= m.h) y = m.h-1;
    */        
    if (c < 0 || c >= m.c) return 0;
    return get_pixel(m, x, y, c);
}

float bilinear_interpolate(image im, float x, float y, int c)
{      
    int ix = (int) floorf(x);
    int iy = (int) floorf(y);
       
    float dx = x - ix;
    float dy = y - iy;
       
    float val = (1-dy) * (1-dx) * get_pixel_extend(im, ix, iy, c) +
        dy     * (1-dx) * get_pixel_extend(im, ix, iy+1, c) +
        (1-dy) *   dx   * get_pixel_extend(im, ix+1, iy, c) +
        dy     *   dx   * get_pixel_extend(im, ix+1, iy+1, c);
    return val;
}  

image rotate_image(image im, float rad) 
{                
    int x, y, c;
    float cx = im.w/2.;
    float cy = im.h/2.;
    image rot = make_image(im.w, im.h, im.c);
    for(c = 0; c < im.c; ++c){
        for(y = 0; y < im.h; ++y){
            for(x = 0; x < im.w; ++x){
                float rx = cos(rad)*(x-cx) - sin(rad)*(y-cy) + cx;
                float ry = sin(rad)*(x-cx) + cos(rad)*(y-cy) + cy;
                float val = bilinear_interpolate(im, rx, ry, c);
                set_pixel(rot, x, y, c, val);
            }
        }    
    }            
    return rot; 
}

int main(int argc, char **argv)
{
	FILE *file;
	int width, height, input_width, input_height;
 
	DBG("in");

	if(argc != 6)
	{
		DBG("input error, you should use this program like that: program xxxx.yuv width height.");
		exit(-1);
	}
 
	width  = atoi(argv[2]);
	height = atoi(argv[3]);
	input_width  = atoi(argv[4]);
	input_height = atoi(argv[5]);
 
	DBG("scale to width %d, height %d.", width, height);
	
	file = fopen(argv[1], "rb");
	if(file == NULL)
	{
		DBG("fatal error, open file %s failure, please check the file status.", argv[1]);
		exit(-1);
	}
 
	fseek(file, 0, SEEK_END);
	int filelen = ftell(file);
	
	DBG("file %s len %d byets.", argv[1], filelen);
 
	unsigned char *p = malloc(filelen);
	if(p == NULL)
	{
		DBG("malloc buffer failure for %s len %d.", argv[1], filelen);
		exit(-1);
	}
 
	memset(p, 0x00, filelen);
	fseek(file, 0, SEEK_SET);
 
	if(fread(p, 1, filelen, file) != filelen)
	{
		DBG("read file failure, size wrong.");
		exit(-1);
	}

    fclose(file);
 
	dump_memory(p, 32);
	dump_memory(p + filelen - 32, 32);

    image orig_Y = make_image(input_width,input_height, 1);
    image orig_U = make_image(input_width,input_height, 1);
    image orig_V = make_image(input_width,input_height, 1);
    memcpy(orig_Y.data, p + 0*input_width*input_height,input_width*input_height);
    memcpy(orig_U.data, p + 1*input_width*input_height,input_width*input_height);
    memcpy(orig_V.data, p + 2*input_width*input_height,input_width*input_height);

    image outputy = rotate_image(orig_Y, 0.2);
    image outputu = rotate_image(orig_U, 0.2);
    image outputv = rotate_image(orig_V, 0.2);

	file = fopen("./output.yuv", "wb+");
	if(file == NULL)
	{
		DBG("fatal error, open output file failure, please check the file status.");
		exit(-1);
	}
    
	unsigned char *o = malloc(width * height);
	if(o == NULL)
	{
		DBG("malloc output buffer failure.");
		exit(-1);
	}

	memset(o, 0x00, width * height);
	memcpy(o, outputy.data, width * height);
   
	filelen = width * height;
	if(fwrite(o, 1, filelen, file) != filelen)
	{
		DBG("read file failure, size wrong.");
		exit(-1);
	}
	memset(o, 0x00, width * height);
	memcpy(o, outputu.data, width * height);

	if(fwrite(o, 1, filelen, file) != filelen)
	{
		DBG("read file failure, size wrong.");
		exit(-1);
	}

	memset(o, 0x00, width * height);
	memcpy(o, outputv.data, width * height);

	if(fwrite(o, 1, filelen, file) != filelen)
	{
		DBG("read file failure, size wrong.");
		exit(-1);
	}

	fflush(file);
	fsync(fileno(file));
	fclose(file);

    return 0;
}

执行命令：

$ffmpeg -i 165823915.jpg -pix_fmt yuv444p xuanwumen.nv12.yuv
$gcc main.c -lm
$./a.out xuanwumen.yuv444p.yuv 1920 1080 1920 1080
$ffplay -pix_fmt yuv444p -f rawvideo -video_size 1920x1080 ./output.yuv

算法执行后的效果：

大礼堂：

旋转60度的大礼堂：

旋转+Crop

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stddef.h>
#include <stdint.h>
#include <fcntl.h>
#include <unistd.h>
#include <assert.h>
#include <sys/ioctl.h>
#include <math.h>

#define DBG(fmt, ...)   do { printf("%s line %d, "fmt"\n", __func__, __LINE__, ##__VA_ARGS__); } while (0)

typedef struct image {
    int w;
    int h;
    int c;
    unsigned char *data;
} image;

static void dump_memory(uint8_t *buf, int32_t len)
{
    int i;
 
    printf("\n\rdump file memory:");
    for (i = 0; i < len; i ++)
    {
        if ((i % 16) == 0)
        {
            printf("\n\r%p: ", buf + i);
        }
        printf("0x%02x ", buf[i]);
    }

    printf("\n\r");

    return;
}

image make_empty_image(int w, int h, int c)
{
    image out;             
    out.data = 0;
    out.h = h;                                                                                                                                                                                              
    out.w = w;
    out.c = c;
    return out;
}

image copy_image(image p)
{
	image copy = p;
    copy.data = (unsigned char*)calloc(p.h * p.w * p.c, sizeof(float));
    memcpy(copy.data, p.data, p.h*p.w*p.c*sizeof(float));
    return copy;
}

image make_image(int w, int h, int c)
{
    image out = make_empty_image(w,h,c);
    out.data = (unsigned char*)calloc(h * w * c, sizeof(char));
    return out;
}

static void set_pixel(image m, int x, int y, int c, float val)
{
    if (x < 0 || y < 0 || c < 0 || x >= m.w || y >= m.h || c >= m.c) return;

    assert(x < m.w && y < m.h && c < m.c);
    m.data[c*m.h*m.w + y*m.w + x] = val;
}

static float get_pixel(image m, int x, int y, int c)
{ 
    assert(x < m.w && y < m.h && c < m.c);
    return m.data[c*m.h*m.w + y*m.w + x];
} 

void free_image(image m)
{   
    if(m.data){
       free(m.data);
    }
} 

static void add_pixel(image m, int x, int y, int c, float val)
{            
    assert(x < m.w && y < m.h && c < m.c);
    m.data[c*m.h*m.w + y*m.w + x] += val;
}

image resize_image(image im, int w, int h)
{                                         
    if (im.w == w && im.h == h) return copy_image(im);

    image resized = make_image(w, h, im.c);
    image part = make_image(w, im.h, im.c);
    int r, c, k;                          
    float w_scale = (float)(im.w - 1) / (w - 1);
    float h_scale = (float)(im.h - 1) / (h - 1);
    for(k = 0; k < im.c; ++k){            
        for(r = 0; r < im.h; ++r){        
            for(c = 0; c < w; ++c){       
                float val = 0;            
                if(c == w-1 || im.w == 1){
                    val = get_pixel(im, im.w-1, r, k);
                } else {                  
                    float sx = c*w_scale; 
                    int ix = (int) sx;    
                    float dx = sx - ix;   
                    val = (1 - dx) * get_pixel(im, ix, r, k) + dx * get_pixel(im, ix+1, r, k);
                }                         
                set_pixel(part, c, r, k, val);
            }                             
        }                                 
    }                                     
    for(k = 0; k < im.c; ++k){            
        for(r = 0; r < h; ++r){           
           float sy = r*h_scale;         
           int iy = (int) sy;            
           float dy = sy - iy;           
           for(c = 0; c < w; ++c){       
               float val = (1-dy) * get_pixel(part, c, iy, k);
               set_pixel(resized, c, r, k, val);
           }                             
           if(r == h-1 || im.h == 1) continue;
           for(c = 0; c < w; ++c){       
               float val = dy * get_pixel(part, c, iy+1, k);
               add_pixel(resized, c, r, k, val);
           }                             
        }                                 
    }                                     

    free_image(part);                     
    return resized;                       
}

static float get_pixel_extend(image m, int x, int y, int c)
{             
    if (x < 0 || x >= m.w || y < 0 || y >= m.h) return 0xff;
    /*        
    if(x < 0) x = 0;
    if(x >= m.w) x = m.w-1;
    if(y < 0) y = 0;
    if(y >= m.h) y = m.h-1;
    */        
    if (c < 0 || c >= m.c) return 0;
    return get_pixel(m, x, y, c);
}

float bilinear_interpolate(image im, float x, float y, int c)
{      
    int ix = (int) floorf(x);
    int iy = (int) floorf(y);
       
    float dx = x - ix;
    float dy = y - iy;
       
    float val = (1-dy) * (1-dx) * get_pixel_extend(im, ix, iy, c) +
        dy     * (1-dx) * get_pixel_extend(im, ix, iy+1, c) +
        (1-dy) *   dx   * get_pixel_extend(im, ix+1, iy, c) +
        dy     *   dx   * get_pixel_extend(im, ix+1, iy+1, c);
    return val;
}  

image rotate_crop_image(image im, float rad, float s, int w, int h, float dx, float dy, float aspect)
{          
    int x, y, c;
    float cx = im.w/2.;
    float cy = im.h/2.;
    image rot = make_image(w, h, im.c);
    for(c = 0; c < im.c; ++c){
        for(y = 0; y < h; ++y){
            for(x = 0; x < w; ++x){
                float rx = cos(rad)*((x - w/2.)/s*aspect + dx/s*aspect) - sin(rad)*((y - h/2.)/s + dy/s) + cx;
                float ry = sin(rad)*((x - w/2.)/s*aspect + dx/s*aspect) + cos(rad)*((y - h/2.)/s + dy/s) + cy;
                float val = bilinear_interpolate(im, rx, ry, c);
                set_pixel(rot, x, y, c, val);
            }
        }
    }    
    return rot; 
}  

image rotate_image(image im, float rad) 
{                
    int x, y, c;
    float cx = im.w/2.;
    float cy = im.h/2.;
    image rot = make_image(im.w, im.h, im.c);
    for(c = 0; c < im.c; ++c){
        for(y = 0; y < im.h; ++y){
            for(x = 0; x < im.w; ++x){
                float rx = cos(rad)*(x-cx) - sin(rad)*(y-cy) + cx;
                float ry = sin(rad)*(x-cx) + cos(rad)*(y-cy) + cy;
                float val = bilinear_interpolate(im, rx, ry, c);
                set_pixel(rot, x, y, c, val);
            }
        }    
    }            
    return rot; 
}

int main(int argc, char **argv)
{
	FILE *file;
	int width, height, input_width, input_height;
	float aspect_ratio;
 
	DBG("in");

	if(argc != 7)
	{
		DBG("input error, you should use this program like that: program xxxx.yuv width height.");
		exit(-1);
	}
 
	width  = atoi(argv[2]);
	height = atoi(argv[3]);
	input_width  = atoi(argv[4]);
	input_height = atoi(argv[5]);
	aspect_ratio = atof(argv[6]);
 
	DBG("scale to width %d, height %d.", width, height);
	
	file = fopen(argv[1], "rb");
	if(file == NULL)
	{
		DBG("fatal error, open file %s failure, please check the file status.", argv[1]);
		exit(-1);
	}
 
	fseek(file, 0, SEEK_END);
	int filelen = ftell(file);
	
	DBG("file %s len %d byets.", argv[1], filelen);
 
	unsigned char *p = malloc(filelen);
	if(p == NULL)
	{
		DBG("malloc buffer failure for %s len %d.", argv[1], filelen);
		exit(-1);
	}
 
	memset(p, 0x00, filelen);
	fseek(file, 0, SEEK_SET);
 
	if(fread(p, 1, filelen, file) != filelen)
	{
		DBG("read file failure, size wrong.");
		exit(-1);
	}

    fclose(file);
 
	dump_memory(p, 32);
	dump_memory(p + filelen - 32, 32);

    image orig_Y = make_image(input_width,input_height, 1);
    image orig_U = make_image(input_width,input_height, 1);
    image orig_V = make_image(input_width,input_height, 1);
    memcpy(orig_Y.data, p + 0*input_width*input_height,input_width*input_height);
    memcpy(orig_U.data, p + 1*input_width*input_height,input_width*input_height);
    memcpy(orig_V.data, p + 2*input_width*input_height,input_width*input_height);

    image outputy = rotate_crop_image(orig_Y, 0.00, 1.0, width, height, 0, 0, aspect_ratio);
    image outputu = rotate_crop_image(orig_U, 0.00, 1.0, width, height, 0, 0, aspect_ratio);
    image outputv = rotate_crop_image(orig_V, 0.00, 1.0, width, height, 0, 0, aspect_ratio);

	file = fopen("./output.yuv", "wb+");
	if(file == NULL)
	{
		DBG("fatal error, open output file failure, please check the file status.");
		exit(-1);
	}
    
	unsigned char *o = malloc(width * height);
	if(o == NULL)
	{
		DBG("malloc output buffer failure.");
		exit(-1);
	}

	memset(o, 0x00, width * height);
	memcpy(o, outputy.data, width * height);
   
	filelen = width * height;
	if(fwrite(o, 1, filelen, file) != filelen)
	{
		DBG("read file failure, size wrong.");
		exit(-1);
	}
	memset(o, 0x00, width * height);
	memcpy(o, outputu.data, width * height);

	if(fwrite(o, 1, filelen, file) != filelen)
	{
		DBG("read file failure, size wrong.");
		exit(-1);
	}

	memset(o, 0x00, width * height);
	memcpy(o, outputv.data, width * height);

	if(fwrite(o, 1, filelen, file) != filelen)
	{
		DBG("read file failure, size wrong.");
		exit(-1);
	}

	fflush(file);
	fsync(fileno(file));
	fclose(file);

    return 0;
}

$gcc main.c -lm
$./a.out xuanwumen.yuv444p.yuv 640 1080 1920 1080 1.0
$ffplay -pix_fmt yuv444p -f rawvideo -video_size 640x1080 ./output.yuv

可以看到CROP的效果：

上面的ASPECT为1，可以看到画面两边有裁减，现在我们调整ASPECT，让有限的窗口装进更多的画面，调整aspect:

$./a.out xuanwumen.yuv444p.yuv 640 1080 1920 1080 3.2
$ffplay -pix_fmt yuv444p -f rawvideo -video_size 640x1080 ./output.yuv

旋转pi/6rad

图像转置(transpose)操作

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stddef.h>
#include <stdint.h>
#include <fcntl.h>
#include <unistd.h>
#include <assert.h>
#include <sys/ioctl.h>
#include <math.h>
 
#define DBG(fmt, ...)   do { printf("%s line %d, "fmt"\n", __func__, __LINE__, ##__VA_ARGS__); } while (0)
 
typedef struct image {
    int w;
    int h;
    int c;
    unsigned char *data;
} image;
 
static void dump_memory(uint8_t *buf, int32_t len)
{
    int i;
 
    printf("\n\rdump file memory:");
    for (i = 0; i < len; i ++)
    {
        if ((i % 16) == 0)
        {
            printf("\n\r%p: ", buf + i);
        }
        printf("0x%02x ", buf[i]);
    }
 
    printf("\n\r");
 
    return;
}
 
image make_empty_image(int w, int h, int c)
{
    image out;             
    out.data = 0;
    out.h = h;                                                                                                                                                                                              
    out.w = w;
    out.c = c;
    return out;
}
 
image copy_image(image p)
{
	image copy = p;
    copy.data = (unsigned char*)calloc(p.h * p.w * p.c, sizeof(float));
    memcpy(copy.data, p.data, p.h*p.w*p.c*sizeof(float));
    return copy;
}
 
image make_image(int w, int h, int c)
{
    image out = make_empty_image(w,h,c);
    out.data = (unsigned char*)calloc(h * w * c, sizeof(char));
    return out;
}
 
static void set_pixel(image m, int x, int y, int c, float val)
{
    if (x < 0 || y < 0 || c < 0 || x >= m.w || y >= m.h || c >= m.c) return;
 
    assert(x < m.w && y < m.h && c < m.c);
    m.data[c*m.h*m.w + y*m.w + x] = val;
}
 
static float get_pixel(image m, int x, int y, int c)
{ 
    assert(x < m.w && y < m.h && c < m.c);
    return m.data[c*m.h*m.w + y*m.w + x];
} 
 
void free_image(image m)
{   
    if(m.data){
       free(m.data);
    }
} 
 
static void add_pixel(image m, int x, int y, int c, float val)
{            
    assert(x < m.w && y < m.h && c < m.c);
    m.data[c*m.h*m.w + y*m.w + x] += val;
}
 
image resize_image(image im, int w, int h)
{                                         
    if (im.w == w && im.h == h) return copy_image(im);
 
    image resized = make_image(w, h, im.c);
    image part = make_image(w, im.h, im.c);
    int r, c, k;                          
    float w_scale = (float)(im.w - 1) / (w - 1);
    float h_scale = (float)(im.h - 1) / (h - 1);
    for(k = 0; k < im.c; ++k){            
        for(r = 0; r < im.h; ++r){        
            for(c = 0; c < w; ++c){       
                float val = 0;            
                if(c == w-1 || im.w == 1){
                    val = get_pixel(im, im.w-1, r, k);
                } else {                  
                    float sx = c*w_scale; 
                    int ix = (int) sx;    
                    float dx = sx - ix;   
                    val = (1 - dx) * get_pixel(im, ix, r, k) + dx * get_pixel(im, ix+1, r, k);
                }                         
                set_pixel(part, c, r, k, val);
            }                             
        }                                 
    }                                     
    for(k = 0; k < im.c; ++k){            
        for(r = 0; r < h; ++r){           
           float sy = r*h_scale;         
           int iy = (int) sy;            
           float dy = sy - iy;           
           for(c = 0; c < w; ++c){       
               float val = (1-dy) * get_pixel(part, c, iy, k);
               set_pixel(resized, c, r, k, val);
           }                             
           if(r == h-1 || im.h == 1) continue;
           for(c = 0; c < w; ++c){       
               float val = dy * get_pixel(part, c, iy+1, k);
               add_pixel(resized, c, r, k, val);
           }                             
        }                                 
    }                                     
 
    free_image(part);                     
    return resized;                       
}
 
static float get_pixel_extend(image m, int x, int y, int c)
{             
    if (x < 0 || x >= m.w || y < 0 || y >= m.h) return 0xff;
    /*        
    if(x < 0) x = 0;
    if(x >= m.w) x = m.w-1;
    if(y < 0) y = 0;
    if(y >= m.h) y = m.h-1;
    */        
    if (c < 0 || c >= m.c) return 0;
    return get_pixel(m, x, y, c);
}
 
float bilinear_interpolate(image im, float x, float y, int c)
{      
    int ix = (int) floorf(x);
    int iy = (int) floorf(y);
       
    float dx = x - ix;
    float dy = y - iy;
       
    float val = (1-dy) * (1-dx) * get_pixel_extend(im, ix, iy, c) +
        dy     * (1-dx) * get_pixel_extend(im, ix, iy+1, c) +
        (1-dy) *   dx   * get_pixel_extend(im, ix+1, iy, c) +
        dy     *   dx   * get_pixel_extend(im, ix+1, iy+1, c);
    return val;
}  
 
image rotate_crop_image(image im, float rad, float s, int w, int h, float dx, float dy, float aspect)
{          
    int x, y, c;
    float cx = im.w/2.;
    float cy = im.h/2.;
    image rot = make_image(w, h, im.c);
    for(c = 0; c < im.c; ++c){
        for(y = 0; y < h; ++y){
            for(x = 0; x < w; ++x){
                float rx = cos(rad)*((x - w/2.)/s*aspect + dx/s*aspect) - sin(rad)*((y - h/2.)/s + dy/s) + cx;
                float ry = sin(rad)*((x - w/2.)/s*aspect + dx/s*aspect) + cos(rad)*((y - h/2.)/s + dy/s) + cy;
                float val = bilinear_interpolate(im, rx, ry, c);
                set_pixel(rot, x, y, c, val);
            }
        }
    }    
    return rot; 
}  

void transpose_image(image im)
{
    assert(im.w == im.h);
	int n, m, c;
	for(c = 0; c < im.c; c ++)
	{
		for(n = 0; n < im.w - 1; n ++)
		{
			for(m = n + 1; m < im.w; m ++)
			{
				float swap = im.data[m + im.w*n + im.w*im.h*c];
				im.data[m + im.w*n + im.w*im.h*c] = im.data[n + im.w*m + im.w*im.h*c];
				im.data[n + im.w*m + im.w*im.h*c] = swap;
			}
		}
	}
}
 
image rotate_image(image im, float rad) 
{                
    int x, y, c;
    float cx = im.w/2.;
    float cy = im.h/2.;
    image rot = make_image(im.w, im.h, im.c);
    for(c = 0; c < im.c; ++c){
        for(y = 0; y < im.h; ++y){
            for(x = 0; x < im.w; ++x){
                float rx = cos(rad)*(x-cx) - sin(rad)*(y-cy) + cx;
                float ry = sin(rad)*(x-cx) + cos(rad)*(y-cy) + cy;
                float val = bilinear_interpolate(im, rx, ry, c);
                set_pixel(rot, x, y, c, val);
            }
        }    
    }            
    return rot; 
}
 
int main(int argc, char **argv)
{
	FILE *file;
	int width, height, input_width, input_height;
	float aspect_ratio;
 
	DBG("in");
 
	if(argc != 7)
	{
		DBG("input error, you should use this program like that: program xxxx.yuv width height.");
		exit(-1);
	}
 
	width  = atoi(argv[2]);
	height = atoi(argv[3]);
	input_width  = atoi(argv[4]);
	input_height = atoi(argv[5]);
	aspect_ratio = atof(argv[6]);
 
	DBG("scale to width %d, height %d.", width, height);
	
	file = fopen(argv[1], "rb");
	if(file == NULL)
	{
		DBG("fatal error, open file %s failure, please check the file status.", argv[1]);
		exit(-1);
	}
 
	fseek(file, 0, SEEK_END);
	int filelen = ftell(file);
	
	DBG("file %s len %d byets.", argv[1], filelen);
 
	unsigned char *p = malloc(filelen);
	if(p == NULL)
	{
		DBG("malloc buffer failure for %s len %d.", argv[1], filelen);
		exit(-1);
	}
 
	memset(p, 0x00, filelen);
	fseek(file, 0, SEEK_SET);
 
	if(fread(p, 1, filelen, file) != filelen)
	{
		DBG("read file failure, size wrong.");
		exit(-1);
	}
 
    fclose(file);
 
	dump_memory(p, 32);
	dump_memory(p + filelen - 32, 32);
 
    image orig_Y = make_image(input_width,input_height, 1);
    image orig_U = make_image(input_width,input_height, 1);
    image orig_V = make_image(input_width,input_height, 1);
    memcpy(orig_Y.data, p + 0*input_width*input_height,input_width*input_height);
    memcpy(orig_U.data, p + 1*input_width*input_height,input_width*input_height);
    memcpy(orig_V.data, p + 2*input_width*input_height,input_width*input_height);
 
    transpose_image(orig_Y);
    transpose_image(orig_U);
    transpose_image(orig_V);

	image outputy = orig_Y;
	image outputu = orig_U;
	image outputv = orig_V;

	file = fopen("./output.yuv", "wb+");
	if(file == NULL)
	{
		DBG("fatal error, open output file failure, please check the file status.");
		exit(-1);
	}
    
	unsigned char *o = malloc(width * height);
	if(o == NULL)
	{
		DBG("malloc output buffer failure.");
		exit(-1);
	}
 
	memset(o, 0x00, width * height);
	memcpy(o, outputy.data, width * height);
   
	filelen = width * height;
	if(fwrite(o, 1, filelen, file) != filelen)
	{
		DBG("read file failure, size wrong.");
		exit(-1);
	}
	memset(o, 0x00, width * height);
	memcpy(o, outputu.data, width * height);
 
	if(fwrite(o, 1, filelen, file) != filelen)
	{
		DBG("read file failure, size wrong.");
		exit(-1);
	}
 
	memset(o, 0x00, width * height);
	memcpy(o, outputv.data, width * height);
 
	if(fwrite(o, 1, filelen, file) != filelen)
	{
		DBG("read file failure, size wrong.");
		exit(-1);
	}
 
	fflush(file);
	fsync(fileno(file));
	fclose(file);
 
    return 0;
}

ffmpeg -i ./dog_416x416.jpg -pix_fmt yuv444p dog_416x416.yuv444p.yuv
ffplay -pix_fmt yuv444p -f rawvideo -video_size 416x416 ./dog_416x416.yuv444p.yuv
gcc seu.c  -lm
./a.out dog_416x416.yuv444p.yuv 416 416 416 416 0
ffplay -pix_fmt yuv444p -f rawvideo -video_size 416x416 ./output.yuv

算法效果：

程序原理如下：

为什么可以使用GPU对计算进行加速？

在图形和很多其它应用中，大量数据具有良好的并行特征，这种数据的并行特征，使得处理器在计算过程中可以安全的对数据以一定的结构化方式同时进行操作，典型的例子就是矩阵乘法运算，由于其良好的数据并行特征，结果矩阵中每个元素的计算可以并行地进行下去，如下图所示，矩阵乘法地结果矩阵C中每个元素都可以由一个输入矩阵A地行向量和领域给输入矩阵B地列向量进行点积运算得。C中每个元素的计算过程都可以独立进行，不存在依赖关系，因此具有良好的数据并行特征，同时C中每个元素的计算规则都是相同的，这个规则可以预先确定（GPU代码编程），所以，每个元素的最终输出都需要经历相同的点积运算次数，这位并行编程带来了很好的实现可能性。