#include <cuda_runtime.h> static __device__ void affine_project(float* matrix, float x, float y, float* ox