Android nv21 缩放

missile111

已于 2024-12-10 10:30:00 修改

阅读量744

点赞数 24

文章标签： android

于 2024-11-28 10:58:33 首次发布

本文链接：https://blog.youkuaiyun.com/qq_35506618/article/details/144018730

版权

一、背景分析

1.1 背景：

出图后经过算法，大图是有算法的，但是缩略图没有。结果，缩略图跟大图不一致。

1.2目的：

让缩略图跟大图保持一致。

二、思路：

2.1使用bitmap做缩放

刚开始，想着，既然是Android平台，bitmap缩放是最基本的思路

    public static Bitmap createThumbnail(String imagePath) {
        BitmapFactory.Options options = new BitmapFactory.Options();
        options.inJustDecodeBounds = true;
        BitmapFactory.decodeFile(imagePath, options);
        int scaleSize = calculateInSampleSize(options, ThumbnailHelper.getThumbnailWidth(), ThumbnailHelper.getThumbnailHeight());
        options.inSampleSize = scaleSize;
        options.inJustDecodeBounds = false;
        return BitmapFactory.decodeFile(imagePath, options);
    }

    public static int calculateInSampleSize(BitmapFactory.Options options, int reqWidth, int reqHeight) {
        final int height = options.outHeight;
        final int width = options.outWidth;
        int inSampleSize = 1;

        if (height > reqHeight || width > reqWidth) {
            final int halfHeight = height / 2;
            final int halfWidth = width / 2;

            while ((halfHeight / inSampleSize) >= reqHeight && (halfWidth / inSampleSize) >= reqWidth) {
                inSampleSize *= 2;
            }
        }
        return inSampleSize;
    }

好家伙，耗时达到了100ms+，果断放弃

2.2 直接把缩略图删除，

这样平台会拿大图自己生成缩略图，但是如果把大图拿到其他平台例如winds、Linux上就可能有问题，看不到缩略图，预览显示的不是预期效果

2.3让缩略图跑一遍算法

首先，本来就有缩略图的配流，拿到缩略图的出图后，直接对缩略图跑一遍算法即可

总耗时7-10ms，

结果合格（1、控制在10ms以下；2、出图跟缩略图保持一致）

虽然能达到预期目的，但是总体下来是跑了两遍算法

2.4使用libyuv缩放nv21

既然使用libyuv，先了解libyuv的提供的接口数据类型，

在libyuv中没有直接对nv21数组直接进行缩放的接口，只有对I420处理的接口

2.41 链接I420的存储方式

具体逻辑是

nv21-->I420-->缩放I420-->转为jpeg

jni方法
 public final static native int nv21ToI420(byte[] output_image, byte[] input_image, int width, int height);

jni C++ 方法实现 nv21转I420
extern "C" {
    JNIEXPORT jint JNICALL Java_xx_xx_xx_feature_setting_facebeauty_core_MorphoFaceBeauty_nv21ToI420(JNIEnv* env, jobject obj,
    jbyteArray src_, jbyteArray dst_, jint width,jint height){
    jint ret = 0;

    jbyte *Src_data = env->GetByteArrayElements(src_, NULL);
    jbyte *Dst_data = env->GetByteArrayElements(dst_, NULL);

    jint src_y_size = width * height;
    jint src_u_size = (width >> 1) * (height >> 1);

    jbyte *src_nv21_y_data = Src_data;
    jbyte *src_nv21_vu_data = Src_data + src_y_size;

    jbyte *src_i420_y_data = Dst_data;
    jbyte *src_i420_u_data = Dst_data + src_y_size;
    jbyte *src_i420_v_data = Dst_data + src_y_size + src_u_size;

    libyuv::NV21ToI420((const uint8 *) src_nv21_y_data, width,
    (const uint8 *) src_nv21_vu_data, width,
    (uint8 *) src_i420_y_data, width,
    (uint8 *) src_i420_u_data, width >> 1,
    (uint8 *) src_i420_v_data, width >> 1,
    width, height);

    env->ReleaseByteArrayElements(src_,Src_data,0);
    env->ReleaseByteArrayElements(dst_,Dst_data,0);
    return ret;
}}

缩放I420
public static byte[] scaleI420(byte[] i420Data, int srcWidth, int srcHeight, int dstWidth, int dstHeight) {
        int srcYSize = srcWidth * srcHeight;
        int srcUVSize = srcYSize / 4;
        int dstYSize = dstWidth * dstHeight;
        int dstUVSize = dstYSize / 4;
        byte[] scaledI420Data = new byte[dstYSize + dstUVSize * 2];
        float scaleX_Y = (float) srcWidth / dstWidth;
        float scaleY_Y = (float) srcHeight / dstHeight;
        int yIndex_dst = 0;
        for (int i = 0; i < dstHeight; i++) {
            int originalY = (int) (i * scaleY_Y);
            for (int j = 0; j < dstWidth; j++) {
                int originalX = (int) (j * scaleX_Y);
                scaledI420Data[yIndex_dst++] = i420Data[originalY * srcWidth + originalX];
            }
        }
        float scaleX_UV = (float) (srcWidth / 2) / (dstWidth / 2);
        float scaleY_UV = (float) (srcHeight / 2) / (dstHeight / 2);

        int uIndex_dst = dstYSize;
        for (int i = 0; i < dstHeight / 2; i++) {
            int originalYU = (int) (i * scaleY_UV);
            for (int j = 0; j < dstWidth / 2; j++) {
                int originalXU = (int) (j * scaleX_UV);
                scaledI420Data[uIndex_dst++] = i420Data[srcYSize + originalYU * (srcWidth / 2) + originalXU];
            }
        }
        int vIndex_dst = dstYSize + dstUVSize;
        for (int i = 0; i < dstHeight / 2; i++) {
            int originalYV = (int) (i * scaleY_UV);
            for (int j = 0; j < dstWidth / 2; j++) {
                int originalXV = (int) (j * scaleX_UV);
                scaledI420Data[vIndex_dst++] = i420Data[srcYSize + srcUVSize + originalYV * (srcWidth / 2) + originalXV];
            }
        }
        return scaledI420Data;
    }

I420转jpeg
    public static byte[] i420ToJpeg(byte[] i420Data, int width, int height) {
        try {
            YuvImage yuvImage = new YuvImage(i420Data, ImageFormat.YUV_420_888, width, height, null);
            ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
            yuvImage.compressToJpeg(new Rect(0, 0, width, height), 90, outputStream);
            return outputStream.toByteArray();
        } catch (Exception e) {
            LogHelper.e(TAG, "i420ToJpeg Error converting I420 to JPEG: " + e.getMessage());
            return null;
        }
    }

上述流程的耗时大概在8-15ms，主要耗时在第一步，耗时站95%（mt6789直出12.5M），剩余三个步骤加起来总耗时1-3ms，基本没有优化空间

此方案耗时有点大，改进为直接缩放nv21，如下

三、自己手搓

3.1 了解NV21 格式的存储方式

(0  ~  3) Y00  Y01  Y02  Y03  
(4  ~  7) Y10  Y11  Y12  Y13  
(8  ~ 11) Y20  Y21  Y22  Y23  
(12 ~ 15) Y30  Y31  Y32  Y33  

(16 ~ 19) V00  U00  V01  U01 
(20 ~ 23) V10  U10  V11  U11

3.2梳理nv21的缩放逻辑

3.3 代码逻辑

注意：这里的大图宽和高，与缩放后的宽和高直接的比例必须为8的整数被，否则会因不是8的整数倍导致uv采用不准确，而出图紫绿相间或者杂乱的情况；

或者因为必须是8的整数倍，导致出图的size并不是我们想要的

    public static byte[] nv21ToJpeg(byte[] i420Data, int width, int height) {
        try {
            YuvImage yuvImage = new YuvImage(i420Data, ImageFormat.NV21, width, height, null);
            ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
            yuvImage.compressToJpeg(new Rect(0, 0, width, height), 90, outputStream);
            return outputStream.toByteArray();
        } catch (Exception e) {
            LogHelper.e(TAG, "i420ToJpeg Error converting I420 to JPEG: " + e.getMessage());
            return null;
        }
    }

    public static byte[] scaleNv21(byte[] nv21Data, int srcWidth, int srcHeight, int dstWidth, int dstHeight) {
        byte[] scaledI420Data = nv21DoScale(nv21Data, srcWidth, srcHeight, dstWidth, dstHeight);
        byte[] bytes = nv21ToJpeg(scaledI420Data, dstWidth, dstHeight);
        return bytes;
    }

    public static byte[] nv21DoScale(byte[] nv21Data, int originalWidth , int originalHeight, int newWidth, int newHeight) {
        byte[] scaledNv21Data = new byte[newWidth * newHeight * 3 / 2];
        int scaleX =  originalHeight / newHeight;
        int scaleY =  originalWidth / newWidth;
        int yIndex = 0;
        for (int i = 0; i < newHeight; i++) {
            int originalY = (int) (i * scaleY);
            for (int j = 0; j < newWidth; j++) {
                int originalX = (int) (j * scaleX);
                scaledNv21Data[yIndex++] = nv21Data[originalY * originalWidth + originalX];
            }
        }
        int originalYSize = originalWidth * originalHeight;
        int uvIndex = newWidth * newHeight;
        for (int i = 0; i < newHeight / 2 ;i++) {
            int originalYU = (int) (i *  originalWidth * scaleY);
            for (int j = 0; j <newWidth ; j+=2) {
                int originalXU = (int) (j * scaleX);
                scaledNv21Data[uvIndex++] = nv21Data[originalYSize + originalYU + originalXU];
                scaledNv21Data[uvIndex++] = nv21Data[originalYSize + originalYU + originalXU + 1];
            }
        }
        return scaledNv21Data;
    }

继续改进，如下

3.4 裁切后缩放

具体逻辑如下，

分别用大图和小图的宽和高，做判断

裁切

    private void effectThumb(byte[] nv21, int width, int height){
        int thumbnailWidth = ThumbnailHelper.getThumbnailWidth();
        int thumbnailHeight = ThumbnailHelper.getThumbnailHeight();
        int scale8=0;
        for (int i = 1; i < 10; i++) {
            if (thumbnailWidth * i * 8 > height || thumbnailHeight * i * 8 >width){
                scale8 =i-1;
                break;
            }
        }

        int newWidth = scale8 * 8 * thumbnailHeight;
        int newHeight = scale8 * 8 * thumbnailWidth;
        int corpHeight = (width - newWidth) / 2;
        int corpWidth = (height - newHeight) / 2;
        byte[] clipNV21Bytes = clipNV21(nv21, width, height,corpHeight, corpWidth, newWidth, newHeight);
        byte[] bytes = CameraUtil.scaleNv21(clipNV21Bytes, newWidth, newHeight,thumbnailHeight,thumbnailWidth);
        mExif.setCompressedThumbnail(bytes);
    }

    public static byte[] clipNV21(byte[] src, int width, int height, int left, int top, int clip_w, int clip_h) {
        if (left > width || top > height || left + clip_w > width || top + clip_h > height) {
            return null;
        }
        //取偶
        int x = left / 4 * 4, y = top / 4 * 4;
        int w = clip_w / 4 * 4, h = clip_h / 4 * 4;
        int y_unit = w * h;
        int uv = y_unit / 2;
        byte[] nData = new byte[y_unit + uv];
        int uv_index_dst = w * h - y / 2 * w;
        int uv_index_src = width * height + x;
        for (int i = y; i < y + h; i++) {
            System.arraycopy(src, i * width + x, nData, (i - y) * w, w);//y内存块复制
            if (i % 2 == 0) {
                System.arraycopy(src, uv_index_src + (i >> 1) * width, nData, uv_index_dst + (i >> 1) * w, w);//uv内存块复制
            }
        }
        return nData;
    }

缩放



    public static byte[] scaleNv21(byte[] nv21Data, int srcWidth, int srcHeight, int dstWidth, int dstHeight) {
        byte[] scaledI420Data = doScaleNv21(nv21Data, srcWidth, srcHeight, dstWidth, dstHeight);
        byte[] bytes = i420ToJpeg(scaledI420Data, dstWidth, dstHeight);
        return bytes;
    }

    public static byte[] doScaleNv21(byte[] nv21Data, int originalWidth , int originalHeight, int newWidth, int newHeight) {
        byte[] scaledNv21Data = new byte[newWidth * newHeight * 3 / 2];
        int scaleX =  originalHeight / newHeight;
        int scaleY =  originalWidth / newWidth;
        int yIndex = 0;
        for (int i = 0; i < newHeight; i++) {
            int originalY = i * scaleY;
            for (int j = 0; j < newWidth; j++) {
                int originalX = j * scaleX;
                scaledNv21Data[yIndex++] = nv21Data[originalY * originalWidth + originalX];
            }
        }
        int originalYSize = originalWidth * originalHeight;
        int uvIndex = newWidth * newHeight;
        for (int i = 0; i < newHeight / 2 ;i++) {
            int originalYU = i *  originalWidth * scaleY;
            for (int j = 0; j <newWidth ; j+=2) {
                int originalXU = j * scaleX;
                scaledNv21Data[uvIndex++] = nv21Data[originalYSize + originalYU + originalXU];
                scaledNv21Data[uvIndex++] = nv21Data[originalYSize + originalYU + originalXU + 1];
            }
        }
        return scaledNv21Data;
    }

转jepg

    public static byte[] nv21ToJpeg(byte[] i420Data, int width, int height) {
        try {
            YuvImage yuvImage = new YuvImage(i420Data, ImageFormat.NV21, width, height, null);
            ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
            yuvImage.compressToJpeg(new Rect(0, 0, width, height), 90, outputStream);
            return outputStream.toByteArray();
        } catch (Exception e) {
            LogHelper.e(TAG, "i420ToJpeg Error converting I420 to JPEG: " + e.getMessage());
            return null;
        }
    }

这样永远都是想要的目标宽高了

但是因为多了一个裁切的流程，用时高达6ms

所以总体流程的用时在7-10ms，虽然说可以接受，但如果把上述的裁剪、缩放、转jpeg的流程都放到C++里面，使用jni的方式去调用，应该能压缩到3ms

还是有优化空间，

3.5 裁切时缩放

对数据源的nv21数组，在裁切的时候直接缩放，不用如上图2.5.4一样，现在裁切，再缩放

把上图中的如下两个步骤合并为一步

具体代码逻辑如下

    private void effectThumb(byte[] nv21, int width, int height){
        int thumbnailWidth = ThumbnailHelper.getThumbnailWidth();
        int thumbnailHeight = ThumbnailHelper.getThumbnailHeight();
        int scale8=0;
        for (int i = 1; i < 10; i++) {
            if (thumbnailWidth * i * 8 > height || thumbnailHeight * i * 8 >width){
                scale8 =i-1;
                break;
            }
        }
        int newWidth = scale8 * 8 * thumbnailHeight;
        int newHeight = scale8 * 8 * thumbnailWidth;
        int corpHeight = (height - newHeight) / 2;
        int corpWidth = (width - newWidth) / 2;

        byte[] clipNV21Bytes = CameraUtil.clipAndScaleNV21(nv21,width  ,height,corpWidth,corpHeight, newWidth , newHeight ,scale8 * 8);
        byte[] bytes = CameraUtil.NV21ToJpeg(clipNV21Bytes,thumbnailHeight , thumbnailWidth);
        mExif.setCompressedThumbnail(bytes);
    }

    public static byte[] NV21ToJpeg(byte[] i420Data, int width, int height) {
        try {
            YuvImage yuvImage = new YuvImage(i420Data, ImageFormat.NV21, width, height, null);
            ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
            yuvImage.compressToJpeg(new Rect(0, 0, width, height), 90, outputStream);
            return outputStream.toByteArray();
        } catch (Exception e) {
            LogHelper.e(TAG, "i420ToJpeg Error converting I420 to JPEG: " + e.getMessage());
            return null;
        }
    }
    public static byte[] clipAndScaleNV21(byte[] src, int width, int height, int left, int top, int clip_w, int clip_h, int scale8) {
        int newHeight   =  clip_h / scale8 ;
        int newWidth=  clip_w /  scale8;

        byte[] scaledNv21Data = new byte[newWidth * newHeight * 3 / 2];
        int yIndex = 0;
        for (int i = 0; i < newHeight; i++) {
            int originalY = top * width + i * scale8 * width;
            for (int j = 0; j < newWidth; j++) {
                int originalX = left + j * scale8;
                scaledNv21Data[yIndex++] = src[originalY + originalX];
            }
        }

        int originalYSize = width * height;
        int uvIndex = newWidth * newHeight;
        for (int i = 0; i < newHeight / 2 ;i++) {
            int originalYU =top * width /2 + i * scale8 * width;
            for (int j = 0; j < newWidth; j+=2) {
                int  originalXU =  left + j * scale8;
                scaledNv21Data[uvIndex++] = src[originalYSize + originalYU + originalXU];
                scaledNv21Data[uvIndex++] = src[originalYSize + originalYU + originalXU + 1];
            }
        }
        return scaledNv21Data;
    }

此流程可以把整体时间缩减到3ms以下，经测试一般缩小16倍一个13M的nv21，用时1ms，压测偶现2ms，压测100次，出现4次3ms，其余都是3ms以下

理论上说，如上代码放到C层，使用jni调用可以更一步的压缩耗时时长，但是，目前已经压缩到3ms以下了，又觉得再放到C层有点鸡肋。

3.6偏移量优化

上面的采样都是从左上角采样，这样会出现偏差，应该是从正中间采样

优化的逻辑如下图

    private void effectThumb(byte[] nv21,int width,int height){
        int thumbnailWidth = ThumbnailHelper.getThumbnailWidth();
        int thumbnailHeight = ThumbnailHelper.getThumbnailHeight();

        int scale8 = Math.min((width / thumbnailHeight), (height / thumbnailWidth));
        int newWidth = scale8 * thumbnailHeight;
        int newHeight = scale8 * thumbnailWidth;
        int dsc = scale8 >> 1;
        int corpHeight = ((height - newHeight) >> 1) + dsc;
        int corpWidth = ((width - newWidth) >> 1) + dsc;

        corpWidth = corpWidth >> 1 << 1;
        corpHeight = corpHeight >> 1 << 1;

        byte[] clipNV21Bytes = CameraUtil.clipAndScaleNv21(nv21,width,height,corpWidth,corpHeight,newWidth,newHeight,scale8);
        byte[] bytes = CameraUtil.nv21ToJpeg(clipNV21Bytes,thumbnailHeight,thumbnailWidth);
        mExif.setCompressedThumbnail(bytes);
    }

3.7 使用jni

在上面3.6的基础上，把clipAndScaleNv21方法的计算放到c++中，

如下：

// 实现clipAndScaleNv21JNI对应的JNI函数
extern "C" JNIEXPORT jbyteArray JNICALL
Java_ImageProcessor_clipAndScaleNv21JNI(JNIEnv *env, jclass cls, jbyteArray src, jint width, jint height,
                                         jint left, jint top, jint clip_w, jint clip_h, jint scale8) {
    // 获取源字节数组的长度
    jsize srcLength = env->GetArrayLength(src);
    // 获取源字节数组的指针
    jbyte *srcData = env->GetByteArrayElements(src, nullptr);

    // 计算新的高度和宽度
    int newHeight = clip_h / scale8;
    int newWidth = clip_w / scale8;
    // 计算处理后NV21数据的字节长度
    int scaledDataLength = newWidth * newHeight * 3 / 2;
    // 创建用于存储处理后数据的字节数组
    jbyteArray scaledNv21Data = env->NewByteArray(scaledDataLength);
    // 获取处理后字节数组的指针
    jbyte *scaledData = env->GetByteArrayElements(scaledNv21Data, nullptr);

    // 处理Y分量（亮度分量）
    int yIndex = 0;
    for (int i = 0; i < newHeight; i++) {
        int originalY = top * width + i * scale8 * width;
        for (int j = 0; j < newWidth; j++) {
            int originalX = left + j * scale8;
            scaledData[yIndex++] = srcData[originalY + originalX];
        }
    }

    // 处理UV分量
    int originalYSize = width * height;
    int uvIndex = newWidth * newHeight;
    for (int i = 0; i < newHeight / 2; i++) {
        int originalYU = top * width / 2 + i * scale8 * width;
        for (int j = 0; j < newWidth; j += 2) {
            int originalXU = left + j * scale8;
            scaledData[uvIndex++] = srcData[originalYSize + originalYU + originalXU];
            scaledData[uvIndex++] = srcData[originalYSize + originalYU + originalXU + 1];
        }
    }

    // 释放源字节数组的元素指针（不再需要进行修改了）
    env->ReleaseByteArrayElements(src, srcData, JNI_ABORT);
    // 释放处理后字节数组的元素指针（更新内容到Java层的数组中）
    env->ReleaseByteArrayElements(scaledNv21Data, scaledData, 0);

    return scaledNv21Data;
}

结果：耗时还是在1-3ms直接，并没有很大的效率提升（也没有多少提升的空间），好消息是重压环境下的时间也很稳定就是1或者2ms

3.8对比

把3.6和3.7做一万次做耗时对比

次数	java	jni
1	12918	9691
2	10241	10812
3	10992	11598
4	10921	11485
5	11509	10002
6	10774	11555
7	10427	11320
8	10698	11697
9	10292	11618
10	11026	10890
11	10481	10208
12	12150	11269
13	10894	9539
14	10792	10766
15	10594	10699
16	10930	11267
17	10381	10238
18	10504	10620
19	11392	11471
20	10047	10443
21	11145	9799
22	10094	11725
23	11747	11201
24	10941	10068
25	11834	10679
26	10497	11152