一、背景分析
1.1 背景:
出图后经过算法,大图是有算法的,但是缩略图没有。结果,缩略图跟大图不一致。
1.2目的:
让缩略图跟大图保持一致。
二、思路:
2.1使用bitmap做缩放
刚开始,想着,既然是Android平台,bitmap缩放是最基本的思路
public static Bitmap createThumbnail(String imagePath) {
BitmapFactory.Options options = new BitmapFactory.Options();
options.inJustDecodeBounds = true;
BitmapFactory.decodeFile(imagePath, options);
int scaleSize = calculateInSampleSize(options, ThumbnailHelper.getThumbnailWidth(), ThumbnailHelper.getThumbnailHeight());
options.inSampleSize = scaleSize;
options.inJustDecodeBounds = false;
return BitmapFactory.decodeFile(imagePath, options);
}
public static int calculateInSampleSize(BitmapFactory.Options options, int reqWidth, int reqHeight) {
final int height = options.outHeight;
final int width = options.outWidth;
int inSampleSize = 1;
if (height > reqHeight || width > reqWidth) {
final int halfHeight = height / 2;
final int halfWidth = width / 2;
while ((halfHeight / inSampleSize) >= reqHeight && (halfWidth / inSampleSize) >= reqWidth) {
inSampleSize *= 2;
}
}
return inSampleSize;
}
好家伙,耗时达到了100ms+,果断放弃
2.2 直接把缩略图删除,
这样平台会拿大图自己生成缩略图,但是如果把大图拿到其他平台例如winds、Linux上就可能有问题,看不到缩略图,预览显示的不是预期效果
2.3让缩略图跑一遍算法
首先,本来就有缩略图的配流,拿到缩略图的出图后,直接对缩略图跑一遍算法即可
总耗时7-10ms,
结果合格(1、控制在10ms以下;2、出图跟缩略图保持一致)
虽然能达到预期目的,但是总体下来是跑了两遍算法
2.4使用libyuv缩放nv21
既然使用libyuv,先了解libyuv的提供的接口数据类型,
在libyuv中没有直接对nv21数组直接进行缩放的接口,只有对I420处理的接口
2.41 链接I420的存储方式
具体逻辑是
nv21-->I420-->缩放I420-->转为jpeg
jni方法
public final static native int nv21ToI420(byte[] output_image, byte[] input_image, int width, int height);
jni C++ 方法实现 nv21转I420
extern "C" {
JNIEXPORT jint JNICALL Java_xx_xx_xx_feature_setting_facebeauty_core_MorphoFaceBeauty_nv21ToI420(JNIEnv* env, jobject obj,
jbyteArray src_, jbyteArray dst_, jint width,jint height){
jint ret = 0;
jbyte *Src_data = env->GetByteArrayElements(src_, NULL);
jbyte *Dst_data = env->GetByteArrayElements(dst_, NULL);
jint src_y_size = width * height;
jint src_u_size = (width >> 1) * (height >> 1);
jbyte *src_nv21_y_data = Src_data;
jbyte *src_nv21_vu_data = Src_data + src_y_size;
jbyte *src_i420_y_data = Dst_data;
jbyte *src_i420_u_data = Dst_data + src_y_size;
jbyte *src_i420_v_data = Dst_data + src_y_size + src_u_size;
libyuv::NV21ToI420((const uint8 *) src_nv21_y_data, width,
(const uint8 *) src_nv21_vu_data, width,
(uint8 *) src_i420_y_data, width,
(uint8 *) src_i420_u_data, width >> 1,
(uint8 *) src_i420_v_data, width >> 1,
width, height);
env->ReleaseByteArrayElements(src_,Src_data,0);
env->ReleaseByteArrayElements(dst_,Dst_data,0);
return ret;
}}
缩放I420
public static byte[] scaleI420(byte[] i420Data, int srcWidth, int srcHeight, int dstWidth, int dstHeight) {
int srcYSize = srcWidth * srcHeight;
int srcUVSize = srcYSize / 4;
int dstYSize = dstWidth * dstHeight;
int dstUVSize = dstYSize / 4;
byte[] scaledI420Data = new byte[dstYSize + dstUVSize * 2];
float scaleX_Y = (float) srcWidth / dstWidth;
float scaleY_Y = (float) srcHeight / dstHeight;
int yIndex_dst = 0;
for (int i = 0; i < dstHeight; i++) {
int originalY = (int) (i * scaleY_Y);
for (int j = 0; j < dstWidth; j++) {
int originalX = (int) (j * scaleX_Y);
scaledI420Data[yIndex_dst++] = i420Data[originalY * srcWidth + originalX];
}
}
float scaleX_UV = (float) (srcWidth / 2) / (dstWidth / 2);
float scaleY_UV = (float) (srcHeight / 2) / (dstHeight / 2);
int uIndex_dst = dstYSize;
for (int i = 0; i < dstHeight / 2; i++) {
int originalYU = (int) (i * scaleY_UV);
for (int j = 0; j < dstWidth / 2; j++) {
int originalXU = (int) (j * scaleX_UV);
scaledI420Data[uIndex_dst++] = i420Data[srcYSize + originalYU * (srcWidth / 2) + originalXU];
}
}
int vIndex_dst = dstYSize + dstUVSize;
for (int i = 0; i < dstHeight / 2; i++) {
int originalYV = (int) (i * scaleY_UV);
for (int j = 0; j < dstWidth / 2; j++) {
int originalXV = (int) (j * scaleX_UV);
scaledI420Data[vIndex_dst++] = i420Data[srcYSize + srcUVSize + originalYV * (srcWidth / 2) + originalXV];
}
}
return scaledI420Data;
}
I420转jpeg
public static byte[] i420ToJpeg(byte[] i420Data, int width, int height) {
try {
YuvImage yuvImage = new YuvImage(i420Data, ImageFormat.YUV_420_888, width, height, null);
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
yuvImage.compressToJpeg(new Rect(0, 0, width, height), 90, outputStream);
return outputStream.toByteArray();
} catch (Exception e) {
LogHelper.e(TAG, "i420ToJpeg Error converting I420 to JPEG: " + e.getMessage());
return null;
}
}
上述流程的耗时大概在8-15ms,主要耗时在第一步,耗时站95%(mt6789直出12.5M),剩余三个步骤加起来总耗时1-3ms,基本没有优化空间
此方案耗时有点大,改进为直接缩放nv21,如下
三、自己手搓
3.1 了解NV21 格式的存储方式
(0 ~ 3) Y00 Y01 Y02 Y03
(4 ~ 7) Y10 Y11 Y12 Y13
(8 ~ 11) Y20 Y21 Y22 Y23
(12 ~ 15) Y30 Y31 Y32 Y33
(16 ~ 19) V00 U00 V01 U01
(20 ~ 23) V10 U10 V11 U11
3.2梳理nv21的缩放逻辑
3.3 代码逻辑
注意:这里的大图宽和高,与缩放后的宽和高直接的比例必须为8的整数被,否则会因不是8的整数倍导致uv采用不准确,而出图紫绿相间或者杂乱的情况;
或者因为必须是8的整数倍,导致出图的size并不是我们想要的
public static byte[] nv21ToJpeg(byte[] i420Data, int width, int height) {
try {
YuvImage yuvImage = new YuvImage(i420Data, ImageFormat.NV21, width, height, null);
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
yuvImage.compressToJpeg(new Rect(0, 0, width, height), 90, outputStream);
return outputStream.toByteArray();
} catch (Exception e) {
LogHelper.e(TAG, "i420ToJpeg Error converting I420 to JPEG: " + e.getMessage());
return null;
}
}
public static byte[] scaleNv21(byte[] nv21Data, int srcWidth, int srcHeight, int dstWidth, int dstHeight) {
byte[] scaledI420Data = nv21DoScale(nv21Data, srcWidth, srcHeight, dstWidth, dstHeight);
byte[] bytes = nv21ToJpeg(scaledI420Data, dstWidth, dstHeight);
return bytes;
}
public static byte[] nv21DoScale(byte[] nv21Data, int originalWidth , int originalHeight, int newWidth, int newHeight) {
byte[] scaledNv21Data = new byte[newWidth * newHeight * 3 / 2];
int scaleX = originalHeight / newHeight;
int scaleY = originalWidth / newWidth;
int yIndex = 0;
for (int i = 0; i < newHeight; i++) {
int originalY = (int) (i * scaleY);
for (int j = 0; j < newWidth; j++) {
int originalX = (int) (j * scaleX);
scaledNv21Data[yIndex++] = nv21Data[originalY * originalWidth + originalX];
}
}
int originalYSize = originalWidth * originalHeight;
int uvIndex = newWidth * newHeight;
for (int i = 0; i < newHeight / 2 ;i++) {
int originalYU = (int) (i * originalWidth * scaleY);
for (int j = 0; j <newWidth ; j+=2) {
int originalXU = (int) (j * scaleX);
scaledNv21Data[uvIndex++] = nv21Data[originalYSize + originalYU + originalXU];
scaledNv21Data[uvIndex++] = nv21Data[originalYSize + originalYU + originalXU + 1];
}
}
return scaledNv21Data;
}
继续改进,如下
3.4 裁切后缩放
具体逻辑如下,
分别用大图和小图的宽和高,做判断
裁切
private void effectThumb(byte[] nv21, int width, int height){
int thumbnailWidth = ThumbnailHelper.getThumbnailWidth();
int thumbnailHeight = ThumbnailHelper.getThumbnailHeight();
int scale8=0;
for (int i = 1; i < 10; i++) {
if (thumbnailWidth * i * 8 > height || thumbnailHeight * i * 8 >width){
scale8 =i-1;
break;
}
}
int newWidth = scale8 * 8 * thumbnailHeight;
int newHeight = scale8 * 8 * thumbnailWidth;
int corpHeight = (width - newWidth) / 2;
int corpWidth = (height - newHeight) / 2;
byte[] clipNV21Bytes = clipNV21(nv21, width, height,corpHeight, corpWidth, newWidth, newHeight);
byte[] bytes = CameraUtil.scaleNv21(clipNV21Bytes, newWidth, newHeight,thumbnailHeight,thumbnailWidth);
mExif.setCompressedThumbnail(bytes);
}
public static byte[] clipNV21(byte[] src, int width, int height, int left, int top, int clip_w, int clip_h) {
if (left > width || top > height || left + clip_w > width || top + clip_h > height) {
return null;
}
//取偶
int x = left / 4 * 4, y = top / 4 * 4;
int w = clip_w / 4 * 4, h = clip_h / 4 * 4;
int y_unit = w * h;
int uv = y_unit / 2;
byte[] nData = new byte[y_unit + uv];
int uv_index_dst = w * h - y / 2 * w;
int uv_index_src = width * height + x;
for (int i = y; i < y + h; i++) {
System.arraycopy(src, i * width + x, nData, (i - y) * w, w);//y内存块复制
if (i % 2 == 0) {
System.arraycopy(src, uv_index_src + (i >> 1) * width, nData, uv_index_dst + (i >> 1) * w, w);//uv内存块复制
}
}
return nData;
}
缩放
public static byte[] scaleNv21(byte[] nv21Data, int srcWidth, int srcHeight, int dstWidth, int dstHeight) {
byte[] scaledI420Data = doScaleNv21(nv21Data, srcWidth, srcHeight, dstWidth, dstHeight);
byte[] bytes = i420ToJpeg(scaledI420Data, dstWidth, dstHeight);
return bytes;
}
public static byte[] doScaleNv21(byte[] nv21Data, int originalWidth , int originalHeight, int newWidth, int newHeight) {
byte[] scaledNv21Data = new byte[newWidth * newHeight * 3 / 2];
int scaleX = originalHeight / newHeight;
int scaleY = originalWidth / newWidth;
int yIndex = 0;
for (int i = 0; i < newHeight; i++) {
int originalY = i * scaleY;
for (int j = 0; j < newWidth; j++) {
int originalX = j * scaleX;
scaledNv21Data[yIndex++] = nv21Data[originalY * originalWidth + originalX];
}
}
int originalYSize = originalWidth * originalHeight;
int uvIndex = newWidth * newHeight;
for (int i = 0; i < newHeight / 2 ;i++) {
int originalYU = i * originalWidth * scaleY;
for (int j = 0; j <newWidth ; j+=2) {
int originalXU = j * scaleX;
scaledNv21Data[uvIndex++] = nv21Data[originalYSize + originalYU + originalXU];
scaledNv21Data[uvIndex++] = nv21Data[originalYSize + originalYU + originalXU + 1];
}
}
return scaledNv21Data;
}
转jepg
public static byte[] nv21ToJpeg(byte[] i420Data, int width, int height) {
try {
YuvImage yuvImage = new YuvImage(i420Data, ImageFormat.NV21, width, height, null);
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
yuvImage.compressToJpeg(new Rect(0, 0, width, height), 90, outputStream);
return outputStream.toByteArray();
} catch (Exception e) {
LogHelper.e(TAG, "i420ToJpeg Error converting I420 to JPEG: " + e.getMessage());
return null;
}
}
这样永远都是想要的目标宽高了
但是因为多了一个裁切的流程,用时高达6ms
所以总体流程的用时在7-10ms,虽然说可以接受,但如果把上述的裁剪、缩放、转jpeg的流程都放到C++里面,使用jni的方式去调用,应该能压缩到3ms
还是有优化空间,
3.5 裁切时缩放
对数据源的nv21数组,在裁切的时候直接缩放,不用如上图2.5.4一样,现在裁切,再缩放
把上图中的如下两个步骤合并为一步
具体代码逻辑如下
private void effectThumb(byte[] nv21, int width, int height){
int thumbnailWidth = ThumbnailHelper.getThumbnailWidth();
int thumbnailHeight = ThumbnailHelper.getThumbnailHeight();
int scale8=0;
for (int i = 1; i < 10; i++) {
if (thumbnailWidth * i * 8 > height || thumbnailHeight * i * 8 >width){
scale8 =i-1;
break;
}
}
int newWidth = scale8 * 8 * thumbnailHeight;
int newHeight = scale8 * 8 * thumbnailWidth;
int corpHeight = (height - newHeight) / 2;
int corpWidth = (width - newWidth) / 2;
byte[] clipNV21Bytes = CameraUtil.clipAndScaleNV21(nv21,width ,height,corpWidth,corpHeight, newWidth , newHeight ,scale8 * 8);
byte[] bytes = CameraUtil.NV21ToJpeg(clipNV21Bytes,thumbnailHeight , thumbnailWidth);
mExif.setCompressedThumbnail(bytes);
}
public static byte[] NV21ToJpeg(byte[] i420Data, int width, int height) {
try {
YuvImage yuvImage = new YuvImage(i420Data, ImageFormat.NV21, width, height, null);
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
yuvImage.compressToJpeg(new Rect(0, 0, width, height), 90, outputStream);
return outputStream.toByteArray();
} catch (Exception e) {
LogHelper.e(TAG, "i420ToJpeg Error converting I420 to JPEG: " + e.getMessage());
return null;
}
}
public static byte[] clipAndScaleNV21(byte[] src, int width, int height, int left, int top, int clip_w, int clip_h, int scale8) {
int newHeight = clip_h / scale8 ;
int newWidth= clip_w / scale8;
byte[] scaledNv21Data = new byte[newWidth * newHeight * 3 / 2];
int yIndex = 0;
for (int i = 0; i < newHeight; i++) {
int originalY = top * width + i * scale8 * width;
for (int j = 0; j < newWidth; j++) {
int originalX = left + j * scale8;
scaledNv21Data[yIndex++] = src[originalY + originalX];
}
}
int originalYSize = width * height;
int uvIndex = newWidth * newHeight;
for (int i = 0; i < newHeight / 2 ;i++) {
int originalYU =top * width /2 + i * scale8 * width;
for (int j = 0; j < newWidth; j+=2) {
int originalXU = left + j * scale8;
scaledNv21Data[uvIndex++] = src[originalYSize + originalYU + originalXU];
scaledNv21Data[uvIndex++] = src[originalYSize + originalYU + originalXU + 1];
}
}
return scaledNv21Data;
}
此流程可以把整体时间缩减到3ms以下,经测试一般缩小16倍一个13M的nv21,用时1ms,压测偶现2ms,压测100次,出现4次3ms,其余都是3ms以下
理论上说,如上代码放到C层,使用jni调用可以更一步的压缩耗时时长,但是,目前已经压缩到3ms以下了,又觉得再放到C层有点鸡肋。
3.6偏移量优化
上面的采样都是从左上角采样,这样会出现偏差,应该是从正中间采样
优化的逻辑如下图
private void effectThumb(byte[] nv21,int width,int height){
int thumbnailWidth = ThumbnailHelper.getThumbnailWidth();
int thumbnailHeight = ThumbnailHelper.getThumbnailHeight();
int scale8 = Math.min((width / thumbnailHeight), (height / thumbnailWidth));
int newWidth = scale8 * thumbnailHeight;
int newHeight = scale8 * thumbnailWidth;
int dsc = scale8 >> 1;
int corpHeight = ((height - newHeight) >> 1) + dsc;
int corpWidth = ((width - newWidth) >> 1) + dsc;
corpWidth = corpWidth >> 1 << 1;
corpHeight = corpHeight >> 1 << 1;
byte[] clipNV21Bytes = CameraUtil.clipAndScaleNv21(nv21,width,height,corpWidth,corpHeight,newWidth,newHeight,scale8);
byte[] bytes = CameraUtil.nv21ToJpeg(clipNV21Bytes,thumbnailHeight,thumbnailWidth);
mExif.setCompressedThumbnail(bytes);
}
3.7 使用jni
在上面3.6的基础上,把clipAndScaleNv21方法的计算放到c++中,
如下:
// 实现clipAndScaleNv21JNI对应的JNI函数
extern "C" JNIEXPORT jbyteArray JNICALL
Java_ImageProcessor_clipAndScaleNv21JNI(JNIEnv *env, jclass cls, jbyteArray src, jint width, jint height,
jint left, jint top, jint clip_w, jint clip_h, jint scale8) {
// 获取源字节数组的长度
jsize srcLength = env->GetArrayLength(src);
// 获取源字节数组的指针
jbyte *srcData = env->GetByteArrayElements(src, nullptr);
// 计算新的高度和宽度
int newHeight = clip_h / scale8;
int newWidth = clip_w / scale8;
// 计算处理后NV21数据的字节长度
int scaledDataLength = newWidth * newHeight * 3 / 2;
// 创建用于存储处理后数据的字节数组
jbyteArray scaledNv21Data = env->NewByteArray(scaledDataLength);
// 获取处理后字节数组的指针
jbyte *scaledData = env->GetByteArrayElements(scaledNv21Data, nullptr);
// 处理Y分量(亮度分量)
int yIndex = 0;
for (int i = 0; i < newHeight; i++) {
int originalY = top * width + i * scale8 * width;
for (int j = 0; j < newWidth; j++) {
int originalX = left + j * scale8;
scaledData[yIndex++] = srcData[originalY + originalX];
}
}
// 处理UV分量
int originalYSize = width * height;
int uvIndex = newWidth * newHeight;
for (int i = 0; i < newHeight / 2; i++) {
int originalYU = top * width / 2 + i * scale8 * width;
for (int j = 0; j < newWidth; j += 2) {
int originalXU = left + j * scale8;
scaledData[uvIndex++] = srcData[originalYSize + originalYU + originalXU];
scaledData[uvIndex++] = srcData[originalYSize + originalYU + originalXU + 1];
}
}
// 释放源字节数组的元素指针(不再需要进行修改了)
env->ReleaseByteArrayElements(src, srcData, JNI_ABORT);
// 释放处理后字节数组的元素指针(更新内容到Java层的数组中)
env->ReleaseByteArrayElements(scaledNv21Data, scaledData, 0);
return scaledNv21Data;
}
结果:耗时还是在1-3ms直接,并没有很大的效率提升(也没有多少提升的空间),好消息是重压环境下的时间也很稳定就是1或者2ms
3.8对比
把3.6和3.7做一万次做耗时对比
次数 | java | jni |
1 | 12918 | 9691 |
2 | 10241 | 10812 |
3 | 10992 | 11598 |
4 | 10921 | 11485 |
5 | 11509 | 10002 |
6 | 10774 | 11555 |
7 | 10427 | 11320 |
8 | 10698 | 11697 |
9 | 10292 | 11618 |
10 | 11026 | 10890 |
11 | 10481 | 10208 |
12 | 12150 | 11269 |
13 | 10894 | 9539 |
14 | 10792 | 10766 |
15 | 10594 | 10699 |
16 | 10930 | 11267 |
17 | 10381 | 10238 |
18 | 10504 | 10620 |
19 | 11392 | 11471 |
20 | 10047 | 10443 |
21 | 11145 | 9799 |
22 | 10094 | 11725 |
23 | 11747 | 11201 |
24 | 10941 | 10068 |
25 | 11834 | 10679 |
26 | 10497 | 11152 |
整理成表格形式
说明:看起来并没有很大的差距,如上 没有对数据做整理,理论上应该是做20组,取每组的平方差做折线图,会更直观。
参考文章:
https://zhuanlan.zhihu.com/p/683452602
YUV420 总结 (YU12、YV12、NV12 和 NV21)-优快云博客
https://blog.youkuaiyun.com/lakebobo/article/details/88762978