首先贴出原文链接
Shape-adaptive DCT with block-based DC separation and /spl Delta/DC correction | IEEE Journals & Magazine | IEEE Xplore
由于这篇论文发表于1998年(比我年纪还大),在中文互联网上的讨论比较少,但又是图像压缩中比较重要的技术,本篇博客对此进行补充。
在讲SA-DCT之前先回顾DCT变换,
https://zhuanlan.zhihu.com/p/85299446。
DCT直接用于图像压缩有一个问题,如果一张图存在前景和后景,只有前景需要压缩,那么DCT无法处理值的空缺和非矩形数据的压缩。
形状自适应离散余弦变换的流程如上,首先以图中8x8的像素图片为例,对前景区域的每一列进行平移,使其顶部对齐,对每一列按数据长度进行一次离散余弦变换;接着平移所有变换过后的系数数据(系数的长度和数据的长度是相等的)左对齐,对每一行进行离散余弦变换得到变换结果。在解码时需要传输原有的前景轮廓图,才能恢复出时域数据。
这个真的写了不止一天,如果对你有帮助,希望能得到你的点赞收藏。
"""
Author: Zoratt
Email: 1720049199@qq.com
Date: 2024-7-26
Description: SADCT implementation based on the paper published in 1995
"""
import numpy as np
import matplotlib.pyplot as plt
class SADCT_encoder:
def __init__(self,cth=0) -> None:
self.cth=cth
self.byteVector=None
self.coeffVector=[]
self.raw_coeff_matrix=None
def encode(self,data,mask):
self.raw_coeff_matrix,final_mask=SADCT(data,mask)
truncated_matrix=self.raw_coeff_matrix[:np.sum(final_mask[:,0]),:np.sum(final_mask[0,:])]
for i in range(truncated_matrix.shape[0]):
for j in reversed(range(truncated_matrix.shape[1])):
if abs(truncated_matrix[i,j])<self.cth:
truncated_matrix[i,j]=0
else:
break
truncated_matrix=np.round(truncated_matrix,3)#模拟量化
value_mask=np.where(truncated_matrix!=0,1,0).astype(np.uint8)
self.byteVector=np.packbits(value_mask)
for i in range(truncated_matrix.shape[0]):
for j in range(truncated_matrix.shape[1]):
if value_mask[i,j]:
self.coeffVector.append(truncated_matrix[i,j])
self.coeffVector=np.array(self.coeffVector).astype(np.float32)
print("valid data",np.sum(value_mask))
return truncated_matrix
def dct(signal):
N=len(signal)
DCTN=np.zeros((N,N))
k=np.arange(N)
for p in range(N):
if p==0:
b=np.sqrt(0.5)
else:
b=1
DCTN[p,:]=b*np.cos(p*(k+0.5)*np.pi/N)
c=2/N*DCTN.dot(signal)
return c
def idct(coeff):
N=len(coeff)
IDCTN=np.zeros((N,N))
k=np.arange(N)
for p in range(N):
if p==0:
b=np.sqrt(0.5)
else:
b=1
IDCTN[p,:]=b*np.cos(p*(k+0.5)*np.pi/N)
x=IDCTN.T.dot(coeff)
return x
def SADCT(data,mask):
"""
data:2D numpy array
mask:2D numpy array,mask of valid data
rtype:2D numpy array and 2D numpy array(the final mask is the valid coeff)
"""
temp_img=np.zeros_like(data)
#将每一列平移到最顶部
for j in range(data.shape[1]):
l=0
for i in range(data.shape[0]):
if mask[i,j]:
temp_img[l,j]=data[i,j]
l+=1
if l!=0:
temp_img[:l,j]=dct(temp_img[:l,j])
mask=np.where(temp_img!=0,1,0)
#print("dct1",temp_img)
#将每一行平移到最左边
temp_img_new=np.zeros_like(temp_img)
for i in range(data.shape[0]):
l=0
for j in range(data.shape[1]):
if mask[i,j]:
temp_img_new[i,l]=temp_img[i,j]
l+=1
if l!=0:
temp_img_new[i,:l]=dct(temp_img_new[i,:l])
mask=np.where(temp_img_new!=0,1,0)
#print("dct2",temp_img_new)
return temp_img_new,mask
def SAIDCT(data,mask):
"""
data:2D numpy array,shape may not be the same as original data
mask:mask of data original position
rtype:2D numpy array
"""
coeff=np.zeros_like(mask).astype(np.float64)
#获得DCT变换后最终的遮罩
_mask=np.zeros_like(mask)#最终遮罩
_mask1=np.zeros_like(mask)#列上移后的遮罩
index_mask1=np.zeros((mask.shape[0],mask.shape[1],2))#列上移后原始位置的坐标
for j in range(mask.shape[1]):
l=0
for i in range(mask.shape[0]):
if mask[i,j]:
_mask1[l,j]=1
index_mask1[l,j]=[i,j]
l+=1
index_mask=np.zeros((mask.shape[0],mask.shape[1],2))#行左移后原始位置的坐标
index_mask2=np.zeros((mask.shape[0],mask.shape[1],2))#行左移后列上移时的坐标
for i in range(mask.shape[0]):
l=0
for j in range(mask.shape[1]):
if _mask1[i,j]:
_mask[i,l]=1
index_mask[i,l]=index_mask1[i,j]
index_mask2[i,l]=[i,j]
l+=1
# print("_mask",_mask)
# print("index_mask",index_mask)
# print("index_mask1",index_mask1)
# print("index_mask2",index_mask2)
#数据有可能被截断,将其填充到原始尺寸
coeff[:data.shape[0],:data.shape[1]]=data
#将每一行逆变换
temp_coeff=np.zeros_like(coeff).astype(np.float64)
for i in range(mask.shape[0]):
valid_num=np.sum(_mask[i,:])
if valid_num!=0:
temp_coeff[i,:valid_num]=idct(coeff[i,:valid_num])
coeff=temp_coeff
#将每一行逆变换后的数据填充到列位置
temp_coeff=np.zeros_like(coeff).astype(np.float64)
for i in range(mask.shape[0]):
for j in range(mask.shape[1]):
if _mask[i,j]:
temp_coeff[int(index_mask2[i,j][0]),int(index_mask2[i,j][1])]=coeff[i,j]
coeff=temp_coeff
#将每一列逆变换
temp_coeff=np.zeros_like(coeff).astype(np.float64)
for j in range(mask.shape[1]):
valid_num=np.sum(_mask1[:,j])
if valid_num!=0:
temp_coeff[:valid_num,j]=idct(coeff[:valid_num,j])
coeff=temp_coeff
#将每一列逆变换后的数据填充到列位置
#print("#将每一列逆变换后的数据填充到列位置",coeff)
real_result=np.zeros_like(mask).astype(np.float64)
for i in range(mask.shape[0]):
for j in range(mask.shape[1]):
if _mask1[i,j]:
real_result[int(index_mask1[i,j][0]),int(index_mask1[i,j][1])]=coeff[i,j]
return real_result
if __name__ == '__main__':
# 生成测试数据
#np.random.seed(0)
scale=400
data = np.random.rand(scale, scale)*245
#data=np.ones((scale,scale))*255
mask = np.random.choice([0, 1], size=(scale, scale), p=[0.5, 0.5])
# print("data",data)
# print("mask",mask)
encoder=SADCT_encoder(0.0)
print("-"*8+"encoder"+"-"*8)
img=encoder.encode(data,mask)
print(img.shape)
# 应用SADCT
coeff,_ = SADCT(data, mask)
#截去一些coefficient从而压缩
# transformed_data=coeff[0,:9]
# transformed_data=np.expand_dims(transformed_data,axis=0)
truncate_num=scale
transformed_data=coeff[:truncate_num,:truncate_num]
ql=0.000001
transformed_data=(transformed_data/ql).astype(np.int32)*ql
np.set_printoptions(suppress=True,precision=3)
print("transformed_data",transformed_data)
np.savetxt('array.csv', transformed_data, delimiter=',', fmt='%.4f')
print("truncated data",img)
# 应用SAIDCT
print("-"*8+"SAIDCT"+"-"*8)
restored_data = SAIDCT(transformed_data, mask)
print("data:",data*mask)
print("restored_data:",restored_data)
print(np.abs(data*mask-restored_data).mean())
# 展示结果
fig, ax = plt.subplots(1, 4, figsize=(15, 5))
ax[0].imshow(data*mask, cmap='gray', vmin=0, vmax=255)
ax[0].set_title('Original Data')
ax[1].imshow(transformed_data, cmap='gray',vmin=-4, vmax=4)
ax[1].set_title('Transformed Data (SADCT)')
ax[2].imshow(img, cmap='gray', vmin=-4, vmax=4)
ax[2].set_title('Truncated Data (SADCT)')
ax[3].imshow(restored_data, cmap='gray', vmin=0, vmax=255)
ax[3].set_title('Restored Data (SAIDCT)')
plt.show()