背景
很多开放平台都使用Webhook的方式进行外部系统数据同步或者通知,对于Webhook请求的对外发送不进行重试显然有点说不过去。使用简单的while一个条件去重试N次好像达不到什么效果,只能是说有重试而已,而使用消息队列中间件好像依赖又太重,于是索性自己动手写一个抽象范型延迟重试队列组件。
1. 实现思路及考虑
- 按照重试间隔(RetryDelaySeconds)& 重试持续结束时间(RetryTask.retryEndTime)进行重试以支持较长的重试周期(例如,保障一定可跨天的持续24小时的重试);
- 通过限制延迟队列容量上限的方式避免重试任务积压较多时发生OOM以及落盘和恢复的时间不至于太长;
- 重试队列的消费使用重试任务哈希值(RetryTask.retryHash)得到的线程进行执行以避免不同业务之间重试任务的互相影响以及相同业务重试任务的串行执行;
- 如果接受重试任务不落盘(程序意外重启时直接丢弃没有达到最大重试持续时间的任务)则可以不实现:flushTask() 和 recoverTask() 方法;
2. 代码实现
2.1 重试队列组件代码
2.1.1 RetryTask
package retry;
import lombok.Data;
import java.util.concurrent.Delayed;
import java.util.concurrent.TimeUnit;
/**
* RetryTask
*
* @author chenx
*/
@Data
public class RetryTask implements Delayed {
/**
* expireTime
*/
protected long expireTime;
/**
* retryHash
*/
protected int retryHash;
/**
* retryEndTime
*/
protected long retryEndTime;
@Override
public long getDelay(TimeUnit unit) {
long diff = this.expireTime - System.currentTimeMillis();
return unit.convert(diff, TimeUnit.MILLISECONDS);
}
@Override
public int compareTo(Delayed other) {
long diff = this.getDelay(TimeUnit.MILLISECONDS) - other.getDelay(TimeUnit.MILLISECONDS);
return Long.compare(diff, 0);
}
}
2.1.2 RetryQueue
package retry;
import org.apache.commons.collections.CollectionUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.DelayQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
/**
* RetryQueue
* <p>
* 1、按照重试间隔(RetryDelaySeconds)& 重试持续结束时间(RetryTask.retryEndTime)进行重试以支持较长的重试周期(例如,保障一定可跨天的持续24小时的重试);
* 2、通过限制延迟队列容量上限的方式避免重试任务积压较多时发生OOM以及落盘和恢复的时间不至于太长;
* 3、重试队列的消费使用重试任务哈希值(RetryTask.retryHash)得到的线程进行执行以避免不同业务之间重试任务的互相影响以及相同业务重试任务的串行执行;
*
* @author chenx
*/
public abstract class RetryQueue<T extends RetryTask> {
private static final Logger LOGGER = LoggerFactory.getLogger(RetryQueue.class);
private final int maxQueueSize;
private final int processExecutorPoolSize;
private boolean isStart = false;
private DelayQueue<T> queue;
private ExecutorService takeExecutor;
private ExecutorService[] taskProcessExecutors;
protected RetryQueue(int maxQueueSize, int processExecutorPoolSize) {
this.maxQueueSize = maxQueueSize;
this.processExecutorPoolSize = processExecutorPoolSize;
this.queue = new DelayQueue<>();
this.takeExecutor = Executors.newSingleThreadExecutor();
this.taskProcessExecutors = new ExecutorService[this.processExecutorPoolSize];
for (int i = 0; i < this.processExecutorPoolSize; i++) {
this.taskProcessExecutors[i] = Executors.newSingleThreadExecutor();
}
}
/**
* process(重试任务处理)
*
* @param task
*/
public abstract void process(T task);
/**
* flushTask(重试任务落盘)
*
* @param taskList
*/
public abstract void flushTask(List<T> taskList);
/**
* recoverTask(重试任务恢复)
*
* @return
*/
public abstract List<T> recoverTask();
/**
* startup
*/
public void startup() {
if (this.isStart) {
LOGGER.warn("<[RetryQueue startup already!]>");
return;
}
this.isStart = true;
// 落盘任务恢复
this.recoverTaskProcess();
// take重试任务
this.takeExecutor.execute(() -> {
while (this.isStart) {
try {
T task = this.queue.take();
this.onRetryTaskTaken(task);
} catch (InterruptedException ex) {
LOGGER.warn("<[RetryQueue.startup() interrupted!]>");
Thread.currentThread().interrupt();
} catch (Exception ex) {
LOGGER.error("retryQueue.startup() error!", ex);
}
}
});
}
/**
* shutdown
*/
public void shutdown() {
if (!this.isStart) {
LOGGER.warn("<[RetryQueue stop already!]>");
return;
}
this.isStart = false;
this.takeExecutor.shutdownNow();
for (int i = 0; i < this.processExecutorPoolSize; i++) {
this.taskProcessExecutors[i].shutdownNow();
}
this.flushTask(this.getFlushTaskList());
}
/**
* enqueue
*
* @param task
*/
public void enqueue(T task) {
try {
if (task.getRetryEndTime() < System.currentTimeMillis()) {
LOGGER.warn("<[Discarded an expired RetryTask: {}]>", task);
return;
}
if (this.queue.size() >= this.maxQueueSize) {
LOGGER.warn("<[queue.size() >= maxQueueSize! maxQueueSize: {}]>", this.maxQueueSize);
return;
}
if (!this.queue.offer(task)) {
LOGGER.error("<[RetryQueue is full!]>");
return;
}
LOGGER.info("<[RetryQueue.enqueue() done, queueSize: {}, task: {}]>", this.queue.size(), task);
} catch (Exception ex) {
LOGGER.error("RetryQueue.enqueue() error!", ex);
}
}
/**
* onRetryTaskTaken
*
* @param task
*/
private void onRetryTaskTaken(T task) {
try {
if (Objects.isNull(task)) {
LOGGER.warn("<[RetryTask is null!]>");
return;
}
this.getExecutor(task).execute(() -> this.process(task));
} catch (Exception ex) {
LOGGER.error("RetryQueue.onRetryTaskTaken() error!", ex);
}
}
/**
* getExecutor
*
* @param task
* @return
*/
private ExecutorService getExecutor(T task) {
if (Objects.isNull(task)) {
throw new RuntimeException("<[RetryTask is null!]>");
}
int hashCode = task.getRetryHash();
if (hashCode == Integer.MIN_VALUE) {
hashCode = 0;
}
return this.taskProcessExecutors[Math.abs(hashCode) % this.processExecutorPoolSize];
}
/**
* recoverTaskProcess
*/
private void recoverTaskProcess() {
List<T> recoverTaskList = this.recoverTask();
LOGGER.warn("<[recoverTask size is: {}]>", (CollectionUtils.isEmpty(recoverTaskList) ? 0 : recoverTaskList.size()));
if (CollectionUtils.isEmpty(recoverTaskList)) {
return;
}
for (T task : recoverTaskList) {
this.enqueue(task);
}
}
/**
* getFlushTaskList
*
* @return
*/
private List<T> getFlushTaskList() {
List<T> list = new ArrayList<>();
Iterator<T> iterator = this.queue.iterator();
while (iterator.hasNext()) {
list.add(iterator.next());
}
LOGGER.warn("<[flushTaskList size is: {}]>", list.size());
return list;
}
}
- 备注:
虽然DelayQueue提供了drainTo()方法,但是DelayQueue仍然无法快速全部取出队列里的元素,因此getFlushTaskList()方法中使用queue.iterator()进行遍历取获取队列中的全部元素用于重试任务落盘;
2.2 测试代码
2.2.1 FooRetryQueue
package retry;
import java.util.List;
/**
* FooRetryQueue
*
* @author chenx
*/
@SuppressWarnings("all")
public class FooRetryQueue extends RetryQueue<FooRetryQueue.FooRetryTask> {
private static final int MAX_QUEUE_SIZE = 10000;
private static final int PROCESS_EXECUTOR_POOL_SIZE = 64;
private FooRetryQueue() {
super(MAX_QUEUE_SIZE, PROCESS_EXECUTOR_POOL_SIZE);
}
/**
* getInstance
*
* @return
*/
public static FooRetryQueue getInstance() {
return SingletonHolder.INSTANCE;
}
@Override
public void process(FooRetryTask task) {
FooService.getInstance().sendMessage(task.getMsgId(), task.getRetryEndTime());
}
@Override
public void flushTask(List<FooRetryTask> taskList) {
}
@Override
public List<FooRetryTask> recoverTask() {
return null;
}
/**
* SingletonHolder
*/
private static class SingletonHolder {
public static final FooRetryQueue INSTANCE = new FooRetryQueue();
}
/**
* FooRetryTask
*/
public static class FooRetryTask extends RetryTask {
private String msgId;
public String getMsgId() {
return this.msgId;
}
public void setMsgId(String msgId) {
this.msgId = msgId;
}
@Override
public String toString() {
return "FooRetryTask{" +
"msgId='" + msgId + '\'' +
", expireTime=" + expireTime +
", retryHash=" + retryHash +
", retryEndTime=" + retryEndTime +
'}';
}
}
}
2.2.2 FooService
package retry;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* FooSendService
*
* @author chenx
*/
@SuppressWarnings("all")
public class FooService {
private static final Logger LOGGER = LoggerFactory.getLogger(FooService.class);
/**
* 重试延迟时间(建议走配置)
*/
private static final int RETRY_DELAY_SECONDS = 5;
/**
* 重试最大持续时间(建议走配置)
*/
private static final int MAX_RETRY_DURATION = 15;
private FooService() {
// just do nothing
}
/**
* getInstance
*
* @return
*/
public static FooService getInstance() {
return SingletonHolder.INSTANCE;
}
/**
* startup
*/
public void startup() {
FooRetryQueue.getInstance().startup();
}
/**
* shutdown
*/
public void shutdown() {
FooRetryQueue.getInstance().shutdown();
}
/**
* sendMessage
*
* @param msgId
* @param retryEndTime
*/
public void sendMessage(String msgId, long retryEndTime) {
boolean isRetry = false;
try {
// 模拟消息发送失败
isRetry = true;
LOGGER.info("sendMessage() failed! msgId: {}", msgId);
} finally {
if (isRetry) {
FooRetryQueue.FooRetryTask task = new FooRetryQueue.FooRetryTask();
task.setExpireTime(System.currentTimeMillis() + RETRY_DELAY_SECONDS * 1000);
task.setRetryHash(msgId.hashCode());
task.setRetryEndTime(retryEndTime <= 0 ? System.currentTimeMillis() + MAX_RETRY_DURATION * 1000 : retryEndTime);
task.setMsgId(msgId);
FooRetryQueue.getInstance().enqueue(task);
}
}
}
/**
* SingletonHolder
*/
private static class SingletonHolder {
public static final FooService INSTANCE = new FooService();
}
public static void main(String[] args) throws InterruptedException {
FooService.getInstance().startup();
FooService.getInstance().sendMessage("msgId-1", -1L);
// 模拟服务停止
Thread.sleep((MAX_RETRY_DURATION + RETRY_DELAY_SECONDS) * 1000);
FooService.getInstance().shutdown();
}
}
3. 执行结果
FooService中的main方法执行结果如下,从执行结果可以看出确实可以按照预期进行重试:最多持续MAX_RETRY_DURATION秒,每RETRY_DELAY_SECONDS秒进行一次重试。

另外需要补充说明是,如果大家希望对重试任务进行落盘则需要:
1:实现void flushTask(List taskList) 和 List recoverTask() 这2个方法即可,方式可以是写本地临时文件或者持久到DB中。
2:服务需要支持优雅停机(关于java优雅停机的实现一搜一大把),并且需要在停机阶段调用RetryQueue.shutdown()方法。
4. 重试任务落盘打样
以下是一个采用本地临时文件方式落盘的打样,示例中使用了自定义紧凑序列化+零拷贝读写本地文件的方式,目的是降低重试任务落盘及恢复的时间。
- WebhookRetryTask (重试任务实体)
public class WebhookRetryTask extends RetryTask {
/**
* webhookType
*/
private WebhookType webhookType;
/**
* webhookRequest
*/
private Object webhookRequest;
public WebhookRetryTask() {
}
public WebhookRetryTask(long retryEndTime,
int retryHash,
WebhookType webhookType,
Object webhookRequest) {
this.retryEndTime = retryEndTime;
this.retryHash = retryHash;
this.webhookType = webhookType;
this.webhookRequest = webhookRequest;
}
public WebhookType getWebhookType() {
return this.webhookType;
}
public void setWebhookType(WebhookType webhookType) {
this.webhookType = webhookType;
}
public Object getWebhookRequest() {
return this.webhookRequest;
}
public void setWebhookRequest(Object webhookRequest) {
this.webhookRequest = webhookRequest;
}
@Override
public String toString() {
return GsonUtils.toJson(this);
}
}
- WebhookRetryQueue (重试队列实现)
@SuppressWarnings({"squid:S6548", "squid:S1075"})
public class WebhookRetryQueue extends RetryQueue<WebhookRetryTask> {
private static final Logger LOGGER = LoggerFactory.getLogger(WebhookRetryQueue.class);
private static final int MAX_QUEUE_SIZE = 50000;
private static final int SCHEDULED_EXECUTOR_POOL_SIZE = 128;
private static final String WEBHOOK_RETRY_QUEUE_RECOVER_FILE_PATH = "/tmp/WebhookRetryQueue_recoverFile.data";
private WebhookRetryQueue() {
super(MAX_QUEUE_SIZE, SCHEDULED_EXECUTOR_POOL_SIZE);
}
/**
* getInstance
*/
public static WebhookRetryQueue getInstance() {
return WebhookRetryQueue.SingletonHolder.INSTANCE;
}
@Override
public void process(WebhookRetryTask task) {
try {
if (task.getWebhookType().equals(WebhookType.MESSAGE_WEBHOOK)) {
MessageWebhookProto request = (MessageWebhookProto) task.getWebhookRequest();
SpringContextUtils.getBean(WebhookService.class).sendMessageWebhook(request, task.getRetryEndTime());
} else if (task.getWebhookType().equals(WebhookType.MESSAGE_STATUS_UPDATE)) {
MessageStatusWebhookProto request = (MessageStatusWebhookProto) task.getWebhookRequest();
SpringContextUtils.getBean(WebhookService.class).sendMessageStatusWebhook(request, task.getRetryEndTime());
}
} catch (Exception ex) {
LOGGER.error("WebhookRetryQueue.process() error!", ex);
}
}
@Override
public void flushTask(List<WebhookRetryTask> taskList) {
try {
if (CollectionUtils.isEmpty(taskList)) {
LOGGER.warn("WebhookRetryQueue.flushTask() done, taskList is empty!");
return;
}
byte[] data = WebhookRetryTaskCodec.serialize(taskList);
File recoverFile = this.getRecoverFile();
WebhookRetryTaskCodec.writeFileData(recoverFile, data);
} catch (Exception ex) {
LOGGER.error("WebhookRetryQueue.flushTask() error!", ex);
}
}
@Override
public List<WebhookRetryTask> recoverTask() {
try {
File file = new File(WEBHOOK_RETRY_QUEUE_RECOVER_FILE_PATH);
if (!file.exists()) {
return Collections.emptyList();
}
byte[] date = WebhookRetryTaskCodec.readFileData(file);
return WebhookRetryTaskCodec.deserialize(date);
} catch (Exception ex) {
LOGGER.error("WebhookRetryQueue.recoverTask() error!", ex);
}
return Collections.emptyList();
}
/**
* getRecoverFile
* <p>
* 由于服务更新会导致服务物理根目录发生变化,因此往/tmp/目录下写落盘文件是一种可行的选择。
* @return
* @throws IOException
*/
private File getRecoverFile() throws IOException {
File file = new File(WEBHOOK_RETRY_QUEUE_RECOVER_FILE_PATH);
if (!file.exists()) {
Path path = Paths.get(WEBHOOK_RETRY_QUEUE_RECOVER_FILE_PATH);
Files.createDirectories(path.getParent());
Files.createFile(path);
LOGGER.warn("WebhookRetryQueue.createFile() done, filePath: {}", file.getAbsolutePath());
}
return file;
}
private static class SingletonHolder {
public static final WebhookRetryQueue INSTANCE = new WebhookRetryQueue();
}
}
- WebhookRetryTaskCodec(重试任务序列化)
public class WebhookRetryTaskCodec {
private WebhookRetryTaskCodec() {
// just do nothing
}
/**
* serialize
*
* @param taskList
* @return
*/
public static byte[] serialize(List<WebhookRetryTask> taskList) throws IOException {
if (CollectionUtils.isEmpty(taskList)) {
return new byte[0];
}
try (ByteArrayOutputStream out = new ByteArrayOutputStream();
DataOutputStream dos = new DataOutputStream(out)) {
dos.writeInt(taskList.size());
for (WebhookRetryTask entry : taskList) {
dos.writeLong(entry.getRetryEndTime());
dos.writeInt(entry.getRetryHash());
dos.writeInt(entry.getWebhookType().getCode());
byte[] webhookRequestData = null;
if (entry.getWebhookType().getCode() == WebhookType.MESSAGE_WEBHOOK.getCode()) {
MessageWebhookProto request = (MessageWebhookProto) entry.getWebhookRequest();
webhookRequestData = ProtoStuffUtil.serialize(request);
} else if (entry.getWebhookType().getCode() == WebhookType.MESSAGE_STATUS_UPDATE.getCode()) {
MessageStatusWebhookProto request = (MessageStatusWebhookProto) entry.getWebhookRequest();
webhookRequestData = ProtoStuffUtil.serialize(request);
}
int webhookRequestDataLength = ArrayUtils.isEmpty(webhookRequestData) ? 0 : webhookRequestData.length;
dos.writeInt(webhookRequestDataLength);
dos.write(webhookRequestData);
}
return out.toByteArray();
}
}
/**
* deserialize
*
* @param data
* @return
*/
public static List<WebhookRetryTask> deserialize(byte[] data) throws IOException {
if (ArrayUtils.isEmpty(data)) {
return Collections.emptyList();
}
try (ByteArrayInputStream in = new ByteArrayInputStream(data);
DataInputStream inputStream = new DataInputStream(in)
) {
int taskListSize = inputStream.readInt();
if (taskListSize <= 0) {
return Collections.emptyList();
}
List<WebhookRetryTask> taskList = new ArrayList<>(taskListSize);
for (int i = 0; i < taskListSize; i++) {
long retryEndTime = inputStream.readLong();
int retryHash = inputStream.readInt();
int webhookType = inputStream.readInt();
int webhookRequestDataLength = inputStream.readInt();
byte[] webhookRequestData = new byte[webhookRequestDataLength];
WebhookRetryTask task = new WebhookRetryTask();
task.setRetryEndTime(retryEndTime);
task.setRetryHash(retryHash);
task.setWebhookType(WebhookType.getByCode(webhookType));
if (webhookRequestDataLength <= 0) {
taskList.add(task);
continue;
}
// webhookRequest反序列化
if (inputStream.read(webhookRequestData) != webhookRequestDataLength) {
throw new OpenApiRuntimeException("read webhookRequestData failed!");
}
if (webhookType == WebhookType.MESSAGE_WEBHOOK.getCode()) {
MessageWebhookProto request = ProtoStuffUtil.deserialize(webhookRequestData, MessageWebhookProto.class);
task.setWebhookRequest(request);
} else if (webhookType == WebhookType.MESSAGE_STATUS_UPDATE.getCode()) {
MessageStatusWebhookProto request = ProtoStuffUtil.deserialize(webhookRequestData, MessageStatusWebhookProto.class);
task.setWebhookRequest(request);
}
taskList.add(task);
}
return taskList;
}
}
/**
* writeData
*
* @param destFile
* @param data
* @throws IOException
*/
public static void writeFileData(File destFile, byte[] data) throws IOException {
if (Objects.isNull(destFile) || !destFile.exists()) {
throw new OpenApiRuntimeException("destFile not existed!");
}
if (ArrayUtils.isEmpty(data)) {
throw new OpenApiRuntimeException("data is empty!");
}
try (FileOutputStream fos = new FileOutputStream(destFile);
FileChannel destChannel = fos.getChannel();
ByteArrayInputStream srcInput = new ByteArrayInputStream(data);
ReadableByteChannel srcChannel = Channels.newChannel(srcInput)
) {
destChannel.transferFrom(srcChannel, 0, data.length);
}
}
/**
* readFileData
*
* @param srcFile
* @return
* @throws IOException
*/
public static byte[] readFileData(File srcFile) throws IOException {
if (Objects.isNull(srcFile) || !srcFile.exists()) {
throw new OpenApiRuntimeException("srcFile not existed!");
}
try (FileInputStream fis = new FileInputStream(srcFile);
FileChannel srcFileChannel = fis.getChannel();
ByteArrayOutputStream out = new ByteArrayOutputStream();
WritableByteChannel destChannel = Channels.newChannel(out)
) {
srcFileChannel.transferTo(0, srcFile.length(), destChannel);
return out.toByteArray();
}
}
}
本文介绍了一个基于Java的自定义延迟重试队列组件,用于处理Webhook请求的重试,考虑了重试间隔、队列容量限制、任务哈希分发执行等因素,以支持长时间的重试周期并避免资源消耗过多。
13万+

被折叠的 条评论
为什么被折叠?



