限流实现方式
public boolean checkRateLimit(String rateLimiter) {
String lockKey = OPEN_AI_RATE_LIMITER_KEY + rateLimiter + ":lock";
RLock lock = redissonClient.getLock(lockKey);
try {
lock.lock(3, TimeUnit.SECONDS);
Long perMinuteNum = 12000L;
String rateKey = OPEN_AI_RATE_LIMITER_KEY + rateLimiter + "_" + DateUtil.format(new Date(), "yyyyMMddHHmm");
Long num = redisManager.incrLong(rateKey, 1, DateUtil.ONE_MINUTE_SECONDS + 1);
if (Objects.nonNull(num) && num > perMinuteNum) {
log.info("checkRateLimit-超限 rateKey : {} perMinuteNum : {} num : {}", rateKey, perMinuteNum, num);
return false;
}
log.info("checkRateLimit-正常 rateKey : {} perMinuteNum : {} num : {}", rateKey, perMinuteNum, num);
return true;
} catch (Exception e) {
log.error("checkRateLimit-失败 rateLimiter : {}", JSON.toJSONString(rateLimiter), e);
} finally {
if (lock.isLocked() && lock.isHeldByCurrentThread()) {
lock.forceUnlock();
}
}
return false;
}
上线后大量慢请求报警

排查原因
- 通过阿里云监控面板可以看到 /v2/get-data3 接口请求非常慢,达到了分钟级(正常接口响应时间在 200ms 以内)

- 通过方法栈执行时间可以看到接口主要慢在 Redisson 加锁,导致大量请求堵塞

解决方案
回滚代码
压测
- 10 个线程,每个线程调 20 次,时间间隔 0,限流方法就达到 30ms

- 60 个线程,每个线程调 5 次,时间间隔 0,限流方法达到 155ms

- 通过方法栈执行时间和压测,可以确定 Redisson 加全局锁的方式堵塞时间会随着瞬时流量增加而增加
解决方案
incr 实现限流
import com.alibaba.fastjson.JSONObject;
import lombok.NonNull;
import lombok.extern.slf4j.Slf4j;
import org.springframework.data.redis.connection.RedisStringCommands;
import org.springframework.data.redis.connection.RedisZSetCommands;
import org.springframework.data.redis.connection.ReturnType;
import org.springframework.data.redis.core.RedisCallback;
import org.springframework.data.redis.core.RedisTemplate;
import org.springframework.data.redis.core.ZSetOperations;
import org.springframework.data.redis.core.query.SortQuery;
import org.springframework.data.redis.core.types.Expiration;
import org.springframework.util.CollectionUtils;
import org.springframework.util.StringUtils;
import java.util.*;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
public boolean checkRateLimitByIncr(@NonNull String key, @NonNull int maxRequests, @NonNull int timeWindowInSeconds) {
if (maxRequests <= 0) {
throw new RuntimeException("最大请求数必须大于 0");
}
if (timeWindowInSeconds > ONE_DAY_SECONDS) {
throw new RuntimeException("窗口时长最大为 1 天");
}
long stTime = System.currentTimeMillis();
long time = stTime / 1000;
long suffix = time / timeWindowInSeconds;
String keySplic = String.format(acquire_token_incr_prefix, key, suffix);
Long num = incrLong(keySplic, 1L, timeWindowInSeconds + 1);
boolean res = false;
if (Objects.nonNull(num) && num <= maxRequests) {
res = true;
log.debug("checkRateLimitByIncr-通过 key : {} keySplic : {} timeWindowInSeconds : {} maxRequests : {} num : {} consumeTime : {}"
, key, keySplic, timeWindowInSeconds, maxRequests, num, System.currentTimeMillis() - stTime);
} else {
log.warn("checkRateLimitByIncr-限流 key : {} keySplic : {} timeWindowInSeconds : {} maxRequests : {} num : {} consumeTime : {}"
, key, keySplic, timeWindowInSeconds, maxRequests, num, System.currentTimeMillis() - stTime);
}
return res;
}
public Long incrLong(String key, long delta, long time) {
Long increment = null;
try {
increment = redisTemplate.opsForValue().increment(key, delta);
} catch (Exception e) {
log.error("failed key : {} delta : {} time : {}", key, delta, time);
return null;
}
return increment;
}
- 注意:该方式并不是原子的,可能导致最大请求数超过设置的最大请求数
lua 实现限流
public Boolean checkRateLimitByLua(@NonNull String key, @NonNull int maxRequests, @NonNull int timeWindowInSeconds) {
if (maxRequests <= 0) {
throw new RuntimeException("最大请求数必须大于 0");
}
if (timeWindowInSeconds > ONE_DAY_SECONDS) {
throw new RuntimeException("窗口时长最大为 1 天");
}
long stTime = System.currentTimeMillis();
String keySplic = String.format(acquire_token_lua_prefix, key);
String luaScript = "local key = KEYS[1]\n" +
"local count = tonumber(ARGV[1])\n" +
"local time = tonumber(ARGV[2])\n" +
"local current = redis.call('get', key);\n" +
"if current and tonumber(current) > count then\n" +
" return tonumber(current);\n" +
"end\n" +
"current = redis.call('incr', key)\n" +
"if tonumber(current) == 1 then\n" +
" redis.call('expire', key, time)\n" +
"end\n" +
"return tonumber(current);";
Long num = redisTemplate.execute((RedisCallback<Long>) connection ->
connection.eval(luaScript.getBytes(), ReturnType.INTEGER, 1,
keySplic.getBytes(), String.valueOf(maxRequests).getBytes(), String.valueOf(timeWindowInSeconds).getBytes()));
boolean res = false;
if (Objects.nonNull(num) && num <= maxRequests) {
res = true;
log.debug("checkRateLimitByLua-通过 key : {} timeWindowInSeconds : {} maxRequests : {} num : {} consumeTime : {}"
, key, timeWindowInSeconds, maxRequests, num, System.currentTimeMillis() - stTime);
} else {
log.warn("checkRateLimitByLua-限流 key : {} timeWindowInSeconds : {} maxRequests : {} num : {} consumeTime : {}"
, key, timeWindowInSeconds, maxRequests, num, System.currentTimeMillis() - stTime);
}
return res;
}
分别压测
- 10 个线程,每个线程调 20 次,时间间隔 0,耗时 1~2 ms

- 10 个线程,每个线程调 20 次,时间间隔 0,耗时 1~3 ms

- 60 个线程,每个线程调 5 次,时间间隔 0,incr 和 lua 仍然基本稳定在 1~3 毫秒
总结
- 不能使用 Redisson 锁的方式保证原子性,Redisson 在瞬时流量较大时会到导致大量锁堵塞
- 对最大请求数要求不是那么严格的情况下可以使用 incr 的方式限流
- 对最大请求数要求严格情况可使用 lua 脚本的方式限流