外卖霸王餐推荐算法:协同过滤矩阵分解与实时特征存储Redis Feature
背景:霸王餐日均2000万曝光,CTR提升1%即带来百万级GMV增长
传统热门榜单已无法满足“千人千面”需求,团队采用ALS矩阵分解离线训练协同过滤模型,将用户&商户隐向量注入Redis Feature Service,线上通过Lua脚本秒级拼接实时特征,实现召回+排序一体化。线上A/B测试7天,CTR+3.4%,UV价值+5.1%,下文给出完整Java实现与调参细节。

离线训练:Spark MLlib 20行完成ALS
import org.apache.spark.ml.recommendation.ALS
val als = new ALS()
.setUserCol("userId")
.setItemCol("shopId")
.setRatingCol("weight")
.setRank(64)
.setMaxIter(15)
.setRegParam(0.01)
.setImplicitPrefs(true) // 霸王餐点击/下单均为隐式反馈
val model = als.fit(ratingDF)
model.userFactors.write.parquet("s3://bwc/model/userFactors")
model.itemFactors.write.parquet("s3://bwc/model/itemFactors")
- rank=64经网格搜索最优,再增大线上耗时提升但收益递减
- weight=if(click,1)+if(order,5) 引入业务权重
- 全量6500万评分,15分钟训练完成
模型解析:把64维向量压成Redis String
package juwatech.cn.als.parser;
public class VectorEncoder {
public static String encode(float[] vec) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < vec.length; i++) {
// 保留3位小数,逗号分隔
sb.append(String.format("%.3f", vec[i]));
if (i < vec.length - 1) sb.append(',');
}
return sb.toString();
}
public static float[] decode(String str) {
String[] ss = str.split(",");
float[] v = new float[ss.length];
for (int i = 0; i < ss.length; i++) v[i] = Float.parseFloat(ss[i]);
return v;
}
}
每日凌晨将parquet下载到本地,解析后批量写入Redis Cluster,Key格式:
u:{userId} -> "0.123,0.456,..."
i:{shopId} -> "0.222,0.333,..."
实时特征服务:Redis Feature Service
package juwatech.cn.feature.service;
@Service
public class FeatureService {
@Resource
private RedisClusterClient redis;
private static final int VEC_LEN = 64;
public float[] getUserVector(long userId) {
String val = redis.sync().get("u:" + userId);
return val == null ? new float[VEC_LEN] : VectorEncoder.decode(val);
}
public float[] getItemVector(long shopId) {
String val = redis.sync().get("i:" + shopId);
return val == null ? new float[VEC_LEN] : VectorEncoder.decode(val);
}
// 实时上下文特征:距离、评分、库存
public Map<String,Double> getCtxFeatures(long userId, long shopId) {
Map<String,Double> m = new HashMap<>();
m.put("dist", DistanceUtils.km(userId, shopId));
m.put("score", redis.sync().hget("shop:score", String.valueOf(shopId)));
m.put("stock", redis.sync().hget("shop:stock", String.valueOf(shopId)));
return m;
}
}
Vec与Context特征统一走本地缓存+Redis双级,命中率96%,P99读取1.3ms。
召回+粗排:Lua脚本一次网络往返完成向量点积
-- KEYS[1] = 用户向量字符串
-- ARGV = 候选shopId列表
local userVec = {}
for w in string.gmatch(KEYS[1], "%-?%d+%.?%d*") do
table.insert(userVec, tonumber(w))
end
local result = {}
for i=1,#ARGV do
local shopKey = 'i:'..ARGV[i]
local shopVecStr = redis.call('GET', shopKey)
if shopVecStr then
local shopVec = {}
for w in string.gmatch(shopVecStr, "%-?%d+%.?%d*") do
table.insert(shopVec, tonumber(w))
end
local score = 0
for j=1,#userVec do
score = score + userVec[j] * shopVec[j]
end
table.insert(result, ARGV[i])
table.insert(result, score)
end
end
return result
Java侧调用:
public List<ScoredShop> recall(long userId, List<Long> candidates) {
String userVec = VectorEncoder.encode(featureService.getUserVector(userId));
List<String> args = candidates.stream().map(String::valueOf).collect(Collectors.toList());
List<String> resp = redis.sync().eval(lua, Collections.singletonList(userVec), args.toArray(new String[0]));
List<ScoredShop> list = new ArrayList<>();
for (int i = 0; i < resp.size(); i += 2) {
list.add(new ScoredShop(Long.parseLong(resp.get(i)), Double.parseDouble(resp.get(i + 1))));
}
// 按分数降序取Top200进入精排
list.sort((a, b) -> -Double.compare(a.score, b.score));
return list.size() > 200 ? list.subList(0, 200) : list;
}
一次EVALSHA减少64×N次网络往返,200候选<5ms。
精排模型:LR拼接实时上下文
public double rank(long userId, long shopId, double dot) {
Map<String,Double> ctx = featureService.getCtxFeatures(userId, shopId);
// 简单线性模型,线上通过Flink实时更新权重
double wDist = -0.34, wScore = 0.25, wStock = 0.12;
return 0.6 * dot
+ wDist * Math.exp(-ctx.get("dist") / 2.0)
+ wScore * ctx.get("score")
+ wStock * ctx.get("stock");
}
权重每周离线GridSearch,增量更新Redis Hash,无需重启应用。
在线A/B:FeatureToggle秒级切换召回源
@GetMapping("/rec")
public List<ShopDTO> recommend(@RequestParam long userId) {
if (FeatureToggle.isOn("bwc.cf.recall")) {
List<Long> candidates = CandidatePicker.get(userId); // 简单热门过滤
return recall(userId, candidates).stream()
.map(s -> ShopDTO.builder().id(s.shopId).score(rank(userId, s.shopId, s.score)).build())
.collect(Collectors.toList());
}
return fallbackHotList(); // 基线热门榜
}
实验组占5%流量,实时监控CTR、GMV,下跌一键关闭。
冷启动&稀疏:利用Shop向量均值填充
public float[] getUserVector(long userId) {
String val = redis.sync().get("u:" + userId);
if (val == null) { // 新用户或向量未生成
return VectorEncoder.decode(redis.sync().get("i:avg")); // 预计算全量均值
}
return VectorEncoder.decode(val);
}
i:avg每日随模型一起产出,新用户首日CTR提升8%。
性能压测:64维向量200候选P99 6ms
单机4C8G容器,JMH 20并发:
Benchmark Mode Cnt Score Error Units
RecallAndRank.recall thrpt 5 28.9 ± 0.4 ops/ms
RecallAndRank.rank thrpt 5 16.7 ± 0.2 ops/ms
线上集群QPS 3.2万,CPU占用18%,Redis宽带300MB/s。
本文著作权归吃喝不愁app开发者团队,转载请注明出处!

被折叠的 条评论
为什么被折叠?



