spring-boot,用于java操作ES。maven项目直接导入依赖。
<dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-data-elasticsearch</artifactId> </dependency>
索引的数据结构
{"request_record" : {
"mappings" : {
"main" : {
"properties" : {
"body" : {
"type" : "text"
},
"createTime" : {
"type" : "date",
"format" : "yyyy-MM-dd HH:mm:ss"
},
"id" : {
"type" : "keyword"
},
"ip" : {
"type" : "keyword"
},
"method" : {
"type" : "keyword"
},
"params" : {
"type" : "keyword"
},
"requestTime" : {
"type" : "date",
"format" : "yyyy-MM-dd HH:mm:ss"
},
"url" : {
"type" : "keyword"
},
"userId" : {
"type" : "integer"
}
}
}
}
}
}
文章数据如下:
{
"_index" : "request_record",
"_type" : "main",
"_id" : "846ee670-f81c-4e7f-a9a3-c0a4501865b7",
"_score" : 1.030125,
"_source" : {
"body" : """{"userId":"110","cityName":"深圳"}""",
"createTime" : "2024-09-16 10:01:22",
"id" : "846ee670-f81c-4e7f-a9a3-c0a4501865b7",
"ip" : "218.18.76.0",
"method" : "POST",
"requestTime" : "2024-09-16 10:01:12",
"url" : "index/changeDate/getList",
"userId" : 110
}
}
采用多线程查询上亿条用户请求信息
public interface CountCityClicksService {
CommonResponse getScrollCountCityClicks(DayLogDto filterDTO);
}
@Service
public class CountCityClicksServiceImpl implements CountCityClicksService {
@Autowired
protected ElasticsearchRestTemplate elasticsearchTemplate;
private static final DateTimeFormatter outputFormatter =
DateTimeFormatter.ofPattern("yyyy-MM-dd");
private static final long SCROLL_TIMEOUT = 3000;
public final static SearchResultMapper searchBodyResultMapper = new SearchResultMapper() {
@Override
public <T> AggregatedPage<T> mapResults(SearchResponse response, Class<T> aClass, Pageable pageable) {
List<RequestRecordDto> result = new ArrayList<RequestRecordDto>();
for (SearchHit hit : response.getHits()) {
if (response.getHits().getHits().length <= 0) {
return new AggregatedPageImpl<T>(Collections.EMPTY_LIST, pageable, response.getHits().getTotalHits(), response.getScrollId());
}
//可以做更复杂的映射逻辑
RequestRecordDto eSMapSearchAggDto = new RequestRecordDto();
String method = (String) hit.getSourceAsMap().get("method");
if(StringUtils.isEmpty(method)){
continue;
}
eSMapSearchAggDto.setMethod(method);
if("POST".equals(method)){
String body = (String) hit.getSourceAsMap().get("body");
if (org.apache.commons.lang3.StringUtils.isNotBlank(body)) {
eSMapSearchAggDto.setBody(body);
}
}
if("GET".equals(method)){
String params = (String) hit.getSourceAsMap().get("params");
if (org.apache.commons.lang3.StringUtils.isNotBlank(params)) {
eSMapSearchAggDto.setBody(params);
}
}
result.add(eSMapSearchAggDto);
}
if (result.isEmpty()) {
return new AggregatedPageImpl<T>(Collections.EMPTY_LIST, pageable, response.getHits().getTotalHits(), response.getScrollId());
}
return new AggregatedPageImpl<T>((List<T>) result, pageable, response.getHits().getTotalHits(), response.getScrollId());
}
@Override
public <T> T mapSearchHit(SearchHit searchHit, Class<T> aClass) {
return null;
}
};
@Override
public CommonResponse getScrollCountCityClicks(DayLogDto filterDTO) {
long startQueryTime = System.currentTimeMillis();
CommonResponse commonResponse=new CommonResponse();
String startTime=filterDTO.getStartTime();//2023-10-13 00:00:00 开始时间
String endTime=filterDTO.getEndTime();// 2024-12-31 23:59:59结束时间
// 定义日期格式
DateTimeFormatter inputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
DateTimeFormatter outputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd");
// 将字符串转换为LocalDate
LocalDate startDate = LocalDate.parse(startTime, inputFormatter);
LocalDate endDate = LocalDate.parse(endTime, inputFormatter);
ExecutorService executorService = Executors.newFixedThreadPool(10); // 创建线程池
List<Future<List<EsCityClickDataDto>>> futures = new ArrayList<>();
try {
for (LocalDate date = startDate; !date.isAfter(endDate); date = date.plusDays(1)) {
//按照月度时间范围去查询
String formattedDate = date.format(outputFormatter);
String requestStartTime = formattedDate + " 00:00:00";
String requestEndTime = formattedDate + " 23:59:59";
Callable<List<EsCityClickDataDto>> task = () -> processDate(elasticsearchTemplate,filterDTO,formattedDate, requestStartTime, requestEndTime);
Future<List<EsCityClickDataDto>> future = executorService.submit(task);
futures.add(future);
}
// 汇总结果
List<EsCityClickDataDto> allResults = new ArrayList<>();
for (Future<List<EsCityClickDataDto>> future : futures) {
List<EsCityClickDataDto> result = future.get();
allResults.addAll(result);
}
// 关闭线程池
executorService.shutdown();
while (!executorService.isTerminated()) {
// 等待所有任务完成
if(allResults.size()>0){
// 汇总所有查询的数据,并进行处理
esCityClickDataMapper.batchTestInsert(allResults);
}
}
} catch (InterruptedException e) {
commonResponse.setErrorMsg(ExceptionFormatUtil.buildErrorMessage(e));
} catch (ExecutionException e) {
commonResponse.setErrorMsg(ExceptionFormatUtil.buildErrorMessage(e));
}
long endQueryTime = System.currentTimeMillis();
long duration = endQueryTime - startQueryTime;
commonResponse.setBody("程序耗时: " + duration + " 毫秒");
return commonResponse;
}
private static List<EsCityClickDataDto> processDate (ElasticsearchRestTemplate elasticsearchTemplate,DayLogDto filterDTO, String formattedDate, String requestStartTime, String requestEndTime) {
List<EsCityClickDataDto> rankList = new ArrayList<>();
NativeSearchQueryBuilder queryBuilder = new NativeSearchQueryBuilder();
queryBuilder.withIndices("request_record").withTypes("main");
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
boolQueryBuilder.must(QueryBuilders.rangeQuery("requestTime").from(requestStartTime).to(requestEndTime));
queryBuilder.withQuery(boolQueryBuilder);
queryBuilder.withPageable(PageRequest.of(0, 10000));//默认查询是10000条
Map<String, Integer> cityCountMap = new HashMap<>();
ScrolledPage<RequestRecordDto> scroll = (ScrolledPage<RequestRecordDto>) elasticsearchTemplate.startScroll(SCROLL_TIMEOUT, queryBuilder.build(), RequestRecordDto.class, searchBodyResultMapper);
while (scroll.hasContent()) {
for (RequestRecordDto dto : scroll.getContent()) {
String body = dto.getBody();
if (StringUtils.isBlank(body)) {
continue;
}
try {
String cleanedJsonStr = cleanJsonData(body);
JSONObject jsonObject = JSON.parseObject(cleanedJsonStr);
if (jsonObject != null && jsonObject.get("cityName") != null) {
String cityName = String.valueOf(jsonObject.get("cityName"));
String cleanedCityName = removeSpecialCharacters(cityName);
if (StringUtils.isEmpty(cleanedCityName)) {
continue;
}
Integer count = cityCountMap.get(cleanedCityName);
if (count == null) {
count = 0;
}
cityCountMap.put(cleanedCityName, ++count);
}
} catch (Exception e) {
continue;
}
}
//取下一页,scrollId在es服务器上可能会发生变化,需要用最新的。发起continueScroll请求会重新刷新快照保留时间
scroll = (ScrolledPage<RequestRecordDto>) elasticsearchTemplate.continueScroll(scroll.getScrollId(), SCROLL_TIMEOUT, RequestRecordDto.class, searchBodyResultMapper);
}
elasticsearchTemplate.clearScroll(scroll.getScrollId());
if (cityCountMap.isEmpty()) {
return new ArrayList<>();
}
for (Map.Entry<String, Integer> entry : cityCountMap.entrySet()) {
EsCityClickDataDto cityClickDataDto = new EsCityClickDataDto();
cityClickDataDto.setCityName(entry.getKey());
cityClickDataDto.setClickCount(entry.getValue());
cityClickDataDto.setClickDate(formattedDate);
cityClickDataDto.setCreateTime(new Date());
rankList.add(cityClickDataDto);
}
return rankList;
}
/**
* 清理 JSON 数据中的非法字符
*
* @param jsonStr JSON 字符串
* @return 清理后的 JSON 字符串
*/
private static String cleanJsonData(String jsonStr) {
return jsonStr.replaceAll("-|\\s+", "");
}
/**
* 去除特殊字符,只保留汉字
*
* @param cityName 城市名称
* @return 清理后的城市名称
*/
private static String removeSpecialCharacters(String cityName) {
return cityName.replaceAll("[^\\u4e00-\\u9fa5]", "");
}
}
代码中涉及到的类:
@Data
@Accessors(chain = true)
public class DayLogDto implements Serializable {
private String startTime;// 开始时间
private String endTime;// 开始时间
private long count;
private Integer querySize;
}
@Data
public class RequestRecordDto implements Serializable {
private String url;
private String params;
private String body;
private String method;
private String userId;
}
@Data
@Accessors(chain = true)
public class EsCityClickDataDto implements Serializable {
private Integer id; // 主键
private String cityName; // 城市
private Integer clickCount; // 点击量
private String clickDate; // 点击日期
private Date createTime;
}
public class CommonResponse implements Serializable {
public int status;
public String errorMsg;
public Object body;
private String code;
private Long timeStamp;
private boolean success;
/**
* 响应使用
*/
public CommonResponse(int status, String errorMsg, Object body, Long timeStamp) {
super();
this.status = status;
this.errorMsg = errorMsg;
this.body = body;
this.timeStamp = timeStamp;
}
public int getStatus() {
return status;
}
public void setStatus(int status) {
this.status = status;
}
public String getErrorMsg() {
return errorMsg;
}
public void setErrorMsg(String errorMsg) {
this.errorMsg = errorMsg;
}
public Object getBody() {
return body;
}
public void setBody(Object body) {
this.body = body;
}
public Long getTimeStamp() {
return timeStamp;
}
public void setTimeStamp(Long timeStamp) {
this.timeStamp = timeStamp;
}
public boolean getSuccess() {
return success;
}
public void setSuccess(boolean success) {
this.success = success;
}
public String getCode() {
return code;
}
public void setCode(String code) {
this.code = code;
}
}