java使用多线程快速查询elasticsearch用户请求日志中上亿条数据

spring-boot,用于java操作ES。maven项目直接导入依赖。

<dependency>
    <groupId>org.springframework.boot</groupId>
    <artifactId>spring-boot-starter-data-elasticsearch</artifactId>
</dependency>

索引的数据结构

{"request_record" : {
    "mappings" : {
      "main" : {
        "properties" : {
          "body" : {
            "type" : "text"
          },
          "createTime" : {
            "type" : "date",
            "format" : "yyyy-MM-dd HH:mm:ss"
          },
         
          "id" : {
            "type" : "keyword"
          },
          "ip" : {
            "type" : "keyword"
          },
          "method" : {
            "type" : "keyword"
          },
          "params" : {
            "type" : "keyword"
          },
          "requestTime" : {
            "type" : "date",
            "format" : "yyyy-MM-dd HH:mm:ss"
          },
          "url" : {
            "type" : "keyword"
          },
          "userId" : {
            "type" : "integer"
          }
        }
      }
    }
  }
}

文章数据如下:

{
        "_index" : "request_record",
        "_type" : "main",
        "_id" : "846ee670-f81c-4e7f-a9a3-c0a4501865b7",
        "_score" : 1.030125,
        "_source" : {
          "body" : """{"userId":"110","cityName":"深圳"}""",
          "createTime" : "2024-09-16 10:01:22",
          "id" : "846ee670-f81c-4e7f-a9a3-c0a4501865b7",
          "ip" : "218.18.76.0",
          "method" : "POST",
          "requestTime" : "2024-09-16 10:01:12",
          "url" : "index/changeDate/getList",
          "userId" : 110
        }
      }

采用多线程查询上亿条用户请求信息

public interface CountCityClicksService {
    CommonResponse getScrollCountCityClicks(DayLogDto filterDTO);
}


@Service
public class CountCityClicksServiceImpl implements CountCityClicksService {
    @Autowired
    protected ElasticsearchRestTemplate elasticsearchTemplate;
    private static final DateTimeFormatter outputFormatter = 
    DateTimeFormatter.ofPattern("yyyy-MM-dd");
    private static final long SCROLL_TIMEOUT = 3000;

    public final static SearchResultMapper searchBodyResultMapper = new SearchResultMapper() {
        @Override
        public <T> AggregatedPage<T> mapResults(SearchResponse response, Class<T> aClass, Pageable pageable) {
            List<RequestRecordDto> result = new ArrayList<RequestRecordDto>();
            for (SearchHit hit : response.getHits()) {
                if (response.getHits().getHits().length <= 0) {
                    return new AggregatedPageImpl<T>(Collections.EMPTY_LIST, pageable, response.getHits().getTotalHits(), response.getScrollId());
                }
                //可以做更复杂的映射逻辑
                RequestRecordDto eSMapSearchAggDto = new RequestRecordDto();

                String method = (String) hit.getSourceAsMap().get("method");
                if(StringUtils.isEmpty(method)){
                    continue;
                }
                eSMapSearchAggDto.setMethod(method);
                if("POST".equals(method)){
                    String body = (String) hit.getSourceAsMap().get("body");
                    if (org.apache.commons.lang3.StringUtils.isNotBlank(body)) {
                        eSMapSearchAggDto.setBody(body);
                    }
                }
                if("GET".equals(method)){
                    String params = (String) hit.getSourceAsMap().get("params");
                    if (org.apache.commons.lang3.StringUtils.isNotBlank(params)) {
                        eSMapSearchAggDto.setBody(params);
                    }
                }
                result.add(eSMapSearchAggDto);
            }
            if (result.isEmpty()) {
                return new AggregatedPageImpl<T>(Collections.EMPTY_LIST, pageable, response.getHits().getTotalHits(), response.getScrollId());
            }
            return new AggregatedPageImpl<T>((List<T>) result, pageable, response.getHits().getTotalHits(), response.getScrollId());
        }

        @Override
        public <T> T mapSearchHit(SearchHit searchHit, Class<T> aClass) {
            return null;
        }
    };
    @Override
    public CommonResponse getScrollCountCityClicks(DayLogDto filterDTO) {
        long startQueryTime = System.currentTimeMillis();
        CommonResponse commonResponse=new CommonResponse();
        String startTime=filterDTO.getStartTime();//2023-10-13 00:00:00 开始时间
        String endTime=filterDTO.getEndTime();// 2024-12-31 23:59:59结束时间
        // 定义日期格式
        DateTimeFormatter inputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
        DateTimeFormatter outputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd");

        // 将字符串转换为LocalDate
        LocalDate startDate = LocalDate.parse(startTime, inputFormatter);
        LocalDate endDate = LocalDate.parse(endTime, inputFormatter);

        ExecutorService executorService = Executors.newFixedThreadPool(10); // 创建线程池
        List<Future<List<EsCityClickDataDto>>> futures = new ArrayList<>();
        try {
            for (LocalDate date = startDate; !date.isAfter(endDate); date = date.plusDays(1)) {
                //按照月度时间范围去查询
                String formattedDate = date.format(outputFormatter);
                String requestStartTime = formattedDate + " 00:00:00";
                String requestEndTime = formattedDate + " 23:59:59";
                Callable<List<EsCityClickDataDto>> task = () -> processDate(elasticsearchTemplate,filterDTO,formattedDate, requestStartTime, requestEndTime);
                Future<List<EsCityClickDataDto>> future = executorService.submit(task);
                futures.add(future);
            }
            // 汇总结果
            List<EsCityClickDataDto> allResults = new ArrayList<>();
            for (Future<List<EsCityClickDataDto>> future : futures) {
                List<EsCityClickDataDto> result = future.get();
                allResults.addAll(result);
            }
            
            // 关闭线程池
            executorService.shutdown();
            while (!executorService.isTerminated()) {
                // 等待所有任务完成
              if(allResults.size()>0){
                // 汇总所有查询的数据,并进行处理
                esCityClickDataMapper.batchTestInsert(allResults);
               }
            }
        } catch (InterruptedException e) {
            commonResponse.setErrorMsg(ExceptionFormatUtil.buildErrorMessage(e));
        } catch (ExecutionException e) {
            commonResponse.setErrorMsg(ExceptionFormatUtil.buildErrorMessage(e));
        }
        long endQueryTime = System.currentTimeMillis();
        long duration = endQueryTime - startQueryTime;
        commonResponse.setBody("程序耗时: " + duration + " 毫秒");
        return commonResponse;
    }
     private static List<EsCityClickDataDto> processDate (ElasticsearchRestTemplate elasticsearchTemplate,DayLogDto filterDTO, String formattedDate, String requestStartTime, String requestEndTime) {
        List<EsCityClickDataDto> rankList = new ArrayList<>();
        NativeSearchQueryBuilder queryBuilder = new NativeSearchQueryBuilder();
        queryBuilder.withIndices("request_record").withTypes("main");
        BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
        boolQueryBuilder.must(QueryBuilders.rangeQuery("requestTime").from(requestStartTime).to(requestEndTime));

        queryBuilder.withQuery(boolQueryBuilder);
        queryBuilder.withPageable(PageRequest.of(0, 10000));//默认查询是10000条
        Map<String, Integer> cityCountMap = new HashMap<>();
        ScrolledPage<RequestRecordDto> scroll = (ScrolledPage<RequestRecordDto>) elasticsearchTemplate.startScroll(SCROLL_TIMEOUT, queryBuilder.build(), RequestRecordDto.class, searchBodyResultMapper);
        while (scroll.hasContent()) {
            for (RequestRecordDto dto : scroll.getContent()) {
                String body = dto.getBody();
                if (StringUtils.isBlank(body)) {
                    continue;
                }
                try {
                    String cleanedJsonStr = cleanJsonData(body);
                    JSONObject jsonObject = JSON.parseObject(cleanedJsonStr);
                    if (jsonObject != null && jsonObject.get("cityName") != null) {
                        String cityName = String.valueOf(jsonObject.get("cityName"));
                        String cleanedCityName = removeSpecialCharacters(cityName);
                        if (StringUtils.isEmpty(cleanedCityName)) {
                            continue;
                        }
                        Integer count = cityCountMap.get(cleanedCityName);
                        if (count == null) {
                            count = 0;
                        }
                        cityCountMap.put(cleanedCityName, ++count);
                    }
                } catch (Exception e) {
                    continue;
                }
            }
            //取下一页,scrollId在es服务器上可能会发生变化,需要用最新的。发起continueScroll请求会重新刷新快照保留时间
            scroll = (ScrolledPage<RequestRecordDto>) elasticsearchTemplate.continueScroll(scroll.getScrollId(), SCROLL_TIMEOUT, RequestRecordDto.class, searchBodyResultMapper);
        }
        elasticsearchTemplate.clearScroll(scroll.getScrollId());

         if (cityCountMap.isEmpty()) {
             return new ArrayList<>();
         }
         for (Map.Entry<String, Integer> entry : cityCountMap.entrySet()) {
             EsCityClickDataDto cityClickDataDto = new EsCityClickDataDto();
             cityClickDataDto.setCityName(entry.getKey());
             cityClickDataDto.setClickCount(entry.getValue());
             cityClickDataDto.setClickDate(formattedDate);
             cityClickDataDto.setCreateTime(new Date());
             rankList.add(cityClickDataDto);
         }
         return rankList;
    }

    /**
     * 清理 JSON 数据中的非法字符
     *
     * @param jsonStr JSON 字符串
     * @return 清理后的 JSON 字符串
     */
    private static String cleanJsonData(String jsonStr) {
        return jsonStr.replaceAll("-|\\s+", "");
    }

    /**
     * 去除特殊字符,只保留汉字
     *
     * @param cityName 城市名称
     * @return 清理后的城市名称
     */
    private static String removeSpecialCharacters(String cityName) {
        return cityName.replaceAll("[^\\u4e00-\\u9fa5]", "");
    }

}

代码中涉及到的类:

@Data
@Accessors(chain = true)
public class DayLogDto implements Serializable {
    private String startTime;// 开始时间
    private String endTime;// 开始时间
    private long count;
    private Integer querySize;
}

@Data
public class RequestRecordDto implements Serializable {
    private String url;
    private String params;
    private String body;
    private String method;
    private String userId;
}

@Data
@Accessors(chain = true)
public class EsCityClickDataDto implements Serializable {

    private Integer id; // 主键

    private String cityName; // 城市

    private Integer clickCount; // 点击量

    private String clickDate; // 点击日期
    private Date createTime;
}

public class CommonResponse implements Serializable {

   
    public int status;

    public String errorMsg;

    public Object body;
    
    private String code;

    private Long timeStamp;
    
    private boolean success;


    /**
     * 响应使用
     */
    public CommonResponse(int status, String errorMsg, Object body, Long timeStamp) {
        super();
        this.status = status;
        this.errorMsg = errorMsg;
        this.body = body;
        this.timeStamp = timeStamp;
    }

    public int getStatus() {
        return status;
    }

    public void setStatus(int status) {
        this.status = status;
    }

    public String getErrorMsg() {
        return errorMsg;
    }

    public void setErrorMsg(String errorMsg) {
        this.errorMsg = errorMsg;
    }

    public Object getBody() {
        return body;
    }

    public void setBody(Object body) {
        this.body = body;
    }

    public Long getTimeStamp() {
        return timeStamp;
    }

    public void setTimeStamp(Long timeStamp) {
        this.timeStamp = timeStamp;
    }

    public boolean getSuccess() {
        return success;
    }

    public void setSuccess(boolean success) {
        this.success = success;
    }
    

    public String getCode() {
        return code;
    }

    public void setCode(String code) {
        this.code = code;
    }

    
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值