MR任务之后提交Hadoop批量索引任务

本文介绍了一种利用MR程序处理数据后,通过Hadoop Indexing将JSON数据提交给Druid的方法。具体实现了从指定目录读取JSON数据,并通过HTTP POST方式向Druid Overlord提交索引任务。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

MR程序运行成功之后,生成的JSON数据放入指定的目录,然后利用HTTP的POST,向druid.io的overload上提交任务

    private static void submitHadoopIndexTask(FileSystem fileSystem, String dataSource, String
            intervals, List<Path> outputPaths, String segmentGranularity) {
        // 任务完成之后,提交一个hadoopIndex任务
        List<String> paths = new ArrayList<>();
        for (Path outputPath : outputPaths) {
            String tmpPath = getPaths(fileSystem, outputPath);
            if (tmpPath != null) paths.add(tmpPath);
        }
        String path = String.join(",", paths);
        //增加延迟到当前时间戳计算的数据
        if (path != null && !path.equals("")) {
            String json = HadoopIndexClient.easyJson(dataSource, path, intervals, segmentGranularity, queryGranularity);
            HttpClientUtil.post(druidTask, json);
        } else {
            System.out.println("Paths are null");
        }
    }

其hadoop-index的json描述文件可以根据设置的参数进行修改:

package net.icsoc.cti.report.druid;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.TypeReference;
import net.icsoc.cti.report.CtiReportBatch;

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.text.SimpleDateFormat;
import java.util.*;

/*******************************************************************************
 * 版权信息:北京中通天鸿武汉分公司
 * @author xuchang
 * Copyright: Copyright (c) 2007北京中通天鸿武汉分公司,Inc.All Rights Reserved.
 * Description:
 ******************************************************************************/
public class HadoopIndexClient {

    public static String easyJson(String dataSource, String paths, String interval, String segmentGranularity, String queryGranularity) {
        InputStream inputStream = null;
        BufferedReader reader = null;
        try {
            inputStream = CtiReportBatch.class.getClassLoader().getResourceAsStream("index-hadoop.json");
            reader = new BufferedReader(new InputStreamReader(inputStream));
            StringBuffer json = new StringBuffer();
            String s;
            while ((s = reader.readLine()) != null) {
                json.append(s);
            }
            Map<String, Object> map = JSON.parseObject(json.toString(), new TypeReference<Map<String, Object>>() {
            });
            //对就送数据做一些修改
            Map<String, Object> spec = (Map<String, Object>) map.get("spec");
            Map<String, Object> dataSchema = (Map<String, Object>) spec.get("dataSchema");
            dataSchema.put("dataSource", dataSource);
            Map<String, Object> granularity = (Map<String, Object>) dataSchema.get("granularitySpec");
            List<String> intervals = new ArrayList<>();
            intervals.add(interval);
            granularity.put("intervals", intervals);
            granularity.put("segmentGranularity", segmentGranularity);
            granularity.put("queryGranularity", queryGranularity);
            Map<String, Object> ioConfig = (Map<String, Object>) spec.get("ioConfig");
            Map<String, Object> inputSpec = (Map<String, Object>) ioConfig.get("inputSpec");
            inputSpec.put("paths", paths);
            return JSON.toJSONString(map);
        } catch (Exception e) {
            //处理异常
            System.out.println(e.getMessage());
        } finally {
            //关闭流
            if (inputStream != null) {
                try {
                    inputStream.close();
                } catch (Exception e) {
                    System.out.println(e.getMessage());
                }
            }
            if (reader != null) {
                try {
                    reader.close();
                } catch (Exception e) {
                    System.out.println(e.getMessage());
                }
            }
        }
        return null;
    }

    public static void main(String[] args) {
        SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX");
        format.setCalendar(Calendar.getInstance(TimeZone.getTimeZone("GMT")));
        System.out.println(format.format(new Date()));
    }
}

利用HTTPClient实现任务提交:

package net.icsoc.cti.report.utils;

import net.icsoc.cti.report.druid.HadoopIndexClient;
import org.apache.commons.lang.StringUtils;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.NameValuePair;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.params.CoreConnectionPNames;
import org.apache.http.util.CharArrayBuffer;
import org.apache.http.util.EntityUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;

/*******************************************************************************
 * 版权信息:北京中通天鸿武汉分公司
 * @author xuchang
 * Copyright: Copyright (c) 2007北京中通天鸿武汉分公司,Inc.All Rights Reserved.
 * Description: MapReduce使用oss的工具类
 ******************************************************************************/
public class HttpClientUtil {
    private static Logger logger = LoggerFactory.getLogger(HttpClientUtil.class);

    public static String post(String url, String json) {
        HttpClient httpclient = new DefaultHttpClient();
        String content = null;
        try {
            HttpPost httppost = new HttpPost(url);
            StringEntity postEntity = new StringEntity(json);
            postEntity.setContentEncoding("UTF-8");
            postEntity.setContentType("application/json");//发送json数据需要设置contentType
            httppost.setEntity(postEntity);
            // 执行
            HttpResponse response = httpclient.execute(httppost);
            HttpEntity resEntity = response.getEntity();
            content = EntityUtils.toString(resEntity);//返回json格式数据
            if (StringUtils.isEmpty(content)) {
                logger.error("[httpUtils] 返回的结果类型不包含结果  返回的结果为空");
            }
            logger.debug("[httpUtils] load TaskInfo success");
            // 关闭连接,释放资源
        } catch (Exception e) {
            e.printStackTrace();
            logger.info("access api url wrong!!", e.getMessage());
        } finally {
            httpclient.getConnectionManager().shutdown();
        }
        return content;
    }
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值