【HIVE】Hook

有一篇文章,讲 Hook. 【HIVE】Hook(钩子)函数从入门到放弃

HookRunner

HookRunner 定义了所有的hook 和所有 hook 的定义。
在 initialize 方法里,定义所有的 hook.

public void initialize() {
    if (initialized) {
      return;
    }
    initialized = true;
    queryHooks.addAll(loadHooksFromConf(HiveConf.ConfVars.HIVE_QUERY_LIFETIME_HOOKS, QueryLifeTimeHook.class));
    saHooks.addAll(loadHooksFromConf(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK, HiveSemanticAnalyzerHook.class));
    driverRunHooks.addAll(loadHooksFromConf(HiveConf.ConfVars.HIVE_DRIVER_RUN_HOOKS, HiveDriverRunHook.class));
    preExecHooks.addAll(loadHooksFromConf(HiveConf.ConfVars.PREEXECHOOKS, ExecuteWithHookContext.class));
    postExecHooks.addAll(loadHooksFromConf(HiveConf.ConfVars.POSTEXECHOOKS, ExecuteWithHookContext.class));
    onFailureHooks.addAll(loadHooksFromConf(HiveConf.ConfVars.ONFAILUREHOOKS, ExecuteWithHookContext.class));

    if (conf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_METRICS_ENABLED)) {
      queryHooks.add(new MetricsQueryLifeTimeHook());
    }
  }

QueryLifeTimeHook

定义了四个操作。

/**
 * A type of hook which triggers before query compilation and after query execution.
 */
@InterfaceAudience.Public
@InterfaceStability.Stable
public interface QueryLifeTimeHook extends Hook {

  /**
   * Invoked before a query enters the compilation phase.
   *
   * @param ctx the context for the hook
   */
  void beforeCompile(QueryLifeTimeHookContext ctx);

  /**
   * Invoked after a query compilation. Note: if 'hasError' is true,
   * the query won't enter the following execution phase.
   *
   * @param ctx the context for the hook
   * @param hasError whether any error occurred during compilation.
   */
  void afterCompile(QueryLifeTimeHookContext ctx, boolean hasError);

  /**
   * Invoked before a query enters the execution phase.
   *
   * @param ctx the context for the hook
   */
  void beforeExecution(QueryLifeTimeHookContext ctx);

  /**
   * Invoked after a query finishes its execution.
   *
   * @param ctx the context for the hook
   * @param hasError whether any error occurred during query execution.
   */
  void afterExecution(QueryLifeTimeHookContext ctx, boolean hasError);

}

HiveSemanticAnalyzerHook

定义了两个操作。

@InterfaceAudience.Public
@InterfaceStability.Stable
public interface HiveSemanticAnalyzerHook extends Hook {
  /**
   * Invoked before Hive performs its own semantic analysis on
   * a statement.  The implementation may inspect the statement AST and
   * prevent its execution by throwing a SemanticException.
   * Optionally, it may also augment/rewrite the AST, but must produce
   * a form equivalent to one which could have
   * been returned directly from Hive's own parser.
   *
   * @param context context information for semantic analysis
   *
   * @param ast AST being analyzed and optionally rewritten
   *
   * @return replacement AST (typically the same as the original AST unless the
   * entire tree had to be replaced; must not be null)
   */
  public ASTNode preAnalyze(
    HiveSemanticAnalyzerHookContext context,
    ASTNode ast) throws SemanticException;

  /**
   * Invoked after Hive performs its own semantic analysis on a
   * statement (including optimization).
   * Hive calls postAnalyze on the same hook object
   * as preAnalyze, so the hook can maintain state across the calls.
   *
   * @param context context information for semantic analysis
   * @param rootTasks root tasks produced by semantic analysis;
   * the hook is free to modify this list or its contents
   */
  public void postAnalyze(
    HiveSemanticAnalyzerHookContext context,
    List<Task<? extends Serializable>> rootTasks) throws SemanticException;
}

ReExecDriver

ReExecDriver 增加了一个 SemanticAnalyzerHook。

public ReExecDriver(QueryState queryState, String userName, QueryInfo queryInfo,
      ArrayList<IReExecutionPlugin> plugins) {
    this.queryState = queryState;
    coreDriver = new Driver(queryState, userName, queryInfo, null);
    coreDriver.getHookRunner().addSemanticAnalyzerHook(new HandleReOptimizationExplain());
    this.plugins = plugins;

    for (IReExecutionPlugin p : plugins) {
      p.initialize(coreDriver);
    }
  }

DriverFactory

public static IDriver newDriver(QueryState queryState, String userName, QueryInfo queryInfo) {
    boolean enabled = queryState.getConf().getBoolVar(ConfVars.HIVE_QUERY_REEXECUTION_ENABLED);
    if (!enabled) {
      return new Driver(queryState, userName, queryInfo);
    }

    String strategies = queryState.getConf().getVar(ConfVars.HIVE_QUERY_REEXECUTION_STRATEGIES);
    strategies = Strings.nullToEmpty(strategies).trim().toLowerCase();
    ArrayList<IReExecutionPlugin> plugins = new ArrayList<>();
    for (String string : strategies.split(",")) {
      if (string.trim().isEmpty()) {
        continue;
      }
      plugins.add(buildReExecPlugin(string));
    }

    return new ReExecDriver(queryState, userName, queryInfo, plugins);
  }
 HIVE_QUERY_REEXECUTION_ENABLED("hive.query.reexecution.enabled", true,
        "Enable query reexecutions"),
    HIVE_QUERY_REEXECUTION_STRATEGIES("hive.query.reexecution.strategies", "overlay,reoptimize",
        "comma separated list of plugin can be used:\n"
            + "  overlay: hiveconf subtree 'reexec.overlay' is used as an overlay in case of an execution errors out\n"
            + "  reoptimize: collects operator statistics during execution and recompile the query after a failure"),

DriverFactory.buildReExecPlugin

 private static IReExecutionPlugin buildReExecPlugin(String name) throws RuntimeException {
    if (name.equals("overlay")) {
      return new ReExecutionOverlayPlugin();
    }
    if (name.equals("reoptimize")) {
      return new ReOptimizePlugin();
    }
    throw new RuntimeException(
        "Unknown re-execution plugin: " + name + " (" + ConfVars.HIVE_QUERY_REEXECUTION_STRATEGIES.varname + ")");
  }

ReExecutionOverlayPlugin

ReExecutionOverlayPlugin add OnFailureHook

 @Override
  public void initialize(Driver driver) {
    this.driver = driver;
    driver.getHookRunner().addOnFailureHook(new LocalHook());
    HiveConf conf = driver.getConf();
    subtree = conf.subtree("reexec.overlay");
  }

ReOptimizePlugin

ReOptimizePlugin add 4 hooks.

@Override
  public void initialize(Driver driver) {
    coreDriver = driver;
    coreDriver.getHookRunner().addOnFailureHook(new LocalHook());
    statsReaderHook = new OperatorStatsReaderHook();
    coreDriver.getHookRunner().addOnFailureHook(statsReaderHook);
    coreDriver.getHookRunner().addPostHook(statsReaderHook);
    alwaysCollectStats = driver.getConf().getBoolVar(ConfVars.HIVE_QUERY_REEXECUTION_ALWAYS_COLLECT_OPERATOR_STATS);
    statsReaderHook.setCollectOnSuccess(alwaysCollectStats);

    coreDriver.setStatsSource(StatsSources.getStatsSource(driver.getConf()));
  }
### 调试 Hive Hook 问题的方法及常见解决方案 #### 使用日志分析 当遇到 Hive Hook 的问题时,首要的任务是对相关组件的日志进行全面审查。通常情况下,Hive 日志会位于安装目录下的 `logs` 文件夹内。通过调整日志级别至 DEBUG 或 TRACE 可获取更详细的执行信息[^1]。 ```bash # 修改hive-site.xml配置文件来增加日志详细程度 <property> <name>hive.root.logger</name> <value>DEBUG,console</value> </property> ``` #### 验证环境设置 确保所有依赖服务正常运行并正确配置非常重要。特别是对于集成了 LDAP 和 Kerberos 认证机制的情况,在 Hue 中可能需要额外配置高级参数以支持这两种认证方式的同时存在[^3]。 #### 测试连接与查询功能 为了排除网络或权限方面的问题,建议直接从命令行工具测试基本的数据加载和查询操作是否能够顺利完成。例如可以尝试如下简单的数据导入语句验证基础功能: ```sql LOAD DATA INPATH '/user/hive/student1.txt' INTO TABLE copy_student1; ``` 如果上述指令成功,则说明 Hive 基础设施本身工作良好,问题很可能存在于特定 Hook 实现上[^4]。 #### 审查Hook实现细节 针对具体的 Hive Hook 开发文档和技术资料进行深入研究,理解其工作机制以及可能出现异常的原因。这有助于定位具体错误所在位置,并采取针对性措施解决。 #### 社区资源利用 查阅官方论坛、邮件列表以及其他开发者分享的经验贴也是解决问题的有效途径之一。很多复杂场景下产生的 bug 往往已经被其他用户发现并解决了。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值