Hadoop--CLI的解析

这篇博客探讨了如何在Hadoop中使用Apache Commons CLI进行命令行接口的设计,包括定义、解析和询问三个关键步骤。文章详细介绍了GenericOptionsParser在处理命令行参数中的作用,并在ToolRunner中执行解析后的操作。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

基于 Apache Commons CLI 的命令行设计

通常情况下命令行处理有三个步骤:定义,解析和询问阶段

一、定义

package org.apache.hadoop.util;--GenericOptionsParser中

private static Options buildGeneralOptions(Options opts) {
    Option fs = OptionBuilder.withArgName("local|namenode:port")
    .hasArg()
    .withDescription("specify a namenode")
    .create("fs");
    Option jt = OptionBuilder.withArgName("local|jobtracker:port")
    .hasArg()
    .withDescription("specify a job tracker")
    .create("jt");
    Option oconf = OptionBuilder.withArgName("configuration file")
    .hasArg()
    .withDescription("specify an application configuration file")
    .create("conf");
    Option property = OptionBuilder.withArgName("property=value")
    .hasArg()
    .withDescription("use value for given property")
    .create('D');
    Option libjars = OptionBuilder.withArgName("paths")
    .hasArg()
    .withDescription("comma separated jar files to include in the classpath.")
    .create("libjars");
    Option files = OptionBuilder.withArgName("paths")
    .hasArg()
    .withDescription("comma separated files to be copied to the " +
           "map reduce cluster")
    .create("files");
    Option archives = OptionBuilder.withArgName("paths")
    .hasArg()
    .withDescription("comma separated archives to be unarchived" +
                     " on the compute machines.")
    .create("archives");
    // file with security tokens
    Option tokensFile = OptionBuilder.withArgName("tokensFile")
    .hasArg()
    .withDescription("name of the file with the tokens")
    .create("tokenCacheFile");

    opts.addOption(fs);
    opts.addOption(jt);
    opts.addOption(oconf);
    opts.addOption(property);
    opts.addOption(libjars);
    opts.addOption(files);
    opts.addOption(archives);
    opts.addOption(tokensFile);

    return opts;
  }

 二、解析

 /**
   * Parse the user-specified options, get the generic options, and modify
   * configuration accordingly
   * @param conf Configuration to be modified
   * @param args User-specified arguments
   * @return Command-specific arguments
   */
  private String[] parseGeneralOptions(Options opts, Configuration conf, 
      String[] args) throws IOException {
    opts = buildGeneralOptions(opts);
    /**
     * Apache CLI支持多种输入参数格式,主要支持的格式有以下几种
     * POSIX(Portable Operating System Interface of Unix)中的参数形式,例如 tar -zxvf foo.tar.gz
		GNU 中的长参数形式,例如 du --human-readable --max-depth=1
		Java 命令中的参数形式,例如 java -Djava.net.useSystemProxies=true Foo
		短杠参数带参数值的参数形式,例如 gcc -O2 foo.c
		长杠参数不带参数值的形式,例如 ant – projecthelp
     */
    //使用GNU解析
    CommandLineParser parser = new GnuParser();
    try {
      commandLine = parser.parse(opts, args, true);
      processGeneralOptions(conf, commandLine);
      return commandLine.getArgs();
    } catch(ParseException e) {
      LOG.warn("options parsing failed: "+e.getMessage());

      HelpFormatter formatter = new HelpFormatter();
      formatter.printHelp("general options are: ", opts);
    }
    return args;
  }

 三、询问

 /**
   * Modify configuration according user-specified generic options
   * @param conf Configuration to be modified
   * @param line User-specified generic options
   */
  private void processGeneralOptions(Configuration conf,
      CommandLine line) throws IOException {
    if (line.hasOption("fs")) {
      FileSystem.setDefaultUri(conf, line.getOptionValue("fs"));
    }

    if (line.hasOption("jt")) {
      conf.set("mapred.job.tracker", line.getOptionValue("jt"));
    }
    if (line.hasOption("conf")) {
      String[] values = line.getOptionValues("conf");
      for(String value : values) {
        conf.addResource(new Path(value));
      }
    }
    if (line.hasOption("libjars")) {
      conf.set("tmpjars", 
               validateFiles(line.getOptionValue("libjars"), conf));
      //setting libjars in client classpath
      URL[] libjars = getLibJars(conf);
      if(libjars!=null && libjars.length>0) {
        conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader()));
        Thread.currentThread().setContextClassLoader(
            new URLClassLoader(libjars, 
                Thread.currentThread().getContextClassLoader()));
      }
    }
    if (line.hasOption("files")) {
      conf.set("tmpfiles", 
               validateFiles(line.getOptionValue("files"), conf));
    }
    if (line.hasOption("archives")) {
      conf.set("tmparchives", 
                validateFiles(line.getOptionValue("archives"), conf));
    }
    if (line.hasOption('D')) {
      String[] property = line.getOptionValues('D');
      for(String prop : property) {
        String[] keyval = prop.split("=", 2);
        if (keyval.length == 2) {
          conf.set(keyval[0], keyval[1]);
        }
      }
    }
    conf.setBoolean("mapred.used.genericoptionsparser", true);
    
    // tokensFile
    if(line.hasOption("tokenCacheFile")) {
      String fileName = line.getOptionValue("tokenCacheFile");
      // check if the local file exists
      try 
      {
        FileSystem localFs = FileSystem.getLocal(conf);
        Path p = new Path(fileName);
        if (!localFs.exists(p)) {
          throw new FileNotFoundException("File "+fileName+" does not exist.");
        }

        LOG.debug("setting conf tokensFile: " + fileName);
        conf.set("mapreduce.job.credentials.json", 
                 localFs.makeQualified(p).toString());
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
    }
  }

 解析完成之后再执行,执行代码如下,在ToolRunner中

public static int run(Configuration conf, Tool tool, String[] args) 
    throws Exception{
    if(conf == null) {
      conf = new Configuration();
    }
    GenericOptionsParser parser = new GenericOptionsParser(conf, args);
    //set the configuration back, so that Tool can configure itself
    tool.setConf(conf);
    
    //get the args w/o generic hadoop args
    String[] toolArgs = parser.getRemainingArgs();
    return tool.run(toolArgs);
  }

 

 

 

Warning: No configuration directory set! Use --conf <dir> to override. Info: Including Hadoop libraries found via (/opt/hadoop-3.1.2/bin/hadoop) for HDFS access Info: Including HBASE libraries found via (/opt/hbase-2.2.6/bin/hbase) for HBASE access 错误: 找不到或无法加载主类 org.apache.flume.tools.GetJavaProperty Info: Including Hive libraries found via (/opt/hive-3.1.2) for Hive access + exec /opt/jdk1.8.0_351/bin/java -Xmx20m -cp '/opt/flume-1.9.0/lib/*:/opt/hadoop-3.1.2/etc/hadoop:/opt/hadoop-3.1.2/share/hadoop/common/lib/*:/opt/hadoop-3.1.2/share/hadoop/common/*:/opt/hadoop-3.1.2/share/hadoop/hdfs:/opt/hadoop-3.1.2/share/hadoop/hdfs/lib/*:/opt/hadoop-3.1.2/share/hadoop/hdfs/*:/opt/hadoop-3.1.2/share/hadoop/mapreduce/lib/*:/opt/hadoop-3.1.2/share/hadoop/mapreduce/*:/opt/hadoop-3.1.2/share/hadoop/yarn:/opt/hadoop-3.1.2/share/hadoop/yarn/lib/*:/opt/hadoop-3.1.2/share/hadoop/yarn/*:/opt/hbase-2.2.6/conf:/opt/jdk1.8.0_351//lib/tools.jar:/opt/hbase-2.2.6:/opt/hbase-2.2.6/lib/shaded-clients/hbase-shaded-client-byo-hadoop-2.2.6.jar:/opt/hbase-2.2.6/lib/client-facing-thirdparty/audience-annotations-0.5.0.jar:/opt/hbase-2.2.6/lib/client-facing-thirdparty/commons-logging-1.2.jar:/opt/hbase-2.2.6/lib/client-facing-thirdparty/findbugs-annotations-1.3.9-1.jar:/opt/hbase-2.2.6/lib/client-facing-thirdparty/htrace-core4-4.2.0-incubating.jar:/opt/hbase-2.2.6/lib/client-facing-thirdparty/log4j-1.2.17.jar:/opt/hbase-2.2.6/lib/client-facing-thirdparty/slf4j-api-1.7.25.jar:/opt/hadoop-3.1.2/etc/hadoop:/opt/hadoop-3.1.2/share/hadoop/common/lib/*:/opt/hadoop-3.1.2/share/hadoop/common/*:/opt/hadoop-3.1.2/share/hadoop/hdfs:/opt/hadoop-3.1.2/share/hadoop/hdfs/lib/*:/opt/hadoop-3.1.2/share/hadoop/hdfs/*:/opt/hadoop-3.1.2/share/hadoop/mapreduce/lib/*:/opt/hadoop-3.1.2/share/hadoop/mapreduce/*:/opt/hadoop-3.1.2/share/hadoop/yarn:/opt/hadoop-3.1.2/share/hadoop/yarn/lib/*:/opt/hadoop-3.1.2/share/hadoop/yarn/*:/opt/hadoop-3.1.2/etc/hadoop:/opt/hbase-2.2.6/conf:/opt/hive-3.1.2/lib/*' -Djava.library.path=:/opt/hadoop-3.1.2/lib/native org.apache.flume.node.Application --name a1 --conf/opt/flume-1.9.0/conf --conf-file/opt/flume-1.9.0/conf/dhfsspool.conf-Dflume.root.logger=DEBUG,consol SLF4J: Class path contains multiple SLF4J bindings. SLF4J: Found binding in [jar:file:/opt/flume-1.9.0/lib/slf4j-log4j12-1.7.25.jar!/org/slf4j/impl/StaticLoggerBinder.class] SLF4J: Found binding in [jar:file:/opt/hadoop-3.1.2/share/hadoop/common/lib/slf4j-log4j12-1.7.25.jar!/org/slf4j/impl/StaticLoggerBinder.class] SLF4J: Found binding in [jar:file:/opt/hive-3.1.2/lib/log4j-slf4j-impl-2.10.0.jar!/org/slf4j/impl/StaticLoggerBinder.class] SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation. SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory] 2023-06-08 17:26:46,403 ERROR node.Application: A fatal error occurred while running. Exception follows. org.apache.commons.cli.UnrecognizedOptionException: Unrecognized option: --conf/opt/flume-1.9.0/conf at org.apache.commons.cli.Parser.processOption(Parser.java:363) at org.apache.commons.cli.Parser.parse(Parser.java:199) at org.apache.commons.cli.Parser.parse(Parser.java:85) at org.apache.flume.node.Application.main(Application.java:287)
06-09
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值