常用软件安装

Hadoop安装

  1. 解压tar -zxvf hadoop.tar.gz -C targetDir

2. 修改配置文件

  1. 配置workers: workers是Hadoop集群配置文件中的一个文件,它用于指定集群中所有的工作节点(即DataNode和NodeManager节点)。在Hadoop集群中,每个节点都有一个对应的工作节点,workers文件用于将这些工作节点的主机名或IP地址列出来,以便Hadoop集群中的各个节点可以相互通信和协作。在Hadoop集群中,配置workers文件是非常重要的,因为它直接关系到集群的可靠性和稳定性。vim $HADOOP_HOMEetc/hadoop/workers
bindata7
bigdata8
bigdata9
  1. core.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>

  <!-- 把多个NameNode的地址组装成一个集群mycluster -->
   <property>
     <name>fs.defaultFS</name>
     <value>hdfs://fdCluster1</value>
   </property>

   <!-- 指定hadoop运行时产生文件的存储目录 -->
   <property>
     <name>hadoop.tmp.dir</name>
     <value>/data/software/hadoop-3.3.4/data</value>
   </property>
    <!-- 指定zkfc要连接的zkServer地址 -->
    <property>
      <name>ha.zookeeper.quorum</name>
      <value>bigdata7:2181,bigdata8:2181,bigdata9:2181</value>
    </property> 

<!-- 这个配置项是用来限制哪些用户可以通过代理用户(proxyuser)访问Hadoop集群,其中“admin”是代理用户的名称,“hosts”是可以访问集群的主机列表。具体来说,这个配置项可以防止代理用户被恶意使用或滥用,提高集群的安全性。-->
    <property>
        <name>hadoop.proxyuser.admin.hosts</name>
        <value>*</value>
    </property>	
	
	<!--    这个配置项是用来配置Hadoop代理用户admin所属的用户组列表的。在Hadoop中,代理用户是被授权代表其他用户执行特定任务的用户。这个配置项可以指定哪些用户组被授权允许代理用户admin执行相关任务。-->
    <property>
        <name>hadoop.proxyuser.admin.groups</name>
        <value>*</value>
    </property>
	 <!-- 配置hadoop的静态用户 -->
    <property>
        <name>hadoop.http.staticuser.user</name>
        <value>admin</value>
    </property>

<!-- 配置hadoop的压缩算法 -->
    <property>
        <name>io.compression.codecs</name>
        <value>
            org.apache.hadoop.io.compress.GzipCodec,
            org.apache.hadoop.io.compress.DefaultCodec,
            org.apache.hadoop.io.compress.BZip2Codec,
            org.apache.hadoop.io.compress.SnappyCodec,
            com.hadoop.compression.lzo.LzoCodec,
            com.hadoop.compression.lzo.LzopCodec
        </value>
    </property>
    <!-- 配置hadoop的压缩算法 -->
    <property>
        <name>io.compression.codec.lzo.class</name>
        <value>com.hadoop.compression.lzo.LzoCodec</value>
    </property>
</configuration>

  1. hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

	<!--odataPut site-specific property overrides in this file. -->
<configuration>

  <!-- NameNode数据存储目录 -->
  <property>
    <name>dfs.namenode.name.dir</name>
    <value>file:///data/hadoop-3.3.4/nn</value>
  </property>

  <!-- DataNode数据存储目录 -->
  <property>
    <name>dfs.datanode.data.dir</name>
    <value>file:///data/hadoop-3.3.4/dn</value>
  </property>

  <!-- JournalNode数据存储目录 -->
  <property>
    <name>dfs.journalnode.edits.dir</name>
    <value>/data/hadoop-3.3.4/journaldata</value>
  </property>

  <!-- 完全分布式集群名称 -->
  <property>
    <name>dfs.nameservices</name>
    <value>fdCluster1</value>
  </property>

  <!-- 集群中NameNode节点都有哪些 -->
  <property>
    <name>dfs.ha.namenodes.fdCluster1</name>
    <value>nn1,nn2,nn3</value>
  </property>

  <!-- NameNode的RPC通信地址 -->
  <property>
    <name>dfs.namenode.rpc-address.fdCluster1.nn1</name>
    <value>bigdata7:8020</value>
  </property>
  <property>
    <name>dfs.namenode.rpc-address.fdCluster1.nn2</name>
    <value>bigdata8:8020</value>
  </property>
  <property>
    <name>dfs.namenode.rpc-address.fdCluster1.nn3</name>
    <value>bigdata9:8020</value>
  </property>

  <!-- NameNode的http通信地址 -->
  <property>
    <name>dfs.namenode.http-address.fdCluster1.nn1</name>
    <value>bigdata7:9870</value>
  </property>
  <property>
    <name>dfs.namenode.http-address.fdCluster1.nn2</name>
    <value>bigdata8:9870</value>
  </property>
  <property>
    <name>dfs.namenode.http-address.fdCluster1.nn3</name>
    <value>bigdata9:9870</value>
  </property>

  <!-- 指定NameNode元数据在JournalNode上的存放位置 -->
  <property>
    <name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://bigdata7:8485;bigdata8:8485;bigdata9:8485/fdCluster1</value>
  </property>

  <!-- 访问代理类:client用于确定哪个NameNode为Active -->
  <property>
    <name>dfs.client.failover.proxy.provider.fdCluster1</name>
    <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
  </property>

  <!-- 配置隔离机制,即同一时刻只能有一台服务器对外响应 -->
  <property>
    <name>dfs.ha.fencing.methods</name>
    <value>sshfence</value>
  </property>

  <!-- 使用隔离机制时需要ssh秘钥登录-->
  <property>
    <name>dfs.ha.fencing.ssh.private-key-files</name>
    <value>/home/admin/.ssh/id_rsa</value>
  </property>
<!-- 启用nn故障自动转移 -->
<property>
	 <name>dfs.ha.automatic-failover.enabled</name>
	 <value>true</value>
</property>

<!--    是否启用hdfs权限配置, 如果启用, 则没有对应权限的用户无法访问对应的文件夹-->
    <property>
        <name>dfs.permissions.enable</name>
        <value>false</value>
    </property>

</configuration>

  1. mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>

<!-- 执行MR的资源管理器类型为yarn-->
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>
    <!--    指定mr的历史服务器地址-->
    <property>
        <name>mapreduce.jobhistory.address</name>
        <value>bigdata9:10020</value>
    </property>

    <!-- 历史服务器web端地址 -->
    <property>
        <name>mapreduce.jobhistory.webapp.address</name>
        <value>bigdata9:19888</value>
    </property>
    <property>
        <name>mapreduce.application.classpath</name>
        <value>$HADOOP_HOME/share/hadoop/common/*,$HADOOP_HOME/share/hadoop/common/lib/*,$HADOOP_HOME/share/hadoop/hdfs/*,$HADOOP_HOME/share/hadoop/hdfs/lib/*,$HADOOP_HOME/share/hadoop/mapreduce/*,$HADOOP_HOME/share/hadoop/mapreduce/lib/*,$HADOOP_HOME/share/hadoop/yarn/*,$HADOOP_HOME/share/hadoop/yarn/lib/*</value>
    </property>

</configuration>

  1. yarn-site.xml
<?xml version="1.0"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->
<configuration>
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>

    <!-- 启用resourcemanager ha -->
    <property>
        <name>yarn.resourcemanager.ha.enabled</name>
        <value>true</value>
    </property>
 
    <!-- 声明两台resourcemanager的地址 -->
    <property>
        <name>yarn.resourcemanager.cluster-id</name>
        <value>cluster-yarn1</value>
    </property>

    <!--指定resourcemanager的逻辑列表-->
    <property>
        <name>yarn.resourcemanager.ha.rm-ids</name>
        <value>rm1,rm2,rm3</value>
    </property>
<!-- ========== rm1的配置 ========== -->
    <!-- 指定rm1的主机名 -->
    <property>
        <name>yarn.resourcemanager.hostname.rm1</name>
        <value>bigdata7</value>
    </property>

    <!-- 指定rm1的web端地址 -->
    <property>
        <name>yarn.resourcemanager.webapp.address.rm1</name>
        <value>bigdata7:8088</value>
    </property>

    <!-- 指定rm1的内部通信地址 -->
    <property>
        <name>yarn.resourcemanager.address.rm1</name>
        <value>bigdata7:8032</value>
    </property>

    <!-- 指定AM向rm1申请资源的地址 -->
    <property>
        <name>yarn.resourcemanager.scheduler.address.rm1</name>  
        <value>bigdata7:8030</value>
    </property>

    <!-- 指定供NM连接的地址 -->  
    <property>
    <name>yarn.resourcemanager.resource-tracker.address.rm1</name>
        <value>bigdata7:8031</value>
    </property>

<!-- ========== rm2的配置 ========== -->
    <!-- 指定rm2的主机名 -->
    <property>
        <name>yarn.resourcemanager.hostname.rm2</name>
        <value>bigdata8</value>
    </property>
    <property>
        <name>yarn.resourcemanager.webapp.address.rm2</name>
        <value>bigdata8:8088</value>
    </property>
    <property>
        <name>yarn.resourcemanager.address.rm2</name>
        <value>bigdata8:8032</value>
    </property>
    <property>
        <name>yarn.resourcemanager.scheduler.address.rm2</name>
        <value>bigdata8:8030</value>
    </property>

    <property>
<name>yarn.resourcemanager.resource-tracker.address.rm2</name>
        <value>bigdata8:8031</value>
    </property>

<!-- ========== rm3的配置 ========== -->
    <!-- 指定rm1的主机名 -->
    <property>
        <name>yarn.resourcemanager.hostname.rm3</name>
        <value>bigdata9</value>
    </property>
    <!-- 指定rm1的web端地址 -->
    <property>
        <name>yarn.resourcemanager.webapp.address.rm3</name>
        <value>bigdata9:8088</value>
    </property>
    <!-- 指定rm1的内部通信地址 -->
    <property>
        <name>yarn.resourcemanager.address.rm3</name>
        <value>bigdata9:8032</value>
    </property>
    <!-- 指定AM向rm1申请资源的地址 -->
    <property>
        <name>yarn.resourcemanager.scheduler.address.rm3</name>  
        <value>bigdata9:8030</value>
    </property>

    <!-- 指定供NM连接的地址 -->  
    <property>
    <name>yarn.resourcemanager.resource-tracker.address.rm3</name>
        <value>bigdata9:8031</value>
    </property>

    <!-- 指定zookeeper集群的地址 --> 
    <property>
        <name>yarn.resourcemanager.zk-address</name>
        <value>bigdata7:2181,bigdata8:2181,bigdata9:2181</value>
    </property>

    <!-- 启用自动恢复 --> 
    <property>
        <name>yarn.resourcemanager.recovery.enabled</name>
        <value>true</value>
    </property>
 
    <!-- 指定resourcemanager的状态信息存储在zookeeper集群 --> 
    <property>
        <name>yarn.resourcemanager.store.class</name>     
		<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>

    <!-- 环境变量的继承 -->
    <property>
        <name>yarn.nodemanager.env-whitelist</name>
        <value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
    </property>

<!-- 关闭yarn内存检查 -->
<property>
    <name>yarn.nodemanager.pmem-check-enabled</name>
    <value>false</value>
</property>
<property>
    <name>yarn.nodemanager.vmem-check-enabled</name>
    <value>false</value>
</property>
<!-- yarn的nodemanager在这台节点上可以使用的内存大小, 以m为单位 -->
<property>
        <name>yarn.nodemanager.resource.memory-mb</name>
        <value>16384</value>
</property>
<!-- yarn调度器在这台节点上可以使用的内存大小, 以m为单位 -->
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>16384</value>
</property>

<property>
    <description>
      Enable services rest api on ResourceManager.
    </description>
    <name>yarn.webapp.api-service.enable</name>
    <value>true</value>
  </property>
<!--配置历史服务器-->
    <property>
        <name>mapreduce.jobhistory.address</name>
        <value>bigdata9:10020</value>
    </property>
    <property>
        <name>mapreduce.jobhistory.webapp.address</name>
        <value>bigdata9:19888</value>
    </property>
    <!--配置日志聚集-->
    <property>
        <name>yarn.log-aggregation-enable</name>
        <value>true</value>
    </property>
	<!-- 日志保留时间,单位为秒 -->
    <property>
        <name>yarn.log-aggregation.retain-seconds</name>
        <value>106800</value>
    </property>

</configuration>

  1. capacity-scheduler.xml
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->
<configuration>

  <property>
    <name>yarn.scheduler.capacity.maximum-applications</name>
    <value>10000</value>
    <description>
      Maximum number of applications that can be pending and running.
	  yarn调度的pending and running状态下最大的任务数
    </description>
  </property>

  <property>
    <name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
    <value>0.75</value>
    <description>
      Maximum percent of resources in the cluster which can be used to run 
      application masters i.e. controls number of concurrent running
      applications.
	  可以用于启动application master最大占比
    </description>
  </property>

  <property>
    <name>yarn.scheduler.capacity.resource-calculator</name>
    <value>org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator</value>
    <description>
      The ResourceCalculator implementation to be used to compare 
      Resources in the scheduler.
      The default i.e. DefaultResourceCalculator only uses Memory while
      DominantResourceCalculator uses dominant-resource to compare 
      multi-dimensional resources such as Memory, CPU etc.
	  配置资源计算器的实现类,用于在调度器内进行资源比较计算。默认是DefaultResourceCalculator只使用内存;
	  DominantResourceCalculator使用内存和CPU进行计算
    </description>
  </property>

  <property>
    <name>yarn.scheduler.capacity.root.queues</name>
    <value>default</value>
    <description>
      The queues at the this level (root is the root queue).
	  申明队列信息: 每个queue名称以逗号“,”分割
    </description>
  </property>

  <property>
    <name>yarn.scheduler.capacity.root.default.capacity</name>
    <value>100</value>
    <description>
	Default queue target capacity.
	声明队列可占用的资源大小
	</description>
  </property>

  <property>
    <name>yarn.scheduler.capacity.root.default.user-limit-factor</name>
    <value>1</value>
    <description>
      Default queue user limit a percentage from 0.0 to 1.0.
	  可以配置为允许单个用户获取更对资源的队列容量的倍数,值为浮点数,
	  默认为1表示无论集群有多空闲,单个用户都不能占用超过队列配置的容量
    </description>
  </property>

  <property>
    <name>yarn.scheduler.capacity.root.default.maximum-capacity</name>
    <value>100</value>
    <description>
      The maximum capacity of the default queue.
	  default 最大时占用多少比例资源
    </description>
  </property>

  <property>
    <name>yarn.scheduler.capacity.root.default.state</name>
    <value>RUNNING</value>
    <description>
      The state of the default queue. State can be one of RUNNING or STOPPED.
	  default队列的默认状态, 可以是RUNNING或STOPPED
    </description>
  </property>

  <property>
    <name>yarn.scheduler.capacity.root.default.acl_submit_applications</name>
    <value>*</value>
    <description>
      The ACL of who can submit jobs to the default queue.
	  哪些用户可以向default队列提交任务
    </description>
  </property>

  <property>
    <name>yarn.scheduler.capacity.root.default.acl_administer_queue</name>
    <value>*</value>
    <description>
      The ACL of who can administer jobs on the default queue.
	  哪些用户可以管理default队列中的job
    </description>
  </property>

  <property>
    <name>yarn.scheduler.capacity.root.default.acl_application_max_priority</name>
    <value>*</value>
    <description>
      The ACL of who can submit applications with configured priority.
      For e.g, [user={name} group={name} max_priority={priority} default_priority={priority}]
	  哪些用户可以享有设置的最高优先级
    </description>
  </property>

   <property>
     <name>yarn.scheduler.capacity.root.default.maximum-application-lifetime
     </name>
     <value>-1</value>
     <description>
        Maximum lifetime of an application which is submitted to a queue
        in seconds. Any value less than or equal to zero will be considered as
        disabled.
        This will be a hard time limit for all applications in this
        queue. If positive value is configured then any application submitted
        to this queue will be killed after exceeds the configured lifetime.
        User can also specify lifetime per application basis in
        application submission context. But user lifetime will be
        overridden if it exceeds queue maximum lifetime. It is point-in-time
        configuration.
        Note : Configuring too low value will result in killing application
        sooner. This feature is applicable only for leaf queue.
		任务提交后的最大生命周期, 默认为-1, 则为不限制, 不建议限制
		
     </description>
   </property>

   <property>
     <name>yarn.scheduler.capacity.root.default.default-application-lifetime
     </name>
     <value>-1</value>
     <description>
        Default lifetime of an application which is submitted to a queue
        in seconds. Any value less than or equal to zero will be considered as
        disabled.
        If the user has not submitted application with lifetime value then this
        value will be taken. It is point-in-time configuration.
        Note : Default lifetime can't exceed maximum lifetime. This feature is
        applicable only for leaf queue.
		默认的生命周期, 不建议限制, 不能大于最大生命周期
     </description>
   </property>

  <property>
    <name>yarn.scheduler.capacity.node-locality-delay</name>
    <value>40</value>
    <description>
      Number of missed scheduling opportunities after which the CapacityScheduler 
      attempts to schedule rack-local containers.
      When setting this parameter, the size of the cluster should be taken into account.
      We use 40 as the default value, which is approximately the number of nodes in one rack.
      Note, if this value is -1, the locality constraint in the container request
      will be ignored, which disables the delay scheduling.
	  调度器尝试进行调度的次数。一般都是跟集群的节点数量有关。默认40(一个机架上的节点数)
    </description>
  </property>

  <property>
    <name>yarn.scheduler.capacity.rack-locality-additional-delay</name>
    <value>-1</value>
    <description>
      Number of additional missed scheduling opportunities over the node-locality-delay
      ones, after which the CapacityScheduler attempts to schedule off-switch containers,
      instead of rack-local ones.
      Example: with node-locality-delay=40 and rack-locality-delay=20, the scheduler will
      attempt rack-local assignments after 40 missed opportunities, and off-switch assignments
      after 40+20=60 missed opportunities.
      When setting this parameter, the size of the cluster should be taken into account.
      We use -1 as the default value, which disables this feature. In this case, the number
      of missed opportunities for assigning off-switch containers is calculated based on
      the number of containers and unique locations specified in the resource request,
      as well as the size of the cluster.
	  在节点-位置-延迟的调度机会之上,额外错过的调度机会的数量,在此之后,CapacityScheduler会尝试调度关闭开关容器。默认情况下,该值设置为-1,在这种情况下,错过分配off-switch容器的机会的数量是根据公式L * C / N计算的,其中L是在资源请求中指定的位置(节点或机架)的数量,C是请求的容器数量,N是集群的大小。
    </description>
  </property>

  <property>
    <name>yarn.scheduler.capacity.queue-mappings</name>
    <value></value>
    <description>
      A list of mappings that will be used to assign jobs to queues
      The syntax for this list is [u|g]:[name]:[queue_name][,next mapping]*
      Typically this list will be used to map users to queues,
      for example, u:%user:%user maps all users to queues with the same name
      as the user.
	  此配置指定用户或组到特定队列的映射。
	  您可以将单个用户或用户列表映射到队列。
	  语法:[u或g]:[name]:[queue_name][,next_mapping]*。
	  这里,u或g表示映射是针对用户还是针对组。
	  值是u代表用户,g代表组。name表示用户名或组名。
	  要指定提交应用程序的用户,可以使用%user。queue_name表示应用程序必须为其映射的队列名称。
	  若要指定与用户名相同的队列名称,可以使用%user。
	  若要指定与用户所属的主组名称相同的队列名称,可以使用 %primary_group。
	  辅助组可以引用为 %secondary_group
    </description>
  </property>

  <property>
    <name>yarn.scheduler.capacity.queue-mappings-override.enable</name>
    <value>false</value>
    <description>
      If a queue mapping is present, will it override the value specified
      by the user? This can be used by administrators to place jobs in queues
      that are different than the one specified by the user.
      The default is false.
	  如果开启覆盖,则用户在提交应用程序的时候,可以手动指定提交队列,覆盖默认配置;
	  反之,即使用户提交时候指定了提交队列,也会被忽略
    </description>
  </property>

  <property>
    <name>yarn.scheduler.capacity.per-node-heartbeat.maximum-offswitch-assignments</name>
    <value>1</value>
    <description>
      Controls the number of OFF_SWITCH assignments allowed
      during a node's heartbeat. Increasing this value can improve
      scheduling rate for OFF_SWITCH containers. Lower values reduce
      "clumping" of applications on particular nodes. The default is 1.
      Legal values are 1-MAX_INT. This config is refreshable.
	  如果multiple-assignments-enabled为true,则可以在一个NodeManager心跳中分配的最大交换机容器数量。
	  默认为1,表示一个心跳中只允许一个交换机间分配。
    </description>
  </property>


  <property>
    <name>yarn.scheduler.capacity.application.fail-fast</name>
    <value>false</value>
    <description>
      Whether RM should fail during recovery if previous applications'
      queue is no longer valid.
	  当RM重启恢复之前的队列不可用时,是否强制RM恢复失败
    </description>
  </property>

  <property>
    <name>yarn.scheduler.capacity.workflow-priority-mappings</name>
    <value></value>
    <description>
      A list of mappings that will be used to override application priority.
      The syntax for this list is
      [workflowId]:[full_queue_name]:[priority][,next mapping]*
      where an application submitted (or mapped to) queue "full_queue_name"
      and workflowId "workflowId" (as specified in application submission
      context) will be given priority "priority".
	  配置应用优先级规则,语法为:[workflowId]:[full_queue_name]:[priority][,next_mapping]*
    </description>
  </property>

  <property>
    <name>yarn.scheduler.capacity.workflow-priority-mappings-override.enable</name>
    <value>false</value>
    <description>
      If a priority mapping is present, will it override the value specified
      by the user? This can be used by administrators to give applications a
      priority that is different than the one specified by the user.
      The default is false.
	  是否开启优先级映射
    </description>
  </property>

</configuration>

  1. yarn-site高可用配置
    在Hadoop YARN高可用(HA)集群中,通常有两个或多个ResourceManager实例,它们共同管理资源。为了实现ResourceManager的高可用性,需要将这些ResourceManager配置为Active-Standby的HA模式。在这种情况下,只有一个ResourceManager处于Active状态,负责接收来自客户端的请求并管理集群中的资源,其他的ResourceManager处于Standby状态,处于待命状态。

为了配置ResourceManager的高可用性,您需要在yarn-site.xml配置文件中设置以下属性:
yarn.resourcemanager.ha.enabled:启用ResourceManager的HA模式。
yarn.resourcemanager.cluster-id:集群唯一标识符。
yarn.resourcemanager.ha.rm-ids:ResourceManager的标识符,以逗号分隔。
yarn.resourcemanager.hostname.rm-id:每个ResourceManager的主机名或IP地址。在这里,rm-id是ResourceManager的标识符之一。
例如,假设您有两个ResourceManager节点,分别是rm1.example.com和rm2.example.com,您可以按照以下方式配置:

<property>
  <name>yarn.resourcemanager.ha.enabled</name>
  <value>true</value>
</property>

<property>
  <name>yarn.resourcemanager.cluster-id</name>
  <value>my_cluster</value>
</property>

<property>
  <name>yarn.resourcemanager.ha.rm-ids</name>
  <value>rm1,rm2</value>
</property>

<property>
  <name>yarn.resourcemanager.hostname.rm1</name>
  <value>rm1.example.com</value>
</property>

<property>
  <name>yarn.resourcemanager.hostname.rm2</name>
  <value>rm2.example.com</value>
</property>

Dolphinscheduler

Dolphinscheduler元数据

-- 查询 Dolphinscheduler 中定时任务上线的任务
SELECT b.name AS project_name,
        b.code AS project_code,
        d.name AS process_name,
        d.code AS process_code,
        e.name AS task_name,
        e.code AS task_code,
        e.task_type,
        a.crontab,
        e.task_params,
        a.create_time,
        MAX(f.update_time) AS update_time
        FROM dolphinscheduler.t_ds_schedules a
        LEFT JOIN
        dolphinscheduler.t_ds_process_task_relation c
        ON
        a.process_definition_code = c.process_definition_code
        LEFT JOIN
        dolphinscheduler.t_ds_project b
        ON
        b.code = c.project_code
        LEFT JOIN
        dolphinscheduler.t_ds_process_definition d
        ON
        c.process_definition_code = d.code
        LEFT JOIN
        dolphinscheduler.t_ds_task_definition e
        ON
        c.post_task_code = e.code
        LEFT JOIN
        dolphinscheduler.t_ds_process_instance f
        ON
        d.code = f.process_definition_code
        WHERE e.task_type IN (
        <foreach collection="array" item="type" separator=",">
            #{type}
        </foreach>
        )
        AND e.flag = 1
        AND d.release_state = 1
        AND a.release_state = 1
        AND DATE(f.update_time) >= DATE(NOW() - INTERVAL 3 DAY)
        GROUP BY e.code
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值