hadoop基础搭建(hadoop+hive+hbase+zk)(一)

  • 修改核心配置文件:vi /home/hadoop/module/hadoop-3.2.2/etc/hadoop/core-site.xml 在中间添加配置
<!-- 指定 NameNode 的地址 -->
<property>
    <name>fs.defaultFS</name>
    <value>hdfs://mycluster</value>
</property>

<!-- 指定 hadoop 数据的存储目录 -->
<property>
    <name>hadoop.tmp.dir</name>
    <value>/home/hadoop/module/hadoop-3.2.2/data</value>
</property>

<!-- 配置 HDFS 网页登录使用的静态用户为 hadoop -->
<property>
    <name>hadoop.http.staticuser.user</name>
    <value>hadoop</value>
</property>
<!-- 考虑配置,整合hive 用户代理设置 -->
<property>
    <name>hadoop.proxyuser.hadoop.hosts</name>
    <value>*</value>
</property>
<property>
    <name>hadoop.proxyuser.hadoop.groups</name>
    <value>*</value>
</property>
<property>
    <name>ha.zookeeper.quorum</name>
    <value>node10:2181,node11:2181,node12:2181</value>
 </property>
 <property>
    <name>ha.zookeeper.session-timeout.ms</name>
    <value>10000</value>
 </property>
  • 修改HDFS配置文件:vi /home/hadoop/module/hadoop-3.2.2/etc/hadoop/hdfs-site.xml 在标签中添加配置

    <!-- 副本数dfs.replication默认值3,可不配置 -->
    <property>
        <name>dfs.replication</name>
        <value>3</value>
    </property>

    <!-- 节点数据存储地址 -->
    <property>
        <name>dfs.namenode.name.dir</name>
        <value>/home/hadoop/module/hadoop-3.2.2/namenode/data</value>
    </property>
    <property>
        <name>dfs.datanode.data.dir</name>
        <value>/home/hadoop/module/hadoop-3.2.2/datanode/data</value>
    </property>

    <!-- 主备配置 -->
    <!-- 为namenode集群定义一个services name -->
    <property>
        <name>dfs.nameservices</name>
        <value>mycluster</value>
    </property>
    <!-- 声明集群有几个namenode节点 -->
    <property>
        <name>dfs.ha.namenodes.mycluster</name>
        <value>nn1,nn2</value>
    </property>
    <!-- 指定 RPC通信地址 的地址 -->
    <property>
        <name>dfs.namenode.rpc-address.mycluster.nn1</name>
        <value>node10:8020</value>
    </property>
    <!-- 指定 RPC通信地址 的地址 -->
    <property>
        <name>dfs.namenode.rpc-address.mycluster.nn2</name>
        <value>node11:8020</value>
    </property>
    <!-- http通信地址 web端访问地址 -->
    <property>
            <name>dfs.namenode.http-address.mycluster.nn1</name>
            <value>node10:50070</value>
    </property>
    <!-- http通信地址 web 端访问地址 -->
    <property>
            <name>dfs.namenode.http-address.mycluster.nn2</name>
            <value>node11:50070</value>
     </property>

     <!-- 声明journalnode集群服务器 -->
     <property>
            <name>dfs.namenode.shared.edits.dir</name>
            <value>qjournal://node10:8485;node11:8485;node12:8485/mycluster</value>
     </property>
     <!-- 声明journalnode服务器数据存储目录 -->
     <property>
            <name>dfs.journalnode.edits.dir</name>
            <value>/home/hadoop/module/hadoop-3.2.2/journalnode/data</value>
     </property>
     <!-- 开启NameNode失败自动切换 -->
     <property>
            <name>dfs.ha.automatic-failover.enabled</name>
            <value>true</value>
     </property>
     <!-- 隔离:同一时刻只能有一台服务器对外响应 -->
    <property>
        <name>dfs.ha.fencing.methods</name>
        <value>
            sshfence
            shell(/bin/true)
        </value>
    </property>

     <!-- 配置失败自动切换实现方式,通过ConfiguredFailoverProxyProvider这个类实现自动切换 -->
     <property>
            <name>dfs.client.failover.proxy.provider.mycluster</name>
            <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
     </property>

     <!-- 指定上述选项ssh通讯使用的密钥文件在系统中的位置。 -->
     <property>
            <name>dfs.ha.fencing.ssh.private-key-files</name>
            <value>/root/.ssh/id_rsa</value>
      </property>
      <!-- 配置sshfence隔离机制超时时间(active坏了之后,standby如果没有在30秒之内未连接上,那么standby将变成active) -->
      <property>
            <name>dfs.ha.fencing.ssh.connect-timeout</name>
            <value>30000</value>
     </property>

     <!-- 开启hdfs允许创建目录的权限,配置hdfs-site.xml -->
     <property>
        <name>dfs.permissions.enabled</name>
        <value>false</value>
    </property>
    <!-- 使用host+hostName的配置方式 -->
    <property>
        <name>dfs.namenode.datanode.registration.ip-hostname-check</name>
        <value>false</value>
    </property>
  • 修改YARN配置文件:vi /home/hadoop/module/hadoop-3.2.2/etc/hadoop/yarn-site.xml 根据虚拟机内存进行设置,参照:https://blog.csdn.net/u010452388/article/details/98234147 在标签中添加配置
 <!-- 指定yarn占电脑资源,默认8核8g -->
 <property>
  <name>yarn.nodemanager.resource.cpu-vcores</name>
  <value>12</value>
</property>
<property>
  <name>yarn.nodemanager.resource.memory-mb</name>
  <value>10240</value>
</property>

 <property>
    <name>yarn.log.server.url</name>
    <value>http://node10:19888/jobhistory/logs</value>
</property>
    <!-- 指定 MR 走 shuffle -->
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
    <!-- 开启日志聚集功能 -->
    <property>
        <name>yarn.log-aggregation-enable</name>
        <value>true</value>
    </property>
    <!-- 设置日志保留时间为 7 天 -->
    <property>
        <name>yarn.log-aggregation.retain-seconds</name>
        <value>86400</value>
    </property>

    <!-- 主备配置 -->
    <!-- 启用resourcemanager ha -->
    <property>
        <name>yarn.resourcemanager.ha.enabled</name>
        <value>true</value>
    </property>
    <property>
        <name>yarn.resourcemanager.cluster-id</name>
        <value>my-yarn-cluster</value>
    </property>
    <!-- 声明两台resourcemanager的地址 -->
    <property>
        <name>yarn.resourcemanager.ha.rm-ids</name>
        <value>rm1,rm2</value>
    </property>
    <property>
        <name>yarn.resourcemanager.hostname.rm1</name>
        <value>node10</value>
    </property>
    <property>
        <name>yarn.resourcemanager.hostname.rm2</name>
        <value>node11</value>
    </property>
    <property>
        <name>yarn.resourcemanager.webapp.address.rm1</name>
        <value>node10:8088</value>
    </property>
    <property>
        <name>yarn.resourcemanager.webapp.address.rm2</name>
        <value>node11:8088</value>
    </property>
    <!-- 指定zookeeper集群的地址 -->
    <property>
        <name>yarn.resourcemanager.zk-address</name>
        <value>node10:2181,node11:2181,node12:2181</value>
    </property>
    <!-- 启用自动恢复 -->
    <property>
        <name>yarn.resourcemanager.recovery.enabled</name>
        <value>true</value>
    </property>
    <!-- 指定resourcemanager的状态信息存储在zookeeper集群 -->
    <property>
        <name>yarn.resourcemanager.store.class</name>
        <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
    </property>

    <property>
        <name>yarn.scheduler.maximum-allocation-mb</name>
        <value>2048</value>
    </property>
    <property>
        <name>yarn.scheduler.minimum-allocation-mb</name>
        <value>2048</value>
    </property>
    <property>
        <name>yarn.nodemanager.vmem-pmem-ratio</name>
        <value>2.1</value>
    </property>
    <property>
        <name>mapred.child.java.opts</name>
        <value>-Xmx1024m</value>
    </property>

  <property>
    <name>yarn.resourcemanager.address.rm1</name>
    <value>node10:8032</value>
  </property>
  <property>
    <name>yarn.resourcemanager.scheduler.address.rm1</name>
    <value>node10:8030</value>
  </property>
  <property>
    <name>yarn.resourcemanager.resource-tracker.address.rm1</name>
    <value>node10:8031</value>
  </property>
  <property>
    <name>yarn.resourcemanager.admin.address.rm1</name>
    <value>node10:8033</value>
  </property>
  <property>
    <name>yarn.nodemanager.address.rm1</name>
    <value>node10:8041</value>
  </property>
  <property>
    <name>yarn.resourcemanager.address.rm2</name>
    <value>node11:8032</value>
  </property>
  <property>
    <name>yarn.resourcemanager.scheduler.address.rm2</name>
    <value>node11:8030</value>
  </property>
  <property>
    <name>yarn.resourcemanager.resource-tracker.address.rm2</name>
    <value>node11:8031</value>
  </property>
  <property>
    <name>yarn.resourcemanager.admin.address.rm2</name>
    <value>node11:8033</value>
  </property>
  <property>
    <name>yarn.nodemanager.address.rm2</name>
    <value>node11:8041</value>
  </property>
  <property>
    <name>yarn.nodemanager.localizer.address</name>
    <value>0.0.0.0:8040</value>
  </property>
  <property>
    <description>NM Webapp address.</description>
    <name>yarn.nodemanager.webapp.address</name>
    <value>0.0.0.0:8042</value>
  </property>
<property>
    <name>yarn.nodemanager.address</name>
    <value>${yarn.resourcemanager.hostname}:8041</value>
</property>
    <!-- vulue主要看HADOOP_CLASSPATH这个环境变量的输出值 -->
    <property>
        <name>yarn.application.classpath</name>
        <value>/home/hadoop/module/hadoop-3.2.2/etc/hadoop:/home/hadoop/module/hadoop-3.2.2/share/hadoop/common/lib

Original: https://blog.csdn.net/weixin_40496191/article/details/128521527
Author: 懒惰の天真热
Title: hadoop基础搭建(hadoop+hive+hbase+zk)(一)

原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/817234/

转载文章受原作者版权保护。转载请注明原作者出处!

(0)

大家都在看

亲爱的 Coder【最近整理,可免费获取】👉 最新必读书单  | 👏 面试题下载  | 🌎 免费的AI知识星球