- 修改核心配置文件:vi /home/hadoop/module/hadoop-3.2.2/etc/hadoop/core-site.xml 在中间添加配置
<!-- 指定 NameNode 的地址 -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://mycluster</value>
</property>
<!-- 指定 hadoop 数据的存储目录 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/home/hadoop/module/hadoop-3.2.2/data</value>
</property>
<!-- 配置 HDFS 网页登录使用的静态用户为 hadoop -->
<property>
<name>hadoop.http.staticuser.user</name>
<value>hadoop</value>
</property>
<!-- 考虑配置,整合hive 用户代理设置 -->
<property>
<name>hadoop.proxyuser.hadoop.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.groups</name>
<value>*</value>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>node10:2181,node11:2181,node12:2181</value>
</property>
<property>
<name>ha.zookeeper.session-timeout.ms</name>
<value>10000</value>
</property>
- 修改HDFS配置文件:vi /home/hadoop/module/hadoop-3.2.2/etc/hadoop/hdfs-site.xml 在标签中添加配置
<!-- 副本数dfs.replication默认值3,可不配置 -->
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<!-- 节点数据存储地址 -->
<property>
<name>dfs.namenode.name.dir</name>
<value>/home/hadoop/module/hadoop-3.2.2/namenode/data</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/home/hadoop/module/hadoop-3.2.2/datanode/data</value>
</property>
<!-- 主备配置 -->
<!-- 为namenode集群定义一个services name -->
<property>
<name>dfs.nameservices</name>
<value>mycluster</value>
</property>
<!-- 声明集群有几个namenode节点 -->
<property>
<name>dfs.ha.namenodes.mycluster</name>
<value>nn1,nn2</value>
</property>
<!-- 指定 RPC通信地址 的地址 -->
<property>
<name>dfs.namenode.rpc-address.mycluster.nn1</name>
<value>node10:8020</value>
</property>
<!-- 指定 RPC通信地址 的地址 -->
<property>
<name>dfs.namenode.rpc-address.mycluster.nn2</name>
<value>node11:8020</value>
</property>
<!-- http通信地址 web端访问地址 -->
<property>
<name>dfs.namenode.http-address.mycluster.nn1</name>
<value>node10:50070</value>
</property>
<!-- http通信地址 web 端访问地址 -->
<property>
<name>dfs.namenode.http-address.mycluster.nn2</name>
<value>node11:50070</value>
</property>
<!-- 声明journalnode集群服务器 -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://node10:8485;node11:8485;node12:8485/mycluster</value>
</property>
<!-- 声明journalnode服务器数据存储目录 -->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/home/hadoop/module/hadoop-3.2.2/journalnode/data</value>
</property>
<!-- 开启NameNode失败自动切换 -->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<!-- 隔离:同一时刻只能有一台服务器对外响应 -->
<property>
<name>dfs.ha.fencing.methods</name>
<value>
sshfence
shell(/bin/true)
</value>
</property>
<!-- 配置失败自动切换实现方式,通过ConfiguredFailoverProxyProvider这个类实现自动切换 -->
<property>
<name>dfs.client.failover.proxy.provider.mycluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!-- 指定上述选项ssh通讯使用的密钥文件在系统中的位置。 -->
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/root/.ssh/id_rsa</value>
</property>
<!-- 配置sshfence隔离机制超时时间(active坏了之后,standby如果没有在30秒之内未连接上,那么standby将变成active) -->
<property>
<name>dfs.ha.fencing.ssh.connect-timeout</name>
<value>30000</value>
</property>
<!-- 开启hdfs允许创建目录的权限,配置hdfs-site.xml -->
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
<!-- 使用host+hostName的配置方式 -->
<property>
<name>dfs.namenode.datanode.registration.ip-hostname-check</name>
<value>false</value>
</property>
- 修改YARN配置文件:vi /home/hadoop/module/hadoop-3.2.2/etc/hadoop/yarn-site.xml 根据虚拟机内存进行设置,参照:https://blog.csdn.net/u010452388/article/details/98234147 在标签中添加配置
<!-- 指定yarn占电脑资源,默认8核8g -->
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>12</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>10240</value>
</property>
<property>
<name>yarn.log.server.url</name>
<value>http://node10:19888/jobhistory/logs</value>
</property>
<!-- 指定 MR 走 shuffle -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!-- 开启日志聚集功能 -->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<!-- 设置日志保留时间为 7 天 -->
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>86400</value>
</property>
<!-- 主备配置 -->
<!-- 启用resourcemanager ha -->
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>my-yarn-cluster</value>
</property>
<!-- 声明两台resourcemanager的地址 -->
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>node10</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>node11</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm1</name>
<value>node10:8088</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm2</name>
<value>node11:8088</value>
</property>
<!-- 指定zookeeper集群的地址 -->
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>node10:2181,node11:2181,node12:2181</value>
</property>
<!-- 启用自动恢复 -->
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<!-- 指定resourcemanager的状态信息存储在zookeeper集群 -->
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>2048</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>2048</value>
</property>
<property>
<name>yarn.nodemanager.vmem-pmem-ratio</name>
<value>2.1</value>
</property>
<property>
<name>mapred.child.java.opts</name>
<value>-Xmx1024m</value>
</property>
<property>
<name>yarn.resourcemanager.address.rm1</name>
<value>node10:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address.rm1</name>
<value>node10:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm1</name>
<value>node10:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address.rm1</name>
<value>node10:8033</value>
</property>
<property>
<name>yarn.nodemanager.address.rm1</name>
<value>node10:8041</value>
</property>
<property>
<name>yarn.resourcemanager.address.rm2</name>
<value>node11:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address.rm2</name>
<value>node11:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm2</name>
<value>node11:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address.rm2</name>
<value>node11:8033</value>
</property>
<property>
<name>yarn.nodemanager.address.rm2</name>
<value>node11:8041</value>
</property>
<property>
<name>yarn.nodemanager.localizer.address</name>
<value>0.0.0.0:8040</value>
</property>
<property>
<description>NM Webapp address.</description>
<name>yarn.nodemanager.webapp.address</name>
<value>0.0.0.0:8042</value>
</property>
<property>
<name>yarn.nodemanager.address</name>
<value>${yarn.resourcemanager.hostname}:8041</value>
</property>
<!-- vulue主要看HADOOP_CLASSPATH这个环境变量的输出值 -->
<property>
<name>yarn.application.classpath</name>
<value>/home/hadoop/module/hadoop-3.2.2/etc/hadoop:/home/hadoop/module/hadoop-3.2.2/share/hadoop/common/lib
Original: https://blog.csdn.net/weixin_40496191/article/details/128521527
Author: 懒惰の天真热
Title: hadoop基础搭建(hadoop+hive+hbase+zk)(一)
原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/817234/
转载文章受原作者版权保护。转载请注明原作者出处!