Hadoop集群安装配置

Standard
#===Master,Slave1,2===
cat /etc/hosts
192.168.131.130 Master
192.168.131.131 Slave1
192.168.131.132 Slave2
 
#===Master===
cd ~/.ssh
rm ./id_rsa*
ssh-keygen -t rsa
cat ./id_rsa.pub >> ./authorized_keys
scp ~/.ssh/id_rsa.pub hadoop@Slave1:/home/hadoop/
scp ~/.ssh/id_rsa.pub hadoop@Slave2:/home/hadoop/
 
apt-get install openjdk-8-jre openjdk-8-jdk
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
 
wget http://mirror.metrocast.net/apache/hadoop/common/hadoop-2.7.2/hadoop-2.7.2.tar.gz
tar zxvf hadoop-2.7.2.tar.gz -C /uar/local/
cd /usr/local/;
mv hadoop-2.7.2 hadoop;
cd ./hadoop;
./bin/hadoop version
export PATH=$PATH:/usr/local/hadoop/bin:/usr/local/hadoop/sbin
 
vi /usr/local/hadoop/etc/hadoop/hadoop-env.sh
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/jre

vi /usr/local/hadoop/etc/hadoop/slaves
#将作为DataNode的主机名写入该文件,每行一个,默认为localhost。
#让Master节点仅作为 NameNode使用时可删除localhost,只添加两行内容:Slave1和Slave2。

vi /usr/local/hadoop/etc/hadoop/core-site.xml

<configuration>
        <property>
                <name>fs.defaultFS</name>
                <value>hdfs://Master:9000</value>
        </property>
        <property>
                <name>hadoop.tmp.dir</name>
                <value>file:/usr/local/hadoop/tmp</value>
                <description>Abase for other temporary directories.</description>
        </property>
</configuration>

vi /usr/local/hadoop/etc/hadoop/hdfs-site.xml

<configuration>
        <property>
                <name>dfs.namenode.secondary.http-address</name>
                <value>Master:50090</value>
        </property>
        <property>
                <name>dfs.replication</name>
                <value>1</value>
        </property>
        <property>
                <name>dfs.namenode.name.dir</name>
                <value>file:/usr/local/hadoop/tmp/dfs/name</value>
        </property>
        <property>
                <name>dfs.datanode.data.dir</name>
                <value>file:/usr/local/hadoop/tmp/dfs/data</value>
        </property>
</configuration>

vi /usr/local/hadoop/etc/hadoop/mapred-site.xml

<configuration>
        <property>
                <name>mapreduce.framework.name</name>
                <value>yarn</value>
        </property>
        <property>
                <name>mapreduce.jobhistory.address</name>
                <value>Master:10020</value>
        </property>
        <property>
                <name>mapreduce.jobhistory.webapp.address</name>
                <value>Master:19888</value>
        </property>
</configuration>

vi /usr/local/hadoop/etc/hadoop/yarn-site.xml

<configuration>
        <property>
                <name>yarn.resourcemanager.hostname</name>
                <value>Master</value>
        </property>
        <property>
                <name>yarn.nodemanager.aux-services</name>
                <value>mapreduce_shuffle</value>
        </property>
</configuration>
#===Master===
cd /usr/local
rm -r ./hadoop/tmp
rm -r ./hadoop/logs/*
tar zcvf hadoop.master.tar.gz ./hadoop
scp ./hadoop.master.tar.gz Slave1:/home/hadoop
scp ./hadoop.master.tar.gz Slave2:/home/hadoop
 
#===Slave1,2===
tar -zxf ~/hadoop.master.tar.gz -C /usr/local
chown -R hadoop:hadoop /usr/local/hadoop
#===Master===
#NameNode格式化
hdfs namenode -format
 
#启动Hadoop
start-all.sh
#OR
start-dfs.sh
start-yarn.sh
mr-jobhistory-daemon.sh start historyserver

#查看 DataNode 和 NameNode 的状态
http://192.168.101.130:50070/

hdfs dfsadmin -report
 
#创建用户目录
hdfs dfs -mkdir -p /user/hadoop
 
#将/usr/local/hadoop/etc/hadoop中的配置文件作为输入文件复制到分布式文件系统中
hdfs dfs -mkdir input
hdfs dfs -put /usr/local/hadoop/etc/hadoop/*.xml input
 
hdfs dfs -ls
Found 2 items
drwxr-xr-x   - hadoop supergroup          0 2016-03-03 22:48 input
drwxr-xr-x   - hadoop supergroup          0 2016-03-03 22:54 output
 
hdfs dfs -ls /
Found 3 items
drwxr-xr-x   - hadoop supergroup          0 2016-03-04 00:38 /system
drwxrwx---   - hadoop supergroup          0 2016-03-03 22:45 /tmp
drwxr-xr-x   - hadoop supergroup          0 2016-03-03 22:47 /user
 
hdfs dfs -ls /user/hadoop
Found 2 items
drwxr-xr-x   - hadoop supergroup          0 2016-03-03 22:48 /user/hadoop/input
drwxr-xr-x   - hadoop supergroup          0 2016-03-03 22:54 /user/hadoop/output
 
hadoop jar /usr/local/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar grep input output 'dfs[a-z.]+'
hdfs dfs -cat output/*
1       dfsadmin
1       dfs.replication
1       dfs.namenode.secondary.http
1       dfs.namenode.name.dir
1       dfs.datanode.data.dir
 
hdfs dfs -rmr output
hadoop jar /usr/local/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar grep input output 'mast[a-z.]+'
hdfs dfs -cat output/*
1       masters
1       master.protocol.acl

#查看任务进度
http://192.168.101.130:8088/cluster

#===Master===
stop-all.sh
#OR
stop-yarn.sh
stop-dfs.sh
mr-jobhistory-daemon.sh stop historyserver

#报错处理记录

tail /usr/local/hadoop/logs/hadoop-hadoop-datanode-Slave1.log
  FATAL org.apache.hadoop.hdfs.server.datanode.DataNode: Initialization failed for Block pool <registering> (Datanode Uuid unassigned) service to Master/9000. Exiting.
http://stackoverflow.com/questions/30521474/hadoop-hdfs-formatting-gets-error-failed-for-block-pool
 
cat /usr/local/hadoop/logs/hadoop-hadoop-datanode-Slave1.log | grep CID
  2016-03-06 18:56:24,584 WARN org.apache.hadoop.hdfs.server.common.Storage: java.io.IOException: Incompatible clusterIDs in /usr/local/hadoop/tmp/dfs/data: namenode clusterID = CID-f134a9fa-041e-46f8-8e03-6cf78226a9cd; datanode clusterID = CID-4e691c0a-1cb2-46fd-9793-734a9b9047cf
 
Open your usr/local/hadoop/dfs/datanode/current/VERSION file and change to:
  clusterID=CID-f134a9fa-041e-46f8-8e03-6cf78226a9cd

Leave a Reply

Your email address will not be published. Required fields are marked *

This site uses Akismet to reduce spam. Learn how your comment data is processed.