From f4c8ed531f3581db3697fe5d2681e4ae64ebeaf1 Mon Sep 17 00:00:00 2001 From: Decoupes Remy <remy.decoupes@irstea.fr> Date: Mon, 9 Dec 2019 17:43:12 +0100 Subject: [PATCH] start HDFS cluster : still some issues in configuration --- README.md | 25 ++++- playbook/install-datanode.yml | 4 +- playbook/install-mononode.yml | 2 +- playbook/install-namenode.yml | 4 +- playbook/roles/hadoop-common/tasks/main.yml | 74 +++++++------ .../hadoop-common/templates/core-site.j2 | 2 +- .../hadoop-common/templates/hdfs-site.j2 | 2 +- .../hadoop-common/templates/mapred-site.j2 | 2 +- .../roles/hadoop-common/templates/workers.j2 | 5 + playbook/roles/hadoop-common/vars/main.yml | 3 +- playbook/roles/hadoop-mononode/tasks/main.yml | 104 ++++++++++++++++++ playbook/roles/hadoop-namenode/tasks/main.yml | 7 ++ playbook/roles/hosts-file/vars/main.yml | 3 + vagrant/cluster/Vagrantfile | 8 +- 14 files changed, 197 insertions(+), 48 deletions(-) create mode 100644 playbook/roles/hadoop-common/templates/workers.j2 create mode 100644 playbook/roles/hadoop-namenode/tasks/main.yml diff --git a/README.md b/README.md index 7c451c1..442f641 100644 --- a/README.md +++ b/README.md @@ -83,8 +83,27 @@ Then run the script [ansible-launch.sh](ansible-launch.sh) : 1. Set your nodes' IP address in [VagrantFile](vagrant/cluster/Vagrantfile) 2. Declare those IP for ansible provision in [vars](playbook/roles/hosts-file/vars/main.yml) 3. in cli : start your multiple VM from this [directory : vagrant/cluster](vagrant/cluster) : -```shell -vagrant up -``` + ```shell + vagrant up + ``` +4. Format HDFS : + * ssh on namenode + * in cli : as user hadoop : change directory & format HDFS + ```shell + sudo su hadoop + cd /usr/local/hadoop/bin/ + hdfs namenode -format + ``` + 5. Start HDFS deamon on your cluser + * ssh on namenode + * in cli : as root : start service hadoop + ```shell + sudo systemctl start hadoop + ``` + * **WORK In Progress** : systemd will tell you something wrong happens but cluster is working anyway. + 6. Verify your cluster is up: + * on your own device, use a webbrowser + * go on [IP-of-your-namenode]:9870 + if default : http://10.0.0.10:9870 ## Deploy cluster HDFS on servers work in progress \ No newline at end of file diff --git a/playbook/install-datanode.yml b/playbook/install-datanode.yml index d5f94ba..90e32f2 100644 --- a/playbook/install-datanode.yml +++ b/playbook/install-datanode.yml @@ -5,4 +5,6 @@ roles: - common - hosts-file - - hadoop-common \ No newline at end of file + - role: hadoop-common + vars_files: + - playbook/roles/hosts-file/vars/main.yml \ No newline at end of file diff --git a/playbook/install-mononode.yml b/playbook/install-mononode.yml index 5937a0b..93a5c50 100644 --- a/playbook/install-mononode.yml +++ b/playbook/install-mononode.yml @@ -4,4 +4,4 @@ roles: - common - - hadoop-common + - hadoop-mononode diff --git a/playbook/install-namenode.yml b/playbook/install-namenode.yml index 23ac001..c5d3d7b 100644 --- a/playbook/install-namenode.yml +++ b/playbook/install-namenode.yml @@ -5,4 +5,6 @@ roles: - common - hosts-file - - hadoop-common \ No newline at end of file + - role: hadoop-common + vars_files: + - playbook/roles/hosts-file/vars/main.yml \ No newline at end of file diff --git a/playbook/roles/hadoop-common/tasks/main.yml b/playbook/roles/hadoop-common/tasks/main.yml index fc3d342..c520745 100644 --- a/playbook/roles/hadoop-common/tasks/main.yml +++ b/playbook/roles/hadoop-common/tasks/main.yml @@ -102,43 +102,51 @@ path: "{{ hadoopDir }}/etc/hadoop/hadoop-env.sh" block: "export JAVA_HOME={{ javahome }}" -# - name: configure core-site.xml -# become: yes -# template: -# src: templates/core-site.j2 -# dest: "{{ hadoopDir }}/etc/hadoop/core-site.xml" -# owner: hadoop -# group: hadoop +- name: configure hdfs-site.xml + become: yes + template: + src: templates/hdfs-site.j2 + dest: "{{ hadoopDir }}/etc/hadoop/hdfs-site.xml" + owner: hadoop + group: hadoop -# - name: configure hdfs-site.xml -# become: yes -# template: -# src: templates/hdfs-site.j2 -# dest: "{{ hadoopDir }}/etc/hadoop/hdfs-site.xml" -# owner: hadoop -# group: hadoop +- name: configure core-site.xml + become: yes + template: + src: templates/core-site.j2 + dest: "{{ hadoopDir }}/etc/hadoop/core-site.xml" + owner: hadoop + group: hadoop -# - name: configure mapred-site.xml -# become: yes -# template: -# src: templates/mapred-site.j2 -# dest: "{{ hadoopDir }}/etc/hadoop/mapred-site.xml" -# owner: hadoop -# group: hadoop +- name: configure mapred-site.xml + become: yes + template: + src: templates/mapred-site.j2 + dest: "{{ hadoopDir }}/etc/hadoop/mapred-site.xml" + owner: hadoop + group: hadoop -# - name: copy hadoop service file -# become: yes -# template: -# src: templates/hadoop.service.j2 -# dest: /etc/systemd/system/hadoop.service +- name: configure /etc/hadoop/workers + become: yes + template: + src: templates/workers.j2 + dest: "{{ hadoopDir }}/etc/hadoop/workers" + owner: hadoop + group: hadoop -# - name: enable hadoop service -# become: yes -# service: -# daemon_reload: yes -# name: hadoop -# state: stopped -# enabled: yes +- name: copy hadoop service file + become: yes + template: + src: templates/hadoop.service.j2 + dest: /etc/systemd/system/hadoop.service + +- name: enable hadoop service + become: yes + service: + daemon_reload: yes + name: hadoop + state: stopped + enabled: yes # - name: HDFS has been already formatted ? # become: yes diff --git a/playbook/roles/hadoop-common/templates/core-site.j2 b/playbook/roles/hadoop-common/templates/core-site.j2 index ba76ece..a43b22a 100644 --- a/playbook/roles/hadoop-common/templates/core-site.j2 +++ b/playbook/roles/hadoop-common/templates/core-site.j2 @@ -1,6 +1,6 @@ <configuration> <property> <name>fs.default.name</name> - <value>hdfs://localhost:9000</value> + <value>hdfs://namenode:9000</value> </property> </configuration> \ No newline at end of file diff --git a/playbook/roles/hadoop-common/templates/hdfs-site.j2 b/playbook/roles/hadoop-common/templates/hdfs-site.j2 index 84f3842..9342205 100644 --- a/playbook/roles/hadoop-common/templates/hdfs-site.j2 +++ b/playbook/roles/hadoop-common/templates/hdfs-site.j2 @@ -1,6 +1,6 @@ <configuration> <property> <name>dfs.replication</name> - <value>1</value> + <value>{{nbOfClusterDataNode}}</value> </property> </configuration> diff --git a/playbook/roles/hadoop-common/templates/mapred-site.j2 b/playbook/roles/hadoop-common/templates/mapred-site.j2 index 74e723d..89adbd8 100644 --- a/playbook/roles/hadoop-common/templates/mapred-site.j2 +++ b/playbook/roles/hadoop-common/templates/mapred-site.j2 @@ -1,4 +1,4 @@ <property> <name>mapred.job.tracker</name> -<value>localhost:9001</value> +<value>{{localhost}}:9001</value> </property> \ No newline at end of file diff --git a/playbook/roles/hadoop-common/templates/workers.j2 b/playbook/roles/hadoop-common/templates/workers.j2 new file mode 100644 index 0000000..4599c1a --- /dev/null +++ b/playbook/roles/hadoop-common/templates/workers.j2 @@ -0,0 +1,5 @@ +{% for node in cluster %} +{% if not 'namenode' in node.role %} +{{node.hostname}} +{% endif %} +{% endfor %} \ No newline at end of file diff --git a/playbook/roles/hadoop-common/vars/main.yml b/playbook/roles/hadoop-common/vars/main.yml index 30c8624..ce8cae9 100644 --- a/playbook/roles/hadoop-common/vars/main.yml +++ b/playbook/roles/hadoop-common/vars/main.yml @@ -2,4 +2,5 @@ hadoopVersion: 3.2.1 hadoopUSRHome: /home/hadoop hadoopDir: /usr/local/hadoop/ javahome: /usr/lib/jvm/java-11-openjdk-amd64 -openjdk8URL : http://security-cdn.debian.org/debian-security/pool/updates/main/o/openjdk-8/openjdk-8-jdk_8u232-b09-1~deb9u1_amd64.deb \ No newline at end of file +openjdk8URL : http://security-cdn.debian.org/debian-security/pool/updates/main/o/openjdk-8/openjdk-8-jdk_8u232-b09-1~deb9u1_amd64.deb +nbOfClusterDataNode: 2 \ No newline at end of file diff --git a/playbook/roles/hadoop-mononode/tasks/main.yml b/playbook/roles/hadoop-mononode/tasks/main.yml index 2d66cf0..dfd95a3 100644 --- a/playbook/roles/hadoop-mononode/tasks/main.yml +++ b/playbook/roles/hadoop-mononode/tasks/main.yml @@ -1,3 +1,107 @@ +# Playbook based on https://fr.blog.businessdecision.com/tutoriel-cluster-hadoop/ + +- debug: + msg: + - "Hadoop only support java jdk 8, see https://cwiki.apache.org/confluence/display/HADOOP/Hadoop+Java+Versions when it'll support jdk 11" + - "Be aware that things may not work when using jdk 11 like explore HDFS using webserver on port 9870" + - "Default IPv4 address is : {{ ansible_default_ipv4.address }}" + +- name: Set java home as environment variable + become: yes + apt: + name: + - openjdk-11-jdk + +- name: create hadoop group + become: yes + group: + name: hadoop + +- name: create hadoop user + become: yes + user: + name: hadoop + group: hadoop + home: "{{ hadoopUSRHome }}" + createhome: yes + system: yes + +- name: Set JAVA_HOME as environment variable + become: yes + become_user : hadoop + blockinfile: + insertafter: EOF + path : ~/.bashrc + block: | + export JAVA_HOME={{ javahome }} + export HADOOP_HOME=/usr/local/hadoop + export PATH=$PATH:$HADOOP_HOME/bin + +- name: source .bashrc + become: yes + become_user: hadoop + shell: source ~/.bashrc + args: + executable: /bin/bash + + +- name: create .ssh directory + become: yes + file: + path: "{{ hadoopUSRHome }}/.ssh/" + state: directory + owner: hadoop + group: hadoop + mode: 0700 + +- name: copy ssh key + become: yes + copy: + src: "{{ item }}" + dest: "{{ hadoopUSRHome }}/.ssh/" + owner: hadoop + group: hadoop + mode: 0600 + with_items: + - keys/id_rsa + - keys/id_rsa.pub + +- name: authorized ssh key for hadoop user + become: yes + authorized_key: + user: hadoop + state: present + key: "{{ lookup('file', 'keys/id_rsa.pub') }}" + +- name: create a tempory directory + become: yes + file: + state: directory + path: "{{ hadoopUSRHome }}/tmp" + +- name: create a prod directory for hadoop + become: yes + file: + state: directory + path: "{{ hadoopDir }}" + +- name: "Download and Extract hadoop-{{ hadoopVersion }}" + become: yes + unarchive: + src: "http://apache.mirrors.ovh.net/ftp.apache.org/dist/hadoop/core/hadoop-{{ hadoopVersion }}/hadoop-{{ hadoopVersion }}.tar.gz" + remote_src: yes + dest: "{{ hadoopDir }}" + extra_opts: [--strip-components=1] + owner: hadoop + group: hadoop + +- name : Set JAVA_HOME in hadoop-env.sh + become: yes + blockinfile: + insertafter: EOF + path: "{{ hadoopDir }}/etc/hadoop/hadoop-env.sh" + block: "export JAVA_HOME={{ javahome }}" + - name: configure core-site.xml become: yes template: diff --git a/playbook/roles/hadoop-namenode/tasks/main.yml b/playbook/roles/hadoop-namenode/tasks/main.yml new file mode 100644 index 0000000..e27fcbf --- /dev/null +++ b/playbook/roles/hadoop-namenode/tasks/main.yml @@ -0,0 +1,7 @@ +- name: configure core-site.xml + become: yes + template: + src: templates/core-site.j2 + dest: "{{ hadoopDir }}/etc/hadoop/core-site.xml" + owner: hadoop + group: hadoop \ No newline at end of file diff --git a/playbook/roles/hosts-file/vars/main.yml b/playbook/roles/hosts-file/vars/main.yml index f74023c..5d039a7 100644 --- a/playbook/roles/hosts-file/vars/main.yml +++ b/playbook/roles/hosts-file/vars/main.yml @@ -1,7 +1,10 @@ cluster: - hostname: namenode + role: namenode IP: 10.0.0.10 - hostname: datanode1 + role: datanode IP: 10.0.0.11 - hostname: datanode2 + role: datanode IP: 10.0.0.12 diff --git a/vagrant/cluster/Vagrantfile b/vagrant/cluster/Vagrantfile index 32160eb..54890f4 100644 --- a/vagrant/cluster/Vagrantfile +++ b/vagrant/cluster/Vagrantfile @@ -20,7 +20,7 @@ Vagrant.configure("2") do |config| # Number of datanode N = NUMBER_OF_DATANODE - (1..2).each do |machine_id| + (1..N).each do |machine_id| config.vm.define "datanode#{machine_id}" do |machine| machine.vm.box = "generic/debian10" machine.vm.network "public_network", bridge:"enp1s0", use_dhcp_assigned_default_route: true @@ -31,10 +31,8 @@ Vagrant.configure("2") do |config| SHELL machine.vm.hostname = "datanode#{machine_id}" machine.vm.network :private_network, ip: "10.0.0.1#{machine_id}" - if machine_id == N - machine.vm.provision "ansible" do |ansible| - ansible.playbook = "../../playbook/install-datanode.yml" - end + machine.vm.provision "ansible" do |ansible| + ansible.playbook = "../../playbook/install-datanode.yml" end end end -- GitLab