Commit f4c8ed53 authored by Decoupes Remy's avatar Decoupes Remy
Browse files

start HDFS cluster : still some issues in configuration

No related merge requests found
Showing with 197 additions and 48 deletions
+197 -48
......@@ -83,8 +83,27 @@ Then run the script [ansible-launch.sh](ansible-launch.sh) :
1. Set your nodes' IP address in [VagrantFile](vagrant/cluster/Vagrantfile)
2. Declare those IP for ansible provision in [vars](playbook/roles/hosts-file/vars/main.yml)
3. in cli : start your multiple VM from this [directory : vagrant/cluster](vagrant/cluster) :
```shell
vagrant up
```
```shell
vagrant up
```
4. Format HDFS :
* ssh on namenode
* in cli : as user hadoop : change directory & format HDFS
```shell
sudo su hadoop
cd /usr/local/hadoop/bin/
hdfs namenode -format
```
5. Start HDFS deamon on your cluser
* ssh on namenode
* in cli : as root : start service hadoop
```shell
sudo systemctl start hadoop
```
* **WORK In Progress** : systemd will tell you something wrong happens but cluster is working anyway.
6. Verify your cluster is up:
* on your own device, use a webbrowser
* go on [IP-of-your-namenode]:9870
if default : http://10.0.0.10:9870
## Deploy cluster HDFS on servers
work in progress
\ No newline at end of file
......@@ -5,4 +5,6 @@
roles:
- common
- hosts-file
- hadoop-common
\ No newline at end of file
- role: hadoop-common
vars_files:
- playbook/roles/hosts-file/vars/main.yml
\ No newline at end of file
......@@ -4,4 +4,4 @@
roles:
- common
- hadoop-common
- hadoop-mononode
......@@ -5,4 +5,6 @@
roles:
- common
- hosts-file
- hadoop-common
\ No newline at end of file
- role: hadoop-common
vars_files:
- playbook/roles/hosts-file/vars/main.yml
\ No newline at end of file
......@@ -102,43 +102,51 @@
path: "{{ hadoopDir }}/etc/hadoop/hadoop-env.sh"
block: "export JAVA_HOME={{ javahome }}"
# - name: configure core-site.xml
# become: yes
# template:
# src: templates/core-site.j2
# dest: "{{ hadoopDir }}/etc/hadoop/core-site.xml"
# owner: hadoop
# group: hadoop
- name: configure hdfs-site.xml
become: yes
template:
src: templates/hdfs-site.j2
dest: "{{ hadoopDir }}/etc/hadoop/hdfs-site.xml"
owner: hadoop
group: hadoop
# - name: configure hdfs-site.xml
# become: yes
# template:
# src: templates/hdfs-site.j2
# dest: "{{ hadoopDir }}/etc/hadoop/hdfs-site.xml"
# owner: hadoop
# group: hadoop
- name: configure core-site.xml
become: yes
template:
src: templates/core-site.j2
dest: "{{ hadoopDir }}/etc/hadoop/core-site.xml"
owner: hadoop
group: hadoop
# - name: configure mapred-site.xml
# become: yes
# template:
# src: templates/mapred-site.j2
# dest: "{{ hadoopDir }}/etc/hadoop/mapred-site.xml"
# owner: hadoop
# group: hadoop
- name: configure mapred-site.xml
become: yes
template:
src: templates/mapred-site.j2
dest: "{{ hadoopDir }}/etc/hadoop/mapred-site.xml"
owner: hadoop
group: hadoop
# - name: copy hadoop service file
# become: yes
# template:
# src: templates/hadoop.service.j2
# dest: /etc/systemd/system/hadoop.service
- name: configure /etc/hadoop/workers
become: yes
template:
src: templates/workers.j2
dest: "{{ hadoopDir }}/etc/hadoop/workers"
owner: hadoop
group: hadoop
# - name: enable hadoop service
# become: yes
# service:
# daemon_reload: yes
# name: hadoop
# state: stopped
# enabled: yes
- name: copy hadoop service file
become: yes
template:
src: templates/hadoop.service.j2
dest: /etc/systemd/system/hadoop.service
- name: enable hadoop service
become: yes
service:
daemon_reload: yes
name: hadoop
state: stopped
enabled: yes
# - name: HDFS has been already formatted ?
# become: yes
......
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://localhost:9000</value>
<value>hdfs://namenode:9000</value>
</property>
</configuration>
\ No newline at end of file
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
<value>{{nbOfClusterDataNode}}</value>
</property>
</configuration>
<property>
<name>mapred.job.tracker</name>
<value>localhost:9001</value>
<value>{{localhost}}:9001</value>
</property>
\ No newline at end of file
{% for node in cluster %}
{% if not 'namenode' in node.role %}
{{node.hostname}}
{% endif %}
{% endfor %}
\ No newline at end of file
......@@ -2,4 +2,5 @@ hadoopVersion: 3.2.1
hadoopUSRHome: /home/hadoop
hadoopDir: /usr/local/hadoop/
javahome: /usr/lib/jvm/java-11-openjdk-amd64
openjdk8URL : http://security-cdn.debian.org/debian-security/pool/updates/main/o/openjdk-8/openjdk-8-jdk_8u232-b09-1~deb9u1_amd64.deb
\ No newline at end of file
openjdk8URL : http://security-cdn.debian.org/debian-security/pool/updates/main/o/openjdk-8/openjdk-8-jdk_8u232-b09-1~deb9u1_amd64.deb
nbOfClusterDataNode: 2
\ No newline at end of file
# Playbook based on https://fr.blog.businessdecision.com/tutoriel-cluster-hadoop/
- debug:
msg:
- "Hadoop only support java jdk 8, see https://cwiki.apache.org/confluence/display/HADOOP/Hadoop+Java+Versions when it'll support jdk 11"
- "Be aware that things may not work when using jdk 11 like explore HDFS using webserver on port 9870"
- "Default IPv4 address is : {{ ansible_default_ipv4.address }}"
- name: Set java home as environment variable
become: yes
apt:
name:
- openjdk-11-jdk
- name: create hadoop group
become: yes
group:
name: hadoop
- name: create hadoop user
become: yes
user:
name: hadoop
group: hadoop
home: "{{ hadoopUSRHome }}"
createhome: yes
system: yes
- name: Set JAVA_HOME as environment variable
become: yes
become_user : hadoop
blockinfile:
insertafter: EOF
path : ~/.bashrc
block: |
export JAVA_HOME={{ javahome }}
export HADOOP_HOME=/usr/local/hadoop
export PATH=$PATH:$HADOOP_HOME/bin
- name: source .bashrc
become: yes
become_user: hadoop
shell: source ~/.bashrc
args:
executable: /bin/bash
- name: create .ssh directory
become: yes
file:
path: "{{ hadoopUSRHome }}/.ssh/"
state: directory
owner: hadoop
group: hadoop
mode: 0700
- name: copy ssh key
become: yes
copy:
src: "{{ item }}"
dest: "{{ hadoopUSRHome }}/.ssh/"
owner: hadoop
group: hadoop
mode: 0600
with_items:
- keys/id_rsa
- keys/id_rsa.pub
- name: authorized ssh key for hadoop user
become: yes
authorized_key:
user: hadoop
state: present
key: "{{ lookup('file', 'keys/id_rsa.pub') }}"
- name: create a tempory directory
become: yes
file:
state: directory
path: "{{ hadoopUSRHome }}/tmp"
- name: create a prod directory for hadoop
become: yes
file:
state: directory
path: "{{ hadoopDir }}"
- name: "Download and Extract hadoop-{{ hadoopVersion }}"
become: yes
unarchive:
src: "http://apache.mirrors.ovh.net/ftp.apache.org/dist/hadoop/core/hadoop-{{ hadoopVersion }}/hadoop-{{ hadoopVersion }}.tar.gz"
remote_src: yes
dest: "{{ hadoopDir }}"
extra_opts: [--strip-components=1]
owner: hadoop
group: hadoop
- name : Set JAVA_HOME in hadoop-env.sh
become: yes
blockinfile:
insertafter: EOF
path: "{{ hadoopDir }}/etc/hadoop/hadoop-env.sh"
block: "export JAVA_HOME={{ javahome }}"
- name: configure core-site.xml
become: yes
template:
......
- name: configure core-site.xml
become: yes
template:
src: templates/core-site.j2
dest: "{{ hadoopDir }}/etc/hadoop/core-site.xml"
owner: hadoop
group: hadoop
\ No newline at end of file
cluster:
- hostname: namenode
role: namenode
IP: 10.0.0.10
- hostname: datanode1
role: datanode
IP: 10.0.0.11
- hostname: datanode2
role: datanode
IP: 10.0.0.12
......@@ -20,7 +20,7 @@ Vagrant.configure("2") do |config|
# Number of datanode
N = NUMBER_OF_DATANODE
(1..2).each do |machine_id|
(1..N).each do |machine_id|
config.vm.define "datanode#{machine_id}" do |machine|
machine.vm.box = "generic/debian10"
machine.vm.network "public_network", bridge:"enp1s0", use_dhcp_assigned_default_route: true
......@@ -31,10 +31,8 @@ Vagrant.configure("2") do |config|
SHELL
machine.vm.hostname = "datanode#{machine_id}"
machine.vm.network :private_network, ip: "10.0.0.1#{machine_id}"
if machine_id == N
machine.vm.provision "ansible" do |ansible|
ansible.playbook = "../../playbook/install-datanode.yml"
end
machine.vm.provision "ansible" do |ansible|
ansible.playbook = "../../playbook/install-datanode.yml"
end
end
end
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment