diff --git a/README.md b/README.md index 3a3c43b1742d6ff7fc56d408fdcd835daf262598..7c451c1479822768d2bb51ad8d3513f305d30497 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ This project aims to deploy a datalake and its ecosystem using **[Ansible](http Acutally, this project does : 1. Deploy a mono-node HDFS on : - * [Your own computer using a virtual machine with Vagrant & ansible provision](#deploy-mono-node-hdfs-on-a-VM) + * [Your own computer using a virtual machine with Vagrant & ansible provision](#deploy-mono-node-hdfs-on-a-vm) * [a server using ansible](#deploy-mono-node-hdfs-on-a-server) 2. Deploy a cluster HDFS on : * [Your own computer using as many virtual machine you want using Vagrant & ansible](#deploy-cluster-hdfs-with-multiple-vms) diff --git a/playbook/roles/hadoop-common/tasks/main.yml b/playbook/roles/hadoop-common/tasks/main.yml index 5fb7ebde23826840a10c6f2316dee506b8236b65..fc3d342716f88b697f73db60f04bcb5601d2ce80 100644 --- a/playbook/roles/hadoop-common/tasks/main.yml +++ b/playbook/roles/hadoop-common/tasks/main.yml @@ -6,101 +6,101 @@ - "Be aware that things may not work when using jdk 11 like explore HDFS using webserver on port 9870" - "Default IPv4 address is : {{ ansible_default_ipv4.address }}" -# - name: Set java home as environment variable -# become: yes -# apt: -# name: -# - openjdk-11-jdk - -# - name: create hadoop group -# become: yes -# group: -# name: hadoop - -# - name: create hadoop user -# become: yes -# user: -# name: hadoop -# group: hadoop -# home: "{{ hadoopUSRHome }}" -# createhome: yes -# system: yes - -# - name: Set JAVA_HOME as environment variable -# become: yes -# become_user : hadoop -# blockinfile: -# insertafter: EOF -# path : ~/.bashrc -# block: | -# export JAVA_HOME={{ javahome }} -# export HADOOP_HOME=/usr/local/hadoop -# export PATH=$PATH:$HADOOP_HOME/bin - -# - name: source .bashrc -# become: yes -# become_user: hadoop -# shell: source ~/.bashrc -# args: -# executable: /bin/bash - - -# - name: create .ssh directory -# become: yes -# file: -# path: "{{ hadoopUSRHome }}/.ssh/" -# state: directory -# owner: hadoop -# group: hadoop -# mode: 0700 - -# - name: copy ssh key -# become: yes -# copy: -# src: "{{ item }}" -# dest: "{{ hadoopUSRHome }}/.ssh/" -# owner: hadoop -# group: hadoop -# mode: 0600 -# with_items: -# - keys/id_rsa -# - keys/id_rsa.pub - -# - name: authorized ssh key for hadoop user -# become: yes -# authorized_key: -# user: hadoop -# state: present -# key: "{{ lookup('file', 'keys/id_rsa.pub') }}" +- name: Set java home as environment variable + become: yes + apt: + name: + - openjdk-11-jdk + +- name: create hadoop group + become: yes + group: + name: hadoop + +- name: create hadoop user + become: yes + user: + name: hadoop + group: hadoop + home: "{{ hadoopUSRHome }}" + createhome: yes + system: yes + +- name: Set JAVA_HOME as environment variable + become: yes + become_user : hadoop + blockinfile: + insertafter: EOF + path : ~/.bashrc + block: | + export JAVA_HOME={{ javahome }} + export HADOOP_HOME=/usr/local/hadoop + export PATH=$PATH:$HADOOP_HOME/bin + +- name: source .bashrc + become: yes + become_user: hadoop + shell: source ~/.bashrc + args: + executable: /bin/bash + + +- name: create .ssh directory + become: yes + file: + path: "{{ hadoopUSRHome }}/.ssh/" + state: directory + owner: hadoop + group: hadoop + mode: 0700 + +- name: copy ssh key + become: yes + copy: + src: "{{ item }}" + dest: "{{ hadoopUSRHome }}/.ssh/" + owner: hadoop + group: hadoop + mode: 0600 + with_items: + - keys/id_rsa + - keys/id_rsa.pub + +- name: authorized ssh key for hadoop user + become: yes + authorized_key: + user: hadoop + state: present + key: "{{ lookup('file', 'keys/id_rsa.pub') }}" -# - name: create a tempory directory -# become: yes -# file: -# state: directory -# path: "{{ hadoopUSRHome }}/tmp" - -# - name: create a prod directory for hadoop -# become: yes -# file: -# state: directory -# path: "{{ hadoopDir }}" - -# - name: "Download and Extract hadoop-{{ hadoopVersion }}" -# become: yes -# unarchive: -# src: "http://apache.mirrors.ovh.net/ftp.apache.org/dist/hadoop/core/hadoop-{{ hadoopVersion }}/hadoop-{{ hadoopVersion }}.tar.gz" -# remote_src: yes -# dest: "{{ hadoopDir }}" -# extra_opts: [--strip-components=1] -# owner: hadoop -# group: hadoop - -# - name : Set JAVA_HOME in hadoop-env.sh -# become: yes -# blockinfile: -# insertafter: EOF -# path: "{{ hadoopDir }}/etc/hadoop/hadoop-env.sh" -# block: "export JAVA_HOME={{ javahome }}" +- name: create a tempory directory + become: yes + file: + state: directory + path: "{{ hadoopUSRHome }}/tmp" + +- name: create a prod directory for hadoop + become: yes + file: + state: directory + path: "{{ hadoopDir }}" + +- name: "Download and Extract hadoop-{{ hadoopVersion }}" + become: yes + unarchive: + src: "http://apache.mirrors.ovh.net/ftp.apache.org/dist/hadoop/core/hadoop-{{ hadoopVersion }}/hadoop-{{ hadoopVersion }}.tar.gz" + remote_src: yes + dest: "{{ hadoopDir }}" + extra_opts: [--strip-components=1] + owner: hadoop + group: hadoop + +- name : Set JAVA_HOME in hadoop-env.sh + become: yes + blockinfile: + insertafter: EOF + path: "{{ hadoopDir }}/etc/hadoop/hadoop-env.sh" + block: "export JAVA_HOME={{ javahome }}" # - name: configure core-site.xml # become: yes diff --git a/playbook/roles/hadoop-mononode/tasks/main.yml b/playbook/roles/hadoop-mononode/tasks/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..2d66cf0fb20f6fc3615725f761c4032531a91d92 --- /dev/null +++ b/playbook/roles/hadoop-mononode/tasks/main.yml @@ -0,0 +1,66 @@ +- name: configure core-site.xml + become: yes + template: + src: templates/core-site.j2 + dest: "{{ hadoopDir }}/etc/hadoop/core-site.xml" + owner: hadoop + group: hadoop + +- name: configure hdfs-site.xml + become: yes + template: + src: templates/hdfs-site.j2 + dest: "{{ hadoopDir }}/etc/hadoop/hdfs-site.xml" + owner: hadoop + group: hadoop + +- name: configure mapred-site.xml + become: yes + template: + src: templates/mapred-site.j2 + dest: "{{ hadoopDir }}/etc/hadoop/mapred-site.xml" + owner: hadoop + group: hadoop + +- name: copy hadoop service file + become: yes + template: + src: templates/hadoop.service.j2 + dest: /etc/systemd/system/hadoop.service + +- name: enable hadoop service + become: yes + service: + daemon_reload: yes + name: hadoop + state: stopped + enabled: yes + +- name: HDFS has been already formatted ? + become: yes + stat: + path: /tmp/hadoop-hadoop/dfs/name/current/VERSION + register: file_exist + +- debug: + msg: "/tmp/hadoop-hadoop/dfs/name/current/VERSION exists ? : {{ file_exist.stat.exists}}" + +- name: format HDFS + become: yes + become_user: hadoop + shell: "{{ hadoopDir }}/bin/hdfs namenode -format" + args: + executable: /bin/bash + when: file_exist.stat.exists == False + +- name: stopped hadoop service + become: yes + service: + name: hadoop + state: stopped + +- name: start hadoop service + become: yes + service: + name: hadoop + state: started \ No newline at end of file diff --git a/playbook/roles/hadoop-mononode/templates/core-site.j2 b/playbook/roles/hadoop-mononode/templates/core-site.j2 new file mode 100644 index 0000000000000000000000000000000000000000..ba76ece8d8122832cb88e2631c43e0f11b62bc9a --- /dev/null +++ b/playbook/roles/hadoop-mononode/templates/core-site.j2 @@ -0,0 +1,6 @@ +<configuration> + <property> + <name>fs.default.name</name> + <value>hdfs://localhost:9000</value> + </property> +</configuration> \ No newline at end of file diff --git a/playbook/roles/hadoop-mononode/templates/hadoop.service.j2 b/playbook/roles/hadoop-mononode/templates/hadoop.service.j2 new file mode 100644 index 0000000000000000000000000000000000000000..a48a42b301586c971cc56946136f616114a7a300 --- /dev/null +++ b/playbook/roles/hadoop-mononode/templates/hadoop.service.j2 @@ -0,0 +1,20 @@ +[Unit] +Description=Hadoop DFS namenode and datanode +After=syslog.target network.target remote-fs.target nss-lookup.target network-online.target +Requires=network-online.target + +[Service] +User=hadoop +Group=hadoop +Type=forking +ExecStart={{ hadoopDir }}sbin/start-dfs.sh +ExecStop={{ hadoopDir }}sbin/stop-dfs.sh +WorkingDirectory={{ hadoopDir }} +Environment=JAVA_HOME={{ javahome }} +Environment=HADOOP_HOME={{ hadoopDir }} +TimeoutStartSec=2min +# Restart=on-failure +PIDFile=/tmp/hadoop-hadoop-namenode.pid + +[Install] +WantedBy=multi-user.target \ No newline at end of file diff --git a/playbook/roles/hadoop-mononode/templates/hdfs-site.j2 b/playbook/roles/hadoop-mononode/templates/hdfs-site.j2 new file mode 100644 index 0000000000000000000000000000000000000000..84f3842e9e3f7d21ac1707c44a7933359a092d01 --- /dev/null +++ b/playbook/roles/hadoop-mononode/templates/hdfs-site.j2 @@ -0,0 +1,6 @@ +<configuration> + <property> + <name>dfs.replication</name> + <value>1</value> + </property> +</configuration> diff --git a/playbook/roles/hadoop-mononode/templates/mapred-site.j2 b/playbook/roles/hadoop-mononode/templates/mapred-site.j2 new file mode 100644 index 0000000000000000000000000000000000000000..74e723d7f92e578a00419bbf0f956972229e5c6a --- /dev/null +++ b/playbook/roles/hadoop-mononode/templates/mapred-site.j2 @@ -0,0 +1,4 @@ +<property> +<name>mapred.job.tracker</name> +<value>localhost:9001</value> +</property> \ No newline at end of file