Commit c2731bc2 authored by Vincent Delbar's avatar Vincent Delbar
Browse files

ENH: Docker - last comments + version bumps

Showing with 125 additions and 99 deletions
+125 -99
...@@ -23,7 +23,7 @@ RUN if $GUI; then \ ...@@ -23,7 +23,7 @@ RUN if $GUI; then \
### Python3 links and pip packages ### Python3 links and pip packages
RUN ln -s /usr/bin/python3 /usr/local/bin/python && ln -s /usr/bin/pip3 /usr/local/bin/pip RUN ln -s /usr/bin/python3 /usr/local/bin/python && ln -s /usr/bin/pip3 /usr/local/bin/pip
# NumPy version is conflicting with system's gdal dep, may require venv # NumPy version is conflicting with system's gdal dep and may require venv
ARG NUMPY_SPEC="~=1.19" ARG NUMPY_SPEC="~=1.19"
RUN pip install --no-cache-dir -U pip wheel mock six future "numpy$NUMPY_SPEC" \ RUN pip install --no-cache-dir -U pip wheel mock six future "numpy$NUMPY_SPEC" \
&& pip install --no-cache-dir --no-deps keras_applications keras_preprocessing && pip install --no-cache-dir --no-deps keras_applications keras_preprocessing
...@@ -31,23 +31,25 @@ RUN pip install --no-cache-dir -U pip wheel mock six future "numpy$NUMPY_SPEC" \ ...@@ -31,23 +31,25 @@ RUN pip install --no-cache-dir -U pip wheel mock six future "numpy$NUMPY_SPEC" \
# ---------------------------------------------------------------------------- # ----------------------------------------------------------------------------
# Tmp builder stage - dangling cache should persist until "docker builder prune" # Tmp builder stage - dangling cache should persist until "docker builder prune"
FROM otbtf-base AS builder FROM otbtf-base AS builder
# 0.75 may be required to avoid OOM errors (especially for OTB GUI) # A smaller value may be required to avoid OOM errors when building OTB GUI
ARG CPU_RATIO=1 ARG CPU_RATIO=1
RUN mkdir -p /src/tf /opt/otbtf/bin /opt/otbtf/include /opt/otbtf/lib RUN mkdir -p /src/tf /opt/otbtf/bin /opt/otbtf/include /opt/otbtf/lib
WORKDIR /src/tf WORKDIR /src/tf
RUN git config --global advice.detachedHead false
### TF ### TF
ARG TF=v2.4.0 ARG TF=v2.4.1
# Install bazelisk (will read .bazelrc and download the right bazel version - latest by default) # Install bazelisk (will read .bazelversion and download the right bazel binary - latest by default)
RUN wget -O /opt/otbtf/bin/bazelisk https://github.com/bazelbuild/bazelisk/releases/latest/download/bazelisk-linux-amd64 \ RUN wget -qO /opt/otbtf/bin/bazelisk https://github.com/bazelbuild/bazelisk/releases/latest/download/bazelisk-linux-amd64 \
&& chmod +x /opt/otbtf/bin/bazelisk-linux-amd64 \ && chmod +x /opt/otbtf/bin/bazelisk \
&& ln -s /opt/otbtf/bin/bazelisk-linux-amd64 /opt/otbtf/bin/bazel && ln -s /opt/otbtf/bin/bazelisk /opt/otbtf/bin/bazel
ARG BZL_TARGETS="//tensorflow:libtensorflow_cc.so //tensorflow/tools/pip_package:build_pip_package" ARG BZL_TARGETS="//tensorflow:libtensorflow_cc.so //tensorflow/tools/pip_package:build_pip_package"
# --config=opt with bazel's default optimizations (otherwise edit CC_OPT_FLAGS in build-env-tf.sh) # "--config=opt" will enable 'march=native' (otherwise edit CC_OPT_FLAGS in build-env-tf.sh)
ARG BZL_CONFIGS="--config=nogcp --config=noaws --config=nohdfs --config=opt" ARG BZL_CONFIGS="--config=nogcp --config=noaws --config=nohdfs --config=opt"
# --compilation_mode opt is already enabled by default (see tf repo /.bazelrc and /configure.py) # "--compilation_mode opt" is already enabled by default (see tf repo .bazelrc and configure.py)
ARG BZL_OPTIONS="--verbose_failures --remote_cache=http://localhost:9090" ARG BZL_OPTIONS="--verbose_failures --remote_cache=http://localhost:9090"
# Build # Build
...@@ -63,7 +65,7 @@ RUN git clone --single-branch -b $TF https://github.com/tensorflow/tensorflow.gi ...@@ -63,7 +65,7 @@ RUN git clone --single-branch -b $TF https://github.com/tensorflow/tensorflow.gi
&& export TMP=/tmp/bazel \ && export TMP=/tmp/bazel \
&& BZL_CMD="build $BZL_TARGETS $BZL_CONFIGS $BZL_OPTIONS" \ && BZL_CMD="build $BZL_TARGETS $BZL_CONFIGS $BZL_OPTIONS" \
&& bazel $BZL_CMD --jobs="HOST_CPUS*$CPU_RATIO" ' \ && bazel $BZL_CMD --jobs="HOST_CPUS*$CPU_RATIO" ' \
# Installation - split here if you need to debug ^ # Installation - split here if you want to check files ^
#RUN cd tensorflow \ #RUN cd tensorflow \
&& ./bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg \ && ./bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg \
&& pip3 install --no-cache-dir --prefix=/opt/otbtf /tmp/tensorflow_pkg/tensorflow*.whl \ && pip3 install --no-cache-dir --prefix=/opt/otbtf /tmp/tensorflow_pkg/tensorflow*.whl \
...@@ -75,7 +77,7 @@ RUN git clone --single-branch -b $TF https://github.com/tensorflow/tensorflow.gi ...@@ -75,7 +77,7 @@ RUN git clone --single-branch -b $TF https://github.com/tensorflow/tensorflow.gi
# Symlink external libs (required for MKL - libiomp5) # Symlink external libs (required for MKL - libiomp5)
&& for f in $(find -L /opt/otbtf/include/tf -wholename "*/external/*/*.so"); do ln -s $f /opt/otbtf/lib/; done \ && for f in $(find -L /opt/otbtf/include/tf -wholename "*/external/*/*.so"); do ln -s $f /opt/otbtf/lib/; done \
# Cleaning # Cleaning
&& mv /root/.cache/bazel* /src/tf/ \ && rm -rf bazel-* \
&& ( $KEEP_SRC_TF || rm -rf /src/tf ) \ && ( $KEEP_SRC_TF || rm -rf /src/tf ) \
&& rm -rf /root/.cache/ /tmp/* && rm -rf /root/.cache/ /tmp/*
...@@ -94,11 +96,13 @@ RUN git clone --single-branch -b $OTB https://gitlab.orfeo-toolbox.org/orfeotool ...@@ -94,11 +96,13 @@ RUN git clone --single-branch -b $OTB https://gitlab.orfeo-toolbox.org/orfeotool
# Set GL/Qt build flags # Set GL/Qt build flags
&& if $GUI; then \ && if $GUI; then \
sed -i -r "s/-DOTB_USE_(QT|OPENGL|GL[UFE][WT])=OFF/-DOTB_USE_\1=ON/" ../build-flags-otb.txt; fi \ sed -i -r "s/-DOTB_USE_(QT|OPENGL|GL[UFE][WT])=OFF/-DOTB_USE_\1=ON/" ../build-flags-otb.txt; fi \
# Possible ENH: superbuild-all-dependencies switch, with separated build-deps-minimal.txt and build-deps-otbcli.txt)
#&& if $OTB_SUPERBUILD_ALL; then sed -i -r "s/-DOTB_USE_SYSTEM_([A-Z0-9]*)=ON/-DOTB_USE_SYSTEM_\1=OFF/"" ../build-flags-otb.txt; fi \
&& OTB_FLAGS=$(cat "../build-flags-otb.txt") \ && OTB_FLAGS=$(cat "../build-flags-otb.txt") \
&& cmake ../otb/SuperBuild -DCMAKE_INSTALL_PREFIX=/opt/otbtf $OTB_FLAGS \ && cmake ../otb/SuperBuild -DCMAKE_INSTALL_PREFIX=/opt/otbtf $OTB_FLAGS \
&& make -j $(python -c "import os; print(round( os.cpu_count() * $CPU_RATIO ))") && make -j $(python -c "import os; print(round( os.cpu_count() * $CPU_RATIO ))")
### OTBTF - copy (without .git) or clone repo ### OTBTF - copy (without .git/) or clone repository
COPY . /src/otbtf COPY . /src/otbtf
#RUN git clone https://github.com/remicres/otbtf.git /src/otbtf #RUN git clone https://github.com/remicres/otbtf.git /src/otbtf
RUN ln -s /src/otbtf /src/otb/otb/Modules/Remote/otbtf RUN ln -s /src/otbtf /src/otb/otb/Modules/Remote/otbtf
...@@ -113,6 +117,7 @@ RUN cd /src/otb/build/OTB/build \ ...@@ -113,6 +117,7 @@ RUN cd /src/otb/build/OTB/build \
-DOTB_WRAP_PYTHON=ON -DPYTHON_EXECUTABLE=/usr/bin/python3 \ -DOTB_WRAP_PYTHON=ON -DPYTHON_EXECUTABLE=/usr/bin/python3 \
-DOTB_USE_TENSORFLOW=ON -DModule_OTBTensorflow=ON \ -DOTB_USE_TENSORFLOW=ON -DModule_OTBTensorflow=ON \
-Dtensorflow_include_dir=/opt/otbtf/include/tf \ -Dtensorflow_include_dir=/opt/otbtf/include/tf \
# Forcing TF>=2, this Dockerfile hasn't been tested with v1 + missing link for libtensorflow_framework.so in the wheel
-DTENSORFLOW_CC_LIB=/opt/otbtf/lib/libtensorflow_cc.so.2 \ -DTENSORFLOW_CC_LIB=/opt/otbtf/lib/libtensorflow_cc.so.2 \
-DTENSORFLOW_FRAMEWORK_LIB=/opt/otbtf/lib/python3/site-packages/tensorflow/libtensorflow_framework.so.2 \ -DTENSORFLOW_FRAMEWORK_LIB=/opt/otbtf/lib/python3/site-packages/tensorflow/libtensorflow_framework.so.2 \
&& make install -j $(python -c "import os; print(round( os.cpu_count() * $CPU_RATIO ))") \ && make install -j $(python -c "import os; print(round( os.cpu_count() * $CPU_RATIO ))") \
...@@ -129,11 +134,9 @@ RUN for f in /src/otbtf/python/*.py; do if [ -x $f ]; then ln -s $f /opt/otbtf/b ...@@ -129,11 +134,9 @@ RUN for f in /src/otbtf/python/*.py; do if [ -x $f ]; then ln -s $f /opt/otbtf/b
FROM otbtf-base FROM otbtf-base
MAINTAINER Remi Cresson <remi.cresson[at]inrae[dot]fr> MAINTAINER Remi Cresson <remi.cresson[at]inrae[dot]fr>
COPY --from=builder /opt/otbtf /opt/ # Copy files from intermediate stage
COPY --from=builder /src / COPY --from=builder /opt/otbtf /opt/otbtf
# Relocate ~/.cache/bazel and ~/.cache/bazelisk COPY --from=builder /src /src
RUN if [ -d /src/tf/bazel ]; then \
mkdir -p /root/.cache && mv /src/tf/bazel* /root/.cache/
# System-wide ENV # System-wide ENV
ENV PATH="/opt/otbtf/bin:$PATH" ENV PATH="/opt/otbtf/bin:$PATH"
...@@ -141,7 +144,7 @@ ENV LD_LIBRARY_PATH="/opt/otbtf/lib:$LD_LIBRARY_PATH" ...@@ -141,7 +144,7 @@ ENV LD_LIBRARY_PATH="/opt/otbtf/lib:$LD_LIBRARY_PATH"
ENV PYTHONPATH="/opt/otbtf/lib/python3/site-packages:/opt/otbtf/lib/otb/python:/src/otbtf/python" ENV PYTHONPATH="/opt/otbtf/lib/python3/site-packages:/opt/otbtf/lib/otb/python:/src/otbtf/python"
ENV OTB_APPLICATION_PATH="/opt/otbtf/lib/otb/applications" ENV OTB_APPLICATION_PATH="/opt/otbtf/lib/otb/applications"
# Default user, directory and command (bash = 'docker create' entrypoint) # Default user, directory and command (bash is the entrypoint when using 'docker create')
RUN useradd -s /bin/bash -m otbuser RUN useradd -s /bin/bash -m otbuser
WORKDIR /home/otbuser WORKDIR /home/otbuser
...@@ -151,7 +154,7 @@ RUN if $SUDO; then \ ...@@ -151,7 +154,7 @@ RUN if $SUDO; then \
usermod -a -G sudo otbuser \ usermod -a -G sudo otbuser \
&& echo "otbuser ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers; fi && echo "otbuser ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers; fi
# Set /src/otbtf ownership to otbuser (but you will need 'sudo -i' in order to rebuild TF or OTB) # Set /src/otbtf ownership to otbuser (but you still need 'sudo -i' in order to rebuild TF or OTB)
RUN chown -R otbuser:otbuser /src/otbtf RUN chown -R otbuser:otbuser /src/otbtf
# This won't prevent ownership problems with volumes if you're not UID 1000 # This won't prevent ownership problems with volumes if you're not UID 1000
...@@ -159,4 +162,6 @@ USER otbuser ...@@ -159,4 +162,6 @@ USER otbuser
# User-only ENV # User-only ENV
# Test python imports # Test python imports
RUN python -c "import numpy, tensorflow, otbtf, tricks, otbApplication" RUN python -c "import tensorflow"
RUN python -c "import otbtf, tricks"
RUN python -c "import otbApplication as otb; otb.Registry.CreateApplication('ImageClassifierFromDeepFeatures')"
# Docker multi-stage build with external bazel cache # Build with Docker
Docker build has to be called from the root of the repository (i.e. `docker build .` or `bash tools/docker/multibuild.sh`). Docker build has to be called from the root of the repository (i.e. `docker build .` or `bash tools/docker/multibuild.sh`).
You may build a custom docker image using `--build-arg` and the config files in this directory. You can build a custom image using `--build-arg` and several config files :
For TensorFlow, see the `TF` arg for the git branch/tag, [build-env-tf.sh](build-env-tf.sh) and BZL_* arguments for the build configuration. - Ubuntu : `BASE_IMG` should accept any version, for additional packages see [build-deps-cli.txt](build-deps-cli.txt) and [build-deps-gui.txt](build-deps-gui.txt)
Regarding OTB, you can edit cmake flags in [build-flags-otb.txt](build-flags-otb.txt) and the `OTB` argument for the git branch/tag to clone. - TensorFlow : `TF` arg for the git branch or tag + [build-env-tf.sh](build-env-tf.sh) and BZL_* arguments for the build configuration
If you need additional Ubuntu packages see [build-deps-cli.txt](build-deps-cli.txt) and [build-deps-gui.txt](build-deps-gui.txt) for GUI related packages - it is disabled by default in order to save space, and because docker xvfb isn't working properly with opengl. - OrfeoToolBox : `OTB` arg for the git branch or tag + [build-flags-otb.txt](build-flags-otb.txt) to edit cmake flags
## Default arguments ### Base images
```bash
UBUNTU=20.04 # or 16.04, 18.04
CUDA=11.0.3 # or 10.1, 10.2
CUDNN=8 # or 7
IMG=ubuntu:$UBUNTU
GPU_IMG=nvidia/cuda:$CUDA-cudnn$CUDNN-devel-ubuntu$UBUNTU
``` ```
BASE_IMG (mandatory)
### Default arguments
```bash
BASE_IMG # mandatory
CPU_RATIO=0.95 CPU_RATIO=0.95
GUI=false GUI=false
NUMPY_SPEC="~=1.19" NUMPY_SPEC="~=1.19"
TF=r2.4.0 TF=r2.4.1
OTB=7.2.0 OTB=7.2.0
BZL_TARGETS="//tensorflow:libtensorflow_cc.so //tensorflow:libtensorflow_framework.so //tensorflow/tools/pip_package:build_pip_package" BZL_TARGETS="//tensorflow:libtensorflow_cc.so //tensorflow/tools/pip_package:build_pip_package"
BZL_CONFIGS="--config=nogcp --config=noaws --config=nohdfs --config=opt" BZL_CONFIGS="--config=nogcp --config=noaws --config=nohdfs --config=opt"
BZL_OPTIONS="--verbose_failures --remote_cache=http://localhost:9090" BZL_OPTIONS="--verbose_failures --remote_cache=http://localhost:9090"
KEEP_SRC_TF=false KEEP_SRC_TF=false
KEEP_SRC_OTB=false KEEP_SRC_OTB=false
SUDO=true SUDO=true
# NumPy version requirement :
# TF < 2.4 : "numpy<1.19.0,>=1.16.0"
# TF >= 2.4 : "numpy~=1.19"
``` ```
## Bazel remote cache daemon ### Bazel remote cache daemon
There is no way make a common build of OTB shared between docker builds, since we're using different BASE_IMG and because of the multi-stage Dockerfile. If you just need to rebuild with different GUI or KEEP_SRC arguments, or may be a different branch of OTB, bazel cache will help you to rebuild everything except TF, even if the docker cache was purged (after `docker [system|builder] prune`).
But if you just need to rebuild with different GUI or KEEP_SRC arguments, or may be a different branch of OTB, bazel cache may help you to rebuild everything except TF, even if docker cache was purged (after `docker system prune`). In order to recycle the cache, bazel config and TF git tag should be exactly the same, any change in [build-env-tf.sh](build-env-tf.sh) and `--build-arg` (if related to bazel env, cuda, mkl, xla...) may result in a fresh new build.
In order to recycle the cache, bazel config and TF git tag has to be exactly the same, any change in [build-env-tf.sh](build-env-tf.sh) and `--build-arg` (if related to bazel env, cuda, mkl, xla...) may result in a complete new build.
Start a cache daemon - here with max 20GB but 12GB should be enough to save 2 TF builds (GPU and CPU): Start a cache daemon - here with max 20GB but 12GB should be enough to save 2 TF builds (GPU and CPU):
```bash ```bash
mkdir -p $HOME/.cache/bazel-remote mkdir -p $HOME/.cache/bazel-remote
docker run --detach -u 1000:1000 -v $HOME/.cache/bazel-remote:/data -p 9090:8080 buchgr/bazel-remote-cache --max_size=20 docker run --detach -u 1000:1000 -v $HOME/.cache/bazel-remote:/data -p 9090:8080 buchgr/bazel-remote-cache --max_size=20
``` ```
Then just add ` --network='host'` to the docker build command, or connect bazel to another adress (see the 'BZL_OPTIONS' build argument - the other way of docker is a virtual bridge, you'll need to edit the IP address). Then just add ` --network='host'` to the docker build command, or connect bazel to a remote server - see 'BZL_OPTIONS'.
The other way of docker is a virtual bridge, but you'll need to edit the IP address.
## Build examples ## Build examples
```bash ```bash
# Build for CPU using default Dockerfiles args (without AWS, HDFS and GCP support) # Build for CPU using default Dockerfiles args (without AWS, HDFS or GCP support)
docker build --network='host' -t otbtf:cpu --build-arg BASE_IMG=ubuntu:18.04 . docker build --network='host' -t otbtf:cpu --build-arg BASE_IMG=ubuntu:20.04 .
# Clear bazel config var (deactivate compilation optimizations and unset noaws/nogcp/nohdfs) # Clear bazel config var (deactivate default optimizations and unset noaws/nogcp/nohdfs)
docker build --network='host' -t otbtf:cpu --build-arg BASE_IMG=ubuntu:18.04 --build-arg BZL_CONFIGS= . docker build --network='host' -t otbtf:cpu --build-arg BASE_IMG=ubuntu:20.04 --build-arg BZL_CONFIGS= .
# Enable MKL # Enable MKL
MKL_CONFIG="--config=nogcp --config=noaws --config=nohdfs --config=opt --config=mkl --copt='-mfpmath=both'" MKL_CONFIG="--config=nogcp --config=noaws --config=nohdfs --config=opt --config=mkl"
docker build --network='host' -t otbtf:cpu-mkl --build-arg BZL_CONFIGS="$MKL_CONFIG" --build-arg BASE_IMG=ubuntu:18.04 . docker build --network='host' -t otbtf:cpu-mkl --build-arg BZL_CONFIGS="$MKL_CONFIG" --build-arg BASE_IMG=ubuntu:20.04 .
# Build for GPU (if you're building for your system only you should change the CUDA_COMPUTE_CAPABILTIES in build-env-tf.sh) # Build for GPU (if you're building for your system only you should edit CUDA_COMPUTE_CAPABILITIES in build-env-tf.sh)
docker build --network='host' -t otbtf:gpu --build-arg BASE_IMG=nvidia/cuda:11.0-cudnn8-devel-ubuntu18.04 . docker build --network='host' -t otbtf:gpu --build-arg BASE_IMG=nvidia/cuda:11.0.3-cudnn8-devel-ubuntu20.04 .
# Build dev with TF and OTB sources (huge image) + set git branch/tags to clone # Build dev with TF and OTB sources (huge image) + set git branches/tags to clone
docker build --network='host' -t otbtf:gpu-dev-full --build-arg BASE_IMG=nvidia/cuda:11.0-cudnn8-devel-ubuntu18.04 \ docker build --network='host' -t otbtf:gpu-dev-full --build-arg BASE_IMG=nvidia/cuda:11.0.3-cudnn8-devel-ubuntu20.04 \
--build-arg KEEP_SRC_OTB=true --buid-arg KEEP_SRC_TF=true --build-arg TF=r2.4 --build-arg OTB=develop . --build-arg KEEP_SRC_OTB=true --buid-arg KEEP_SRC_TF=true --build-arg TF=nightly --build-arg OTB=develop .
# Build old release # Build old release
docker build --network='host' -t otbtf:oldstable-gpu --build-arg BASE_IMG=nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04 \ docker build --network='host' -t otbtf:oldstable-gpu --build-arg BASE_IMG=nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04 \
--build-arg TF=r2.1 --build-arg NUMPY_SPEC="<1.19" --build-arg OTB=release-7.2 \ --build-arg TF=r2.1 --build-arg NUMPY_SPEC="<1.19" \
--build-arg BAZEL_OPTIONS="--noincompatible_do_not_split_linking_cmdline --verbose_failures --remote_cache=http://localhost:9090" . --build-arg BAZEL_OPTIONS="--noincompatible_do_not_split_linking_cmdline --verbose_failures --remote_cache=http://localhost:9090" .
# You may edit the Dockerfile to clone an old branch of the repo instead of copying files from the build context # You could edit the Dockerfile in order to clone an old branch of the repo instead of copying files from the build context
# Numpy version requirement :
# TF < 2.4 ==> numpy<1.19.0,>=1.16.0
# TF >= 2.4 ==> numpy~=1.19
``` ```
### Debug build ### Debug build
If you fail to build, you can log into the last layer and check CMake logs. If you fail to build, you can log into the last layer and check CMake logs. Run `docker images`, find the latest layer ID and run a tmp container (`docker run -it d60496d9612e bash`).
Run `docker images`, find the latest layer ID and run a tmp container (`docker run -it d60496d9612e bash`).
You may also need to split some multi-command layers in the Dockerfile. You may also need to split some multi-command layers in the Dockerfile.
If you see OOM errors during SuperBuild you should decrease CPU_RATIO (e.g. 0.75). If you see OOM errors during SuperBuild you should decrease CPU_RATIO (e.g. 0.75).
## Container examples ## Container examples
```bash ```bash
# Pull GPU image and create container with your home directory as volume # Pull GPU image and create a new container with your home directory as volume (requires apt package nvidia-docker2 and CUDA>=11.0)
# (requires apt package nvidia-docker2 and CUDA>=11.0)
docker create --gpus=all --volume $HOME:/home/otbuser/volume -it --name otbtf-gpu mdl4eo/otbtf2.1:gpu docker create --gpus=all --volume $HOME:/home/otbuser/volume -it --name otbtf-gpu mdl4eo/otbtf2.1:gpu
# Run interactive # Run interactive
...@@ -83,13 +88,15 @@ docker start -i otbtf-gpu ...@@ -83,13 +88,15 @@ docker start -i otbtf-gpu
# Run in background # Run in background
docker start otbtf-gpu docker start otbtf-gpu
docker exec otbtf-gpu python -c 'import tensorflow as tf; print(tf.test.is_gpu_available())' docker exec otbtf-gpu python -c 'import tensorflow as tf; print(tf.test.is_gpu_available())'
```
# Rebuild OTB with more modules (e.g. otbSelectiveHaralickTextures) ### Rebuild OTB with more modules
```bash
docker create --gpus=all -it --name otbtf-gpu-dev mdl4eo/otbtf2.1:gpu-dev docker create --gpus=all -it --name otbtf-gpu-dev mdl4eo/otbtf2.1:gpu-dev
docker start -i otbtf-gpu-dev docker start -i otbtf-gpu-dev
``` ```
In the container shell:
```bash ```bash
# From the container shell:
sudo -i sudo -i
cd /src/otb/otb/Modules/Remote cd /src/otb/otb/Modules/Remote
git clone https://gitlab.irstea.fr/raffaele.gaetano/otbSelectiveHaralickTextures.git git clone https://gitlab.irstea.fr/raffaele.gaetano/otbSelectiveHaralickTextures.git
...@@ -97,12 +104,21 @@ cd /src/otb/build/OTB/build ...@@ -97,12 +104,21 @@ cd /src/otb/build/OTB/build
cmake -DModule_OTBAppSelectiveHaralickTextures=ON /src/otb/otb && make install -j cmake -DModule_OTBAppSelectiveHaralickTextures=ON /src/otb/otb && make install -j
``` ```
### Container with GUI
## GUI
```bash ```bash
# With GUI (disabled by default): otbgui seems ok but monteverdi (OpenGL) isn't working # GUI is disabled by default in order to save space, and because docker xvfb isn't working properly with OpenGL.
docker build --network='host' -t otbtf:cpu-gui --build-arg BASE_IMG=ubuntu:18.04 --build-arg GUI=true . # => otbgui seems OK but monteverdi isn't working
docker build --network='host' -t otbtf:cpu-gui --build-arg BASE_IMG=ubuntu:20.04 --build-arg GUI=true .
docker create -v /tmp/.X11-unix:/tmp/.X11-unix -e DISPLAY=$DISPLAY -it --name otbtf-gui otbtf:cpu-gui docker create -v /tmp/.X11-unix:/tmp/.X11-unix -e DISPLAY=$DISPLAY -it --name otbtf-gui otbtf:cpu-gui
docker start -i otbtf-gui docker start -i otbtf-gui
$ mapla $ mapla
``` ```
### Common errors
Buid :
`Error response from daemon: manifest for nvidia/cuda:11.0-cudnn8-devel-ubuntu20.04 not found: manifest unknown: manifest unknown`
=> Image is missing from dockerhub
Run :
`failed call to cuInit: UNKNOWN ERROR (303) / no NVIDIA GPU device is present: /dev/nvidia0 does not exist`
=> Nvidia driver is missing or disabled, make sure to add ` --gpus=all` to your docker run or create command
# TF - bazel build env variables ### TF - bazel build env variables
# Optimization is controlled with bazel --config=opt, will set AVX and SSE flags on linux, without '-march=native' (disabled in TF2.4) # As in official TF wheels, you'll need to remove "-march=native" for old CPUs compatibity (no AVX2)
# Just uncomment CC_OPT_FLAGS or append --copt='-march=native' to the BZL_CONFIGS arg in case you need it export CC_OPT_FLAGS="-march=native -Wno-sign-compare"
#export CC_OPT_FLAGS="-march=native -Wno-sign-compare"
export GCC_HOST_COMPILER_PATH=$(which gcc) export GCC_HOST_COMPILER_PATH=$(which gcc)
export PYTHON_BIN_PATH=$(which python) export PYTHON_BIN_PATH=$(which python)
export PYTHON_LIB_PATH="$($PYTHON_BIN_PATH -c 'import site; print(site.getsitepackages()[0])')" export PYTHON_LIB_PATH="$($PYTHON_BIN_PATH -c 'import site; print(site.getsitepackages()[0])')"
export TF_DOWNLOAD_CLANG=0
export TF_ENABLE_XLA=1 export TF_ENABLE_XLA=1
export TF_NEED_MPI=0 export TF_NEED_COMPUTECPP=0
export TF_NEED_GDR=0 export TF_NEED_GDR=0
export TF_NEED_JEMALLOC=1
export TF_NEED_KAFKA=0 export TF_NEED_KAFKA=0
export TF_NEED_MPI=0
export TF_NEED_OPENCL=0 export TF_NEED_OPENCL=0
export TF_NEED_JEMALLOC=1
export TF_NEED_VERBS=0
export TF_NEED_OPENCL_SYCL=0 export TF_NEED_OPENCL_SYCL=0
export TF_NEED_COMPUTECPP=0 export TF_NEED_VERBS=0
export TF_NEED_ROCM=0
export TF_SET_ANDROID_WORKSPACE=0 export TF_SET_ANDROID_WORKSPACE=0
# We need to set BZL_CONFIGS=" --config=nogcp --config=noaws --config=nohdfs" # For MKL support BZL_CONFIGS+=" --config=mkl"
#export TF_DOWNLOAD_MKL=1
#export TF_NEED_MKL=0
# Needed BZL_CONFIGS=" --config=nogcp --config=noaws --config=nohdfs"
#export TF_NEED_S3=0 #export TF_NEED_S3=0
#export TF_NEED_AWS=0 #export TF_NEED_AWS=0
#export TF_NEED_GCP=0 #export TF_NEED_GCP=0
#export TF_NEED_HDFS=0 #export TF_NEED_HDFS=0
# For MKL support BZL_CONFIGS+=" --config=mkl --config=opt --copt='-mfpmath=both'"
#export TF_DOWNLOAD_MKL=1
#export TF_NEED_MKL=0
# CUDA ## GPU
export TF_NEED_ROCM=0
export TF_NEED_CUDA=0 export TF_NEED_CUDA=0
export CUDA_TOOLKIT_PATH=$(find /usr/local -maxdepth 1 -type d -name 'cuda-*') export CUDA_TOOLKIT_PATH=$(find /usr/local -maxdepth 1 -type d -name 'cuda-*')
if [ ! -z $CUDA_TOOLKIT_PATH ] ; then if [ ! -z $CUDA_TOOLKIT_PATH ] ; then
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$CUDA_TOOLKIT_PATH/lib64:$CUDA_TOOLKIT_PATH/lib64/stubs" export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$CUDA_TOOLKIT_PATH/lib64:$CUDA_TOOLKIT_PATH/lib64/stubs"
export TF_CUDA_VERSION=$(echo $CUDA_TOOLKIT_PATH | sed -r 's/.*\/cuda-(.*)/\1/') export TF_CUDA_VERSION=$(echo $CUDA_TOOLKIT_PATH | sed -r 's/.*\/cuda-(.*)/\1/')
export TF_CUDA_CLANG=0
export TF_CUDA_COMPUTE_CAPABILITIES="5.2,6.1,7.0,7.5" export TF_CUDA_COMPUTE_CAPABILITIES="5.2,6.1,7.0,7.5"
export TF_NEED_CUDA=1 export TF_NEED_CUDA=1
export TF_CUDA_CLANG=0
export TF_NEED_TENSORRT=0 export TF_NEED_TENSORRT=0
export CUDNN_INSTALL_PATH="/usr/" export CUDNN_INSTALL_PATH="/usr/"
export TF_CUDNN_VERSION=$(sed -n 's/^#define CUDNN_MAJOR\s*\(.*\).*/\1/p' $CUDNN_INSTALL_PATH/include/cudnn.h) export TF_CUDNN_VERSION=$(sed -n 's/^#define CUDNN_MAJOR\s*\(.*\).*/\1/p' $CUDNN_INSTALL_PATH/include/cudnn.h)
......
#!/bin/bash #!/bin/bash
# Batch several docker ### Docker multibuild and push, see default args and more examples in tools/docker/README.md
# See default args and more examples in tools/docker/README.md
RELEASE=2.1 RELEASE=2.1
UBUNTU=18.04 UBUNTU=20.04
CUDA=11.0 CUDA=11.0.3
CUDNN=8 CUDNN=8
IMG=ubuntu:$UBUNTU IMG=ubuntu:$UBUNTU
GPU_IMG=nvidia/cuda:$CUDA-cudnn$CUDNN-devel-ubuntu$UBUNTU GPU_IMG=nvidia/cuda:$CUDA-cudnn$CUDNN-devel-ubuntu$UBUNTU
# ubuntu20.04 (python3.8) should work but 11.0-cudnn8-devel-ubuntu20.04 is missing from nvidia's docker repo
# Bazel remote cache daemon ## Bazel remote cache daemon
mkdir -p $HOME/.cache/bazel-remote mkdir -p $HOME/.cache/bazel-remote
docker run -d -u 1000:1000 -v $HOME/.cache/bazel-remote:/data -p 9090:8080 buchgr/bazel-remote-cache --max_size=20 docker run -d -u 1000:1000 -v $HOME/.cache/bazel-remote:/data -p 9090:8080 buchgr/bazel-remote-cache --max_size=20
# CPU (no MKL) ### CPU (no MKL)
docker build --network='host' -t mdl4eo/otbtf$RELEASE:cpu --build-arg BASE_IMG=$IMG . #docker build --network='host' -t mdl4eo/otbtf$RELEASE:-cpu-dev-all --build-arg BASE_IMG=$IMG --build-arg KEEP_SRC_OTB=true --build-arg KEEP_SRC_TF=true .
docker build --network='host' -t mdl4eo/otbtf$RELEASE:cpu-dev --build-arg BASE_IMG=$IMG --build-arg KEEP_SRC_OTB=true . docker build --network='host' -t mdl4eo/otbtf$RELEASE:cpu-dev --build-arg BASE_IMG=$IMG --build-arg KEEP_SRC_OTB=true .
# Enable MKL with bazel config flag (tested on CNN : actually slower than a normal CPU build) docker build --network='host' -t mdl4eo/otbtf$RELEASE:cpu --build-arg BASE_IMG=$IMG .
#MKL_CONF="--config=nogcp --config=noaws --config=nohdfs --config=mkl --config=opt --copt='-mfpmath=both' --copt='-march=native'" #docker build --network='host' -t mdl4eo/otbtf$RELEASE:-cpu-gui --build-arg BASE_IMG=$IMG --build-arg GUI=true .
#docker build --network='host' -t mdl4eo/otbtf$RELEASE:cpu --build-arg BASE_IMG=$IMG --build-arg BZL_CONFIG="$MKL_CONF" .
# Keep OTB src and build files in order to rebuild with other modules
#docker build --network='host' -t mdl4eo/otbtf$RELEASE:cpu-dev --build-arg BASE_IMG=$IMG --build-arg BZL_CONFIG="$MKL_CONF" --build-arg KEEP_SRC_OTB=true .
# GPU support is enabled if CUDA is found in /usr/local ### MKL is enabled with bazel config flag
docker build --network='host' -t mdl4eo/otbtf$RELEASE:gpu --build-arg BASE_IMG=$GPU_IMG . #MKL_CONF="--config=nogcp --config=noaws --config=nohdfs --config=mkl --config=opt"
#docker build --network='host' -t mdl4eo/otbtf$RELEASE:-cpu-mkl --build-arg BASE_IMG=$IMG --build-arg BZL_CONFIGS="$MKL_CONF" .
#docker build --network='host' -t mdl4eo/otbtf$RELEASE:-cpu-mkl-dev --build-arg BASE_IMG=$IMG --build-arg BZL_CONFIGS="$MKL_CONF" --build-arg KEEP_SRC_OTB=true .
### GPU support is enabled if CUDA is found in /usr/local
#docker build --network='host' -t mdl4eo/otbtf$RELEASE:-gpu-dev-all --build-arg BASE_IMG=$GPU_IMG --build-arg KEEP_SRC_OTB=true --build-arg KEEP_SRC_TF=true .
docker build --network='host' -t mdl4eo/otbtf$RELEASE:gpu-dev --build-arg BASE_IMG=$GPU_IMG --build-arg KEEP_SRC_OTB=true . docker build --network='host' -t mdl4eo/otbtf$RELEASE:gpu-dev --build-arg BASE_IMG=$GPU_IMG --build-arg KEEP_SRC_OTB=true .
docker build --network='host' -t mdl4eo/otbtf$RELEASE:gpu --build-arg BASE_IMG=$GPU_IMG .
#docker build --network='host' -t mdl4eo/otbtf$RELEASE:-gpu-gui --build-arg BASE_IMG=$GPU_IMG --build-arg GUI=true .
#docker login #docker login
docker push mdl4eo/otbtf$RELEASE:cpu #docker push mdl4eo/otbtf$RELEASE:-cpu-dev-all
docker push mdl4eo/otbtf$RELEASE:cpu-dev docker push mdl4eo/otbtf$RELEASE:-cpu-dev
#docker push mdl4eo/otbtf$RELEASE:cpu-gui docker push mdl4eo/otbtf$RELEASE:-cpu
#docker push mdl4eo/otbtf$RELEASE:-cpu-gui
#docker push mdl4eo/otbtf$RELEASE:-cpu-mkl
docker push mdl4eo/otbtf$RELEASE:gpu #docker push mdl4eo/otbtf$RELEASE:-gpu-dev-all
docker push mdl4eo/otbtf$RELEASE:gpu-dev docker push mdl4eo/otbtf$RELEASE:-gpu-dev
#docker push mdl4eo/otbtf$RELEASE:gpu-gui docker push mdl4eo/otbtf$RELEASE:-gpu
#docker push mdl4eo/otbtf$RELEASE:-gpu-gui
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment