Skip to content
GitLab
Projects
Groups
Topics
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
hpc
docker-cluster
Compare revisions
effa81106aab2e60d081ab03718f16515148f5a0 to a5dcc6001f51710dfe4cf04b8243a19265ae00c9
Commits on Source (2)
Add `docker-compose.yml`
· ff15e1e9
Alberto Miranda
authored
Nov 09, 2023
ff15e1e9
Add Dockerfiles
· a5dcc600
Alberto Miranda
authored
Nov 09, 2023
a5dcc600
Hide whitespace changes
Inline
Side-by-side
docker-compose.yml
0 → 100644
View file @
a5dcc600
version
:
'
3.6'
# Common configuration options for compute node services
x-compute-node
:
&compute-node
image
:
slurm-docker-cluster-node:${IMAGE_TAG:-21.08.6}
build
:
context
:
slurm-docker-cluster-node
args
:
SLURM_TAG
:
${SLURM_TAG:-slurm-21-08-6-1}
SHARED_USER_NAME
:
${SHARED_USER_NAME:-user}
SHARED_USER_UID
:
${SHARED_USER_UID:-1000}
SHARED_GROUP_NAME
:
${SHARED_GROUP_NAME:-user}
SHARED_GROUP_GID
:
${SHARED_GROUP_GID:-1000}
command
:
[
"
slurmd"
]
tmpfs
:
#################### systemd #############################################
# temporary file systems
-
/tmp
-
/run
volumes
:
#################### systemd #############################################
# cgroups
-
/sys/fs/cgroup:/sys/fs/cgroup:ro
#################### MUNGE ###############################################
# munge configuration files
-
etc_munge:/etc/munge
# munge key file (for authentication)
-
./volumes/etc_munge/munge.key:/etc/munge/munge.key
#################### SLURM ###############################################
# slurm config files
-
./volumes/etc_slurm:/etc/slurm
# slurm source code (for debugging)
-
/home/amiranda/var/projects/slurm:/slurm
# slurm job directory
-
slurm_jobdir:/data
#################### SCORD ###############################################
# scord install prefix
-
./volumes/scord_prefix:/scord_prefix
# scord ld.so.conf.d
-
./volumes/ld.so.conf.d/libscord.conf:/etc/ld.so.conf.d/libscord.conf
#################### CARGO ###############################################
-
./volumes/cargo_prefix:/cargo_prefix
-
./volumes/ld.so.conf.d/libcargo.conf:/etc/ld.so.conf.d/libcargo.conf
#################### ENTRYPOINT ##########################################
-
./volumes/docker-entrypoint.sh:/usr/local/bin/docker-entrypoint.sh
#################### USER ################################################
-
./volumes/user_home:/home/amiranda
-
/home/amiranda/var/projects/scord/repo:/home/amiranda/repo
-
/home/amiranda/var/projects/cargo/repo:/home/amiranda/cargo/repo
networks
:
-
slurm_cluster
expose
:
-
"
6818"
depends_on
:
-
"
slurmctld"
# Allow container to connect to host machine using the
# `host.docker.internal` DNS name
extra_hosts
:
-
"
host.docker.internal:host-gateway"
cap_add
:
-
SYS_PTRACE
services
:
# # DNS proxy server to allow containers to update the host's DNS configuration
# # This allows the host to resolve the containers' hostnames
# dns:
# image: defreitas/dns-proxy-server:latest
# container_name: dns
# volumes:
# - /var/run/docker.sock:/var/run/docker.sock
# - /etc/resolv.conf:/etc/resolv.conf
# environment:
# - MG_LOG_LEVEL=ERROR
# networks:
# - slurm_cluster
# Database server where Slurm will keep accounting information
mysql
:
image
:
mariadb:10.10
hostname
:
mysql
container_name
:
mysql
environment
:
MYSQL_RANDOM_ROOT_PASSWORD
:
"
yes"
MYSQL_DATABASE
:
slurm_acct_db
MYSQL_USER
:
slurm
MYSQL_PASSWORD
:
password
volumes
:
-
var_lib_mysql:/var/lib/mysql
networks
:
-
slurm_cluster
# Slurm DBD daemon
slurmdbd
:
image
:
slurm-docker-cluster:${IMAGE_TAG:-21.08.6}
build
:
context
:
slurm-docker-cluster
args
:
SLURM_TAG
:
${SLURM_TAG:-slurm-21-08-6-1}
command
:
[
"
slurmdbd"
]
container_name
:
slurmdbd
hostname
:
slurmdbd
volumes
:
# munge configuration files
-
etc_munge:/etc/munge
# munge key file (for authentication)
-
./volumes/etc_munge/munge.key:/etc/munge/munge.key
# slurm config files
-
./volumes/etc_slurm:/etc/slurm
# slurm source code (for debugging)
-
/home/amiranda/var/projects/slurm:/slurm
# slurm job directory
-
slurm_jobdir:/data
networks
:
-
slurm_cluster
expose
:
-
"
6819"
depends_on
:
-
mysql
# Slurm controller daemon
slurmctld
:
image
:
slurm-docker-cluster:${IMAGE_TAG:-21.08.6}
command
:
[
"
slurmctld"
]
container_name
:
slurmctld
hostname
:
slurmctld
volumes
:
# munge configuration files
-
etc_munge:/etc/munge
# munge key file (for authentication)
-
./volumes/etc_munge/munge.key:/etc/munge/munge.key
# slurm config files
-
./volumes/etc_slurm:/etc/slurm
# slurm source code (for debugging)
-
/home/amiranda/var/projects/slurm:/slurm
# slurm job directory
-
slurm_jobdir:/data
networks
:
slurm_cluster
:
ipv4_address
:
192.18.0.129
expose
:
-
"
6817"
depends_on
:
-
"
slurmdbd"
# Login node
login
:
hostname
:
login.docker.cluster
container_name
:
login
command
:
[
"
sshd"
]
tmpfs
:
#################### systemd #############################################
# temporary file systems
-
/tmp
-
/run
<<
:
*compute-node
volumes
:
#################### systemd #############################################
# cgroups
-
/sys/fs/cgroup:/sys/fs/cgroup:ro
#################### MUNGE ###############################################
# munge configuration files
-
etc_munge:/etc/munge
# munge key file (for authentication)
-
./volumes/etc_munge/munge.key:/etc/munge/munge.key
#################### SLURM ###############################################
# slurm config files
-
./volumes/etc_slurm:/etc/slurm
# slurm source code (for debugging)
-
/home/amiranda/var/projects/slurm:/slurm
# slurm job directory
-
slurm_jobdir:/data
#################### SSHD ################################################
# sshd configuration files
-
./volumes/etc_ssh:/etc/ssh
#################### SCORD ###############################################
# scord install prefix
-
./volumes/scord_prefix:/scord_prefix
# scord ld.so.conf.d
-
./volumes/ld.so.conf.d/libscord.conf:/etc/ld.so.conf.d/libscord.conf
#################### CARGO ###############################################
-
./volumes/cargo_prefix:/cargo_prefix
-
./volumes/ld.so.conf.d/libcargo.conf:/etc/ld.so.conf.d/libcargo.conf
#################### ENTRYPOINT ##########################################
-
./volumes/docker-~.sh:/usr/local/bin/docker-entrypoint.sh
#################### USER ################################################
-
./volumes/user_home:/home/amiranda
-
/home/amiranda/var/projects/scord/repo:/home/amiranda/repo
-
/home/amiranda/var/projects/cargo/repo:/home/amiranda/cargo/repo
networks
:
slurm_cluster
:
ipv4_address
:
192.18.0.128
# Compute nodes
c1
:
hostname
:
c1
container_name
:
c1
<<
:
*compute-node
networks
:
slurm_cluster
:
ipv4_address
:
192.18.0.10
c2
:
hostname
:
c2
container_name
:
c2
<<
:
*compute-node
networks
:
slurm_cluster
:
ipv4_address
:
192.18.0.11
c3
:
hostname
:
c3
container_name
:
c3
<<
:
*compute-node
networks
:
slurm_cluster
:
ipv4_address
:
192.18.0.12
c4
:
hostname
:
c4
container_name
:
c4
<<
:
*compute-node
networks
:
slurm_cluster
:
ipv4_address
:
192.18.0.13
# Volumes to persist data
volumes
:
etc_munge
:
# etc_slurm:
slurm_jobdir
:
var_lib_mysql
:
networks
:
slurm_cluster
:
driver
:
bridge
name
:
slurm_cluster
ipam
:
driver
:
default
config
:
-
subnet
:
192.18.0.0/24
slurm-docker-cluster-node/Dockerfile
0 → 100644
View file @
a5dcc600
#Dockerfile for building scord in rockylinux:8
FROM
slurm-docker-cluster:21.08.6
ENV
DEPS_SOURCES /deps
ENV
DEPS_INSTALL_PATH /usr
# Install basic packages and dependencies available from the distribution
RUN
set
-ex
\
&&
dnf makecache
\
&&
dnf update
-y
\
&&
dnf
install
-y
dnf-plugins-core
\
&&
dnf config-manager
--set-enabled
crb
\
&&
dnf
install
-y
epel-release
\
&&
dnf
install
-y
\
git
\
gcc
\
gcc-c++
\
make
\
automake
\
ninja-build
\
libtool
\
diffutils
\
file
\
cmake
\
libstdc++-static
\
libstdc++
\
bind-utils
\
# dependencies
boost-devel \
json-c-devel \
hiredis-devel \
libconfig-devel \
libfabric \
libfabric-devel \
openmpi \
openmpi-devel \
boost-openmpi-devel \
&& dnf clean all \
&& rm -rf /var/cache/dnf
### # Install openmpi
### RUN set -ex \
### && cd \
### && mkdir deps \
### && cd deps \
### && wget https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.6.tar.bz2 \
### && tar xvfj openmpi-4.1.6.tar.bz2 \
### && cd openmpi-4.1.6 \
### && mkdir build \
### && cd build \
### && ../configure \
### --prefix=${DEPS_INSTALL_PATH} \
### --with-slurm \
### --with-pmix=external \
### --disable-silent-rules \
### --enable-builtin-atomics \
### --enable-mpi-cxx \
### --without-ucx \
### --without-hcol \
### && make -j8 all \
### && make install \
### && dnf install -y boost-openmpi-devel
### # TODO: remove openmpi-4 dir
# Download and install dependencies
RUN
set
-ex
\
&&
export
LD_LIBRARY_PATH
=
${
DEPS_INSTALL_PATH
}
/lib:
${
DEPS_INSTALL_PATH
}
/lib64
\
&&
export
PKG_CONFIG_PATH
=
${
DEPS_INSTALL_PATH
}
/lib/pkgconfig:
${
DEPS_INSTALL_PATH
}
/lib64/pkgconfig
\
&&
mkdir
-p
${
DEPS_SOURCES
}
\
&&
pushd
${
DEPS_SOURCES
}
\
# && git clone https://github.com/ofiwg/libfabric --recurse-submodules
\
&&
git clone https://github.com/pmodels/argobots
--recurse-submodules
\
&&
git clone https://github.com/mercury-hpc/mercury
--recurse-submodules
\
&&
git clone https://github.com/mochi-hpc/mochi-margo
--recurse-submodules
\
&&
git clone https://github.com/sewenew/redis-plus-plus
--recurse-submodules
\
&&
git clone https://github.com/francielizanon/agios
--recurse-submodules
\
&&
git clone https://github.com/USCiLab/cereal
--recurse-submodules
\
&&
git clone https://github.com/mochi-hpc/mochi-thallium
--recurse-submodules
\
&&
git clone https://storage.bsc.es/gitlab/hpc/cargo.git
\
\
&&
popd
\
### argobots
&& pushd ${DEPS_SOURCES}/argobots \
&& ./autogen.sh \
&& mkdir build \
&& pushd build \
&& CFLAGS="-ggdb3 -O0" ../configure --prefix=${DEPS_INSTALL_PATH} \
&& make install -j \
&& popd \
&& rm -rf build \
&& popd \
\
# ### libfabric
# && pushd ${DEPS_SOURCES}/libfabric \
# && git checkout v1.14.0rc3 \
# && ./autogen.sh \
# && mkdir build \
# && pushd build \
# && CFLAGS="-ggdb3 -O0" \
# ../configure \
# --prefix=${DEPS_INSTALL_PATH} \
# --disable-psm3 \
# && make install -j \
# && popd \
# && rm -rf build \
# && cd \
\
### mercury
&& pushd ${DEPS_SOURCES}/mercury \
&& mkdir build \
&& pushd build \
&& cmake \
-DMERCURY_USE_SELF_FORWARD:BOOL=ON \
-DBUILD_TESTING:BOOL=ON \
-DMERCURY_USE_BOOST_PP:BOOL=ON \
-DCMAKE_INSTALL_PREFIX=${DEPS_INSTALL_PATH} \
-DBUILD_SHARED_LIBS:BOOL=ON \
-DNA_USE_OFI:BOOL=ON \
-DCMAKE_POSITION_INDEPENDENT_CODE=ON \
-DCMAKE_BUILD_TYPE:STRING=Debug \
.. \
&& make install -j \
&& popd \
&& rm -rf build \
&& popd \
\
### mochi-margo
&& pushd ${DEPS_SOURCES}/mochi-margo \
&& ./prepare.sh \
&& mkdir build \
&& pushd build \
&& CFLAGS="-ggdb3 -O0" ../configure --prefix=${DEPS_INSTALL_PATH} \
&& make -j install \
&& popd \
&& rm -rf build \
&& popd \
\
### redis-plus-plus
&& pushd ${DEPS_SOURCES}/redis-plus-plus \
&& mkdir build \
&& pushd build \
&& cmake \
-DCMAKE_INSTALL_PREFIX=${DEPS_INSTALL_PATH} \
-DCMAKE_BUILD_TYPE:STRING=Debug \
.. \
&& make install -j \
&& popd \
&& rm -rf build \
&& popd \
\
### agios
&& pushd ${DEPS_SOURCES}/agios \
&& git checkout development \
&& mkdir build \
&& pushd build \
&& cmake \
-DCMAKE_INSTALL_PREFIX=${DEPS_INSTALL_PATH} \
-DCMAKE_BUILD_TYPE:STRING=Debug \
.. \
&& make install -j \
&& popd \
&& rm -rf build \
&& popd \
\
### cereal
&& pushd ${DEPS_SOURCES}/cereal \
&& mkdir build \
&& pushd build \
\
&& cmake \
-DCMAKE_BUILD_TYPE:STRING=Debug \
-DBUILD_DOC:BOOL=OFF \
-DBUILD_SANDBOX:BOOL=OFF \
-DBUILD_TESTS:BOOL=OFF \
-DSKIP_PERFORMANCE_COMPARISON:BOOL=ON \
-DCMAKE_INSTALL_PREFIX=${DEPS_INSTALL_PATH} \
.. \
&& make -j install \
&& popd \
&& rm -rf build \
&& popd \
\
### mochi-thallium
&& pushd ${DEPS_SOURCES}/mochi-thallium \
&& mkdir build \
&& pushd build \
&& cmake \
-DCMAKE_INSTALL_PREFIX=${DEPS_INSTALL_PATH} \
-DCMAKE_BUILD_TYPE:STRING=Debug \
.. \
&& make -j install \
&& popd \
&& rm -rf build \
&& popd \
# intentionally keep sources so that we can debug
&& chmod a+rwx ${DEPS_SOURCES}