Automatic SLURM Build and Installation Script

Building SLURM and installing is a partially manual process. We have compiled a script which automatically builds and installs SLURM on Redhat/CentOS 7.x and 8.x.

In case you want to install SLURM on Ubuntu you can follow our guide on installing SLURM on Ubuntu (in WSL). Please see below for a container based setup of a SLURM cluster.

You can execute the script at once or step by step to see what happens. We recommend to execute the script as root but it should work as well based on executing it as standard user being able to sudo.

Here is the automatic SLURM build and installation script (below you can find the script for download as well):

################################################################################
# Copyright (C) 2019-2020 NI SP GmbH
# All Rights Reserved
#
# info@ni-sp.com / www.ni-sp.com
#
# We provide the information on an as is basis.
# We provide no warranties, express or implied, related to the
# accuracy, completeness, timeliness, useability, and/or merchantability
# of the data and are not liable for any loss, damage, claim, liability,
# expense, or penalty, or for any direct, indirect, special, secondary,
# incidental, consequential, or exemplary damages or lost profit
# deriving from the use or misuse of this information.
################################################################################
# Version v1.1
#
# SLURM 20.02.3 Build and Installation script for Redhat/CentOS EL7 and EL8
#
# See also https://www.slothparadise.com/how-to-install-slurm-on-centos-7-cluster/
# https://slurm.schedmd.com/quickstart_admin.html
# https://wiki.fysik.dtu.dk/niflheim/Slurm_installation
# https://slurm.schedmd.com/faq.html

# In case of version 7 "Compute Node" was the base for the installation
# In case of version 8 "Server" was the base for the installation

# SLURM accounting support
yum install mariadb-server mariadb-devel -y

# For all the nodes, before you install Slurm or Munge:

# check for RH/CentOS Version
OSVERSION="7"
if [ "`hostnamectl | grep Kernel | grep el8`" != "" ] ; then
  OSVERSION="8"
fi

# sudo su -
export MUNGEUSER=966
sudo groupadd -g $MUNGEUSER munge
sudo useradd  -m -c "MUNGE Uid 'N' Gid Emporium" -d /var/lib/munge -u $MUNGEUSER -g munge  -s /sbin/nologin munge
export SLURMUSER=967
sudo groupadd -g $SLURMUSER slurm
sudo useradd  -m -c "SLURM workload manager" -d /var/lib/slurm -u $SLURMUSER -g slurm  -s /bin/bash slurm
# exit

# For CentOS 7: need to get the latest EPEL repository.
sudo yum install epel-release -y
if [ "$OSVERSION" == "7" ] ; then
    sudo yum install https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm -y
fi
if [ "$OSVERSION" == "8" ] ; then
    sudo yum install https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm -y
fi

# install munge
if [ "$OSVERSION" == "7" ] ; then
    sudo yum install munge munge-libs munge-devel -y
fi
if [ "$OSVERSION" == "8" ] ; then
    sudo yum install munge munge-libs  -y
    dnf --enablerepo=PowerTools install munge-devel -y
fi
sudo yum install rng-tools -y
sudo rngd -r /dev/urandom

sudo /usr/sbin/create-munge-key -r -f

sudo sh -c  "dd if=/dev/urandom bs=1 count=1024 > /etc/munge/munge.key"
sudo chown munge: /etc/munge/munge.key
sudo chmod 400 /etc/munge/munge.key

sudo systemctl enable munge
sudo systemctl start munge

# build and install SLURM
sudo yum install python3 gcc openssl openssl-devel pam-devel numactl numactl-devel hwloc lua readline-devel ncurses-devel man2html libibmad libibumad rpm-build  perl-ExtUtils-MakeMaker.noarch -y
if [ "$OSVERSION" == "7" ] ; then
    sudo yum install rrdtool-devel lua-devel hwloc-devel -y
fi
if [ "$OSVERSION" == "8" ] ; then
    yum install rpm-build make -y
    dnf --enablerepo=PowerTools install rrdtool-devel lua-devel hwloc-devel -y
    # dnf group install "Development Tools"
fi

mkdir slurm-tmp
cd slurm-tmp
export VER=20.02.3
# https://download.schedmd.com/slurm/slurm-20.02.3.tar.bz2
wget https://download.schedmd.com/slurm/slurm-$VER.tar.bz2

rpmbuild -ta slurm-$VER.tar.bz2    # and wait a few minutes until SLURM has been compiled
# if [ "$OSVERSION" == "7" ] ; then
# fi
# if [ "$OSVERSION" == "8" ] ; then
#     rpm-build -ta slurm-$VER.tar.bz2    # and wait a few minutes until SLURM has been compiled
# fi

rm slurm-$VER.tar.bz2
cd ..
rmdir slurm-tmp

# get perl-Switch
# sudo yum install cpan -y

cd ~/rpmbuild/RPMS/x86_64/

# skipping slurm-openlava and slurm-torque because of missing perl-Switch
sudo yum --nogpgcheck localinstall slurm-[0-9]*.el?.x86_64.rpm slurm-contribs-*.el?.x86_64.rpm slurm-devel-*.el?.x86_64.rpm \
slurm-example-configs-*.el?.x86_64.rpm slurm-libpmi-*.el?.x86_64.rpm  \
slurm-pam_slurm-*.el?.x86_64.rpm slurm-perlapi-*.el?.x86_64.rpm slurm-slurmctld-*.el?.x86_64.rpm \
slurm-slurmd-*.el?.x86_64.rpm slurm-slurmdbd-*.el?.x86_64.rpm -y

# create the SLURM default configuration with
# compute nodes called "NodeName=linux[1-32]"
# in a cluster called "cluster"
# and a partition name called "test"
# Feel free to adapt to your needs
HOST=`hostname`

sudo cat > /etc/slurm/slurm.conf << EOF

# slurm.conf file generated by configurator easy.html.
# Put this file on all nodes of your cluster.
# See the slurm.conf man page for more information.
#
SlurmctldHost=localhost
#
#MailProg=/bin/mail
MpiDefault=none
#MpiParams=ports=#-#
ProctrackType=proctrack/cgroup
ReturnToService=1
SlurmctldPidFile=/var/run/slurmctld.pid
#SlurmctldPort=6817
SlurmdPidFile=/var/run/slurmd.pid
#SlurmdPort=6818
SlurmdSpoolDir=/var/spool/slurmd
SlurmUser=slurm
#SlurmdUser=root
StateSaveLocation=/var/spool
SwitchType=switch/none
TaskPlugin=task/affinity
#
#
# TIMERS
#KillWait=30
#MinJobAge=300
#SlurmctldTimeout=120
#SlurmdTimeout=300
#
#
# SCHEDULING
# Obsolete: FastSchedule=1
SchedulerType=sched/backfill
SelectType=select/cons_res
SelectTypeParameters=CR_Core
#
#
# LOGGING AND ACCOUNTING
AccountingStorageType=accounting_storage/none
ClusterName=cluster
#JobAcctGatherFrequency=30
JobAcctGatherType=jobacct_gather/none
#SlurmctldDebug=info
#SlurmctldLogFile=
#SlurmdDebug=info
#SlurmdLogFile=
#
#
# COMPUTE NODES
NodeName=$HOST State=idle Feature=dcv2,other
# NodeName=linux[1-32] CPUs=1 State=UNKNOWN
# NodeName=linux1 NodeAddr=128.197.115.158 CPUs=4 State=UNKNOWN
# NodeName=linux2 NodeAddr=128.197.115.7 CPUs=4 State=UNKNOWN

PartitionName=test Nodes=$HOST Default=YES MaxTime=INFINITE State=UP
# PartitionName=test Nodes=$HOST,linux[1-32] Default=YES MaxTime=INFINITE State=UP

# DefMemPerNode=1000
# MaxMemPerNode=1000
# DefMemPerCPU=4000
# MaxMemPerCPU=4096

EOF

sudo cat > /etc/slurm/cgroup.conf << EOF
###
#
# Slurm cgroup support configuration file
#
# See man slurm.conf and man cgroup.conf for further
# information on cgroup configuration parameters
#--
CgroupAutomount=yes

ConstrainCores=no
ConstrainRAMSpace=no

EOF


sudo mkdir /var/spool/slurmctld
sudo chown slurm:slurm /var/spool/slurmctld
sudo chmod 755 /var/spool/slurmctld
sudo mkdir -p /var/spool/slurm/cluster_state
sudo chown slurm:slurm /var/spool/slurm/cluster_state
sudo touch /var/log/slurmctld.log
sudo chown slurm:slurm /var/log/slurmctld.log
sudo touch /var/log/slurm_jobacct.log /var/log/slurm_jobcomp.log
sudo chown slurm: /var/log/slurm_jobacct.log /var/log/slurm_jobcomp.log

# firewall will block connections between nodes so in case of cluster
# with multiple nodes adapt the firewall on the compute nodes
#
# sudo systemctl stop firewalld
# sudo systemctl disable firewalld

# on the master node
#sudo firewall-cmd --permanent --zone=public --add-port=6817/udp
#sudo firewall-cmd --permanent --zone=public --add-port=6817/tcp
#sudo firewall-cmd --permanent --zone=public --add-port=6818/tcp
#sudo firewall-cmd --permanent --zone=public --add-port=6818/tcp
#sudo firewall-cmd --permanent --zone=public --add-port=7321/tcp
#sudo firewall-cmd --permanent --zone=public --add-port=7321/tcp
#sudo firewall-cmd --reload

# sync clock on master and every compute node
#sudo yum install ntp -y
#sudo chkconfig ntpd on
#sudo ntpdate pool.ntp.org
#sudo systemctl start ntpd

sudo systemctl enable slurmctld
sudo systemctl enable slurmdbd

# on compute nodes
sudo systemctl enable slurmd.service
sudo systemctl start slurmd.service

echo Sleep for a few seconds for slurmd to come up ...
sleep 3

# on master
chmod 777 /var/spool   # hack for now as otherwise slurmctld is complaining
sudo systemctl start slurmctld.service

echo Sleep for a few seconds for slurmctld to come up ...
sleep 3

# checking
# sudo systemctl status slurmd.service
# sudo journalctl -xe

# if you experience an error with starting up slurmd.service
# like "fatal: Incorrect permissions on state save loc: /var/spool"
# then you might want to adapt with chmod 777 /var/spool

# more checking
# sudo slurmd -Dvvv -N YOUR_HOSTNAME
# sudo slurmctld -D vvvvvvvv
# or tracing with sudo strace slurmctld -D vvvvvvvv

# echo Compute node bugs: tail /var/log/slurmd.log
# echo Server node bugs: tail /var/log/slurmctld.log

# show cluster
echo
echo Output from: \"sinfo\"
sinfo

# sinfo -Nle
echo
echo Output from: \"scontrol show partition\"
scontrol show partition

# show host info as slurm sees it
echo
echo Output from: \"slurmd -C\"
slurmd -C

# in case host is in drain status
# scontrol update nodename=$HOST state=idle

echo
echo Output from: \"scontrol show nodes\"
scontrol show nodes

# If jobs are running on the node:
# scontrol update nodename=$HOST state=resume

# lets run our first job
echo
echo Output from: \"srun hostname\"
srun hostname

# if there are issues in scheduling
# turn on debugging
#    sudo scontrol setdebug 6   # or up to 9
# check the journal
#    journalctl -xe
# turn off debugging
#    sudo scontrol setdebug 3

# scontrol
# scontrol: show node $HOST

# scontrol show jobs
# scontrol update NodeName=ip-172-31-23-216 State=RESUME
# scancel JOB_ID
# srun -N5 /bin/hostname
# after changing the configuration:
#   scontrol reconfigure
#
# more resources
# https://slurm.schedmd.com/quickstart.html
# https://slurm.schedmd.com/quickstart_admin.html
#

The SLURM built and installation script can be downloaded here as well: SLURM_installation.sh.

You can download pre-compiled RPMs for EL7 and EL8 here (you can basically start the script above at “cd ~/rpmbuild/RPMS/x86_64/” after extracting the tarball and setting up mariadb and munge):

SLURM Cluster in Docker Containers

SciDAS has created a nice SLURM in container setup. The automatic container build creates 2 SLURM workers with OpenMPI integration as well as a controller and a database container as per this graph from the github page:

Here is an overview how the straightforward installation looks like with input from the github page:

> git clone https://github.com/SciDAS/slurm-in-docker
Cloning into 'slurm-in-docker'...
remote: Enumerating objects: 549, done.
remote: Total 549 (delta 0), reused 0 (delta 0), pack-reused 549
Receiving objects: 100% (549/549), 144.72 KiB | 682.00 KiB/s, done.
Resolving deltas: 100% (310/310), done.
# BEGIN - install docker in case not yet done - in our case for Ubuntu
> sudo apt-get install -y apt-transport-https \
    ca-certificates curl gnupg-agent \
    software-properties-common 
> curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
> sudo add-apt-repository \
   "deb [arch=amd64] https://download.docker.com/linux/ubuntu \
   $(lsb_release -cs) \
   stable"
> sudo apt-get update
> sudo apt-get install -y docker-ce docker-ce-cli containerd.io 
> sudo apt-get install -y docker-compose
> sudo groupadd docker
> sudo usermod -aG docker $USER
# END of Docker installation
# You might need to logout and login to active the docker group access rights
#
# Create the SLURM 19.05.1 containers (SLURM version can be adapted) 
#
> cd slurm-in-docker/
> make                  # building will take some minutes 
# ... lots of output ;) .............................
> docker images
REPOSITORY                      TAG                 IMAGE ID            CREATED             SIZE
scidas/slurm.database           19.05.1             035a7fb27574        3 days ago          828MB
scidas/slurm.worker             19.05.1             6faf0d7804f7        3 days ago          1.31GB
scidas/slurm.controller         19.05.1             e2445edbad54        3 days ago          1.31GB
scidas/slurm.base               19.05.1             668e97c1fb7b        3 days ago          805MB
scidas/slurm.rpms               19.05.1             8b5682048fee        3 days ago          885MB
centos                          7                   7e6257c9f8d8        6 weeks ago         203MB
krallin/centos-tini             7                   748636d1c058        16 months ago       226MB
> docker-compose up -d  # start the environment 
Creating network "slurmindocker_slurm" with the default driver
Creating controller ...
Creating controller ... done
Creating worker01 ...
Creating database ...
Creating worker02 ...
Creating worker01
Creating database
Creating worker02 ... done
> docker exec -ti controller sinfo -lN
NODELIST   NODES PARTITION       STATE CPUS    S:C:T MEMORY TMP_DISK WEIGHT AVAIL_FE REASON
worker01       1   docker*        idle    1    1:1:1   1800        0      1   (null) none
worker02       1   docker*        idle    1    1:1:1   1800        0      1   (null) none
> docker exec -ti controller srun -N 2 hostname
worker02
worker01
> docker exec -ti controller srun --mpi=list
srun: MPI types are...
srun: pmi2
srun: openmpi
srun: none
> docker exec -ti controller ompi_info 
# ......... OpenMPI info output .......
# Test OpenMPI
> cat > home/worker/mpi_hello.c << EOF
/******************************************************************************
 * * FILE: mpi_hello.c
 * * DESCRIPTION: MPI tutorial example code: Simple hello world program
 * * AUTHOR: Blaise Barney
 * * LAST REVISED: 03/05/10
 * ******************************************************************************/
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#define  MASTER 0

int main (int argc, char *argv[]) {
   int   numtasks, taskid, len;
   char hostname[MPI_MAX_PROCESSOR_NAME];

   MPI_Init(&argc, &argv);
   MPI_Comm_size(MPI_COMM_WORLD,&numtasks);
   MPI_Comm_rank(MPI_COMM_WORLD,&taskid);
   MPI_Get_processor_name(hostname, &len);

   printf ("Hello from task %d on %s!\n", taskid, hostname);

   if (taskid == MASTER)
      printf("MASTER: Number of MPI tasks is: %d\n",numtasks);

   //while(1) {}

   MPI_Finalize();
}
EOF
> docker exec -ti worker01 mpicc mpi_hello.c -o mpi_hello.out
> docker exec -ti worker01 srun -N 2 --mpi=openmpi mpi_hello.out
Hello from task 1 on worker02!
Hello from task 0 on worker01!
MASTER: Number of MPI tasks is: 2
# disable message about missing openib in case with the following setting
# docker exec -ti worker01 bash -c "export \
# OMPI_MCA_btl_base_warn_component_unused=0; srun -N 2 --mpi=openmpi mpi_hello.out"
# login to a worker container
> docker exec -ti worker01 bash
# and finally shutdown the SLURM container environment
> sh teardown.sh
# docker-compose stop
# docker-compose rm -f
# docker volume rm slurmindocker_home slurmindocker_secret
# docker network rm slurmindocker_slurm

In case the controller constantly restarts with messages like
sacctmgr: error: Malformed RPC of type PERSIST_RC(1433) received
sacctmgr: error: slurm_persist_conn_open: Failed to unpack persistent connection init resp message from database:6819 :

sh teardown.sh
rm -rf home/worker/.ssh/*
sudo rm -rf secret/*
docker-compose up -d

Have a look at our other technical guides related to NICE DCV and EnginFrame HPC and session management portal. If there are any questions please let us know.