Table of contents
Automatic SLURM Build Script for RH/CentOS 7, 8 and 9
Building SLURM and installing is a partially manual process. We have compiled a script which automatically builds and installs SLURM on Redhat/CentOS 7.x and 8.x.
In case you want to install SLURM on Ubuntu please head over to our Automatic SLURM Build Script for Ubuntu 18.04, 20.04 and 22.04 or you can follow our guide on installing SLURM on Ubuntu (in WSL). Please see below for a container based setup of a SLURM cluster as well.
You can execute the script at once or step by step to see what happens. We recommend to execute the script as root but it should work as well based on executing it as standard user being able to sudo.
Here is the automatic SLURM build and installation script (below you can find the whole script which supports RH9 as well for download):
################################################################################
# Copyright (C) 2019-2022 NI SP GmbH
# All Rights Reserved
#
# info@ni-sp.com / www.ni-sp.com
#
# We provide the information on an as is basis.
# We provide no warranties, express or implied, related to the
# accuracy, completeness, timeliness, useability, and/or merchantability
# of the data and are not liable for any loss, damage, claim, liability,
# expense, or penalty, or for any direct, indirect, special, secondary,
# incidental, consequential, or exemplary damages or lost profit
# deriving from the use or misuse of this information.
################################################################################
# Version v1.1
#
# SLURM Build and Installation script for Redhat/CentOS EL7 and EL8
#
# See also https://www.slothparadise.com/how-to-install-slurm-on-centos-7-cluster/
# https://slurm.schedmd.com/quickstart_admin.html
# https://wiki.fysik.dtu.dk/niflheim/Slurm_installation
# https://slurm.schedmd.com/faq.html
# In case of version 7 "Compute Node" was the base for the installation
# In case of version 8 "Server" was the base for the installation
# check for RH/CentOS Version
OSVERSION="7"
. /etc/os-release
if [[ $VERSION =~ ^8 ]] ; then
OSVERSION="8"
# in case of repo access issues uncomment the following lines
# sudo sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-*
# sudo sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-*
fi
# SLURM accounting support
sudo yum install mariadb-server mariadb-devel -y
# For all the nodes, before you install Slurm or Munge:
# sudo su -
export MUNGEUSER=966
sudo groupadd -g $MUNGEUSER munge
sudo useradd -m -c "MUNGE Uid 'N' Gid Emporium" -d /var/lib/munge -u $MUNGEUSER -g munge -s /sbin/nologin munge
export SLURMUSER=967
sudo groupadd -g $SLURMUSER slurm
sudo useradd -m -c "SLURM workload manager" -d /var/lib/slurm -u $SLURMUSER -g slurm -s /bin/bash slurm
# exit
# For CentOS 7: need to get the latest EPEL repository.
sudo yum install epel-release -y
if [ "$OSVERSION" == "7" ] ; then
sudo yum install https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm -y
fi
if [ "$OSVERSION" == "8" ] ; then
sudo yum install https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm -y
fi
# install munge
if [ "$OSVERSION" == "7" ] ; then
sudo yum install munge munge-libs munge-devel -y
fi
if [ "$OSVERSION" == "8" ] ; then
sudo yum install munge munge-libs -y
# dnf --enablerepo=PowerTools install munge-devel -y
sudo dnf --enablerepo=powertools install munge-devel -y
fi
sudo yum install rng-tools -y
sudo rngd -r /dev/urandom
sudo /usr/sbin/create-munge-key -r -f
sudo sh -c "dd if=/dev/urandom bs=1 count=1024 > /etc/munge/munge.key"
sudo chown munge: /etc/munge/munge.key
sudo chmod 400 /etc/munge/munge.key
sudo systemctl enable munge
sudo systemctl start munge
# build and install SLURM
sudo yum install python3 gcc openssl openssl-devel pam-devel numactl numactl-devel hwloc lua readline-devel ncurses-devel man2html libibmad libibumad rpm-build perl-ExtUtils-MakeMaker.noarch -y
if [ "$OSVERSION" == "7" ] ; then
sudo yum install rrdtool-devel lua-devel hwloc-devel -y
fi
if [ "$OSVERSION" == "8" ] ; then
sudo yum install rpm-build make -y
# dnf --enablerepo=PowerTools install rrdtool-devel lua-devel hwloc-devel -y
sudo dnf --enablerepo=powertools install rrdtool-devel lua-devel hwloc-devel -y
# dnf group install "Development Tools"
fi
mkdir slurm-tmp
cd slurm-tmp
# export VER=20.11.9 # latest 20.11.
# export VER=21.08.5
export VER=22.05.9
# export VER=23.02.2
# https://download.schedmd.com/slurm/slurm-20.02.3.tar.bz2
wget https://download.schedmd.com/slurm/slurm-$VER.tar.bz2
rpmbuild -ta slurm-$VER.tar.bz2 # and wait a few minutes until SLURM has been compiled
# if [ "$OSVERSION" == "7" ] ; then
# fi
# if [ "$OSVERSION" == "8" ] ; then
# rpm-build -ta slurm-$VER.tar.bz2 # and wait a few minutes until SLURM has been compiled
# fi
rm slurm-$VER.tar.bz2
cd ..
rmdir slurm-tmp
# get perl-Switch
# sudo yum install cpan -y
cd ~/rpmbuild/RPMS/x86_64/
# skipping slurm-openlava and slurm-torque because of missing perl-Switch
sudo yum --nogpgcheck localinstall slurm-[0-9]*.el*.x86_64.rpm slurm-contribs-*.el*.x86_64.rpm slurm-devel-*.el*.x86_64.rpm \
slurm-example-configs-*.el*.x86_64.rpm slurm-libpmi-*.el*.x86_64.rpm \
slurm-pam_slurm-*.el*.x86_64.rpm slurm-perlapi-*.el*.x86_64.rpm slurm-slurmctld-*.el*.x86_64.rpm \
slurm-slurmd-*.el*.x86_64.rpm slurm-slurmdbd-*.el*.x86_64.rpm -y
# create the SLURM default configuration with
# compute nodes called "NodeName=linux[1-32]"
# in a cluster called "cluster"
# and a partition name called "test"
# Feel free to adapt to your needs
HOST=`hostname`
cat << EOF | sudo tee /etc/slurm/slurm.conf
# slurm.conf file generated by configurator easy.html.
# Put this file on all nodes of your cluster.
# See the slurm.conf man page for more information.
#
SlurmctldHost=localhost
#
#MailProg=/bin/mail
MpiDefault=none
#MpiParams=ports=#-#
ProctrackType=proctrack/cgroup
ReturnToService=1
SlurmctldPidFile=/var/run/slurmctld.pid
#SlurmctldPort=6817
SlurmdPidFile=/var/run/slurmd.pid
#SlurmdPort=6818
SlurmdSpoolDir=/var/spool/slurm/slurmd
SlurmUser=slurm
#SlurmdUser=root
StateSaveLocation=/var/spool/slurm
SwitchType=switch/none
TaskPlugin=task/affinity
#
#
# TIMERS
#KillWait=30
#MinJobAge=300
#SlurmctldTimeout=120
#SlurmdTimeout=300
#
#
# SCHEDULING
SchedulerType=sched/backfill
SelectType=select/cons_res
SelectTypeParameters=CR_Core
#
#
# LOGGING AND ACCOUNTING
AccountingStorageType=accounting_storage/none
ClusterName=cluster
#JobAcctGatherFrequency=30
JobAcctGatherType=jobacct_gather/none
#SlurmctldDebug=info
#SlurmctldLogFile=
#SlurmdDebug=info
#SlurmdLogFile=
#
#
# COMPUTE NODES
NodeName=$HOST State=idle Feature=dcv2,other
# NodeName=linux[1-32] CPUs=1 State=UNKNOWN
# NodeName=linux1 NodeAddr=128.197.115.158 CPUs=4 State=UNKNOWN
# NodeName=linux2 NodeAddr=128.197.115.7 CPUs=4 State=UNKNOWN
PartitionName=test Nodes=$HOST Default=YES MaxTime=INFINITE State=UP
# PartitionName=test Nodes=$HOST,linux[1-32] Default=YES MaxTime=INFINITE State=UP
# DefMemPerNode=1000
# MaxMemPerNode=1000
# DefMemPerCPU=4000
# MaxMemPerCPU=4096
EOF
cat << EOF | sudo tee /etc/slurm/cgroup.conf
###
#
# Slurm cgroup support configuration file
#
# See man slurm.conf and man cgroup.conf for further
# information on cgroup configuration parameters
#--
CgroupPlugin=cgroup/v1
CgroupAutomount=yes
ConstrainCores=no
ConstrainRAMSpace=no
EOF
sudo mkdir /var/spool/slurm
sudo chown slurm:slurm /var/spool/slurm
sudo chmod 755 /var/spool/slurm
sudo mkdir /var/spool/slurm/slurmctld
sudo chown slurm:slurm /var/spool/slurm/slurmctld
sudo chmod 755 /var/spool/slurm/slurmctld
sudo mkdir -p /var/spool/slurm/cluster_state
sudo chown slurm:slurm /var/spool/slurm/cluster_state
sudo touch /var/log/slurmctld.log
sudo chown slurm:slurm /var/log/slurmctld.log
sudo touch /var/log/slurm_jobacct.log /var/log/slurm_jobcomp.log
sudo chown slurm: /var/log/slurm_jobacct.log /var/log/slurm_jobcomp.log
# firewall will block connections between nodes so in case of cluster
# with multiple nodes adapt the firewall on the compute nodes
#
# sudo systemctl stop firewalld
# sudo systemctl disable firewalld
# on the master node
#sudo firewall-cmd --permanent --zone=public --add-port=6817/udp
#sudo firewall-cmd --permanent --zone=public --add-port=6817/tcp
#sudo firewall-cmd --permanent --zone=public --add-port=6818/tcp
#sudo firewall-cmd --permanent --zone=public --add-port=6818/tcp
#sudo firewall-cmd --permanent --zone=public --add-port=7321/tcp
#sudo firewall-cmd --permanent --zone=public --add-port=7321/tcp
#sudo firewall-cmd --reload
# sync clock on master and every compute node
#sudo yum install ntp -y
#sudo chkconfig ntpd on
#sudo ntpdate pool.ntp.org
#sudo systemctl start ntpd
sudo systemctl enable slurmctld
sudo systemctl enable slurmdbd
# on compute nodes
sudo systemctl enable slurmd.service
sudo systemctl start slurmd.service
echo Sleep for a few seconds for slurmd to come up ...
sleep 2
# on master
sudo systemctl start slurmctld.service
echo Sleep for a few seconds for slurmctld to come up ...
sleep 3
# checking
# sudo systemctl status slurmd.service
# sudo journalctl -xe
# if you experience an error with starting up slurmd.service
# like "fatal: Incorrect permissions on state save loc: /var/spool"
# then you might want to adapt with chmod 777 /var/spool
# more checking
# sudo slurmd -Dvvv -N YOUR_HOSTNAME
# sudo slurmctld -D vvvvvvvv
# or tracing with sudo strace slurmctld -D vvvvvvvv
# echo Compute node bugs: tail /var/log/slurmd.log
# echo Server node bugs: tail /var/log/slurmctld.log
# show cluster
echo
echo Output from: \"sinfo\"
sinfo
# sinfo -Nle
echo
echo Output from: \"scontrol show partition\"
scontrol show partition
# show host info as slurm sees it
echo
echo Output from: \"slurmd -C\"
slurmd -C
# in case host is in drain status
# scontrol update nodename=$HOST state=idle
echo
echo Output from: \"scontrol show nodes\"
scontrol show nodes
# If jobs are running on the node:
# scontrol update nodename=$HOST state=resume
# lets run our first job
echo
echo Output from: \"srun hostname\"
srun hostname
# if there are issues in scheduling
# turn on debugging
# sudo scontrol setdebug 6 # or up to 9
# check the journal
# journalctl -xe
# turn off debugging
# sudo scontrol setdebug 3
# scontrol
# scontrol: show node $HOST
# scontrol show jobs
# scontrol update NodeName=ip-172-31-23-216 State=RESUME
# scancel JOB_ID
# srun -N5 /bin/hostname
# after changing the configuration ("v" for debug level):
# scontrol reconfigure -vvvv
# to show resources in a nice comprehensive way
# sinfo -o "%30N %10c %10m %25f %10G "
#
# more resources
# https://slurm.schedmd.com/quickstart.html
# https://slurm.schedmd.com/quickstart_admin.html
#
The automatic SLURM built and installation script for EL7, EL8 and EL9 and CentOS/Rocky derivatives can be downloaded here: SLURM_installation.sh. You can simply run the following steps on your RH/CentOS/Rocky master:
#
# Automatic SLURM built and installation script for EL7, EL8 and EL9 and derivatives
#
sudo yum install wget -y
wget --no-check-certificate https://www.ni-sp.com/wp-content/uploads/2019/10/SLURM_installation.sh
# set the desired version in case
export VER=20.11.9 #latest 20.11. VER=20.11.8
# export VER=21.08.5
# export VER=22.05.02
bash SLURM_installation.sh
# wait a couple of minutes
# and test your SLURM installation yourself
sinfo
# see above for more SLURM commands and their output
You can download pre-compiled RPMs for EL7 and EL8 here (you can basically start the script above at “cd ~/rpmbuild/RPMS/x86_64/” after extracting the tarball and setting up mariadb and munge):
- SLURM Version 20.02.3
- SLURM Version 20.11.0
- SLURM Version 21.08.6
In case you are interested in HPC in the Cloud head over to our overview article HPC in the Cloud – Pros and Cons.
Build RPMs only for RH/CentOS
In case you want to build only the RPMs here is the script for EL7:
sudo yum install epel-release -y
sudo yum install python3 gcc openssl openssl-devel pam-devel numactl \
numactl-devel hwloc lua readline-devel ncurses-devel man2html \
libibmad libibumad rpm-build perl-ExtUtils-MakeMaker.noarch \
rrdtool-devel lua-devel hwloc-devel munge munge-libs munge-devel \
mariadb-server mariadb-devel -y
mkdir slurm-tmp
cd slurm-tmp
export VER=20.11.8 # latest 20.11
export VER=21.08.6
export VER=22.05.9
# export VER=23.02.2
wget https://download.schedmd.com/slurm/slurm-$VER.tar.bz2
rpmbuild -ta slurm-$VER.tar.bz2
echo Your RPMs are at $HOME/rpmbuild/RPMS/x86_64:
ls -al $HOME/rpmbuild/RPMS/x86_64
And here the automatic RPM builder for EL8:
sudo yum install epel-release -y
sudo yum install dnf-plugins-core
sudo yum config-manager --set-enabled powertools
# in case of repo access issues
# sudo sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-*
# sudo sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-*
sudo yum install --enablerepo=powertools python3 gcc openssl \
openssl-devel pam-devel numactl wget make numactl-devel \
hwloc lua readline-devel ncurses-devel man2html \
libibmad libibumad rpm-build perl-ExtUtils-MakeMaker.noarch \
rrdtool-devel lua-devel hwloc-devel munge munge-libs munge-devel \
mariadb-server mariadb-devel rpm-build -y
mkdir slurm-tmp
cd slurm-tmp
export VER=20.11.8 # latest 20.11
export VER=21.08.6
export VER=22.05.9
# export VER=23.02.2
wget https://download.schedmd.com/slurm/slurm-$VER.tar.bz2
rpmbuild -ta slurm-$VER.tar.bz2
ls -al $HOME/rpmbuild/RPMS/x86_64
echo Your RPMs are at $HOME/rpmbuild/RPMS/x86_64
Automatic SLURM Build Script for Ubuntu 18.04, 20.04 and 22.04
Similar to the above shown build and install script for the latest version of SLURM we have created the automatic SLURM Build and Install Script for Ubuntu. On your Ubuntu master you can simply run the following steps:
#
# Automatic SLURM Build Script for Ubuntu 18.04, 20.04 and 22.04
#
wget https://www.ni-sp.com/wp-content/uploads/2019/10/SLURM_Ubuntu_installation.sh
# set the desired version in case
export VER=20.11.8 # latest 20.11
export VER=21.08.6
export VER=20.11.9
export VER=22.05.9
# export VER=23.02.2
bash SLURM_Ubuntu_installation.sh
# wait a couple of minutes
# and test your SLURM installation yourself
sinfo
# see above for more SLURM commands and their output
SLURM Cluster in Docker Containers
SciDAS has created a easy to use container-based SLURM setup to jump-start a small SLURM cluster. The automatic container build creates 2 SLURM compute workers with OpenMPI integration as well as a controller and a database container as per this graph from the github page:

Here is an overview how the straightforward installation on Ubuntu looks like with input from the github page:
> git clone https://github.com/SciDAS/slurm-in-docker
Cloning into 'slurm-in-docker'...
remote: Enumerating objects: 549, done.
remote: Total 549 (delta 0), reused 0 (delta 0), pack-reused 549
Receiving objects: 100% (549/549), 144.72 KiB | 682.00 KiB/s, done.
Resolving deltas: 100% (310/310), done.
# BEGIN - install docker in case not yet done - in our case for Ubuntu
> sudo apt-get install -y apt-transport-https \
ca-certificates curl gnupg-agent \
software-properties-common make
> curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
> sudo add-apt-repository \
"deb [arch=amd64] https://download.docker.com/linux/ubuntu \
$(lsb_release -cs) \
stable"
> sudo apt-get update
> sudo apt-get install -y docker-ce docker-ce-cli containerd.io
> sudo apt-get install -y docker-compose
> sudo groupadd docker
> sudo usermod -aG docker $USER
# END of Docker installation
# You might need to logout and login to active the docker group access rights
#
# Create the SLURM 19.05.1 containers (SLURM version can be adapted)
#
> cd slurm-in-docker/
> make # building will take some minutes
# ... lots of output ;) .............................
> docker images
REPOSITORY TAG IMAGE ID CREATED SIZE
scidas/slurm.database 19.05.1 035a7fb27574 3 days ago 828MB
scidas/slurm.worker 19.05.1 6faf0d7804f7 3 days ago 1.31GB
scidas/slurm.controller 19.05.1 e2445edbad54 3 days ago 1.31GB
scidas/slurm.base 19.05.1 668e97c1fb7b 3 days ago 805MB
scidas/slurm.rpms 19.05.1 8b5682048fee 3 days ago 885MB
centos 7 7e6257c9f8d8 6 weeks ago 203MB
krallin/centos-tini 7 748636d1c058 16 months ago 226MB
> docker-compose up -d # start the environment
Creating network "slurmindocker_slurm" with the default driver
Creating controller ...
Creating controller ... done
Creating worker01 ...
Creating database ...
Creating worker02 ...
Creating worker01
Creating database
Creating worker02 ... done
> docker exec -ti controller sinfo -lN
NODELIST NODES PARTITION STATE CPUS S:C:T MEMORY TMP_DISK WEIGHT AVAIL_FE REASON
worker01 1 docker* idle 1 1:1:1 1800 0 1 (null) none
worker02 1 docker* idle 1 1:1:1 1800 0 1 (null) none
> docker exec -ti controller srun -N 2 hostname
worker02
worker01
> docker exec -ti controller srun --mpi=list
srun: MPI types are...
srun: pmi2
srun: openmpi
srun: none
> docker exec -ti controller ompi_info
# ......... OpenMPI info output .......
# Test OpenMPI
> cat > home/worker/mpi_hello.c << EOF
/******************************************************************************
* * FILE: mpi_hello.c
* * DESCRIPTION: MPI tutorial example code: Simple hello world program
* * AUTHOR: Blaise Barney
* * LAST REVISED: 03/05/10
* ******************************************************************************/
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#define MASTER 0
int main (int argc, char *argv[]) {
int numtasks, taskid, len;
char hostname[MPI_MAX_PROCESSOR_NAME];
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD,&numtasks);
MPI_Comm_rank(MPI_COMM_WORLD,&taskid);
MPI_Get_processor_name(hostname, &len);
printf ("Hello from task %d on %s!\n", taskid, hostname);
if (taskid == MASTER)
printf("MASTER: Number of MPI tasks is: %d\n",numtasks);
//while(1) {}
MPI_Finalize();
}
EOF
> docker exec -ti worker01 mpicc mpi_hello.c -o mpi_hello.out
> docker exec -ti worker01 srun -N 2 --mpi=openmpi mpi_hello.out
Hello from task 1 on worker02!
Hello from task 0 on worker01!
MASTER: Number of MPI tasks is: 2
# disable message about missing openib in case with the following setting
# docker exec -ti worker01 bash -c "export \
# OMPI_MCA_btl_base_warn_component_unused=0; srun -N 2 --mpi=openmpi mpi_hello.out"
# login to a worker container
> docker exec -ti worker01 bash
# and finally shutdown the SLURM container environment
> sh teardown.sh
# docker-compose stop
# docker-compose rm -f
# docker volume rm slurmindocker_home slurmindocker_secret
# docker network rm slurmindocker_slurm
In case the controller constantly restarts with messages like
sacctmgr: error: Malformed RPC of type PERSIST_RC(1433) received
sacctmgr: error: slurm_persist_conn_open: Failed to unpack persistent connection init resp message from database:6819 :
sh teardown.sh
rm -rf home/worker/.ssh/*
sudo rm -rf secret/*
docker-compose up -d
Have a look at our other technical guides related to the high-end remote desktop software NICE DCV and EnginFrame HPC and session management portal. If there are any questions let us know.
Commercial Support for SLURM
Our experienced technical team offers professional support for SLURM for commercial and academia customers. We help you solve issues with your SLURM installation via Email, Phone and Webconf. In case you are interested let us know at or via our contact form.