Skip to content

Slurm

deploy

config the hostname properly on all nodes prepare /etc/hosts on all nodes be careful about firewall config

cat >/etc/yum.repos.d/MariaDB.repo <<EOF
[mariadb]
name=MariaDB
baseurl=https://ftp.ubuntu-tw.org/mirror/mariadb/yum/10.5/centos7-amd64
gpgkey=https://ftp.ubuntu-tw.org/mirror/mariadb/yum/RPM-GPG-KEY-MariaDB
gpgcheck=1
EOF
dnf makecache
dnf install -y epel-release
dnf makecache
dnf config-manager --set-enabled powertools

groupadd -g 901 munge
useradd -c "MUNGE Uid 'N' Gid Emporium" -d /var/lib/munge -g munge -m -s /sbin/nologin -u 901 munge
groupadd -g 902 slurm
useradd -c "Slurm Workload Manager" -d /var/lib/slurm -g slurm -m -s /bin/bash -u 902 slurm

#https://forums.rockylinux.org/t/slurm-does-not-install-from-the-epel-repo/2832
dnf install -y munge slurm slurm-contribs slurm-perlapi

# login node
dnf install -y slurm-devel slurm-pmi slurm-pmi-devel
# control node
dnf install -y slurm-slurmctld
# database node
dnf install -y mariadb-server slurm-slurmdbd
# compute node
dnf install -y slurm-slurmd slurm-pmi

firewall if only using sbatch and srun

# both control node and compute node
systemctl stop firewalld.service

firewall if only using sbatch

Slurm Workload Manager - Network Configuration Guide The machines running srun also use a range of ports to be able to communicate with slurmstepd. By default these ports are chosen at random from the ephemeral port range, but you can use the SrunPortRange to specify a range of ports from which they can be chosen. This is necessary for login nodes that are behind a firewall.

# firewall on slurmctld
# slurmctld for compute node access
firewall-cmd --permanent --zone=public --add-port=6817/tcp
# mariadb access from outer client like workbench
firewall-cmd --permanent --zone=public --add-port=3306/tcp

# firewall on slurmd
# slurmd for control node access
firewall-cmd --permanent --zone=public --add-port=6818/tcp

slurmrestd

apt install -y libhttp-parser-dev libjson-c-dev libjwt-dev

change the systemd

sudo useradd -M -r -s /usr/sbin/nologin -U slurmrestd

/lib/systemd/system/slurmrestd.service

[Unit]
Description=Slurm REST daemon
After=network-online.target slurmctld.service
Wants=network-online.target
ConditionPathExists=/etc/slurm/slurm.conf

[Service]
Type=simple
EnvironmentFile=-/etc/sysconfig/slurmrestd
EnvironmentFile=-/etc/default/slurmrestd
# slurmrestd should not run as root or the slurm user.
# Please either use the -u and -g options in /etc/sysconfig/slurmrestd or
# /etc/default/slurmrestd, or explicitly set the User and Group in this file
# an unpriviledged user to run as.
User=slurmrestd
Group=slurmrestd
# Default to listen on both socket and slurmrestd port
#ExecStart=/usr/local/sbin/slurmrestd $SLURMRESTD_OPTIONS unix:/usr/local/com/slurmrestd.socket 0.0.0.0:6820
ExecStart=/usr/local/sbin/slurmrestd -a rest_auth/jwt -s openapi/v0.0.39  0.0.0.0:6820
# Enable auth/jwt be default, comment out the line to disable it for slurmrestd
Environment="SLURM_JWT=daemon" "SLURMRESTD_DEBUG=debug"
ExecReload=/bin/kill -HUP $MAINPID

[Install]
WantedBy=multi-user.target

prepare the jwt key on control node(slurmctld) and storage node(slurmdbd) - Slurm Workload Manager - JSON Web Tokens (JWT) Authentication

dd if=/dev/random of=/var/spool/slurm/ctld/jwt_hs256.key bs=32 count=1
chown slurm:slurm /var/spool/slurm/ctld/jwt_hs256.key
chmod 0600 /var/spool/slurm/ctld/jwt_hs256.key

slurm.conf

...
AuthAltTypes=auth/jwt
AuthAltParameters=jwt_key=/var/spool/slurm/ctld/jwt_hs256.key
...

slurmdbd.conf

...
AuthAltTypes=auth/jwt
AuthAltParameters=jwt_key=/var/spool/slurm/ctld/jwt_hs256.key
...

slurmrestd -a list
slurmrestd -s list
unset SLURM_JWT; export $(scontrol token)
curl -H "X-SLURM-USER-TOKEN:$SLURM_JWT" http://172.19.30.198:6820/slurm/v0.0.39/ping

curl -H "X-SLURM-USER-TOKEN:$SLURM_JWT" http://172.19.30.198:6820/slurm/v0.0.39/diag

Test the slurm(node, job)

提交第一個 Job - HackMD

# test
sinfo
sinfo -N
sinfo -N -o "%20N  %10c  %10m  %25f  %10G "
sinfo -o '%11P %5D %22N %4c %21G %7m %11l'
scontrol show node
scontrol show node slurm_compute_1
scontrol show config
scontrol ping
sacctmgr list cluster

# if test.sh in the "foo" directory, then the "foo" should be on everty compute node
sbatch -p debug test.sh
squeue
scontrol show job <job_id>
sacct
# https://ask.cyberinfrastructure.org/t/how-do-i-get-the-list-of-features-and-resources-of-each-node-in-slurm/201
sacct --jobs={{ job_id }} --format JobID,JobName,Partition,Account,AllocCPUS,State,ExitCode,NodeList%40
scancel <jobid>

# interactive
salloc -N 1 -n 2 -p debug
srun date
srun hostname


# after reboot compute node(Restart nodes in state down)
# https://slurm.schedmd.com/prolog_epilog.html#failure_handling (drain status)
sinfo -R
scontrol update nodename=slurm-compute-01 state=resume

account and user - Slurm Workload Manager - sacctmgr - [slurm-users] SlurmdSpoolDir - /var/spool/slurm/d/ => 755 - /var/spool/slurm => 775

create a user with password on login nodes and control node which exist slurmctld, for example, ben project_a is a virtual concept

useradd ben
passwd ben
sacctmgr list associations
sacctmgr show assoc
sacctmgr show tres

sacctmgr create account name=project_a fairshare=50
sacctmgr create user name=jimmy cluster=cluster account=project_a partition=debug
sacctmgr delete user name=jimmy cluster=cluster account=project_a

GPU

nvml

dnf config-manager --add-repo=https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo
rpm --import https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/D42D0685.pub
dnf module install nvidia-driver:525
systemctl reboot
nvidia-smi

method 1: build rpm - [slurm] rpmbuild with gpu :: HPE CRAY 자료 공유

dnf install cuda-toolkit
dnf module install nvidia-driver:525
rpmbuild -ta /tmp/slurm-20.11.9.tar.bz2 --with mysql --define "_with_nvml --with-nvml=/usr/local/cuda"
rpm -qlp /root/rpmbuild/RPMS/x86_64/slurm-20.11.9-1.el8.x86_64.rpm | grep nvml
find /usr -type f -name '*nvml*'

method 2: build and install reference: deepops/roles/slurm/tasks/build.yml

dnf install cuda-toolkit
dnf module install nvidia-driver:525
./configure --prefix=/usr/local --disable-dependency-tracking --disable-debug --disable-x11 --enable-really-no-cray --enable-salloc-kill-cmd --with-hdf5=no --sysconfdir=/etc/slurm --enable-pam --with-pam_dir=/lib64/security --with-shared-libslurm --without-rpath --with-nvml=/usr/local/cuda
make -j$(nproc)
make -j$(nproc) contrib
make -j$(nproc) install
find /usr -type f -name '*nvml*'

nvidia-smi
[root@slurm-compute-01 ~]# nvidia-smi --list-gpus
GPU 0: Tesla T4 (UUID: GPU-2640e36d-125d-89fa-ed7c-ec40de7a44b4)
[root@slurm-compute-01 ~]# 
[root@slurm-compute-01 ~]# nvidia-smi --query-gpu=gpu_name --format=csv,noheader
Tesla T4
[root@slurm-compute-01 ~]# nvidia-smi --list-gpus


[foo@slurm-compute-02 ~]$ nvidia-smi --list-gpus
GPU 0: NVIDIA A100 80GB PCIe (UUID: GPU-a211d1d2-a220-6e1b-890d-c0b25fedcbda)
GPU 1: NVIDIA A100 80GB PCIe (UUID: GPU-9d34191d-1da9-a613-2aad-935778eb6683)
[foo@slurm-compute-02 ~]$ nvidia-smi --query-gpu=gpu_name --format=csv,noheader
NVIDIA A100 80GB PCIe
NVIDIA A100 80GB PCIe
[foo@slurm-compute-02 ~]$

run

single-node multi-GPUs distributed training on multiple GPUs over multiple nodes using the SLURM

DeepOps

release-23.08 for redhat 8.8

https://github.com/NVIDIA/deepops/tree/master/docs/slurm-cluster

./scripts/setup.sh

  • environment variable $ID
  • python
    • seliux
      • sudo pip3.8 install selinux
    • virtualenv
      • sudo pip3.8 virtualenv

ansible-playbook -l slurm-cluster playbooks/slurm-cluster.yml

  • pmix
    • roles/slurm/tasks/pmix.yml
- name: Enable repo
  shell: subscription-manager repos --enable "codeready-builder-for-rhel-8-x86_64-rpms"
  when: ansible_os_family == 'RedHat' and ansible_distribution_major_version == "8"
- name: install pmix 'pandoc' build dependency for RHEL 8
  dnf:
    name: pandoc
    state: present
    enablerepo: powertools
  when: ansible_os_family == 'RedHat' and ansible_distribution_major_version == "8"
  • pyxis
    • roles/galaxy/nvidia.enroot/tasks/redhat.yml
- name: enroot rpm packages
    yum:
      name: "{{ item }}"
      state: "{{ enroot_package_state }}"
      disable_gpg_check: True
    with_items: "{{ enroot_rpm_packages }}"
    environment: "{{proxy_env if proxy_env is defined else {}}}"
  • firewall
    • systemctl status firewalld.service

FAQ