#!/bin/bash # Create SLURM_HOSTFILE, with one extra task on the head node # 2022-07-05: sbatch --distribution=arbitrary is no longer allowed as of slurm >= 21 # Don't bother unless nodes have been allocated if [[ -z $SLURM_JOB_NODELIST ]]; then [[ -n $SLURM_HOSTFILE ]] && unset SLURM_HOSTFILE return fi # Don't bother unless nodes have GPUs if [[ -z $SLURM_JOB_GPUS ]]; then [[ -n $SLURM_HOSTFILE ]] && unset SLURM_HOSTFILE return fi # Don't bother unless multiple tasks have been allocated, and the number of ntasks-per-node is odd if [[ -z $SLURM_NTASKS_PER_NODE ]]; then [[ -n $SLURM_HOSTFILE ]] && unset SLURM_HOSTFILE return elif [[ ${SLURM_NTASKS_PER_NODE} -lt 2 ]]; then [[ -n $SLURM_HOSTFILE ]] && unset SLURM_HOSTFILE return elif [[ $((SLURM_NTASKS_PER_NODE%2)) == 0 ]]; then [[ -n $SLURM_HOSTFILE ]] && unset SLURM_HOSTFILE return fi # Don't bother unless there is more than one node array=( $( scontrol show hostname $SLURM_JOB_NODELIST) ) file=$(mktemp --suffix .SLURM_JOB_NODELIST) if [[ ${#array[@]} -eq 1 ]]; then for ((j=0;j<$((SLURM_NTASKS_PER_NODE));j++)); do echo ${array[0]} >> $file done else echo ${array[0]} > $file for ((i=0;i<${SLURM_JOB_NUM_NODES};i++)); do for ((j=0;j<$((SLURM_NTASKS_PER_NODE-1));j++)); do echo ${array[${i}]} >> $file done done fi # All conditions met, set hostfile and distribution, unset ntasks per node export SLURM_HOSTFILE=$file export SLURM_DISTRIBUTION=arbitrary echo "unsetting SLURM_NTASKS_PER_NODE (was $SLURM_NTASKS_PER_NODE) and setting SLURM_DISTRIBUTION=arbitrary" unset SLURM_NTASKS_PER_NODE