#!/bin/bash

# Create SLURM_HOSTFILE, with one extra task on the head node

# 2022-07-05: sbatch --distribution=arbitrary is no longer allowed as of slurm >= 21

# Don't bother unless nodes have been allocated
if [[ -z $SLURM_JOB_NODELIST ]]; then
  [[ -n $SLURM_HOSTFILE ]] && unset SLURM_HOSTFILE
  return
fi

# Don't bother unless nodes have GPUs
if [[ -z $SLURM_JOB_GPUS ]]; then
  [[ -n $SLURM_HOSTFILE ]] && unset SLURM_HOSTFILE
  return
fi

# Don't bother unless multiple tasks have been allocated, and the number of ntasks-per-node is odd
if [[ -z $SLURM_NTASKS_PER_NODE ]]; then
  [[ -n $SLURM_HOSTFILE ]] && unset SLURM_HOSTFILE
  return
elif [[ ${SLURM_NTASKS_PER_NODE} -lt 2 ]]; then
  [[ -n $SLURM_HOSTFILE ]] && unset SLURM_HOSTFILE
  return
elif [[ $((SLURM_NTASKS_PER_NODE%2)) == 0 ]]; then
  [[ -n $SLURM_HOSTFILE ]] && unset SLURM_HOSTFILE
  return
fi

# Don't bother unless there is more than one node
array=( $( scontrol show hostname $SLURM_JOB_NODELIST) )
file=$(mktemp --suffix .SLURM_JOB_NODELIST)

if [[ ${#array[@]} -eq 1 ]]; then
  for ((j=0;j<$((SLURM_NTASKS_PER_NODE));j++)); do
    echo ${array[0]} >> $file
  done
else
  echo ${array[0]} > $file
  for ((i=0;i<${SLURM_JOB_NUM_NODES};i++)); do
    for ((j=0;j<$((SLURM_NTASKS_PER_NODE-1));j++)); do
      echo ${array[${i}]} >> $file
    done
  done
fi

# All conditions met, set hostfile and distribution, unset ntasks per node
export SLURM_HOSTFILE=$file
export SLURM_DISTRIBUTION=arbitrary
echo "unsetting SLURM_NTASKS_PER_NODE (was $SLURM_NTASKS_PER_NODE) and setting SLURM_DISTRIBUTION=arbitrary"
unset SLURM_NTASKS_PER_NODE