pbs2slurm

pbs2slurm carries out some simple transformations on PBS batch scripts. It is meant as a tools to simplify the process of migrating to Slurm. However, transformed scripts should alsways be inspected manually to ensure correctness.

Basic usage:
usage: pbs2slurm [-h] [--shell SHELL] [--version] [pbs_script]

Translates PBS batch script to Slurm.

The PBS script is split into
- a shebang line
- a header containing #PBS directives, comments, and empty lines
- the body of the script

pbs2slurm carries out 3 transformation steps
- if no shebang line was present in the PBS script, a new one is added. By 
  default this is #! /bin/bash, but this can be changed (see below)
- #PBS directives in the header are translated, where possible, to #SBATCH 
  directives.
- common PBS environment variables in the body are translated to their SLURM 
  equivalents

Please be sure to manually go over translated scripts to esure their 
correctness.

If no input file is specified, pbs2slurm reads from stdin. The translated script 
is written to stdout.

Examples:
    pbs2slurm < pbs_script > slurm_script
    pbs2slurm pbs_script > slurm_script
    pbs2slurm -s /bin/zsh pbs_script > slurm_script

See also https://hpc.cit.nih.gov/docs/pbs2slurm.html.

positional arguments:
  pbs_script

optional arguments:
  -h, --help            show this help message and exit
  --shell SHELL, -s SHELL
                        Shell to insert if shebang line (#! ...) is missing.
                        Defaults to '/bin/bash'
  --version, -v
Test cases

The following table shows the various transformations that the tool currently does with a short explanation.

PBS script SLURM script
Plain bash scripts remain unchanged
#!/bin/bash
set -e
set -o pipefail

module load fastqc
cd /data/$USER/test_data
fastqc -d /scratch -f fastq --noextract some.fastq.gz
#!/bin/bash
set -e
set -o pipefail

module load fastqc
cd /data/$USER/test_data
fastqc -d /scratch -f fastq --noextract some.fastq.gz
Change PBS_O_WORKDIR to SLURM_SUBMIT_DIR
#!/bin/bash
set -e
set -o pipefail

cd $PBS_O_WORKDIR
echo ${PBS_O_WORKDIR}
module load fastqc
cd /data/$USER/test_data
fastqc -d /scratch -f fastq --noextract some.fastq.gz
#!/bin/bash
set -e
set -o pipefail

cd $SLURM_SUBMIT_DIR
echo ${SLURM_SUBMIT_DIR}
module load fastqc
cd /data/$USER/test_data
fastqc -d /scratch -f fastq --noextract some.fastq.gz
Change PBS_JOBID to SLURM_JOB_ID
#!/bin/bash
set -e
set -o pipefail

module load fastxtoolkit
cd /data/$USER/test_data
echo "Job $PBS_JOBID starting" > logfile
zcat some.fq.gz \
  | tr '.' 'N' \
  | fastx_artifacts_filter  \
  | fastx_clipper -a AGATCGGAAGAGC  \
  | fastq_quality_trimmer -t 20 -l 10 -z  \
  > some.clean.fq.gz
echo "Job $PBS_JOBID done" >> logfile
#!/bin/bash
set -e
set -o pipefail

module load fastxtoolkit
cd /data/$USER/test_data
echo "Job $SLURM_JOB_ID starting" > logfile
zcat some.fq.gz \
  | tr '.' 'N' \
  | fastx_artifacts_filter  \
  | fastx_clipper -a AGATCGGAAGAGC  \
  | fastq_quality_trimmer -t 20 -l 10 -z  \
  > some.clean.fq.gz
echo "Job $SLURM_JOB_ID done" >> logfile
Change PBS_ARRAY_INDEX to SLURM_ARRAY_TASK_ID
#! /bin/bash
set -e
set -o pipefail

module load fastqc
cd /data/$USER/test_data
module load bowtie/1.1.1 samtools/1.2
gunzip -c sample${PBS_ARRAY_INDEX}.fastq.gz \
   | bowtie --sam --best --strata --all -m1 -n2 \
       --threads=10 /path/to/genome/index -  \
   | samtools view -Sb -F4 - \
   > sample${PBS_ARRAY_INDEX}.bam
#! /bin/bash
set -e
set -o pipefail

module load fastqc
cd /data/$USER/test_data
module load bowtie/1.1.1 samtools/1.2
gunzip -c sample${SLURM_ARRAY_TASK_ID}.fastq.gz \
   | bowtie --sam --best --strata --all -m1 -n2 \
       --threads=10 /path/to/genome/index -  \
   | samtools view -Sb -F4 - \
   > sample${SLURM_ARRAY_TASK_ID}.bam
If there is not shebang line, insert one (bash by default, can be changed)
set -e
set -o pipefail

module load bowtie/1.1.1 samtools/1.2
cd /data/$USER/test_data
#! /bin/bash
set -e
set -o pipefail

module load bowtie/1.1.1 samtools/1.2
cd /data/$USER/test_data
PBS directives in the header are identified and transformed. PBS directives in the body are left unchanged
#! /bin/bash
#some other comment
#PBS -N fastqc_job

#PBS -N other_name
set -e
set -o pipefail

#PBS -N other
module load fastqc
cd /data/$USER/test_data
#! /bin/bash
#some other comment
#SBATCH --job-name="fastqc_job"

#SBATCH --job-name="other_name"
set -e
set -o pipefail

#PBS -N other
module load fastqc
cd /data/$USER/test_data
Change #PBS -N to #SBATCH --job-name
#! /bin/bash
#PBS -N fastqc_job
set -e
set -o pipefail

module load fastqc
#! /bin/bash
#SBATCH --job-name="fastqc_job"
set -e
set -o pipefail

module load fastqc
#PBS -N is dropped if job name is missing
#! /bin/bash
#PBS -N 
set -e
set -o pipefail

module load fastqc
#! /bin/bash

set -e
set -o pipefail

module load fastqc
Change #PBS -M to #SBATCH --mail-user
#! /bin/bash
#PBS -M user@helix.nih.gov
set -e
set -o pipefail

module load fastqc
#! /bin/bash
#SBATCH --mail-user="user@helix.nih.gov"
set -e
set -o pipefail

module load fastqc
#PBS -M is dropped if email address is missing
#! /bin/bash
#PBS -M 
set -e
set -o pipefail

module load fastqc
#! /bin/bash

set -e
set -o pipefail

module load fastqc
If #PBS -M has a list of valid email addresses, pick first one
#! /bin/bash
#PBS -M user@helix.nih.gov,teacher@helix.nih.gov
set -e
set -o pipefail

module load fastqc
#! /bin/bash
#SBATCH --mail-user="user@helix.nih.gov"
set -e
set -o pipefail

module load fastqc
If #PBS -M has a list of email addresses, pick first valid one
#! /bin/bash
#PBS -M user,teacher@helix.nih.gov
set -e
set -o pipefail

module load fastqc
#! /bin/bash
#SBATCH --mail-user="teacher@helix.nih.gov"
set -e
set -o pipefail

module load fastqc
Drop #PBS -m n email modes since this is the default behaviour for Slurm
#! /bin/bash
#PBS -m n

module load bowtie
#! /bin/bash


module load bowtie
Change #PBS -m a to #SBATCH --mail-type=FAIL
#! /bin/bash
#PBS -m a

module load bowtie
#! /bin/bash
#SBATCH --mail-type=FAIL

module load bowtie
Change #PBS -m b to #SBATCH --mail-type=BEGIN
#! /bin/bash
#PBS -m b

module load bowtie
#! /bin/bash
#SBATCH --mail-type=BEGIN

module load bowtie
Change #PBS -m e to #SBATCH --mail-type=END
#! /bin/bash
#PBS -m e

module load bowtie
#! /bin/bash
#SBATCH --mail-type=END

module load bowtie
Change #PBS -m be|eb to #SBATCH --mail-type=BEGIN,END
#! /bin/bash
#PBS -m be

module load bowtie
#! /bin/bash
#SBATCH --mail-type=BEGIN,END

module load bowtie
Change #PBS -m email modes: abe
#! /bin/bash
#PBS -m abe

module load bowtie
#! /bin/bash
#SBATCH --mail-type=BEGIN,END,FAIL

module load bowtie
If #PBS -m contains n in addition to other options, n has precedence since it's Slurm's default
#! /bin/bash
#PBS -m aben

module load bowtie
#! /bin/bash


module load bowtie
#PBS -m is dropped if email mode is missing
#! /bin/bash
#PBS -m 

module load bowtie
#! /bin/bash


module load bowtie
#PBS -k is dropped since it is not necessary for Slurm
#! /bin/bash
#PBS -k oe

set -e
set -o pipefail

module load fastqc
#! /bin/bash


set -e
set -o pipefail

module load fastqc
#PBS -j is dropped since joining stdout and stderr is the default in Slurm
#! /bin/bash
#PBS -j eo

set -e
set -o pipefail

module load fastqc
#! /bin/bash


set -e
set -o pipefail

module load fastqc
Change #PBS -o to #SBATCH --output
#! /bin/bash
#PBS -o /path/to/some/file

set -e
set -o pipefail

module load fastqc
#! /bin/bash
#SBATCH --output=/path/to/some/file

set -e
set -o pipefail

module load fastqc
#PBS -o is dropped if output path is missing
#! /bin/bash
#PBS -o 

set -e
set -o pipefail

module load fastqc
#! /bin/bash


set -e
set -o pipefail

module load fastqc
Change #PBS -e to #SBATCH --error
#! /bin/bash
#PBS -e /path/to/some/file

set -e
set -o pipefail

module load fastqc
#! /bin/bash
#SBATCH --error=/path/to/some/file

set -e
set -o pipefail

module load fastqc
#PBS -e is dropped if error path is missing
#! /bin/bash
#PBS -e

set -e
set -o pipefail

module load fastqc
#! /bin/bash


set -e
set -o pipefail

module load fastqc
Change #PBS -r y to #SBATCH --requeue
#! /bin/bash
#PBS -r y 

set -e
set -o pipefail

module load fastqc
#! /bin/bash
#SBATCH --requeue

set -e
set -o pipefail

module load fastqc
Change #PBS -r n to #SBATCH --no-requeue
#! /bin/bash
#PBS -r n 

set -e
set -o pipefail

module load fastqc
#! /bin/bash
#SBATCH --no-requeue

set -e
set -o pipefail

module load fastqc
#PBS -r is dropped if any other argument is detected or the argument is missing
#! /bin/bash
#PBS -r fnord 

set -e
set -o pipefail

module load fastqc
#! /bin/bash


set -e
set -o pipefail

module load fastqc
#PBS -S is dropped since Slurm uses shebang lines to determine the interpreter
#! /bin/bash
#PBS -S /bin/bash

set -e
set -o pipefail

module load fastqc
#! /bin/bash


set -e
set -o pipefail

module load fastqc
Change #PBS -V to #SBATCH --export=ALL (even though this is the Slurm default
#! /bin/bash
#PBS -V

set -e
set -o pipefail

module load fastqc
#! /bin/bash
#SBATCH --export=ALL

set -e
set -o pipefail

module load fastqc
Change #PBS -v to #SBATCH --export=
#! /bin/bash
#PBS -v np=300

set -e
set -o pipefail

module load fastqc
#! /bin/bash
#SBATCH --export=np=300

set -e
set -o pipefail

module load fastqc
Change #PBS -v to #SBATCH --export=; remove spaces
#! /bin/bash
#PBS -V
#PBS -v np=300, fnord=1
#PBS -v foo=2

set -e
set -o pipefail

module load fastqc
#! /bin/bash
#SBATCH --export=ALL
#SBATCH --export=np=300,fnord=1
#SBATCH --export=foo=2

set -e
set -o pipefail

module load fastqc
Change #PBS -J to #SBATCH --array
#! /bin/bash
#PBS -J 1-20

set -e
set -o pipefail

module load fastqc
#! /bin/bash
#SBATCH --array=1-20

set -e
set -o pipefail

module load fastqc
#PBS -J is dropped if argument is missing
#! /bin/bash
#PBS -J  

set -e
set -o pipefail

module load fastqc
#! /bin/bash


set -e
set -o pipefail

module load fastqc
The only thing parsed out of PBS -l resource lists is walltime
#! /bin/bash
#PBS -l jobfs=500MB
#PBS -l ncpus=1
#PBS -l nice=19
#PBS -l nodes=1
#PBS -l nodes=150:gige
#PBS -l nodes=1:htown:gige:ppn=8,walltime=60:00:00
#PBS -l nodes=1:ppn=1
#PBS -l nodes=1:ppn=2
#PBS -l nodes=1:ppn=4
#PBS -l software=amber
#PBS -l vmem=500MB
#PBS -l walltime=00:05:0
#PBS -l walltime=00:15:0
#PBS -l walltime=00:30:0
#PBS -l walltime=12:00:00,mem=1000mb
#PBS -l walltime=20:00:00
#PBS -l walltime=24:00:00
#PBS -l walltime=400:00:00,nodes=1:ppn=4,pmem=800mb

set -e
set -o pipefail

module load fastqc
#! /bin/bash





#SBATCH --time=60:00:00





#SBATCH --time=00:05:00
#SBATCH --time=00:15:00
#SBATCH --time=00:30:00
#SBATCH --time=12:00:00
#SBATCH --time=20:00:00
#SBATCH --time=24:00:00
#SBATCH --time=400:00:00

set -e
set -o pipefail

module load fastqc
Drop #PBS -q since there is not reliable, straight forward translation. Please provide partition on the command line
#! /bin/bash
#PBS -q serial
#PBS -q batch
#PBS -q tracking
#PBS -q normal

set -e
set -o pipefail

module load fastqc
#! /bin/bash





set -e
set -o pipefail

module load fastqc
Complete example 1
#!/bin/csh -v
#PBS -N germline
#PBS -m be
#PBS -k oe

cd $PBS_O_WORKDIR
germline -bits 50 -min_m 1 -err_hom 2  <<EOF
1
CEU.22.map
CEU.22.ped
generated
EOF
    
#!/bin/csh -v
#SBATCH --job-name="germline"
#SBATCH --mail-type=BEGIN,END


cd $SLURM_SUBMIT_DIR
germline -bits 50 -min_m 1 -err_hom 2  <<EOF
1
CEU.22.map
CEU.22.ped
generated
EOF
    
Complete example 2
#! /bin/bash
#PBS -N H3K27me3
#PBS -m e
#PBS -k n
cd ${PBS_O_WORKDIR}

# non redundant
slopBed -i K27me3.bed -g mm9.genome -r 126 -s -l 0  \
    | intersectBed -a stdin -b refseqTss.bed -wa -wb\
    | awk '$2 != c {print; c = $2}'      \
    | cut -f10     \
    | sort -S1G    \
    | uniq -c     \
    | sed -r 's/^ +//;s/ /|/'     \
    > count_data/K27me3.nr.ncount
    
#! /bin/bash
#SBATCH --job-name="H3K27me3"
#SBATCH --mail-type=END

cd ${SLURM_SUBMIT_DIR}

# non redundant
slopBed -i K27me3.bed -g mm9.genome -r 126 -s -l 0  \
    | intersectBed -a stdin -b refseqTss.bed -wa -wb\
    | awk '$2 != c {print; c = $2}'      \
    | cut -f10     \
    | sort -S1G    \
    | uniq -c     \
    | sed -r 's/^ +//;s/ /|/'     \
    > count_data/K27me3.nr.ncount
    
Complete example 3
# This is a sample PBS script. It will 
# request 1 processor on 1 node
# for 4 hours.
#PBS -l nodes=1:ppn=1
#   Request 4 hours of walltime
#PBS -l walltime=4:00:00
#PBS -l pmem=1gb
#   Request that stdout and stderr go
#   to the same file
#PBS -j oe
#
# ======== BODY ========
cd $PBS_O_WORKDIR
echo "Job started on `hostname` at `date`"
./hello
echo "Job Ended at `date`"
    
#! /bin/bash
# This is a sample PBS script. It will 
# request 1 processor on 1 node
# for 4 hours.

#   Request 4 hours of walltime
#SBATCH --time=4:00:00

#   Request that regular stdout and stderr go
#   to the same file

#
# ======== BODY ========
cd $SLURM_SUBMIT_DIR
echo "Job started on `hostname` at `date`"
./hello
echo "Job Ended at `date`"
    
Complete example 4
#!/bin/bash -l
#PBS -l walltime=8:00:00,nodes=3:ppn=8,pmem=1000mb
#PBS -m abe
#PBS -M sample_email@floyd.edu

cd ~/program_directory
module load intel
module load ompi/intel
mpirun -np 24 program_name < inputfile > outputfile
    
#!/bin/bash -l
#SBATCH --time=8:00:00
#SBATCH --mail-type=BEGIN,END,FAIL
#SBATCH --mail-user="sample_email@floyd.edu"

cd ~/program_directory
module load intel
module load ompi/intel
mpirun -np 24 program_name < inputfile > outputfile