Modified the NAMDjob script to use slurm's –exclusive flag when the number of cores requested is a multiple of four. This is in order not to loose the SMP-related performance.
New version of NAMDjob
#!/bin/tcsh -f
renice +19 -p $$ >& /dev/null
#
# Check command line arguments ...
#
if ( $# != 3 ) then
echo " "
echo "Usage : NAMDjob <number of cores> <filename of namd script> <log filename>"
echo " "
exit
endif
#
# ... presence of script
#
if (! -es $2 ) then
echo "Missing file ($2) containing NAMD script ? Abort."
exit
endif
#
# ... write access
#
touch .test_$$
if (! -e .test_$$ ) then
echo "No write access in current directory ? Abort."
exit
endif
/bin/rm -rf .test_$$
#
# ... number of cores
#
expr $1 + 1 >& /dev/null
if ( $? != 0 ) then
echo "Number of cores requested ($1) does not make sense"
exit
endif
if ( $1 < 1 || $1 > 32 ) then
echo "Number of cores ($1) outside limits"
exit
endif
#
# If NAMD_THINGS is not defined, set it. Add ++local
# if we run on a single node
#
if ( ! $?NAMD_THINGS ) then
if ( $1 < 5 ) then
set NAMD_THINGS = '++local'
else
set NAMD_THINGS = ' '
endif
else
if ( $1 < 5 ) then
set NAMD_THINGS = $NAMD_THINGS' ++local'
endif
endif
#
# If SLURM_THINGS is not defined, set it. Add --exclusive and -N if number
# of cores multiple of 4 (to make good use of SMP capabilities)
#
if ( ! $?SLURM_THINGS ) then
if ( $1 == 4 || $1 == 8 || $1 == 12 || $1 == 16 || $1 == 20 || $1 == 24 || $1 == 28 || $1 == 32 ) then
@ nodes = $1 / 4
set SLURM_THINGS = '--exclusive -N'$nodes
else
set SLURM_THINGS = ' '
endif
else
if ( $1 == 4 || $1 == 8 || $1 == 12 || $1 == 16 || $1 == 20 || $1 == 24 || $1 == 28 || $1 == 32 ) then
@ nodes = $1 / 4
set SLURM_THINGS = $SLURM_THINGS' --exclusive -N'$nodes
endif
endif
#
# Now, prepare the slurm script. The initial fuss is about preparing the
# nodelist file. Take a good look at the sed line (don't you love unix ?-)
# The reason for the sed line is to prepare a nodelist file containing the IP
# addresses of both interfaces for each node.
#
# The 'if' clause is for the unlikely event where the hosts appear up, but
# they do not respond to ssh.
#
# The env variable $NAMD_THINGS is for passing additional arguments to NAMD
#
cat > ./.NAMD_$$.slurm << EOF
#!/bin/tcsh -f
echo group main > ./.nodelist_$$
srun hostname -i | sort - | awk '{print "host " \$1}' >> ./.nodelist_$$
sed -i -s '3~2s/host 10\.0\.0/host 10\.0\.1/g' ./.nodelist_$$
if ( \`wc -l < ./.nodelist_$$\` != ($1 + 1) ) then
echo "Something went seriously wrong. Get help."
echo "No job will be submitted."
exit
endif
/usr/local/namdtest/charmrun ++nodelist .nodelist_$$ /usr/local/namdtest/namd2 +p$1 $NAMD_THINGS +giga $2
exit
EOF
#
# Submit it
#
sbatch -o $3 -n$1 $SLURM_THINGS .NAMD_$$.slurm
exit