Skip to content

Commit

Permalink
Merge pull request #8 from TACC/process-binding
Browse files Browse the repository at this point in the history
Process binding
  • Loading branch information
Lucas A. Wilson authored Jan 19, 2017
2 parents cea045e + 7aa5ce7 commit b0a8e4d
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 11 deletions.
29 changes: 24 additions & 5 deletions init_launcher
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,30 @@
# The University of Texas at Austin
#------------------------------------------------

for i in `seq 0 $(($LAUNCHER_PPN - 1))`
do
#Start the launcher script for every process to be spawned on this host and give it a unique task id
env `$LAUNCHER_DIR/pass_env` LAUNCHER_TSK_ID=$(($LAUNCHER_PPN * $LAUNCHER_HOST_ID + $i)) $LAUNCHER_DIR/launcher &
done
if [ "$LAUNCHER_BIND" -eq "1" ]
then
if [ $LAUNCHER_BIND_HT -eq 1 ]
then
bind_obj="pu"
else
bind_obj="core"
fi

for i in `seq 0 $(($LAUNCHER_PPN - 1))`
do
spu=$(($i * $LAUNCHER_PUPT))
epu=$(($spu + $LAUNCHER_PUPT - 1))
#Start the launcher script for every process to be spawned on this host and give it a unique task id
env `$LAUNCHER_DIR/pass_env` LAUNCHER_HWLOC_CMD="hwloc-bind ${bind_obj}:$spu-$epu" LAUNCHER_TSK_ID=$(($LAUNCHER_PPN * $LAUNCHER_HOST_ID + $i)) $LAUNCHER_DIR/launcher &
done
else
for i in `seq 0 $(($LAUNCHER_PPN - 1))`
do
#Start the launcher script for every process to be spawned on this host and give it a unique task id
env `$LAUNCHER_DIR/pass_env` LAUNCHER_TSK_ID=$(($LAUNCHER_PPN * $LAUNCHER_HOST_ID + $i)) $LAUNCHER_DIR/launcher &
done
fi


if [ ! "x$LAUNCHER_NPHI" == "x" ]
then
Expand Down
2 changes: 1 addition & 1 deletion launcher
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ while [ $COMPLETE == "false" ]; do

START_TIME=`date +"%s"`
echo "Launcher: Task $LAUNCHER_TSK_ID running job $LAUNCHER_JID on `hostname` ($CMD)"
eval $CMD
eval $LAUNCHER_HWLOC_CMD $CMD
END_TIME=`date +"%s"`
EXEC_TIME=`expr $END_TIME - $START_TIME`
echo "Launcher: Job $LAUNCHER_JID completed in $EXEC_TIME seconds."
Expand Down
45 changes: 44 additions & 1 deletion paramrun
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,33 @@ if [ "$LAUNCHER_SCHED" == "dynamic" ]; then
fi
fi

if [ $LAUNCHER_BIND -eq 1 ]
then
num_socks=$(lstopo-no-graphics --only socket | wc -l | awk '{print $1}')
if [ $num_socks -eq 0 ]
then
num_socks=1
fi
num_cores=$(lstopo-no-graphics --only core | wc -l | awk '{print $1}')
num_threads=$(lstopo-no-graphics --only pu | wc -l | awk '{print $1}')
if [ $LAUNCHER_PPN -gt $num_cores ]
then
export LAUNCHER_BIND_HT=1
if [ $LAUNCHER_PPN -gt $num_threads ]
then
echo "WARNING: Requested Processes per Node ($LAUNCHER_PPN) exceeds number of available threads ($num_threads). Resetting..."
export LAUNCHER_PPN=$num_threads
export LAUNCHER_NPROCS=$(($LAUNCHER_NHOSTS * $LAUNCHER_PPN))
fi
pu_per_task=$(($num_threads / $LAUNCHER_PPN))
else
export LAUNCHER_BIND_HT=0
pu_per_task=$(($num_cores / $LAUNCHER_PPN))
fi

export LAUNCHER_PUPT=$pu_per_task
fi

#------------------------------
# Let's finally launch the job
#------------------------------
Expand All @@ -181,13 +208,29 @@ echo " Processes per host: $LAUNCHER_PPN"
echo " Total processes: $LAUNCHER_NPROCS"
echo " Total jobs: $LAUNCHER_NJOBS"
echo " Scheduling method: $LAUNCHER_SCHED"
if [ $LAUNCHER_BIND -eq 1 ]
then
echo
echo "------ Process Binding Enabled ------"
echo " Sockets per host: $num_socks"
echo " Cores per host: $num_cores"
echo " Threads per host: $num_threads"
echo -n " Binding each task to $LAUNCHER_PUPT "
if [ $LAUNCHER_BIND_HT -eq 1 ]
then
echo "threads (Hyperthreads in use)"
else
echo "cores (Hyperthreads ignored)"
fi
fi


if [ $LAUNCHER_USE_PHI -eq 1 ]
then
echo
echo "--- Intel Xeon Phi Support Enabled ---"
echo " Cards per host: $LAUNCHER_NPHI"
echo " Processes per card: $LAUNCHER_PHI_PPN"
echo
fi
echo
echo "-------------------------------------"
Expand Down
8 changes: 4 additions & 4 deletions pass_env
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
#!/bin/bash

#PASS_PREFIX is a list of variable prefixes that should be passed to launcher tasks on remote hosts
PASS_PREFIX="LAUNCHER TACC ICC GCC LMOD MV2 IMPI PATH LD_LIBRARY_PATH OMP KMP MIC PYTHON"
#EXCLUDE is a list of patterns that should be removed from the environment before passing to launcher tasks on remote hosts
EXCLUDE="BASH_FUNC ModuleTable LS_ LESS SSH_ PE_MPICH MINICOM SLURM_NODELIST"

GREP_ARGS=`
for prefix in $PASS_PREFIX
for prefix in $EXCLUDE
do
echo -n "-e ^$prefix "
done
`

env | grep $GREP_ARGS | tr '\n' ' '
env | grep -v -e "}$" -e "^ " -e "=$" -e "^_" -e "(" $GREP_ARGS | grep -v -e " " | tr '\n' ' '

0 comments on commit b0a8e4d

Please sign in to comment.