Skip to content

Commit

Permalink
Cleanup create_trajectories.sh with shellcheck (#177)
Browse files Browse the repository at this point in the history
* Remove molname, s/submit/submit_command/
* Don't require r.abin
* Submit commands in background,
optionally prepend with `nohup` if running locally with bash
  • Loading branch information
danielhollas authored May 7, 2024
1 parent b5383db commit 675ae72
Showing 1 changed file with 113 additions and 94 deletions.
207 changes: 113 additions & 94 deletions utils/create_trajectories.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# ---------------------------------------------------------------------------------
# create_trajectories.sh - Generate and execute a set ABIN simulations
#
# Initial geometries (and optionally velocities) are taken sequentially from XYZ movie files.
# Initial geometries (and optionally velocities) are taken sequentially from a XYZ trajectory file.

# The trajectories are executed and stored in $folder.

Expand All @@ -12,47 +12,72 @@
# abin-randomint PRNG program for generating random seeds, should be in your $PATH.
#---------------------------------------------------------------------------------

#######-----SETUP---#############
irandom0=156863189 # random seed, set negative for random seed based on time
movie=coords.xyz # PATH TO a XYZ movie with initial geometries
veloc=vels.xyz # PATH to XYZ initial velocities (optional)
isample=1 # initial number of traj
nsample=100 # number of trajectories
folder=MP2-NH4 # Name of the folder with trajectories
inputdir=TEMPLATE-$folder # Directory with input files for ABIN
abin_input=$inputdir/input.in # main input file for ABIN
launch_script=$inputdir/r.abin # this is the file that is submitted by qsub
submit="qsub -q nq -cwd " # comment this line if you don't want to submit to queue yet
rewrite=0 # if =1 -> rewrite trajectories that already exist
jobs=20 # number of batch jobs to submit. Trajectories will be distributed accordingly.

molname=$folder # Name of the job in the queue
# Exit if undefined variable is used
set -u

#### SETUP ####
# Path to a XYZ file with initial geometries
movie=coords.xyz
# Path to XYZ file with initial velocities (optional)
# veloc=vels.xyz

# Starting index for the initial geometries
isample=1
# End index
nsample=100

# Folder name where the trajectories will be created
folder=MY_MOLECULE_TRAJS

# Directory with the input files for ABIN (input.in et al)
inputdir=TEMPLATE-$folder

# File with ABIN input parameters, we need this path
# so we can inject random number seed into it.
abin_input=$inputdir/input.in

# Random seed to generate random seeds for individual trajectories
# Set to a negative number for a time-based random seed.
irandom0=156863189

# Specify path to launch script that is normally submitted to the queuing system.
# Comment out this line if you're running locally.
launch_script=$inputdir/r.abin
# If you don't provide a launch script,
# we need a (preferably absolute) path to abin executable
# abin_exe=/path/to/abin

# Comment out this line if you don't want to run calculations yet
# submit_command="qsub -cwd -V -q nq -cwd "
# If you don't use queing system (like SLURM), use the following line
# submit_command=bash

# Number of batch jobs to submit to queue
# set only if you have more trajectories than jobs
# jobs=20
########## END OF SETUP ##########


function files_exist {
for f in $*;do
if [[ ! -f $f ]];then
for f in "$@";do
if [[ -n ${f-} && ! -f $f ]]; then
echo "ERROR: File '$f' does not exist!"
exit 1
fi
done
}

function folders_exist {
for d in $*;do
if [[ ! -d $d ]];then
for d in "$@";do
if [[ -n ${f-} && ! -d $d ]]; then
echo "ERROR: Directory '$d' does not exist!"
exit 1
fi
done
}

folders_exist "$inputdir"
files_exist "$movie" "$abin_input" "$launch_script"
if [[ -n "$veloc" ]];then
files_exist "$veloc"
fi
files_exist "$movie" "${veloc-}" "$abin_input" "${launch_script-}"

natom=$(head -1 $movie)
if [[ $natom -lt 1 ]];then
Expand All @@ -62,145 +87,139 @@ fi
echo "Number of atoms = $natom"

# TODO: Verify number of atoms and lines in the velocity file
let natom2=natom+2
lines=$(cat $movie | wc -l)
geoms=$(expr $lines / $natom2)
(( natom2=natom+2 ))
lines=$(wc -l < $movie)
(( geoms=lines/natom2 ))
if [[ $nsample -gt $geoms ]];then
echo "ERROR: Number of geometries ($geoms) is smaller than number of samples($nsample)."
echo "Change parameter \"nsample\"."
exit 1
fi

# determine number of ABIN simulations per job
let nsimul=nsample-isample+1
(( nsimul=nsample-isample+1 ))
if [[ -z ${jobs-} ]]; then
jobs=$nsimul
fi

if [[ $nsimul -le $jobs ]];then
remainder=0
injob=1
jobs=$nsimul
else
let injob=nsimul/jobs #number of simulations per job
(( injob=nsimul/jobs )) #number of simulations per job
# determine the remainder and distribute it evenly between jobs
let remainder=nsimul-injob*jobs
(( remainder=nsimul-injob*jobs ))
fi


j=1
i=$isample
w=0 #current number of simulations in current j-th job

#--------------------generation of random numbers--------------------------------
# Generate random number generator seeds for individual trajectories
echo "Generating $nsample random integers for random seeds"
echo "abin-randomint --seed $irandom0 --num $nsample > iran.dat"
abin-randomint --seed $irandom0 --num $nsample > iran.dat
if [[ $? -ne "0" ]];then
if ! abin-randomint --seed $irandom0 --num $nsample > iran.dat
then
echo "ERROR: Could not generate random numbers"
exit 1
fi

mkdir -p $folder
cp iseed0 "$abin_input" $folder

let offset=natom2*isample-natom2

if [[ "$rewrite" -eq "1" ]];then
rm -f $folder/$molname.$isample.*.sh
fi

(( offset=natom2*isample-natom2 ))

while [[ $i -le "$nsample" ]];do

let offset=offset+natom2
(( offset=offset+natom2 ))

if [[ -d "$folder/TRAJ.$i" ]];then
if [[ "$rewrite" -eq "1" ]];then

rm -r $folder/TRAJ.$i ; mkdir $folder/TRAJ.$i

else

echo "Trajectory number $i already exists!"
echo "Exiting..."
echo "If you want to overwrite it, first remove it:"
echo "'rm -r $folder/TRAJ.$i'"
exit 1

fi

else

mkdir $folder/TRAJ.$i

fi

# Copy all the files from the template directory
cp -r $inputdir/* $folder/TRAJ.$i


# Prepare input geometry and velocities

head -$offset $movie | tail -$natom2 > geom
if [[ ! -z "$veloc" ]];then
head -$offset $veloc | tail -$natom2 > veloc.in
fi

mv geom $folder/TRAJ.$i/mini.dat

if [[ ! -z "$veloc" ]];then
mv veloc.in $folder/TRAJ.$i/
head -$offset $movie | tail -$natom2 > $folder/TRAJ.$i/initial.xyz
if [[ -n "${veloc-}" ]];then
head -$offset "$veloc" | tail -$natom2 > $folder/TRAJ.$i/veloc.in
fi


## Now prepare input.in and r.abin
irandom=`head -$i iran.dat |tail -1`
## Now prepare input.in and r.abin
irandom=$(head -$i iran.dat |tail -1)

# TODO: Validate this step
sed -r "s/irandom *= *[0-9]+/irandom=$irandom/" $abin_input > $folder/TRAJ.$i/input.in

cat > $folder/TRAJ.$i/r.$molname.$i << EOF
cat > $folder/TRAJ.$i/r.$folder.$i << EOF
#!/bin/bash
JOBNAME=ABIN.$molname.${i}_$$_\${JOB_ID}
INPUTPARAM=input.in
INPUTGEOM=mini.dat
OUTPUT=output
INPUTGEOM=initial.xyz
EOF

if [[ ! -z $veloc ]];then
echo "INPUTVELOC=veloc.in" >> $folder/TRAJ.$i/r.$molname.$i
if [[ -n ${veloc-} ]];then
echo "INPUTVELOC=veloc.in" >> $folder/TRAJ.$i/r.$folder.$i
fi

grep -v -e '/bin/bash' -e "JOBNAME=" -e "INPUTPARAM=" -e "INPUTGEOM=" -e "INPUTVELOC=" $launch_script >> $folder/TRAJ.$i/r.$molname.$i

chmod 755 $folder/TRAJ.$i/r.$molname.$i

if [[ -n ${launch_script-} ]];then
grep -v -e '/bin/bash' -e "INPUTPARAM=" -e "INPUTGEOM=" -e "INPUTVELOC=" $launch_script >> $folder/TRAJ.$i/r.$folder.$i
else
if [[ -n ${veloc-} ]]; then
echo "$abin_exe -i input.in -x initial.xyz -v veloc.in > abin.out 2>&1" > $folder/TRAJ.$i/r.$folder.$i
else
echo "$abin_exe -i input.in -x initial.xyz > abin.out 2>&1" > $folder/TRAJ.$i/r.$folder.$i
fi
fi

echo "cd TRAJ.$i" >> $folder/$molname.$isample.$j.sh
echo "./r.$molname.$i" >> $folder/$molname.$isample.$j.sh
echo "cd $PWD/$folder" >> $folder/$molname.$isample.$j.sh
echo "(cd TRAJ.$i && bash r.$folder.$i)" >> $folder/$folder.$isample.$j.sh

#--Distribute calculations evenly between jobs for queue
# Distribute calculations evenly between jobs for queue
if [[ $remainder -le 0 ]];then
let ncalc=injob
ncalc=injob
else
let ncalc=injob+1
(( ncalc=injob+1 ))
fi
let w++
(( w++ ))
if [[ $w -eq $ncalc ]] && [[ $j -lt $jobs ]]; then
let j++
let remainder--
let w=0
w=0
(( j++ ))
(( remainder-- ))
fi
#---------------------------------------------------------------------------

let i++
(( i++ ))

done

# Submit jobs
k=1
if [[ ! -z "$submit" ]];then
cd $folder
while [[ $k -le $j ]]
do
if [[ -f $molname.$isample.$k.sh ]];then
$submit -V -cwd $molname.$isample.$k.sh
fi
let k++
done
if [[ -n "${submit_command-}" ]];then
cd $folder || exit 1
if [[ $submit_command = "bash" ]];then
echo "Launching $j calculations locally"
submit_command="nohup $submit_command"
else
echo "Submitting $j calculations with: $submit_command"
fi
while [[ $k -le $j ]]
do
if [[ -f $folder.$isample.$k.sh ]];then
$submit_command $folder.$isample.$k.sh &
fi
(( k++ ))
done
# Wait for submit commands to finish (they should be fast!)
if [[ $submit_command != "bash" ]]; then
wait
fi
fi

0 comments on commit 675ae72

Please sign in to comment.