From 675ae72c2667f03c172c5f7b539b8380c187b81f Mon Sep 17 00:00:00 2001 From: Daniel Hollas Date: Tue, 7 May 2024 11:10:59 +0100 Subject: [PATCH] Cleanup create_trajectories.sh with shellcheck (#177) * Remove molname, s/submit/submit_command/ * Don't require r.abin * Submit commands in background, optionally prepend with `nohup` if running locally with bash --- utils/create_trajectories.sh | 207 +++++++++++++++++++---------------- 1 file changed, 113 insertions(+), 94 deletions(-) diff --git a/utils/create_trajectories.sh b/utils/create_trajectories.sh index b7edc3da..c39353b9 100755 --- a/utils/create_trajectories.sh +++ b/utils/create_trajectories.sh @@ -3,7 +3,7 @@ # --------------------------------------------------------------------------------- # create_trajectories.sh - Generate and execute a set ABIN simulations # -# Initial geometries (and optionally velocities) are taken sequentially from XYZ movie files. +# Initial geometries (and optionally velocities) are taken sequentially from a XYZ trajectory file. # The trajectories are executed and stored in $folder. @@ -12,27 +12,55 @@ # abin-randomint PRNG program for generating random seeds, should be in your $PATH. #--------------------------------------------------------------------------------- -#######-----SETUP---############# -irandom0=156863189 # random seed, set negative for random seed based on time -movie=coords.xyz # PATH TO a XYZ movie with initial geometries -veloc=vels.xyz # PATH to XYZ initial velocities (optional) -isample=1 # initial number of traj -nsample=100 # number of trajectories -folder=MP2-NH4 # Name of the folder with trajectories -inputdir=TEMPLATE-$folder # Directory with input files for ABIN -abin_input=$inputdir/input.in # main input file for ABIN -launch_script=$inputdir/r.abin # this is the file that is submitted by qsub -submit="qsub -q nq -cwd " # comment this line if you don't want to submit to queue yet -rewrite=0 # if =1 -> rewrite trajectories that already exist -jobs=20 # number of batch jobs to submit. Trajectories will be distributed accordingly. - -molname=$folder # Name of the job in the queue +# Exit if undefined variable is used +set -u + +#### SETUP #### +# Path to a XYZ file with initial geometries +movie=coords.xyz +# Path to XYZ file with initial velocities (optional) +# veloc=vels.xyz + +# Starting index for the initial geometries +isample=1 +# End index +nsample=100 + +# Folder name where the trajectories will be created +folder=MY_MOLECULE_TRAJS + +# Directory with the input files for ABIN (input.in et al) +inputdir=TEMPLATE-$folder + +# File with ABIN input parameters, we need this path +# so we can inject random number seed into it. +abin_input=$inputdir/input.in + +# Random seed to generate random seeds for individual trajectories +# Set to a negative number for a time-based random seed. +irandom0=156863189 + +# Specify path to launch script that is normally submitted to the queuing system. +# Comment out this line if you're running locally. +launch_script=$inputdir/r.abin +# If you don't provide a launch script, +# we need a (preferably absolute) path to abin executable +# abin_exe=/path/to/abin + +# Comment out this line if you don't want to run calculations yet +# submit_command="qsub -cwd -V -q nq -cwd " +# If you don't use queing system (like SLURM), use the following line +# submit_command=bash + +# Number of batch jobs to submit to queue +# set only if you have more trajectories than jobs +# jobs=20 ########## END OF SETUP ########## function files_exist { - for f in $*;do - if [[ ! -f $f ]];then + for f in "$@";do + if [[ -n ${f-} && ! -f $f ]]; then echo "ERROR: File '$f' does not exist!" exit 1 fi @@ -40,8 +68,8 @@ function files_exist { } function folders_exist { - for d in $*;do - if [[ ! -d $d ]];then + for d in "$@";do + if [[ -n ${f-} && ! -d $d ]]; then echo "ERROR: Directory '$d' does not exist!" exit 1 fi @@ -49,10 +77,7 @@ function folders_exist { } folders_exist "$inputdir" -files_exist "$movie" "$abin_input" "$launch_script" -if [[ -n "$veloc" ]];then - files_exist "$veloc" -fi +files_exist "$movie" "${veloc-}" "$abin_input" "${launch_script-}" natom=$(head -1 $movie) if [[ $natom -lt 1 ]];then @@ -62,9 +87,9 @@ fi echo "Number of atoms = $natom" # TODO: Verify number of atoms and lines in the velocity file -let natom2=natom+2 -lines=$(cat $movie | wc -l) -geoms=$(expr $lines / $natom2) +(( natom2=natom+2 )) +lines=$(wc -l < $movie) +(( geoms=lines/natom2 )) if [[ $nsample -gt $geoms ]];then echo "ERROR: Number of geometries ($geoms) is smaller than number of samples($nsample)." echo "Change parameter \"nsample\"." @@ -72,27 +97,30 @@ if [[ $nsample -gt $geoms ]];then fi # determine number of ABIN simulations per job -let nsimul=nsample-isample+1 +(( nsimul=nsample-isample+1 )) +if [[ -z ${jobs-} ]]; then + jobs=$nsimul +fi + if [[ $nsimul -le $jobs ]];then remainder=0 injob=1 jobs=$nsimul else - let injob=nsimul/jobs #number of simulations per job + (( injob=nsimul/jobs )) #number of simulations per job # determine the remainder and distribute it evenly between jobs - let remainder=nsimul-injob*jobs + (( remainder=nsimul-injob*jobs )) fi - j=1 i=$isample w=0 #current number of simulations in current j-th job -#--------------------generation of random numbers-------------------------------- +# Generate random number generator seeds for individual trajectories echo "Generating $nsample random integers for random seeds" echo "abin-randomint --seed $irandom0 --num $nsample > iran.dat" -abin-randomint --seed $irandom0 --num $nsample > iran.dat -if [[ $? -ne "0" ]];then +if ! abin-randomint --seed $irandom0 --num $nsample > iran.dat +then echo "ERROR: Could not generate random numbers" exit 1 fi @@ -100,107 +128,98 @@ fi mkdir -p $folder cp iseed0 "$abin_input" $folder -let offset=natom2*isample-natom2 - -if [[ "$rewrite" -eq "1" ]];then - rm -f $folder/$molname.$isample.*.sh -fi - +(( offset=natom2*isample-natom2 )) while [[ $i -le "$nsample" ]];do - let offset=offset+natom2 + (( offset=offset+natom2 )) if [[ -d "$folder/TRAJ.$i" ]];then - if [[ "$rewrite" -eq "1" ]];then - - rm -r $folder/TRAJ.$i ; mkdir $folder/TRAJ.$i - - else echo "Trajectory number $i already exists!" - echo "Exiting..." + echo "If you want to overwrite it, first remove it:" + echo "'rm -r $folder/TRAJ.$i'" exit 1 - fi - else mkdir $folder/TRAJ.$i fi + # Copy all the files from the template directory cp -r $inputdir/* $folder/TRAJ.$i - # Prepare input geometry and velocities - - head -$offset $movie | tail -$natom2 > geom - if [[ ! -z "$veloc" ]];then - head -$offset $veloc | tail -$natom2 > veloc.in - fi - - mv geom $folder/TRAJ.$i/mini.dat - - if [[ ! -z "$veloc" ]];then - mv veloc.in $folder/TRAJ.$i/ + head -$offset $movie | tail -$natom2 > $folder/TRAJ.$i/initial.xyz + if [[ -n "${veloc-}" ]];then + head -$offset "$veloc" | tail -$natom2 > $folder/TRAJ.$i/veloc.in fi - -## Now prepare input.in and r.abin - irandom=`head -$i iran.dat |tail -1` + ## Now prepare input.in and r.abin + irandom=$(head -$i iran.dat |tail -1) # TODO: Validate this step sed -r "s/irandom *= *[0-9]+/irandom=$irandom/" $abin_input > $folder/TRAJ.$i/input.in - cat > $folder/TRAJ.$i/r.$molname.$i << EOF + cat > $folder/TRAJ.$i/r.$folder.$i << EOF #!/bin/bash -JOBNAME=ABIN.$molname.${i}_$$_\${JOB_ID} INPUTPARAM=input.in -INPUTGEOM=mini.dat -OUTPUT=output +INPUTGEOM=initial.xyz EOF - if [[ ! -z $veloc ]];then - echo "INPUTVELOC=veloc.in" >> $folder/TRAJ.$i/r.$molname.$i + if [[ -n ${veloc-} ]];then + echo "INPUTVELOC=veloc.in" >> $folder/TRAJ.$i/r.$folder.$i fi - grep -v -e '/bin/bash' -e "JOBNAME=" -e "INPUTPARAM=" -e "INPUTGEOM=" -e "INPUTVELOC=" $launch_script >> $folder/TRAJ.$i/r.$molname.$i - - chmod 755 $folder/TRAJ.$i/r.$molname.$i - + if [[ -n ${launch_script-} ]];then + grep -v -e '/bin/bash' -e "INPUTPARAM=" -e "INPUTGEOM=" -e "INPUTVELOC=" $launch_script >> $folder/TRAJ.$i/r.$folder.$i + else + if [[ -n ${veloc-} ]]; then + echo "$abin_exe -i input.in -x initial.xyz -v veloc.in > abin.out 2>&1" > $folder/TRAJ.$i/r.$folder.$i + else + echo "$abin_exe -i input.in -x initial.xyz > abin.out 2>&1" > $folder/TRAJ.$i/r.$folder.$i + fi + fi - echo "cd TRAJ.$i" >> $folder/$molname.$isample.$j.sh - echo "./r.$molname.$i" >> $folder/$molname.$isample.$j.sh - echo "cd $PWD/$folder" >> $folder/$molname.$isample.$j.sh + echo "(cd TRAJ.$i && bash r.$folder.$i)" >> $folder/$folder.$isample.$j.sh -#--Distribute calculations evenly between jobs for queue + # Distribute calculations evenly between jobs for queue if [[ $remainder -le 0 ]];then - let ncalc=injob + ncalc=injob else - let ncalc=injob+1 + (( ncalc=injob+1 )) fi - let w++ + (( w++ )) if [[ $w -eq $ncalc ]] && [[ $j -lt $jobs ]]; then - let j++ - let remainder-- - let w=0 + w=0 + (( j++ )) + (( remainder-- )) fi -#--------------------------------------------------------------------------- - let i++ + (( i++ )) done # Submit jobs k=1 -if [[ ! -z "$submit" ]];then - cd $folder - while [[ $k -le $j ]] - do - if [[ -f $molname.$isample.$k.sh ]];then - $submit -V -cwd $molname.$isample.$k.sh - fi - let k++ - done +if [[ -n "${submit_command-}" ]];then + cd $folder || exit 1 + if [[ $submit_command = "bash" ]];then + echo "Launching $j calculations locally" + submit_command="nohup $submit_command" + else + echo "Submitting $j calculations with: $submit_command" + fi + while [[ $k -le $j ]] + do + if [[ -f $folder.$isample.$k.sh ]];then + $submit_command $folder.$isample.$k.sh & + fi + (( k++ )) + done + # Wait for submit commands to finish (they should be fast!) + if [[ $submit_command != "bash" ]]; then + wait + fi fi