farmoutAnalysisJobs

#!/bin/bash
#
# Setup:

#
# Usage:
#       farmoutAnalysisJobs <jobName> <CMSSW Version> <config file>
#
# The config file should refer to the following macros, which are automatically
# inserted by this script:
#
# $inputFileNames     ==>  Will be replaced by list of input files
# $outputFileName     ==>  Will be replaced by the $inputFileName-output.root
#
# Job parameters
#

realpath() {
  readlink -f "$1"
}

# Initialize default settings:
FARMOUT_HOME=${FARMOUT_HOME:-$(dirname $(realpath $0))}

# for storing output
FARMOUT_USER=${FARMOUT_USER:-${USER}}
SRM_SERVER=srm://cmssrm2.hep.wisc.edu:8443/srm/v2/server?SFN=/hdfs
PNFS_HOME=/store/user/${FARMOUT_USER}
CMS_DASHBOARD_LOCAL_CE=cmsgrid02.hep.wisc.edu

LOCAL_POSIX_PREFIX=/hdfs

# for getting input
DCAP_SERVER=root://cmsxrootd.hep.wisc.edu/

# path to directory containing lfns: /store/...
PNFS_STORE=

SRM_HOME=${SRM_SERVER}${PNFS_HOME}
DCAP_HOME=${DCAP_SERVER}${PNFS_HOME}

# We need wisc AFS for accessing user CMSSW release area unless in no-shared-fs mode
SITE_REQUIREMENTS='TARGET.Arch == "X86_64" && (MY.RequiresSharedFS=!=true || TARGET.HasAFS_OSG) && (TARGET.OSG_major =!= undefined || TARGET.IS_GLIDEIN=?=true) && IsSlowSlot=!=true'

##
## cms.cern.ch depends on the cms.hep.wisc.edu CVMFS respository for NOW (02-19-2013)
## SITECONF is the reason for the dependency

if echo "$PATH" | egrep -q "(/cvmfs/cms.hep.wisc.edu)|(/cvmfs/cms.cern.ch)" ; then
  # require cvmfs to be available and at least as up to date as the local cvmfs

  SITE_REQUIREMENTS="${SITE_REQUIREMENTS} && (TARGET.HasParrotCVMFS=?=true || (TARGET.UWCMS_CVMFS_Exists  && TARGET.CMS_CVMFS_Exists"

  local_cvmfs_revision=`attr -q -g revision /cvmfs/cms.hep.wisc.edu`
  if [ "$local_cvmfs_revision" != "" ]; then
    SITE_REQUIREMENTS="${SITE_REQUIREMENTS} && TARGET.UWCMS_CVMFS_Revision >= ${local_cvmfs_revision}"
  fi

  local_cvmfs_revision=`attr -q -g revision /cvmfs/cms.cern.ch`
  if [ "$local_cvmfs_revision" != "" ]; then
    SITE_REQUIREMENTS="${SITE_REQUIREMENTS} && TARGET.CMS_CVMFS_Revision >= ${local_cvmfs_revision}"
  fi

  SITE_REQUIREMENTS="${SITE_REQUIREMENTS} ))"
fi

DISK_REQUIREMENTS=2000
MEMORY_REQUIREMENTS=900
VSIZE_LIMIT=3000
HOLD_IMAGE_SIZE_FACTOR=4.0
HOLD_DISK_USAGE_FACTOR=10.0
MIN_PROXY_HOURS=24
OUTPUT_FILES_PER_SUBDIR=0

# We want explicit references to "app_rw" to be replaced with "app"
# so the interactive osg app volume is not pounded by the jobs.
SITE_ENV_MATCH_REPLACE_1='s|/afs/hep.wisc.edu/osg/app_[^/]*/|/afs/hep.wisc.edu/osg/app/|g'

# special exit status to force job to leave the queue
FAIL_JOB=42

for scratch_dir in /data /scratch /tmp; do
  if [ -d $scratch_dir ] && [ -w $scratch_dir ]; then
    break
  fi
done
SUBMIT_HOME=${scratch_dir}/${FARMOUT_USER}

SHARED_LOGS=${scratch_dir}/farmout_logs
mkdir -p -m a+rwx $SHARED_LOGS
SHARED_LOGS=$SHARED_LOGS/${FARMOUT_USER}
mkdir -p $SHARED_LOGS

basename() {
  # This shell function is faster than calling /bin/basename
  path=$1
  suffix=$2
  path=${path##*/}  # get everything after the final '/'
  if [ ! -z $suffix ]; then
    path=${path%$suffix}
  fi
  echo $path
}

logerror() {
  echo 2>&1 "$@"
}

die() {
  if [ $# -gt 0 ]; then
    logerror
    logerror "$@"
  fi
  exit 1
}

incrfname () {
    fname="$1"
    if [ -f "$fname" ]; then
        num=1
        while [ -f "$fname.$num" ]; do
            let num+=1
        done
        fname="$fname.$num"
    fi
    echo "$fname"
}

outputFileExists() {
  local fname=$1

  #Strip off srm://hostname:8443 to get raw path.
  local local_fname=${fname#srm://*:8443}

  #Strip off '/blah/blah?SFN='
  local_fname=${local_fname#*SFN=}

  if [ -f "$local_fname" ]; then
    return 0
  fi
  return 1
}

outputDirExists() {
  local fname=$1

  #Strip off srm://hostname:8443 to get raw path.
  local local_fname=${fname#srm://*:8443}

  #Strip off '/blah/blah?SFN='
  local_fname=${local_fname#*SFN=}

  if [ -d "$local_fname" ]; then
    return 0
  fi
  return 1
}

jobFailed() {
  ulog=$1

  $FARMOUT_HOME/CondorUserLog.py --job-failed $ulog >/dev/null
}

find_dag_inputs() {
    find "$LAST_SUBMIT_DIR" -type f -name submit -print0 | xargs -r0 \
        sed -nre "s|^# DAG_OUTPUT_FILENAME (.*)|${DCAP_SERVER}\1|p"
}

checkSharedFS() {
  local d=$1
  if echo "$d" | grep -q ^/afs/; then
    if ! (fs la "$d" | grep -q 'condor-hosts rl' || \
          fs la "$d" | grep -q 'system:anyuser rl'); then
       echo
       echo "WARNING: You are using --shared-fs, but $d"
       echo "may not be readable from the worker nodes.  Ensure that this directory"
       echo "and all sub-directories are readable without an AFS token.  Also ensure"
       echo "that parent directories are listable without an AFS token.  We recommend"
       echo "using the condor-hosts group to achieve this.  Example:"
       echo
       d=$(realpath $CMSSW_HOME/..)
       h=$(realpath ~)
       while [ "$d" != "/" ] && [ "$d" != "" ] && [ "$d" != "$h" ]; do
           if ! (fs la "$d" | grep -q 'condor-hosts r*l' || \
                fs la "$d" | grep -q 'system:anyuser r*l'); then
               echo fs setacl -dir $d -acl condor-hosts l
           fi
           d=$(realpath $d/..)
       done
       echo "find $CMSSW_HOME -type d -exec fs setacl -dir '{}' -acl condor-hosts rl \;"
       echo
    fi
  fi
}

check_proxy() {
  hours=$1
  proxy=$2
  if ! [ -f "$proxy" ]; then
    logerror
    logerror "NOTE: No grid proxy found.  (Expecting to find it here: $proxy.)"
    return 1
  fi

  #Issue a warning if less than this many seconds remain:
  min_proxy_lifetime=$((3600*$hours))

  seconds_left="`voms-proxy-info --timeleft --file=$proxy 2>/dev/null`"

  if [ "$seconds_left" = "" ]; then
    echo "WARNING: cannot find time remaining for grid proxy."
    voms-proxy-info -all -path $proxy
    return 0
  fi
  if [ "$seconds_left" -lt "$min_proxy_lifetime" ]; then
    logerror
    logerror "NOTE: grid proxy is about to expire:"
    logerror "voms-proxy-info"
    voms-proxy-info --file=$proxy
    return 1
  fi

}

PrintUsage() {
  echo "USAGE: farmoutAnalysisJobs [options] <jobName> <CMSSW Path> <config file> [extra-cmsRun-args]"
  echo ""
  echo "OPTIONS:"
  echo "  --output-dir=${SRM_HOME}/<jobName>-<runName>"
  echo "               (--output-dir=. copies files back to submit dir)"
  echo "  --input-dir=${DCAP_HOME}/<jobName>"
  echo "  --input-dbs-path=/dbs/dataset/path   (in place of --input-dir)"
  echo "  --input-runs=Run1,Run2,...           (must be used with --input-dbs-path)"
  echo "  --submit-dir=${SUBMIT_HOME}/<jobName>-<runName>"
  echo "  --no-submit"
  echo "  --job-count=N            (limit the number of jobs that are created)"
  echo "  --input-files-per-job=1"
  echo "  --resubmit-failed-jobs"
  echo "  --skip-existing-output   (do not create jobs if output file exists)"
  echo "  --skip-existing-jobs     (do not create jobs if job already created)"
  echo "  --match-input-files='*.root'"
  echo "  --clean-crab-dupes       (filter duplicate output files from CRAB)"
  echo "  --exclude-input-files=pattern"
  echo "  --input-file-list=file"
  echo "  --memory-requirements=$MEMORY_REQUIREMENTS (megabytes)"
  echo "  --vsize-limit=$VSIZE_LIMIT (megabytes)"
  echo "  --disk-requirements=$DISK_REQUIREMENTS   (megabytes)"
  echo "  --site-requirements=$SITE_REQUIREMENTS"
  echo "  --save-failed-datafiles  (save root file from failed cmsRun job)"
  echo "                           (in <output-dir>-cmsRun-failed)"
  echo "  --save-missing-input-file-list=file"
  echo "  --assume-input-files-exist (do not check file existence at submit time)"
  echo "  --express-queue-only     (require to run in the express queue)"
  echo "  --express-queue          (allow to run in the express queue)"
  echo "  --extra-inputs=file1,file2,...  (e.g. parameter files)"
  echo "  --accounting-group=cms_name  (username to use for fair-sharing)"
  echo "  --requires-whole-machine (job should reserve all slots on machine)"
  echo "  --fwklite                (in this case, provide script instead of config)"
  echo "  --merge                     (merge input files; no cfg file expected)"
  echo "  --use-hadd                  (use hadd to merge files instead of mergeFiles.C)"
  echo "  --infer-cmssw-path          (infer CMSSW path from environment)"
  echo "  --lumi-mask=X               (apply lumi mask json file)"
  echo "  --output-files-per-subdir=N (0 for infinite; $OUTPUT_FILES_PER_SUBDIR default)"
  echo "  --job-generates-output-name (don't try to control output file name)"
  echo "  --dbs-service-url=X         (global DBS by default)"
  echo "  --rescue-dag-file=X"
  echo "  --output-dag-file=X         (can be used by multiple jobs to indicate dependencies)"
  echo "  --last-submit-dir=X         (Use output from previous job as inputs to this job.)"
  echo "                              (In conjunction with using the same --output-dag-file,)"
  echo "                              (this will build the job dependency DAG.)"
  echo "  --no-shared-fs              (the default: send analysis binaries to execute machine)"
  echo "  --shared-fs                 (rely on CMSSW project area being on a shared fs (e.g. AFS))"
  echo "  --use-osg                   (allow jobs to run opportunistically on OSG)"
  echo "  --use-only-osg              (only run jobs opportunistically on OSG)"
  echo "  --use-hdfs                  (read files from /hdfs rather than going through xrootd)"
  echo "  --debug"
  echo ""
  echo "Note that <runName> is taken from the name of the config file."
  exit 2
}

OPTS=`getopt -o "h" -l "help,output-dir:,input-dir:,submit-dir:,no-submit,job-count:,skip-existing-output,skip-existing-jobs,match-input-files:,exclude-input-files:,clean-crab-dupes,input-files-per-job:,disk-requirements:,memory-requirements:,input-file-list:,input-dbs-path:,input-runs:,save-failed-datafiles,save-missing-input-file-list:,assume-input-files-exist,site-requirements:,quick-test,extra-inputs:,accounting-group:,requires-whole-machine,fwklite,output-files-per-subdir:,job-generates-output-name,dbs-service-url:,infer-cmssw-path,lumi-mask:,express-queue,express-queue-only,vsize-limit:,merge,use-hadd,rescue-dag-file:,output-dag-file:,last-submit-dir:,no-shared-fs,shared-fs,use-osg,use-only-osg,use-hdfs,debug,resubmit-failed-jobs" -- "$@"`
if [ $? -ne 0 ]; then PrintUsage; fi

eval set -- "$OPTS"

DBS_URL_ARG=
ACCOUNTING_GROUP=
NO_SUBMIT=
JOB_LIMIT=
SKIP_EXISTING_OUTPUT=
SKIP_EXISTING_JOBS=
RESUBMIT_FAILED_JOBS=
OUTPUT_DIR=
INPUT_DIR=
SUBMIT_DIR=
MATCH_INPUT_FILES='*.root'
CLEAN_CRAB_DUPES=0
EXCLUDE_INPUT_FILES=
INPUT_FILES_PER_JOB=1
INPUT_FILE_LIST=
INPUT_DBS_PATH=
INPUT_RUNS=
SAVE_FAILED_DATAFILES=
SAVE_MISSING_INPUT_FILE_LIST=
ASSUME_INPUT_FILES_EXIST=
CFG_EXTENSION=
QUICK_TEST=
EXTRA_INPUTS=
REQUIRES_WHOLE_MACHINE=
FWKLITE=
APPLICATION=cmsRun
JOB_GENERATES_OUTPUT_NAME=0
EXPRESS_QUEUE=0
MERGE=
USE_HADD=
INFER_CMSSW_PATH=0
LUMI_MASK=
RESCUE_DAG_FILE=
OUTPUT_DAG_FILE=
LAST_SUBMIT_DIR=
NO_SHARED_FS=1
USE_OSG=0
USE_HDFS=0
DEBUG=

while [ ! -z "$1" ]
do
  case "$1" in
    -h) PrintUsage;;
    --help) PrintUsage;;
    --no-submit) NO_SUBMIT=1;;
    --job-count) shift; JOB_LIMIT=$1;;
    --skip-existing-output) SKIP_EXISTING_OUTPUT=1;;
    --skip-existing-jobs) SKIP_EXISTING_JOBS=1;;
    --resubmit-failed-jobs) RESUBMIT_FAILED_JOBS=1;;
    --output-dir) shift; OUTPUT_DIR=$1;;
    --input-dir) shift; INPUT_DIR=$1;;
    --submit-dir) shift; SUBMIT_DIR=$1;;
    --match-input-files) shift; MATCH_INPUT_FILES=$1;;
    --exclude-input-files) shift; EXCLUDE_INPUT_FILES=$1;;
    --clean-crab-dupes) CLEAN_CRAB_DUPES=1;;
    --input-files-per-job) shift; INPUT_FILES_PER_JOB=$1;;
    --disk-requirements) shift; DISK_REQUIREMENTS=$1;;
    --memory-requirements) shift; MEMORY_REQUIREMENTS=$1;;
    --vsize-limit) shift; VSIZE_LIMIT=$1;;
    --input-file-list) shift; INPUT_FILE_LIST=$1;;
    --input-dbs-path) shift; INPUT_DBS_PATH=$1;;
    --input-runs) shift; INPUT_RUNS=$1;;
    --save-failed-datafiles) SAVE_FAILED_DATAFILES=1;;
    --save-missing-input-file-list) shift; SAVE_MISSING_INPUT_FILE_LIST=$1;;
    --assume-input-files-exist) ASSUME_INPUT_FILES_EXIST=0;;
    --site-requirements) shift; SITE_REQUIREMENTS="$1";;
    --quick-test) QUICK_TEST=1;;
    --express-queue-only) QUICK_TEST=1;;
    --extra-inputs) shift; EXTRA_INPUTS="$1";;
    --accounting-group) shift; ACCOUNTING_GROUP="+AccountingGroup=\"$1\"";;
    --requires-whole-machine) REQUIRES_WHOLE_MACHINE=1;;
    --fwklite) FWKLITE=1; APPLICATION=fwklite;;
    --output-files-per-subdir) shift; OUTPUT_FILES_PER_SUBDIR=$1;;
    --job-generates-output-name) JOB_GENERATES_OUTPUT_NAME=1;;
    --dbs-service-url) shift; DBS_URL_ARG="--url=$1";;
    --express-queue) EXPRESS_QUEUE=1;;
    --merge) MERGE=1;;
    --use-hadd) USE_HADD=1;;
    --infer-cmssw-path) INFER_CMSSW_PATH=1;;
    --lumi-mask) shift; LUMI_MASK="$1";;
    --rescue-dag-file) shift; RESCUE_DAG_FILE="$1";;
    --output-dag-file) shift; OUTPUT_DAG_FILE="$1";;
    --last-submit-dir) shift; LAST_SUBMIT_DIR="$1";;
    --no-shared-fs) NO_SHARED_FS=1;;
    --shared-fs) NO_SHARED_FS=0;;
    --use-osg) NO_SHARED_FS=1; USE_OSG=1;;
    --use-only-osg) NO_SHARED_FS=1; USE_OSG=1; SITE_REQUIREMENTS="${SITE_REQUIREMENTS} && TARGET.IS_GLIDEIN";;
    --debug) DEBUG=1;;
    --use-hdfs)
        USE_HDFS=1
        DCAP_SERVER=file:/hdfs/
	SITE_REQUIREMENTS="${SITE_REQUIREMENTS} && TARGET.HAS_CMS_HDFS"
        ;;
    --) shift; break;;
    *) die "Unexpected option $1";;
  esac
  shift
done

if [ -n "${DEBUG}" ]; then
    echo "Enabling debug mode"
    set -x
fi

if [ "${MERGE}" = 1 ] && [ "${VSIZE_LIMIT}" -lt 6000 ]; then
    echo "Increasing vsize limit from ${VSIZE_LIMIT} MB to 6000 MB"
    VSIZE_LIMIT=6000
fi

if [ "$SKIP_EXISTING_OUTPUT" = "1" ] && [ "$JOB_GENERATES_OUTPUT_NAME" = "1" ];
then
  die "The option --job-generates-output-name conflicts with --skip-existing-output"
fi

if [ "$SKIP_EXISTING_OUTPUT" = "1" ] && [ "$OUTPUT_DAG_FILE" != "" ]; then
  die "The option --output-dag-file conflicts with --skip-existing-output"
fi

if [ "$RESUBMIT_FAILED_JOBS" = "1" ] && [ "$OUTPUT_DAG_FILE" != "" ]; then
  die "The option --output-dag-file conflicts with --resubmit-failed-jobs.  Perhaps you want to use --rescue-dag-file instead?"
fi

if [ "$SKIP_EXISTING_OUTPUT" = "1" ] && [ "$RESUBMIT_FAILED_JOBS" = "1" ]; then
  echo "NOTE: both --skip-existing-output and --resubmit-failed-jobs were specified, so only jobs that have no existing output AND which exited with non-zero status will be submitted."
fi

if [ "$SKIP_EXISTING_JOBS" = "1" ] && [ "$RESUBMIT_FAILED_JOBS" = "1" ]; then
  die "--skip-existing-jobs conflicts with --resubmit-failed-jobs"
fi

if [ "$MERGE" != "1" ] && [ "$USE_HADD" = "1" ]; then
  die "The option --use-hadd requires --merge"
fi

if [ "$USE_OSG" = "1" ]; then
  WANT_GLIDEIN="+WantGlidein = true"
fi

# Check for some required utilities
for exe in scramv1 condor_submit condor_submit_dag cmsRun.sh readlink voms-proxy-info ; do
  if ! which $exe >& /dev/null; then
    die "Cannot find $exe in PATH.  Your environment is not correctly set up."
  fi
done

submitDagCommand="condor_submit_dag -notification never"
submitCommand=condor_submit

if [ -n "$RESCUE_DAG_FILE" ]; then
    if [ "$#" -gt 1 ]; then
        die "You must not specify other arguments with --rescue-dag-file."
    fi
fi

nargs=3
if [ "$INFER_CMSSW_PATH" -eq 1 ]; then
  nargs=$(($nargs - 1))
fi

if [ "$MERGE" = "1" ]; then
  if [ "$#" -lt $(($nargs - 1)) ]; then PrintUsage; fi
  if [ "$INPUT_FILES_PER_JOB" -lt 2 ]; then
    die "You must specify --input-files-per-job=N with --merge."
  fi
elif [ -n "$RESCUE_DAG_FILE" ]; then
  dag=$(echo $RESCUE_DAG_FILE | sed -re 's/\.(rescue[0-9]{3}|wrapper).*//')
  wrapper="${dag}.wrapper"
  if [ ! -r "$wrapper" ]; then
      die "Failed to find wrapper for DAG $dag"
  elif [ ! -r "${dag}.rescue001" ]; then
      die "Failed to rescue DAG $dag"
  fi
  echo "Submitting rescue for $dag"
  $submitDagCommand "$wrapper"
  exit $?
else
  if [ "$#" -lt $nargs ]; then PrintUsage; fi
fi


# Additional command-line arguments

jobName=$1
shift
if [ ${INFER_CMSSW_PATH} -eq 0 ]; then
  CMSSW_HOME=`realpath $1`
  shift
else
  fail=
  if [ -z "${CMSSW_BASE}" ]; then
    die "--infer-cmssw-path is enabled but \$CMSSW_BASE is not set in the environment."
  fi
  CMSSW_HOME=`realpath $CMSSW_BASE`
fi

if [ "$MERGE" = "1" ]; then
  # In merge mode, we set a bunch of other options automatically,
  # in order to run the merge job.

  if [ "$#" -gt 0 ]; then
    die "Unexpected extra arguments in --merge mode: $@"
  fi

  FWKLITE=1
  APPLICATION=merge

  configTemplate="${FARMOUT_HOME}/mergeFilesJob"

  # if using mergeFiles.C to do the merge, add that to the extra inputs list
  if [ "$USE_HADD" != 1 ]; then
    if ! [ -z "$EXTRA_INPUTS" ]; then
      EXTRA_INPUTS="${EXTRA_INPUTS},"
    fi
    EXTRA_INPUTS="${EXTRA_INPUTS}${FARMOUT_HOME}/mergeFiles.C"
  fi

else
  if [ ! -f $1 ]; then
      die "Can not find config template/script at $1"
  fi
  configTemplate=`realpath $1`
  if [ ! -f $configTemplate ]; then
      die "Could not expand the real path of config template/script at $1"
  fi
  shift

  # get extra arguments in condor arg syntax
  EXTRA_CMSRUN_ARGS=""
  while [ "$1" != "" ]; do
    escaped_arg=$(echo $1 | sed "s|'|''|g; s|\"|\"\"|g")
    shift
    EXTRA_CMSRUN_ARGS="$EXTRA_CMSRUN_ARGS '$escaped_arg'"
  done

fi

# Now we have all the user's input.

if [ "$EXTRA_INPUTS" != "" ]; then
  for file in ${EXTRA_INPUTS//,/ }; do
    if ! [ -a $file ]; then
      logerror "ERROR: Cannot find file specified in --extra-inputs: $file"
      die
    fi
  done
fi

# Check the config template

if [ "$configTemplate" = "" ]; then
  die "You must specify a cfg template."
fi

CFG_EXTENSION="${configTemplate/*./}"

if [ "$FWKLITE" != 1 ]; then
  if [ "$CFG_EXTENSION" != "py" ] && [ "$CFG_EXTENSION" != "cfg" ]; then
    die "cfg template must end in .py or .cfg"
  fi

  macro='$outputFileName'
  if [ "$JOB_GENERATES_OUTPUT_NAME" != "1" ] && ! grep -F -q $macro $configTemplate && ! ( echo "$EXTRA_CMSRUN_ARGS" | grep -F -q $macro ); then

    die "$macro must appear in the configuration template or in the extra cmsRun arguments.  Either specify --job-generates-output-name or put $macro in $configTemplate or the extra cmsRun arguments to let the output file name be configured"

  fi

  for macro in \$inputFileNames; do
    if ! grep -F -q $macro $configTemplate && ! ( echo "$EXTRA_CMSRUN_ARGS" | grep -F -q $macro ); then
      die "$macro must appear in the configuration template or in the extra cmsRun arguments.  I can't find it in $configTemplate or in the extra cmsRun arguments."
    fi
  done
fi


runName=`basename $configTemplate .${CFG_EXTENSION}`

if [ "$INPUT_DIR" = "" ]; then
    if [ "$INPUT_FILE_LIST" != "" ] || [ "$INPUT_DBS_PATH" != "" ]; then
        INPUT_DIR=${DCAP_SERVER}${PNFS_STORE}
    fi
fi

OUTPUT_DIR=${OUTPUT_DIR:-${SRM_HOME}/$jobName-$runName}
INPUT_DIR=${INPUT_DIR:-${DCAP_HOME}/$jobName}
SUBMIT_DIR=${SUBMIT_DIR:-${SUBMIT_HOME}/$jobName-$runName}

if [ "$USE_HDFS" = 1 ]; then
    if [ "${INPUT_DIR#file:/hdfs/}" = "${INPUT_DIR}" ]; then
        die "You specified --use-hdfs.  In this case, the input directory must begin with file:/hdfs/"
    fi
    if [ "$USE_OSG" = 1 ]; then
        die "You cannot use --use-hdfs and --use-osg together."
    fi
fi

#Strip off server part of input path
LOCAL_INPUT_DIR=${INPUT_DIR#$DCAP_SERVER}

#Get the part of INPUT_DIR that was stripped off and make sure INPUT_BASE ends
#with a trailing slash. If we're building a DAG, we don't need INPUT_BASE at
#all.
INPUT_BASE=${INPUT_DIR%$LOCAL_INPUT_DIR}
# if INPUT_BASE is empty, then INPUT_DIR and LOCAL_INPUT_DIR are the same,
# which could mean that INPUT_DIR points to some "foreign" server.
# In that case, just leave INPUT_BASE empty.
if [ "$INPUT_BASE" != "" ]; then
  INPUT_BASE="${INPUT_BASE%/}/"
fi
if [ "$LAST_SUBMIT_DIR" != "" ]; then
    INPUT_BASE=
fi

if [ -d "${LOCAL_POSIX_PREFIX}${LOCAL_INPUT_DIR}" ]; then
    LOCAL_INPUT_DIR="${LOCAL_POSIX_PREFIX}${LOCAL_INPUT_DIR}"
elif [ -d "${LOCAL_POSIX_PREFIX}/${LOCAL_INPUT_DIR}" ]; then
    LOCAL_INPUT_DIR="${LOCAL_POSIX_PREFIX}/${LOCAL_INPUT_DIR}"
fi

if [ "$INPUT_FILE_LIST" != "" ]; then
    if ! [ -f "$INPUT_FILE_LIST" ]; then
        die "Error: No such file: $INPUT_FILE_LIST"
    fi
    INPUT_FILE_LIST=`realpath $INPUT_FILE_LIST`
elif [ "$LAST_SUBMIT_DIR" != "" ]; then
    if ! [ -d "$LAST_SUBMIT_DIR" ]; then
        die "Error: No such directory: $LAST_SUBMIT_DIR"
    fi
    LAST_SUBMIT_DIR=`realpath $LAST_SUBMIT_DIR`
elif ! [ -d "${LOCAL_INPUT_DIR}" ]; then
  die "Error: No such input directory: ${LOCAL_INPUT_DIR}"
fi

if [ "$INPUT_DBS_PATH" != "" ]; then
    if [ "$DBSCMD_HOME" = "" ] || ! [ -d "$DBSCMD_HOME" ]; then
      die "DBS client is not correctly set up: DBSCMD_HOME is invalid"
    fi
    if [ "$INPUT_FILE_LIST" != "" ]; then
      die "--input-dbs-path cannot be used with --input-file-list"
    fi
fi

if [ "$INPUT_RUNS" != "" ]; then
  if [ "$INPUT_DBS_PATH" = "" ]; then
    die "--input-runs requires --input-dbs-path to be specified"
  fi
fi

if ! [ -d "$CMSSW_HOME" ]; then
  die "Error: No such CMSSW directory: $CMSSW_HOME"
fi

if [ -d "$SUBMIT_DIR" ] && [ "$SKIP_EXISTING_JOBS" != "1" ] && [ "$RESUBMIT_FAILED_JOBS" != "1" ] && [ "$SKIP_EXISTING_OUTPUT" != "1" ]; then
  logerror
  logerror "Error: Submit directory already exists: $SUBMIT_DIR"
  logerror
  logerror "You must either remove it, or specify one of the following options:"
  logerror "  --resubmit-failed-jobs"
  logerror "  --skip-existing-jobs"
  logerror "  --skip-existing-output"
  logerror "or specify a different job name or "
  logerror "submission directory with --submit-dir"
  die
fi

proxy=${X509_USER_PROXY:-/tmp/x509up_u$UID}

if [ "$NO_SUBMIT" != 1 ] && ! check_proxy $MIN_PROXY_HOURS $proxy; then
  logerror
  logerror "Either rerun this command with --no-submit or create a new grid proxy"
  logerror "and rerun this command.  Example of how to create a grid proxy:"
  logerror
  logerror "voms-proxy-init --voms=cms --valid=48:00"
  die
fi


#
# CMSSW environment setup
#
originalDir=`pwd`
PATH=$PATH:$originalDir
export PATH
cd $CMSSW_HOME || die "Failed to cd to $CMSSW_HOME."
eval `scramv1 runtime -sh`

if [ "$?" != "0" ]; then
  die "Failed to initialize CMSSW environment with scram in $CMSSW_HOME."
fi

for mr_name in ${!SITE_ENV_MATCH_REPLACE*}; do
    mr="${!mr_name}"
    # apply search-replace to the environment
    eval `env | sed "$mr" | sed 's/"/\\\\"/g' | sed 's/\\([^=]*\\)=\\(.*\\)/export \\1="\\2"/'`
done

runDir=$SUBMIT_DIR
submitFile=$runDir/submit
userCodeTgz=$runDir/user_code.tgz
farmoutLogFile=$runDir/farmoutAnalysisJobs.log

# Make sure submitFile name is unique in case we are continuing a previous
# submission.
submitFile=$(incrfname "$submitFile")

# If building a DAG, zero out the DAG input file if last-submit-dir is not
# specified. Also check to make sure the dag file directory exists.
if [ "$OUTPUT_DAG_FILE" != "" ]; then
    DAGDIR=$(dirname ${OUTPUT_DAG_FILE})
    mkdir -p $DAGDIR
    OUTPUT_DAG_FILE=$(realpath "${OUTPUT_DAG_FILE}")
    if [ "$LAST_SUBMIT_DIR" == "" ]; then
        echo -n >| "$OUTPUT_DAG_FILE"
    fi
fi

mkdir -p $runDir
cd $runDir || die "Failed to create directory $runDir"

SHARED_LOGS=$SHARED_LOGS/$(basename $runDir)
mkdir -p $SHARED_LOGS

#
# Job specification
#
Executable=`which cmsRun.sh`

if [ "$EXPRESS_QUEUE" = "1" ]; then
  QUEUE_ATTRIBUTE='+IsExpressQueueJob      = True'
else
  QUEUE_ATTRIBUTE='+IsFastQueueJob      = True'
fi

if [ "$QUICK_TEST" != "" ]; then
  if [ "$EXPRESS_QUEUE" = "1" ]; then

    # Require express _or_ fast slot here in order to fall back to
    # fast slot in case user is banned from express queue.  The
    # machine policy will prevent the job from running in the fast
    # queue if it is allowed to run in the express queue.

    SITE_REQUIREMENTS="${SITE_REQUIREMENTS} && (IsExpressSlot =?= True || IsFastSlot =?= True)"
  else
    SITE_REQUIREMENTS="${SITE_REQUIREMENTS} && IsFastSlot =?= True"
  fi
fi

#
# CMS Dashboard parameters
#
CMS_DASHBOARD_REPORTER_TGZ="${FARMOUT_HOME}/cmsdashboard_reporter.tgz"
FARMOUT_DASHBOARD_REPORTER="${FARMOUT_HOME}/farmout_dashboard.sh"
if ! [ -x "$FARMOUT_DASHBOARD_REPORTER" ] || ! [ -a "$CMS_DASHBOARD_REPORTER_TGZ" ]; then
   echo "No farmout_dashboard.sh or cmsdashboard_reporter.tgz found, so no reporting to the CMS dashboard."
   FARMOUT_DASHBOARD_REPORTER=""
   CMS_DASHBOARD_REPORTER_TGZ=""
fi
if [ "$INPUT_DBS_PATH" != "" ]; then
    dboard_datasetFull="dboard_datasetFull=${INPUT_DBS_PATH}"
fi
dboard="
dboard_taskId=${FARMOUT_USER}-`hostname -f`-\$(Cluster)
dboard_jobId=\$(Process)
dboard_sid=${FARMOUT_USER}-`hostname -f`-\$(Cluster).\$(Process)
${dboard_datasetFull}
dboard_application=`basename ${CMSSW_HOME}`
dboard_exe=${APPLICATION}
dboard_tool=farmout
dboard_scheduler=local-condor
dboard_taskType=analysis
dboard_broker=local-condor-`hostname -f`
dboard_user=${FARMOUT_USER}
dboard_SyncCE=${CMS_DASHBOARD_LOCAL_CE}
"
# convert newlines to spaces
dboard="`echo $dboard`"

if [ "$SAVE_FAILED_DATAFILES" != "" ]; then
  #cmsRun.sh checks for this in the environment
  save_failed_datafiles_env="SAVE_FAILED_DATAFILES=1"
fi

if [ "$JOB_GENERATES_OUTPUT_NAME" = "1" ]; then
  #cmsRun.sh checks for this in the environment
  job_generates_output_name_env="JOB_GENERATES_OUTPUT_NAME=1"
fi

if [ "$FWKLITE" = 1 ]; then
  # The "configTemplate" in this case is actually the fwklite script.

  # This causes it to run the fwklite script rather than running
  # cmsRun.
  fwklite_env="FWKLITE_SCRIPT=$(basename ${configTemplate})"

  # add the fwklite script to the input files to be transferred to the job
  if [ "$EXTRA_INPUTS" != "" ]; then
    EXTRA_INPUTS="${EXTRA_INPUTS},"
  fi
  EXTRA_INPUTS="${EXTRA_INPUTS}${configTemplate}"
fi

if [ "$VSIZE_LIMIT" != "" ]; then
  vsize_env="FARMOUT_VSIZE_LIMIT=${VSIZE_LIMIT}"
  if [ "$VSIZE_LIMIT" -lt "$MEMORY_REQUIREMENTS" ]; then
    echo
    echo "WARNING: --vsize-limit=$VSIZE_LIMIT is smaller than --memory-requirements=$MEMORY_REQUIREMENTS"
    echo
  fi
fi

if [ "$REQUIRES_WHOLE_MACHINE" == "1" ]; then
  REQUIRES_WHOLE_MACHINE_ATTR="+RequiresWholeMachine=true"

  #Note: the reference to per-slot Memory and Disk below are to prevent
  #condor from inserting the default requirements for these.  Instead,
  #we want to look at whole machine attributes.
  SITE_REQUIREMENTS="${SITE_REQUIREMENTS} && TARGET.CAN_RUN_WHOLE_MACHINE && TARGET.TotalMemory*1024 >= MY.ImageSize && TARGET.Memory > 0 && TARGET.TotalDisk >= MY.DiskUsage && TARGET.Disk > 0"
fi

if [ "$NO_SHARED_FS" != 1 ]; then
  do_getenv="true"
  requires_shared_fs="true"
  dboard="${dboard} CMS_DASHBOARD_REPORTER_TGZ=${CMS_DASHBOARD_REPORTER_TGZ}"
  dboard="${dboard} FARMOUT_DASHBOARD_REPORTER=${FARMOUT_DASHBOARD_REPORTER}"
  checkSharedFS $CMSSW_HOME
else
  do_getenv="false"
  requires_shared_fs="false"

  cmssw_env="DO_RUNTIME_CMSSW_SETUP=1"

  if [ "$VO_CMS_SW_DIR" = "" ]; then
    die "VO_CMS_SW_DIR is not defined"
  fi
  cmssw_env="$cmssw_env VO_CMS_SW_DIR=${VO_CMS_SW_DIR}"

  if [ "$CMSSW_VERSION" = "" ]; then
    die "CMSSW_VERSION is not defined"
  fi
  cmssw_env="$cmssw_env CMSSW_VERSION=${CMSSW_VERSION}"

  cmssw_env="$cmssw_env SCRAM_ARCH=$(scramv1 arch)"

  if [ "$EXTRA_INPUTS" != "" ]; then
    EXTRA_INPUTS="${EXTRA_INPUTS},"
  fi

  cmssw_env="$cmssw_env CMSSW_USER_CODE_TGZ=$(basename $userCodeTgz)"
  ${FARMOUT_HOME}/packUserCode "${CMSSW_HOME}" "$userCodeTgz" || die \
    "Farmout couldn't pack user code area.  Sorry."
  EXTRA_INPUTS="${EXTRA_INPUTS}${userCodeTgz}"

  if ! [ -z "${FARMOUT_DASHBOARD_REPORTER}" ]; then
    dboard="${dboard} CMS_DASHBOARD_REPORTER_TGZ=$(basename ${CMS_DASHBOARD_REPORTER_TGZ})"
    dboard="${dboard} FARMOUT_DASHBOARD_REPORTER=$(basename ${FARMOUT_DASHBOARD_REPORTER})"
    EXTRA_INPUTS="${EXTRA_INPUTS},${FARMOUT_DASHBOARD_REPORTER},${CMS_DASHBOARD_REPORTER_TGZ}"
  fi

fi

# First put all the submit file commands that are the same for all jobs.
    cat <<EOF > $submitFile
X509UserProxy        = ${proxy}
Universe             = vanilla
Executable           = $Executable
GetEnv               = ${do_getenv}
# tell glideins to run job with access to cvmfs (via parrot)
+RequiresCVMFS       = True
# for reference by our own requirements expression
+RequiresSharedFS    = ${requires_shared_fs}
${WANT_GLIDEIN}
Environment          = "${dboard} ${save_failed_datafiles_env} ${job_generates_output_name_env} ${fwklite_env} ${vsize_env} ${cmssw_env}"
Copy_To_Spool        = false
Notification         = never
WhenToTransferOutput = On_Exit
ShouldTransferFiles  = yes
on_exit_remove       = (ExitBySignal == FALSE && (ExitCode == 0 || ExitCode == ${FAIL_JOB} || NumJobStarts>3))
${QUEUE_ATTRIBUTE}
request_memory       = ${MEMORY_REQUIREMENTS}
request_disk         = $(($DISK_REQUIREMENTS*1024))
Requirements         = ${SITE_REQUIREMENTS}
# stop jobs from running if they blow up
periodic_hold        = DiskUsage/1024 > ${HOLD_DISK_USAGE_FACTOR}*${DISK_REQUIREMENTS}
job_ad_information_attrs = MachineAttrGLIDEIN_Site0,MachineAttrName0
${ACCOUNTING_GROUP}
${REQUIRES_WHOLE_MACHINE_ATTR}
EOF

# The following periodic hold expression caused jobs to get put on hold
# when condor happened to monitor the image size when the srm client
# was running, because the new version of the 64-bit jvm has a default
# 4GB heap size.
#ImageSize/1024 > ${HOLD_IMAGE_SIZE_FACTOR}*${MEMORY_REQUIREMENTS}

echo "Generating submit files in $runDir..."

#
# Loop over input files
#

EXCLUDE_ARG=
if [ "$EXCLUDE_INPUT_FILES" != "" ]; then
  EXCLUDE_ARG="-not -name $EXCLUDE_INPUT_FILES"
fi

find_command="find ${LOCAL_INPUT_DIR} -size +0 -name $MATCH_INPUT_FILES $EXCLUDE_ARG"

# This filters nothing.
filter_command="cat"

# Pipe through the filter script
if [ "$CLEAN_CRAB_DUPES" -eq 1 ]; then
  filter_command="${FARMOUT_HOME}/clean_crab_duplicates.py"
fi

dbs_query() {
    query="find file where"
    for arg in $*; do
      query="$query $arg"
    done
    python $DBSCMD_HOME/dbsCommandLine.py -c search ${DBS_URL_ARG} --query="$query" | grep /store/
}
if [ "$INPUT_DBS_PATH" != "" ]; then
  query="dataset=$INPUT_DBS_PATH"
  EXPANDED_INPUT_RUNS=$INPUT_RUNS
  if [ "$EXPANDED_INPUT_RUNS" != "" ]; then
    # Expand any ranges manually, DBS can't parse them
    EXPANDED_INPUT_RUNS=`${FARMOUT_HOME}/expandRunRange.py $EXPANDED_INPUT_RUNS`
    query="$query and run in ($EXPANDED_INPUT_RUNS)"
  fi
  find_command="dbs_query $query"

  # If we are applying a lumi mask, use a separate find command
  if [ "$LUMI_MASK" != "" ]; then
    find_command="${FARMOUT_HOME}/dbsMaskFiles.py -v $INPUT_DBS_PATH $LUMI_MASK"
    if [ "$INPUT_RUNS" != "" ]; then
      find_command="$find_command --run-range=$INPUT_RUNS"
    fi
  fi

  check_input_file_existence=${ASSUME_INPUT_FILES_EXIST:-1}
  prepend_local_input_dir=1
fi

if [ "$INPUT_FILE_LIST" != "" ]; then
  find_command="cat $INPUT_FILE_LIST"
  check_input_file_existence=${ASSUME_INPUT_FILES_EXIST:-1}
  prepend_local_input_dir=1
elif [ "$LAST_SUBMIT_DIR" != "" ]; then
  find_command="find_dag_inputs"
  check_input_file_existence=0
fi

job_output_search_dirs="$OUTPUT_DIR"
if [ "$OUTPUT_DIR" != "." ] && [ "$SKIP_EXISTING_OUTPUT" = 1 ]; then
    subdir=1
    while outputDirExists "$OUTPUT_DIR/$subdir"; do
        job_output_search_dirs="$job_output_search_dirs $OUTPUT_DIR/$subdir"
        subdir=$(($subdir+1))
    done
fi

count=0
output_file_count=0
$find_command | $filter_command | 
while read nextInputFile
do
    inputFileNames=""
    parentJobTags=""
    i=$INPUT_FILES_PER_JOB
    while [ $i -gt 0 ]; do
        nextInputFileOrigName="${nextInputFile}"
        if [ "$prepend_local_input_dir" = 1 ]; then
            nextInputFile="${LOCAL_INPUT_DIR}${nextInputFile}"
        fi
        nextInputFilePosix="${nextInputFile}"
        nextInputFile="${nextInputFile#$LOCAL_POSIX_PREFIX}"

        if [ "$check_input_file_existence" = "1" ]; then
            if ! [ -f "$nextInputFilePosix" ]; then
                echo "$nextInputFilePosix does not exist, skipping"

                if [ "$SAVE_MISSING_INPUT_FILE_LIST" != "" ]; then
                    echo "$nextInputFileOrigName" >> "$SAVE_MISSING_INPUT_FILE_LIST"
                fi

		read nextInputFile || break
                continue
            fi
        fi

        parentJobTag="$(basename "${nextInputFileOrigName}" .root)"

        if [ "$inputFileNames" = "" ]; then
            inputFileNames="\"${INPUT_BASE}$nextInputFile\""
            firstInputFile="${nextInputFile}"
            parentJobTags="${parentJobTag}"
        else
            inputFileNames="${inputFileNames},\"${INPUT_BASE}$nextInputFile\""
            parentJobTags="${parentJobTags} ${parentJobTag}"
        fi

        i=$(($i-1))
        if [ $i -gt 0 ]; then
            read nextInputFile || break
        fi
    done
    [ "$inputFileNames" = "" ] && break

#
# Name the files
#
    job_output_dir="$OUTPUT_DIR"
    if [ "$OUTPUT_DIR" != "." ]; then
        if [ "$OUTPUT_FILES_PER_SUBDIR" != "0" ]; then
            job_output_dir="$job_output_dir/"$(($output_file_count/$OUTPUT_FILES_PER_SUBDIR+1))
        fi
    fi
    output_file_count=$(($output_file_count+1))

    rootname=`basename $firstInputFile .root`
    jobtag=$runName-$rootname
    if [ ${#jobtag} -gt 245 ]; then
        # Condor (as of 6.9.4) cannot handle file names longer than 256
        jobtag=$rootname
    fi

    consub=$jobtag.sub
    conlog=$jobtag.log
    stdout=$jobtag.out
    stderr=$jobtag.err
    if [ "$FWKLITE" = 1 ]; then
      jobcfg=$jobtag.inputs
    else
      jobcfg=$jobtag.$CFG_EXTENSION
    fi
    outputFileName=$jobtag.root
    #  Keep track of the output files we will create
    echo $job_output_dir/$outputFileName >> $runDir/outputfiles.txt
    inputfiles="$jobcfg"
    if [ "$EXTRA_INPUTS" != "" ]; then
      inputfiles="$jobcfg,$EXTRA_INPUTS"
    fi

#
# Create and set to the job subdirectory
#

    cd $runDir || die "Failed to cd to $runDir"


    if [ "$SKIP_EXISTING_JOBS" = "1" ]; then
      if [ -d $jobtag ]; then
        continue
      fi
      echo "Job $jobtag did not exist."
    fi

    if [ "$SKIP_EXISTING_OUTPUT" = "1" ]; then
      # Check for existing output file
      found=0
      for search_dir in $job_output_search_dirs; do
        if outputFileExists $search_dir/$outputFileName; then
          found=1
          break
        fi
      done
      if [ "$found" = 1 ]; then
        continue
      fi
      echo "Output file $outputFileName did not exist."
    fi

    if [ "$RESUBMIT_FAILED_JOBS" = "1" ]; then
      # Check for existing failed job
      if [ -a $jobtag/$conlog ] && jobFailed $jobtag/$conlog; then
        echo "Resubmitting failed job $jobtag"
      else
        continue
      fi
    fi

    count=$(($count+1))
    if [ ! -z $JOB_LIMIT ] && [ $count -gt $JOB_LIMIT ]; then
        echo "Job limit $JOB_LIMIT reached.  Halting creation of jobs."

        # eat up rest of input to avoid broken pipe message
        while read junk; do junk=""; done

        break
    fi
    echo -n "."

    mkdir -p $jobtag
    cd $jobtag || die "Failed to cd to $jobtag"

    jobSubmitFile=$submitFile
    if [ -n "$OUTPUT_DAG_FILE" ]; then
        jobSubmitFile="$(incrfname "$PWD/submit")"
        cp "$submitFile" "$jobSubmitFile"
        echo "JOB $jobtag $jobSubmitFile" >> "$OUTPUT_DAG_FILE"
        if [ "$LAST_SUBMIT_DIR" != "" ]; then
            for parent in $parentJobTags; do
                echo "PARENT $parent CHILD $jobtag" >> "$OUTPUT_DAG_FILE"
            done
        fi
    fi

#
# Prepare job configuration file
#

    if [ "$FWKLITE" != 1 ]; then
      sed < $configTemplate \
          "s|\\\$inputFileNames|$inputFileNames|g;
           s|\\\$outputFileName|$outputFileName|g;
           s|\\\$jobNumber|$output_file_count|g" > $jobcfg
    else
      # convert quoted comma-separated inputfile list to newline-separated
      echo "$inputFileNames" | sed \
        "s|\"||g;
         s|,|\n|g" > $jobcfg
    fi

    job_extra_args=""
    if [ "$EXTRA_CMSRUN_ARGS" != "" ]; then
      # Note that we just strip double-quotes out of inputFileNames,
      # because cmsRun does not expect the file names to be quoted
      # on the commandline.
      escaped_inputFileNames=$(echo "$inputFileNames" | sed "s|'|''|g; s|\"||g")
      escaped_outputFileName=$(echo "$outputFileName" | sed "s|'|''|g; s|\"|\"\"|g")
      job_extra_args=$(echo "$EXTRA_CMSRUN_ARGS" | sed \
          "s|\\\$inputFileNames|$escaped_inputFileNames|g;
           s|\\\$outputFileName|$escaped_outputFileName|g;
           s|\\\$jobNumber|$output_file_count|g" )
    fi

#
# Prepare condor submit file for the job
#
    cat >> $jobSubmitFile <<EOF

InitialDir           = $PWD
Arguments            = "$jobcfg `basename $outputFileName` $job_output_dir$job_extra_args"
Transfer_Input_Files = $inputfiles
output               = $stdout
error                = $stderr
Log                  = $conlog
Queue
EOF

    # Explicitly track output files in case later invocations require them as
    # inputs.
    if [ "$OUTPUT_DAG_FILE" != "" ]; then
        echo "# DAG_OUTPUT_FILENAME ${job_output_dir#${SRM_SERVER}}/$outputFileName" >> $jobSubmitFile
    fi

    touch $stdout $stderr $conlog
    ln -f $stdout $SHARED_LOGS/$stdout
    ln -f $stderr $SHARED_LOGS/$stderr
    ln -f $conlog $SHARED_LOGS/$conlog

done || die

echo ""

cd $runDir

#
# Submit the job
#

# Wrap the real DAG with a thin outer DAG. The real DAG becomes a node with a
# SCRIPT POST that will inform the user about the outcome of their workflow.
wrapDag () {
    cat <<EOF > ${OUTPUT_DAG_FILE}.config
DAGMAN_MAX_SUBMIT_ATTEMPTS = 10
EOF
    cat <<EOF > ${OUTPUT_DAG_FILE}.wrapper
SUBDAG EXTERNAL farmout $OUTPUT_DAG_FILE
SCRIPT POST farmout ${FARMOUT_HOME}/dag-wrapper.sh \$JOBID \$RETURN \
    OUTPUT_DAG_FILE=$OUTPUT_DAG_FILE \
    FARMOUT_USER=$FARMOUT_USER \
    USER=$USER \
    OUTPUT_DIR=${OUTPUT_DIR}
EOF
  cat <<EOF >> ${OUTPUT_DAG_FILE}
NODE_STATUS_FILE ${OUTPUT_DAG_FILE}.status 60
CONFIG ${OUTPUT_DAG_FILE}.config
EOF
}

if [ -z "$OUTPUT_DAG_FILE" ] && ! grep -q ^Queue $submitFile; then
  echo "No jobs were created, so there is nothing to do."
  rm $submitFile
  exit 0
elif [ -z "$NO_SUBMIT" ]; then
  # The job is messed up if X509_USER_PROXY is defined, because then
  # Condor doesn't override this value to point to the actual proxy
  # location on the execution node.
  unset X509_USER_PROXY

  if [ -n "$OUTPUT_DAG_FILE" ]; then
    wrapDag
    submitCommand="$submitDagCommand"
    submitFile="${OUTPUT_DAG_FILE}.wrapper"
  fi

  # Convoluted way of sending a message to the screen
  # and to a log file about the sucess of submitting
  # jobs and then "die"ing if needed.
  $submitCommand $submitFile >> $farmoutLogFile
  if [ $? -eq 1 ]; then
    echo "Failed to submit $submitFile" >> $farmoutLogFile
    die 
  else 
    cat $farmoutLogFile
    echo "Date: "`date` >> $farmoutLogFile
  fi

  if [ -n "$OUTPUT_DAG_FILE" ]; then
    echo "You can monitor your workflow's progess by watching ${OUTPUT_DAG_FILE}.dagman.out"
    echo "A summary of the status of all of the jobs in your workflow will be"
    echo "written to the following file every 60 seconds:"
    echo "  ${OUTPUT_DAG_FILE}.status"
  fi

  echo ""
  echo "Your jobs should show up in ~6 minutes"
  echo "at the NEW job monitoring web page :"
  echo "    http://www.hep.wisc.edu/cms/comp/jobs/"

elif [ -n "$OUTPUT_DAG_FILE" ]; then
  echo "Submit DAG file $OUTPUT_DAG_FILE has been created but not submitted."
else
  echo "Submit file $submitFile has been created but not submitted."
fi

echo -n "Jobs for $jobName are created in "
pwd
cd $originalDir