diff --git a/bin/jobsub_submit b/bin/jobsub_submit index 41f74ea9..3b68217e 100755 --- a/bin/jobsub_submit +++ b/bin/jobsub_submit @@ -58,7 +58,7 @@ from submit_support import ( ) from tarfiles import do_tarballs from token_mods import get_job_scopes, use_token_copy -from tracing import as_span, log_host_time +from tracing import as_span, log_host_time, get_propagator_carrier from utils import ( set_extras_n_fix_units, cleanup, @@ -70,17 +70,9 @@ from version import print_version, print_support_email verbose = 0 # pylint: disable=invalid-name -# pylint: disable=too-many-branches,too-many-statements -@as_span("jobsub_submit", is_main=True) +@as_span("jobsub", is_main=True) def main(): - """script mainline: - - parse args - - get credentials - - handle tarfile options - - set added values from environment, etc. - - convert/render template files to submission files - - launch - """ + """main entrypoint, handles basic arg parsing and creates parent span for submission""" global verbose # pylint: disable=global-statement,invalid-name parser = get_parser() @@ -104,8 +96,6 @@ def main(): verbose = args.verbose - log_host_time(verbose) - # if they were trying to pass LD_LIBRARY_PATH to the job, get it from HIDE_LD_LIBRARY_PATH if "LD_LIBRARY_PATH" in args.environment and os.environ.get( "HIDE_LD_LIBRARY_PATH", "" @@ -115,6 +105,33 @@ def main(): for x in args.environment ] + # get tracing propagator traceparent id so we can use it in templates, etc. + if args.traceparent is None: + carrier = get_propagator_carrier() + if carrier and "traceparent" in carrier: + args.traceparent = carrier["traceparent"] + else: + args.traceparent = "" + + if verbose > 0: + sys.stderr.write(f"Setting traceparent: {args.traceparent}\n") + + submit(args) + + +# pylint: disable=too-many-branches,too-many-statements +@as_span("jobsub_submit") +def submit(args): + """script mainline: + - get credentials + - handle tarfile options + - set added values from environment, etc. + - convert/render template files to submission files + - launch + """ + + log_host_time(args.verbose) + if args.version: print_version() diff --git a/lib/get_parser.py b/lib/get_parser.py index d372ba84..a6bb9207 100644 --- a/lib/get_parser.py +++ b/lib/get_parser.py @@ -240,6 +240,11 @@ def get_base_parser(add_condor_epilog: bool = False) -> argparse.ArgumentParser: action=CheckIfValidSchedd, help=argparse.SUPPRESS, ) + group.add_argument( + "--traceparent", + help="Trace context", + default=os.environ.get("TRACEPARENT", None), + ) return parser diff --git a/lib/utils.py b/lib/utils.py index c945d74d..9b94c474 100644 --- a/lib/utils.py +++ b/lib/utils.py @@ -30,7 +30,6 @@ import uuid import classad # type: ignore # pylint: disable=import-error -from tracing import get_propagator_carrier import token_mods from creds import CredentialSet @@ -199,18 +198,6 @@ def set_extras_n_fix_units( set_some_extras(args, schedd_name, cred_set) - # - # get tracing propagator traceparent id so we can use it in templates, etc. - # - carrier = get_propagator_carrier() - if carrier and "traceparent" in carrier: - args["traceparent"] = carrier["traceparent"] - else: - args["traceparent"] = "" - - if args["verbose"] > 0: - sys.stderr.write(f"Setting traceparent: {args['traceparent']}\n") - # Read in credentials for cred_type, cred_path in vars(cred_set).items(): args[cred_type] = cred_path diff --git a/templates/simple/simple.sh b/templates/simple/simple.sh index c74e0c28..f6a32c78 100644 --- a/templates/simple/simple.sh +++ b/templates/simple/simple.sh @@ -135,8 +135,44 @@ redirect_output_finish(){ {%endif%} } +trace_start(){ + if ( is_set $TRACEPARENT ); then + version="00" + export JSB_TRACE_ID=`echo $TRACEPARENT | cut -d'-' -f 2` + export JSB_PARENT_SPAN_ID=`echo $TRACEPARENT | cut -d'-' -f 3` + sample=`echo $TRACEPARENT | cut -d'-' -f 4` + # 8-byte random span ID e.g. adce7acee6441ec74 + export JSB_SPAN_ID=`head -c8 /dev/urandom | hexdump -e '"%02x"'` + # epoch nanoseconds e.g. 1544712660000000000 + export JSB_SPAN_START=`date +%s%N` + # create child span + export TRACEPARENT="$version-$JSB_TRACE_ID-$JSB_SPAN_ID-$sample" + fi +} + +trace_finish(){ + # TODO check some other env vars + if ( is_set $OTEL_EXPORTER_OTLP_ENDPOINT ); then + export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT="$OTEL_EXPORTER_OTLP_ENDPOINT/v1/traces" + fi + if (( is_set $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT ) && ( is_set $TRACEPARENT )); then + export JSB_SPAN_END=`date +%s%N` + # example trace JSON https://github.com/open-telemetry/opentelemetry-proto/blob/v1.3.2/examples/trace.json + payload=`cat << __HEREDOC__ +{"resourceSpans":[{"resource":{"attributes":[{"key":"service.name","value":{"stringValue":"fife"}}]}, +"scopeSpans":[{"scope":{"name":"jobsub.wrapper","version":"1.0.0","attributes":[]}, +"spans":[{"traceId":"$JSB_TRACE_ID","spanId":"$JSB_SPAN_ID","parentSpanId":"$JSB_PARENT_SPAN_ID","name":"job","startTimeUnixNano":"$JSB_SPAN_START","endTimeUnixNano":"$JSB_SPAN_END","kind":1, + "attributes":[{"key":"hostname","value":{"stringValue":"$HOSTNAME"}}, + {"key":"job.id","value":{"stringValue":"$JOBSUBJOBID"}}]}]}]}]} +__HEREDOC__` + echo $payload 1>&2 + echo $payload | curl -s -k --data-binary @- -XPOST -H 'Content-Type: application/json' -H 'Accept: application/json' $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT + fi +} + normal_exit(){ + trace_finish redirect_output_finish # maybe don't cleanup so we can transfer files back... @@ -202,6 +238,7 @@ touch .empty_file args="$@" set - "" [[ "$JOBSUB_DEBUG" ]] && set_jobsub_debug +trace_start export JSB_TMP=$_CONDOR_SCRATCH_DIR/jsb_tmp mkdir -p $JSB_TMP