From be297c485a28e2a4ea98e07a4f129bcdc851ade6 Mon Sep 17 00:00:00 2001 From: Kevin Retzke Date: Wed, 11 Dec 2024 09:42:15 -0600 Subject: [PATCH 1/3] break jobsub_submit main into two functions The first (main()) does basic arg parsing, and creates an overarching tracing span for the submission. The second (submit()) does the rest. The primary motivation is to create a container tracing span for the submit steps, so they do not clutter the trace graph and provide a clear separation between submit and execution stages. --- bin/jobsub_submit | 43 ++++++++++++++++++++++++++++++------------- lib/get_parser.py | 5 +++++ lib/utils.py | 13 ------------- 3 files changed, 35 insertions(+), 26 deletions(-) diff --git a/bin/jobsub_submit b/bin/jobsub_submit index 41f74ea9..3b68217e 100755 --- a/bin/jobsub_submit +++ b/bin/jobsub_submit @@ -58,7 +58,7 @@ from submit_support import ( ) from tarfiles import do_tarballs from token_mods import get_job_scopes, use_token_copy -from tracing import as_span, log_host_time +from tracing import as_span, log_host_time, get_propagator_carrier from utils import ( set_extras_n_fix_units, cleanup, @@ -70,17 +70,9 @@ from version import print_version, print_support_email verbose = 0 # pylint: disable=invalid-name -# pylint: disable=too-many-branches,too-many-statements -@as_span("jobsub_submit", is_main=True) +@as_span("jobsub", is_main=True) def main(): - """script mainline: - - parse args - - get credentials - - handle tarfile options - - set added values from environment, etc. - - convert/render template files to submission files - - launch - """ + """main entrypoint, handles basic arg parsing and creates parent span for submission""" global verbose # pylint: disable=global-statement,invalid-name parser = get_parser() @@ -104,8 +96,6 @@ def main(): verbose = args.verbose - log_host_time(verbose) - # if they were trying to pass LD_LIBRARY_PATH to the job, get it from HIDE_LD_LIBRARY_PATH if "LD_LIBRARY_PATH" in args.environment and os.environ.get( "HIDE_LD_LIBRARY_PATH", "" @@ -115,6 +105,33 @@ def main(): for x in args.environment ] + # get tracing propagator traceparent id so we can use it in templates, etc. + if args.traceparent is None: + carrier = get_propagator_carrier() + if carrier and "traceparent" in carrier: + args.traceparent = carrier["traceparent"] + else: + args.traceparent = "" + + if verbose > 0: + sys.stderr.write(f"Setting traceparent: {args.traceparent}\n") + + submit(args) + + +# pylint: disable=too-many-branches,too-many-statements +@as_span("jobsub_submit") +def submit(args): + """script mainline: + - get credentials + - handle tarfile options + - set added values from environment, etc. + - convert/render template files to submission files + - launch + """ + + log_host_time(args.verbose) + if args.version: print_version() diff --git a/lib/get_parser.py b/lib/get_parser.py index d372ba84..a6bb9207 100644 --- a/lib/get_parser.py +++ b/lib/get_parser.py @@ -240,6 +240,11 @@ def get_base_parser(add_condor_epilog: bool = False) -> argparse.ArgumentParser: action=CheckIfValidSchedd, help=argparse.SUPPRESS, ) + group.add_argument( + "--traceparent", + help="Trace context", + default=os.environ.get("TRACEPARENT", None), + ) return parser diff --git a/lib/utils.py b/lib/utils.py index c945d74d..9b94c474 100644 --- a/lib/utils.py +++ b/lib/utils.py @@ -30,7 +30,6 @@ import uuid import classad # type: ignore # pylint: disable=import-error -from tracing import get_propagator_carrier import token_mods from creds import CredentialSet @@ -199,18 +198,6 @@ def set_extras_n_fix_units( set_some_extras(args, schedd_name, cred_set) - # - # get tracing propagator traceparent id so we can use it in templates, etc. - # - carrier = get_propagator_carrier() - if carrier and "traceparent" in carrier: - args["traceparent"] = carrier["traceparent"] - else: - args["traceparent"] = "" - - if args["verbose"] > 0: - sys.stderr.write(f"Setting traceparent: {args['traceparent']}\n") - # Read in credentials for cred_type, cred_path in vars(cred_set).items(): args[cred_type] = cred_path From 8cf6d93e295969e9b0b46873f2bc04cddb78e14e Mon Sep 17 00:00:00 2001 From: Kevin Retzke Date: Wed, 11 Dec 2024 11:13:49 -0600 Subject: [PATCH 2/3] add trace span to simple wrapper --- templates/simple/simple.sh | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/templates/simple/simple.sh b/templates/simple/simple.sh index c74e0c28..039ed4e9 100644 --- a/templates/simple/simple.sh +++ b/templates/simple/simple.sh @@ -135,8 +135,43 @@ redirect_output_finish(){ {%endif%} } +trace_start(){ + if ( is_set $TRACEPARENT ); then + version="00" + export JSB_TRACE_ID=`echo $TRACEPARENT | cut -d'-' -f 2` + export JSB_PARENT_SPAN_ID=`echo $TRACEPARENT | cut -d'-' -f 3` + sample=`echo $TRACEPARENT | cut -d'-' -f 4` + # 8-byte random span ID e.g. adce7acee6441ec74 + export JSB_SPAN_ID=`head -c8 /dev/urandom | hexdump -e '"%02x"'` + # epoch nanoseconds e.g. 1544712660000000000 + export JSB_SPAN_START=`date +%s%N` + # create child span + export TRACEPARENT="$version-$JSB_TRACE_ID-$JSB_SPAN_ID-$sample" + fi +} + +trace_finish(){ + # TODO check some other env vars + if ( is_set $OTEL_EXPORTER_OTLP_ENDPOINT ); then + export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT="$OTEL_EXPORTER_OTLP_ENDPOINT/v1/traces" + fi + if (( is_set $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT ) && ( is_set $TRACEPARENT )); then + export JSB_SPAN_END=`date +%s%N` + payload=`cat << __HEREDOC__ +{"resourceSpans":[{"resource":{"attributes":[{"key":"service.name","value":{"stringValue":"fife"}}]}, +"scopeSpans":[{"scope":{"name":"jobsub.wrapper","version":"1.0.0","attributes":[]}, +"spans":[{"traceId":"$JSB_TRACE_ID","spanId":"$JSB_SPAN_ID","parentSpanId":"$JSB_PARENT_SPAN_ID","name":"job","startTimeUnixNano":"$JSB_SPAN_START","endTimeUnixNano":"$JSB_SPAN_END","kind":1, + "attributes":[{"key":"hostname","value":{"stringValue":"$HOSTNAME"}}, + {"key":"job.id","value":{"stringValue":"$JOBSUBJOBID"}}]}]}]}]} +__HEREDOC__` + echo $payload 1>&2 + echo $payload | curl -s -k --data-binary @- -XPOST -H 'Content-Type: application/json' -H 'Accept: application/json' $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT + fi +} + normal_exit(){ + trace_finish redirect_output_finish # maybe don't cleanup so we can transfer files back... @@ -202,6 +237,7 @@ touch .empty_file args="$@" set - "" [[ "$JOBSUB_DEBUG" ]] && set_jobsub_debug +trace_start export JSB_TMP=$_CONDOR_SCRATCH_DIR/jsb_tmp mkdir -p $JSB_TMP From 8d7570533bdb93f46955f08082d31ae80538ad96 Mon Sep 17 00:00:00 2001 From: Kevin Retzke Date: Wed, 11 Dec 2024 13:13:25 -0600 Subject: [PATCH 3/3] add reference to example OTLP trace JSON --- templates/simple/simple.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/templates/simple/simple.sh b/templates/simple/simple.sh index 039ed4e9..f6a32c78 100644 --- a/templates/simple/simple.sh +++ b/templates/simple/simple.sh @@ -157,6 +157,7 @@ trace_finish(){ fi if (( is_set $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT ) && ( is_set $TRACEPARENT )); then export JSB_SPAN_END=`date +%s%N` + # example trace JSON https://github.com/open-telemetry/opentelemetry-proto/blob/v1.3.2/examples/trace.json payload=`cat << __HEREDOC__ {"resourceSpans":[{"resource":{"attributes":[{"key":"service.name","value":{"stringValue":"fife"}}]}, "scopeSpans":[{"scope":{"name":"jobsub.wrapper","version":"1.0.0","attributes":[]},