From 43db09299714662a6857818c3c080e702ce8f74f Mon Sep 17 00:00:00 2001 From: "marius.baseten" Date: Wed, 28 Aug 2024 16:25:35 -0700 Subject: [PATCH] Debug why streamining hangs --- pyproject.toml | 2 +- truss/templates/server/common/truss_server.py | 6 ++++-- truss/templates/server/model_wrapper.py | 4 ++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 6fc7b9263..918def06e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "truss" -version = "0.9.30rc667" +version = "0.9.30rc668" description = "A seamless bridge from model development to model delivery" license = "MIT" readme = "README.md" diff --git a/truss/templates/server/common/truss_server.py b/truss/templates/server/common/truss_server.py index fe502a7ce..ad997e30e 100644 --- a/truss/templates/server/common/truss_server.py +++ b/truss/templates/server/common/truss_server.py @@ -135,8 +135,10 @@ async def predict( model: ModelWrapper = self._safe_lookup_model(model_name) self.check_healthy(model) - # TODO: warn (or skip tracing), if no parent info was found? - trace_ctx = otel_propagate.extract(request.headers) + + print(f"############\n{request.headers}") + + trace_ctx = otel_propagate.extract(request.headers) or None # This is the top-level span in the truss-server, so we set the context here. # Nested spans "inherit" context automatically. with self._tracer.start_as_current_span( diff --git a/truss/templates/server/model_wrapper.py b/truss/templates/server/model_wrapper.py index cd309adf2..1f62d364e 100644 --- a/truss/templates/server/model_wrapper.py +++ b/truss/templates/server/model_wrapper.py @@ -203,7 +203,7 @@ def try_load(self): retry( self._model.load, NUM_LOAD_RETRIES, - self._logger.warn, + self._logger.warning, "Failed to load model.", gap_seconds=1.0, ) @@ -345,7 +345,7 @@ async def _stream_with_background_task( self.write_response_to_queue(response_queue, async_generator, span) ) # Defer the release of the semaphore until the write_response_to_queue task. - gen_task.add_done_callback(lambda _: release_and_end) + gen_task.add_done_callback(lambda _: release_and_end()) # The gap between responses in a stream must be < streaming_read_timeout async def _response_generator():