diff --git a/pyproject.toml b/pyproject.toml
index 5f4039731..6fc7b9263 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "truss"
-version = "0.9.30rc5"
+version = "0.9.30rc667"
 description = "A seamless bridge from model development to model delivery"
 license = "MIT"
 readme = "README.md"
diff --git a/truss/contexts/image_builder/serving_image_builder.py b/truss/contexts/image_builder/serving_image_builder.py
index e11a6d92d..4442ca8d4 100644
--- a/truss/contexts/image_builder/serving_image_builder.py
+++ b/truss/contexts/image_builder/serving_image_builder.py
@@ -459,11 +459,13 @@ def copy_into_build_dir(from_path: Path, path_in_build_dir: str):
         # are detected and cause a build failure. If there are no
         # requirements provided, we just pass an empty string,
         # as there's no need to install anything.
-        # TODO: above reasoning leads to inconsistencies. Needs revisit.
+        # TODO: above reasoning leads to inconsistencies. To get consistent images
+        #  tentatively add server requirements always. This whole point needs more
+        #  thought and potentially a re-design.
         user_provided_python_requirements = (
             base_server_requirements + spec.requirements_txt
             if spec.requirements
-            else ""
+            else base_server_requirements
         )
         if spec.requirements_file is not None:
             copy_into_build_dir(
diff --git a/truss/templates/server/common/tracing.py b/truss/templates/server/common/tracing.py
index 4ef750993..32c984e12 100644
--- a/truss/templates/server/common/tracing.py
+++ b/truss/templates/server/common/tracing.py
@@ -1,20 +1,26 @@
 import contextlib
-import functools
 import json
 import logging
 import os
 import pathlib
 import time
-from typing import Iterator, List, Sequence
+from typing import Iterator, List, Optional, Sequence
 
-import opentelemetry.exporter.otlp.proto.grpc.trace_exporter as oltp_exporter
+import opentelemetry.exporter.otlp.proto.http.trace_exporter as oltp_exporter
 import opentelemetry.sdk.resources as resources
 import opentelemetry.sdk.trace as sdk_trace
 import opentelemetry.sdk.trace.export as trace_export
 from opentelemetry import context, trace
+from shared import secrets_resolver
 
 logger = logging.getLogger(__name__)
 
+ATTR_NAME_DURATION = "duration_sec"
+OTEL_EXPORTER_OTLP_ENDPOINT = "OTEL_EXPORTER_OTLP_ENDPOINT"
+OTEL_TRACING_NDJSON_FILE = "OTEL_TRACING_NDJSON_FILE"
+HONEYCOMB_DATASET = "HONEYCOMB_DATASET"
+HONEYCOMB_API_KEY = "HONEYCOMB_API_KEY"
+
 
 class JSONFileExporter(trace_export.SpanExporter):
     """Writes spans to newline-delimited JSON file for debugging / testing."""
@@ -36,8 +42,10 @@ def shutdown(self) -> None:
         self._file.close()
 
 
-@functools.lru_cache(maxsize=1)
-def get_truss_tracer() -> trace.Tracer:
+_truss_tracer: Optional[trace.Tracer] = None
+
+
+def get_truss_tracer(secrets: secrets_resolver.SecretsResolver) -> trace.Tracer:
     """Creates a cached tracer (i.e. runtime-singleton) to be used for truss
     internal tracing.
 
@@ -45,17 +53,39 @@ def get_truss_tracer() -> trace.Tracer:
     completely from potential user-defined tracing - see also `detach_context` below.
 
     """
+    global _truss_tracer
+    if _truss_tracer:
+        return _truss_tracer
+
     span_processors: List[sdk_trace.SpanProcessor] = []
-    if otlp_endpoint := os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT"):
+    if otlp_endpoint := os.getenv(OTEL_EXPORTER_OTLP_ENDPOINT):
+        logger.info(f"Exporting trace data to {OTEL_EXPORTER_OTLP_ENDPOINT}.")
         otlp_exporter = oltp_exporter.OTLPSpanExporter(endpoint=otlp_endpoint)
         otlp_processor = sdk_trace.export.BatchSpanProcessor(otlp_exporter)
         span_processors.append(otlp_processor)
 
-    if tracing_log_file := os.getenv("OTEL_TRACING_NDJSON_FILE"):
+    if tracing_log_file := os.getenv(OTEL_TRACING_NDJSON_FILE):
+        logger.info("Exporting trace data to `tracing_log_file`.")
         json_file_exporter = JSONFileExporter(pathlib.Path(tracing_log_file))
         file_processor = sdk_trace.export.SimpleSpanProcessor(json_file_exporter)
         span_processors.append(file_processor)
 
+    if honeycomb_dataset := os.getenv(HONEYCOMB_DATASET):
+        if HONEYCOMB_API_KEY in secrets:
+            honeycomb_api_key = secrets[HONEYCOMB_API_KEY]
+            logger.info("Exporting trace data to honeycomb.")
+            honeycomb_exporter = oltp_exporter.OTLPSpanExporter(
+                endpoint="https://api.honeycomb.io/v1/traces",
+                headers={
+                    "x-honeycomb-team": honeycomb_api_key,
+                    "x-honeycomb-dataset": honeycomb_dataset,
+                },
+            )
+            honeycomb_processor = sdk_trace.export.BatchSpanProcessor(
+                honeycomb_exporter
+            )
+            span_processors.append(honeycomb_processor)
+
     if span_processors:
         logger.info("Instantiating truss tracer.")
         resource = resources.Resource.create({resources.SERVICE_NAME: "TrussServer"})
@@ -67,7 +97,8 @@ def get_truss_tracer() -> trace.Tracer:
         logger.info("Using no-op tracing.")
         tracer = sdk_trace.NoOpTracer()
 
-    return tracer
+    _truss_tracer = tracer
+    return _truss_tracer
 
 
 @contextlib.contextmanager
@@ -82,14 +113,9 @@ def detach_context() -> Iterator[None]:
     be wrapped in this context for isolation.
     """
     current_context = context.get_current()
-    # Set the current context to an invalid span context, effectively clearing it.
-    # This makes sure inside the context a new root is context is created.
-    transient_token = context.attach(
-        trace.set_span_in_context(
-            trace.INVALID_SPAN,
-            trace.INVALID_SPAN_CONTEXT,  # type: ignore[arg-type]
-        )
-    )
+    # Create an invalid tracing context. This forces that tracing code inside this
+    # context manager creates a new root tracing context.
+    transient_token = context.attach(trace.set_span_in_context(trace.INVALID_SPAN))
     try:
         yield
     finally:
@@ -105,9 +131,11 @@ def section_as_event(span: sdk_trace.Span, section_name: str) -> Iterator[None]:
     Note that events are much cheaper to create than dedicated spans.
     """
     t0 = time.time()
-    span.add_event(f"start-{section_name}")
+    span.add_event(f"start: {section_name}")
     try:
         yield
     finally:
         t1 = time.time()
-        span.add_event(f"done-{section_name}", attributes={"duration_sec": t1 - t0})
+        span.add_event(
+            f"done: {section_name}", attributes={ATTR_NAME_DURATION: t1 - t0}
+        )
diff --git a/truss/templates/server/common/truss_server.py b/truss/templates/server/common/truss_server.py
index 5ae611e10..fe502a7ce 100644
--- a/truss/templates/server/common/truss_server.py
+++ b/truss/templates/server/common/truss_server.py
@@ -23,6 +23,7 @@
 from opentelemetry import propagate as otel_propagate
 from opentelemetry.sdk import trace as sdk_trace
 from shared.logging import setup_logging
+from shared.secrets_resolver import SecretsResolver
 from shared.serialization import (
     DeepNumpyEncoder,
     truss_msgpack_deserialize,
@@ -173,16 +174,19 @@ async def predict(
 
             response_headers = {}
             if self.is_binary(request):
-                response_headers["Content-Type"] = "application/octet-stream"
-                return Response(
-                    content=truss_msgpack_serialize(response), headers=response_headers
-                )
+                with tracing.section_as_event(span, "binary-serialize"):
+                    response_headers["Content-Type"] = "application/octet-stream"
+                    return Response(
+                        content=truss_msgpack_serialize(response),
+                        headers=response_headers,
+                    )
             else:
-                response_headers["Content-Type"] = "application/json"
-                return Response(
-                    content=json.dumps(response, cls=DeepNumpyEncoder),
-                    headers=response_headers,
-                )
+                with tracing.section_as_event(span, "json-serialize"):
+                    response_headers["Content-Type"] = "application/json"
+                    return Response(
+                        content=json.dumps(response, cls=DeepNumpyEncoder),
+                        headers=response_headers,
+                    )
 
     async def schema(self, model_name: str) -> Dict:
         model: ModelWrapper = self._safe_lookup_model(model_name)
@@ -223,7 +227,8 @@ def __init__(
         config: Dict,
         setup_json_logger: bool = True,
     ):
-        tracer = tracing.get_truss_tracer()
+        secrets = SecretsResolver.get_secrets(config)
+        tracer = tracing.get_truss_tracer(secrets)
         self.http_port = http_port
         self._config = config
         self._model = ModelWrapper(self._config, tracer)
diff --git a/truss/templates/server/model_wrapper.py b/truss/templates/server/model_wrapper.py
index 2569b6894..cd309adf2 100644
--- a/truss/templates/server/model_wrapper.py
+++ b/truss/templates/server/model_wrapper.py
@@ -20,7 +20,6 @@
     Mapping,
     NoReturn,
     Optional,
-    Set,
     TypeVar,
     Union,
 )
@@ -50,38 +49,26 @@
 TRT_LLM_EXTENSION_NAME = "trt_llm"
 
 
-def aprint(msg: str):
-    task_id = str(hash(id(asyncio.current_task())))[:3]
-    print(f"Task[ {task_id} ]: {msg}")
-
-
 @asynccontextmanager
 async def deferred_semaphore_and_span(
-    semaphore: Semaphore, span: sdk_trace.Span
+    semaphore: Semaphore, span: trace.Span
 ) -> AsyncGenerator[Callable[[], Callable[[], None]], None]:
     """
     Context manager that allows deferring the release of a semaphore and the ending of a
     trace span.
 
-    Yields a function that, when called, releases the semaphore and ends the span. If
-    that function is not called, the resources are cleand up when exiting the context.
+    Yields a function that, when called, releases the semaphore and ends the span.
+    If that function is not called, the resources are cleand up when exiting.
     """
-    val_before = semaphore.value
-    aprint("requesting semaphore")
     await semaphore.acquire()
-    val_after = semaphore.value
-    aprint(f"acquired semaphore. {val_before} -> {val_after}")
     trace.use_span(span, end_on_exit=False)
     deferred = False
 
     def release_and_end() -> None:
-        aprint("called release.")
         semaphore.release()
         span.end()
-        aprint("releases semaphore.")
 
     def defer() -> Callable[[], None]:
-        aprint("called defer.")
         nonlocal deferred
         deferred = True
         return release_and_end
@@ -89,12 +76,8 @@ def defer() -> Callable[[], None]:
     try:
         yield defer
     finally:
-        aprint("ending context.")
         if not deferred:
-            aprint("ending context - release.")
             release_and_end()
-        else:
-            aprint("ending context - keep.")
 
 
 class ModelWrapper:
@@ -119,7 +102,6 @@ def __init__(self, config: Dict, tracer: sdk_trace.Tracer):
                 "predict_concurrency", DEFAULT_PREDICT_CONCURRENCY
             )
         )
-        self._background_tasks: Set[asyncio.Task] = set()
         self.truss_schema: TrussSchema = None
 
     def load(self) -> bool:
@@ -305,12 +287,13 @@ async def postprocess(
         )
 
     async def write_response_to_queue(
-        self, queue: asyncio.Queue, generator: AsyncGenerator, span: sdk_trace.Span
+        self, queue: asyncio.Queue, generator: AsyncGenerator, span: trace.Span
     ):
         with tracing.section_as_event(span, "write_response_to_queue"):
-            aprint("start-write_response_to_queue")
             try:
                 async for chunk in generator:
+                    # TODO: consider checking `request.is_disconnected()` for
+                    #   client-side cancellations and freeing resources.
                     await queue.put(ResponseChunk(chunk))
             except Exception as e:
                 self._logger.exception(
@@ -318,12 +301,12 @@ async def write_response_to_queue(
                 )
             finally:
                 await queue.put(None)
-                aprint("end-write_response_to_queue")
 
-    async def _gather_generator(self, response: Any, span: sdk_trace.Span) -> str:
-        # In the case of gathering, it might make more sense to apply the post-process
+    async def _gather_generator(self, response: Any, span: trace.Span) -> str:
+        # In the case of gathering, it might make more sense to apply the postprocess
         # to the gathered result, but that would be inconsistent with streaming.
-        # In general it might even be better to forbid postprocessing completely.
+        # In general, it might even be better to strictly forbid postprocessing
+        # for generators.
         if hasattr(self._model, "postprocess"):
             logging.warning(
                 "Predict returned a streaming response, while a postprocess is defined."
@@ -341,7 +324,7 @@ async def _gather_generator(self, response: Any, span: sdk_trace.Span) -> str:
     async def _stream_with_background_task(
         self,
         response: Any,
-        span: sdk_trace.Span,
+        span: trace.Span,
         release_and_end: Callable[[], None],
     ):
         # The streaming read timeout is the amount of time in between streamed chunk
@@ -358,28 +341,23 @@ async def _stream_with_background_task(
         response_queue: asyncio.Queue = asyncio.Queue()
 
         # `write_response_to_queue` keeps running the background until completion.
-        task = asyncio.create_task(
+        gen_task = asyncio.create_task(
             self.write_response_to_queue(response_queue, async_generator, span)
         )
-        # We add the task to the ModelWrapper instance to ensure it does
-        # not get garbage collected after the predict method completes,
-        # and continues running.
-        self._background_tasks.add(task)
         # Defer the release of the semaphore until the write_response_to_queue task.
-        task.add_done_callback(lambda _: release_and_end())
-        task.add_done_callback(self._background_tasks.discard)
+        gen_task.add_done_callback(lambda _: release_and_end)
 
         # The gap between responses in a stream must be < streaming_read_timeout
         async def _response_generator():
-            with tracing.section_as_event(span, "response_generator"):
-                aprint("start-response_generator")
+            # `span` is tied to the "producer" `gen_task` which might complete before
+            #  "consume" part here finishes, therefore a dedicated span is required.
+            with self._tracer.start_as_current_span("response_generator"):
                 while True:
                     chunk = await asyncio.wait_for(
                         response_queue.get(),
                         timeout=streaming_read_timeout,
                     )
                     if chunk is None:
-                        aprint("done-response_generator")
                         return
                     yield chunk.value
 
@@ -399,24 +377,28 @@ async def __call__(
             Generator: In case of streaming response
             String: in case of non-streamed generator (the string is the JSON result).
         """
-        with self._tracer.start_as_current_span("predict-call-pre") as span:
+        with self._tracer.start_as_current_span("call-pre") as span_pre:
             if self.truss_schema is not None:
                 try:
-                    with tracing.section_as_event(span, "parse-pydantic"):
+                    with tracing.section_as_event(span_pre, "parse-pydantic"):
                         body = self.truss_schema.input_type(**body)
                 except pydantic.ValidationError as e:
                     self._logger.info("Request Validation Error")
                     raise HTTPException(
                         status_code=400, detail=f"Request Validation Error, {str(e)}"
                     ) from e
-            with tracing.section_as_event(span, "preprocess"), tracing.detach_context():
+            with tracing.section_as_event(
+                span_pre, "preprocess"
+            ), tracing.detach_context():
                 payload = await self.preprocess(body)
 
-        span = self._tracer.start_span("predict-call-predict")
+        span_predict = self._tracer.start_span("call-predict")
         async with deferred_semaphore_and_span(
-            self._predict_semaphore, span
+            self._predict_semaphore, span_predict
         ) as get_defer_fn:
-            with tracing.section_as_event(span, "predict"), tracing.detach_context():
+            with tracing.section_as_event(
+                span_predict, "predict"
+            ), tracing.detach_context():
                 # To prevent span pollution, we need to make sure spans created by user
                 # code don't inherit context from our spans (which happens even if
                 # different tracer instances are used).
@@ -430,29 +412,27 @@ async def __call__(
                 # exactly handle that case we would need to apply `detach_context`
                 # around each `next`-invocation that consumes the generator, which is
                 # prohibitive.
-                aprint("start-predict")
                 response = await self.predict(payload)
-                aprint("done-predict")
 
             if inspect.isgenerator(response) or inspect.isasyncgen(response):
                 if headers and headers.get("accept") == "application/json":
                     # In the case of a streaming response, consume stream
                     # if the http accept header is set, and json is requested.
-                    return await self._gather_generator(response, span)
+                    return await self._gather_generator(response, span_predict)
                 else:
                     return await self._stream_with_background_task(
-                        response, span, release_and_end=get_defer_fn()
+                        response, span_predict, release_and_end=get_defer_fn()
                     )
 
-        with self._tracer.start_as_current_span("predict-call-post") as span:
+        with self._tracer.start_as_current_span("call-post") as span_post:
             with tracing.section_as_event(
-                span, "postprocess"
+                span_post, "postprocess"
             ), tracing.detach_context():
                 processed_response = await self.postprocess(response)
 
             if isinstance(processed_response, BaseModel):
                 # If we return a pydantic object, convert it back to a dict
-                with tracing.section_as_event(span, "dump-pydantic"):
+                with tracing.section_as_event(span_post, "dump-pydantic"):
                     processed_response = processed_response.dict()
             return processed_response
 
diff --git a/truss/test_data/server.Dockerfile b/truss/test_data/server.Dockerfile
index 7be57e609..7d97c435a 100644
--- a/truss/test_data/server.Dockerfile
+++ b/truss/test_data/server.Dockerfile
@@ -28,6 +28,10 @@ RUN apt update && \
 COPY ./base_server_requirements.txt base_server_requirements.txt
 RUN pip install -r base_server_requirements.txt --no-cache-dir && rm -rf /root/.cache/pip
 
+COPY ./requirements.txt requirements.txt
+RUN cat requirements.txt
+RUN pip install -r requirements.txt --no-cache-dir && rm -rf /root/.cache/pip
+
 ENV APP_HOME /app
 WORKDIR $APP_HOME
 
diff --git a/truss/test_data/test_streaming_truss_with_tracing/model/model.py b/truss/test_data/test_streaming_truss_with_tracing/model/model.py
index 5c541159b..249a0603f 100644
--- a/truss/test_data/test_streaming_truss_with_tracing/model/model.py
+++ b/truss/test_data/test_streaming_truss_with_tracing/model/model.py
@@ -57,7 +57,7 @@ def predict(self, model_input: Any) -> Generator[str, None, None]:
         with tracer.start_as_current_span("start-predict") as span:
 
             def inner():
-                time.sleep(2)
+                time.sleep(0.02)
                 for i in range(5):
                     span.add_event("yield")
                     yield str(i)