microsoft · dasiths · Sep 16, 2024 · Sep 16, 2024 · Sep 16, 2024 · Sep 16, 2024
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -27,7 +27,8 @@
 		}
 	},
 	"forwardPorts": [
-		5000 // test-client-web port
+		5000, // test-client-web port
+		3000 // grafana UI port
 	],
 	// Use 'postCreateCommand' to run commands after the container is created.
 	"postCreateCommand": "bash -c .devcontainer/post-create.sh",
@@ -37,4 +38,5 @@
 
 	// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
 	// "remoteUser": "root"
+	"mounts": []
 }
diff --git a/.gitignore b/.gitignore
@@ -8,7 +8,7 @@ __pycache__/
 
 # Distribution / packaging
 .Python
-build/
+*/build/
 develop-eggs/
 dist/
 downloads/

diff --git a/Makefile b/Makefile
@@ -95,3 +95,6 @@ docker-build-load-test: ## Build the AOAI Simulated API Load Test as a docker im
 erase-recording: ## Erase all *.recording files
 	rm -rf "${makefile_dir}.recording"
 
+start-telemetry:
+	-docker-compose -f build/telemetry-docker-compose.yaml down
+	docker-compose -f ./build/telemetry-docker-compose.yaml up
diff --git a/README.md b/README.md
@@ -17,6 +17,7 @@ WARNING: This is a work in progress!
   - [How to Get Started with the Azure OpenAI API Simulator](#how-to-get-started-with-the-azure-openai-api-simulator)
     - [Running and Deploying the Azure OpenAI API Simulator](#running-and-deploying-the-azure-openai-api-simulator)
     - [Configuring the Azure OpenAI API Simulator](#configuring-the-azure-openai-api-simulator)
+    - [Monitoring the Azure OpenAI API Simulator](#monitoring-the-azure-openai-api-simulator)
     - [Extending the Azure OpenAI API Simulator](#extending-the-azure-openai-api-simulator)
     - [Contributing to the Azure OpenAI API Simulator](#contributing-to-the-azure-openai-api-simulator)
   - [Changelog](#changelog)
@@ -91,6 +92,12 @@ The document [Running and Deploying the Azure OpenAI API Simulator](./docs/runni
 
 The behaviour of the Azure OpenAI API Simulator is controlled via a range of [Azure OpenAI API Simulator Configuration Options](./docs/config.md).
 
+### Monitoring the Azure OpenAI API Simulator
+
+The Azure OpenAI API Simulator is instrumented using OpenTelemetry and supports exporting telemetry to Azure Monitor or an OTLP endpoint.
+
+See the [telemetry documentation](./docs/telemetry.md) on how to configure the application to export telemetry and the types of metrics captured.
+
 ### Extending the Azure OpenAI API Simulator
 
 There are also a number of [Azure OpenAI API Simulator Extension points](./docs/extending.md) that allow you to customise the behaviour of the Azure OpenAI API Simulator. Extensions can be used to modify the request/response, add latency, or even generate responses.

diff --git a/build/telemetry-docker-compose.yaml b/build/telemetry-docker-compose.yaml
@@ -0,0 +1,8 @@
+services:
+  grafana-all-in-one: # https://grafana.com/blog/2024/03/13/an-opentelemetry-backend-in-a-docker-image-introducing-grafana/otel-lgtm/
+    image: grafana/otel-lgtm
+    container_name: otel-lgtm
+    ports:
+      - "3000:3000"   # Grafana Web UI
+      - "4317:4317"   # OTLP gRPC receiver
+      - "4318:4318"   # OTLP http receiver
diff --git a/docs/config.md b/docs/config.md
@@ -116,6 +116,8 @@ The simulator supports a set of basic Open Telemetry configuration options. Thes
 | ----------------------------- | ----------------------------------------------------------------------------------------------- |
 | `OTEL_SERVICE_NAME`           | Sets the value of the service name reported to Open Telemetry. Defaults to `aoai-api-simulator` |
 | `OTEL_METRIC_EXPORT_INTERVAL` | The time interval (in milliseconds) between the start of two export attempts..                  |
+| `APPLICATIONINSIGHTS_CONNECTION_STRING` | Sets up the app insights connection string for telemetry |
+| `OTEL_EXPORTER_OTLP_ENDPOINT` | Sets up the OpenTelemetry OTLP exporter endpoint. This can be further customised using environment variables described [here](https://opentelemetry.io/docs/specs/otel/protocol/exporter/). i.e. `OTEL_EXPORTER_OTLP_TRACES_ENDPOINT`, `OTEL_EXPORTER_OTLP_METRICS_ENDPOINT` or `OTEL_EXPORTER_OTLP_LOGS_ENDPOINT`  |
 
 ## Config API Endpoint
 

diff --git a/docs/metrics.md → docs/telemetry.md b/docs/metrics.md → docs/telemetry.md
@@ -1,16 +1,24 @@
-# Azure OpenAI API Simulator Metrics
+# Azure OpenAI API Simulator Telemetry
+
+This solution is instrumented using OpenTelemetry. The [Azure OpenTelemetry distribution](https://learn.microsoft.com/en-us/python/api/overview/azure/monitor-opentelemetry-readme?view=azure-python) library is used to instrument and export telemetry to an Azure Monitor instance defined in the `APPLICATIONINSIGHTS_CONNECTION_STRING` environment variable.
+
+In addition to this, the solution also supports exporting to an [OTLP receiver](https://github.com/open-telemetry/opentelemetry-collector/blob/main/receiver/otlpreceiver/README.md) (i.e. OpenTelemetry Collector) using the `OTEL_EXPORTER_OTLP_ENDPOINT` environment variable.
+
+## Metrics
 
 To help you understand how the API Simulator is performing, we provide a number of metrics that you can use to monitor the simulator.
 
-- [Azure OpenAI API Simulator Metrics](#azure-openai-api-simulator-metrics)
-  - [aoai-api-simulator.latency.base](#aoai-api-simulatorlatencybase)
-  - [aoai-api-simulator.latency.full](#aoai-api-simulatorlatencyfull)
-  - [aoai-api-simulator.tokens.used](#aoai-api-simulatortokensused)
-  - [aoai-api-simulator.tokens.requested](#aoai-api-simulatortokensrequested)
-  - [aoai-api-simulator.tokens.rate-limit](#aoai-api-simulatortokensrate-limit)
-  - [aoai-api-simulator.limits](#aoai-api-simulatorlimits)
+- [Azure OpenAI API Simulator Telemetry](#azure-openai-api-simulator-telemetry)
+  - [Metrics](#metrics)
+    - [aoai-api-simulator.latency.base](#aoai-api-simulatorlatencybase)
+    - [aoai-api-simulator.latency.full](#aoai-api-simulatorlatencyfull)
+    - [aoai-api-simulator.tokens.used](#aoai-api-simulatortokensused)
+    - [aoai-api-simulator.tokens.requested](#aoai-api-simulatortokensrequested)
+    - [aoai-api-simulator.tokens.rate-limit](#aoai-api-simulatortokensrate-limit)
+    - [aoai-api-simulator.limits](#aoai-api-simulatorlimits)
+  - [Running Locally](#running-locally)
 
-## aoai-api-simulator.latency.base
+### aoai-api-simulator.latency.base
 
 Units: `seconds`
 
@@ -21,7 +29,7 @@ Dimensions:
 - `deployment`: The name of the deployment the metric relates to.
 - `status_code`: The HTTP status code of the response.
 
-## aoai-api-simulator.latency.full
+### aoai-api-simulator.latency.full
 
 Units: `seconds`
 
@@ -32,7 +40,7 @@ Dimensions:
 - `deployment`: The name of the deployment the metric relates to.
 - `status_code`: The HTTP status code of the response.
 
-## aoai-api-simulator.tokens.used
+### aoai-api-simulator.tokens.used
 
 Units: `tokens`
 
@@ -43,7 +51,7 @@ Dimensions:
 - `deployment`: The name of the deployment the metric relates to.
 - `token_type`: The type of token, e.g. `prompt` or `completion`.
 
-## aoai-api-simulator.tokens.requested
+### aoai-api-simulator.tokens.requested
 
 Units: `tokens`
 
@@ -54,7 +62,7 @@ Dimensions:
 - `deployment`: The name of the deployment the metric relates to.
 - `token_type`: The type of token, e.g. `prompt` or `completion`.
 
-## aoai-api-simulator.tokens.rate-limit
+### aoai-api-simulator.tokens.rate-limit
 
 Units: `tokens`
 
@@ -64,7 +72,7 @@ Dimensions:
 
 - `deployment`: The name of the deployment the metric relates to.
 
-## aoai-api-simulator.limits
+### aoai-api-simulator.limits
 
 Units: `requests`
 
@@ -74,3 +82,9 @@ Dimensions:
 
 - `deployment`: The name of the deployment the metric relates to.
 - `limit_type`: The type of limit that was hit, e.g. `requests` or `tokens`.
+
+## Running Locally
+
+The `make start-telemetry` command starts the `grafana/otel-lgtm` container. This is an [all-in-one container](https://grafana.com/blog/2024/03/13/an-opentelemetry-backend-in-a-docker-image-introducing-grafana/otel-lgtm/) to capture traces, metrics and logs.
+
+It exposes `grafana` UI on port `3000`.
diff --git a/infra/bicep/main.bicep b/infra/bicep/main.bicep
@@ -170,7 +170,7 @@ resource azureOpenAIKeySecret 'Microsoft.KeyVault/vaults/secrets@2023-07-01' = {
 }
 resource appInsightsConnectionStringSecret 'Microsoft.KeyVault/vaults/secrets@2023-07-01' = {
   parent: vault
-  name: 'app-insights-connection-string'
+  name: 'applicationinsights-connection-string'
   properties: {
     value: appInsights.properties.ConnectionString
   }
@@ -208,8 +208,8 @@ resource apiSim 'Microsoft.App/containerApps@2023-05-01' = {
           identity: managedIdentity.id
         }
         {
-          name: 'app-insights-connection-string'
-          keyVaultUrl: '${keyVaultUri}secrets/app-insights-connection-string'
+          name: 'applicationinsights-connection-string'
+          keyVaultUrl: '${keyVaultUri}secrets/applicationinsights-connection-string'
           identity: managedIdentity.id
         }
         {
@@ -243,7 +243,7 @@ resource apiSim 'Microsoft.App/containerApps@2023-05-01' = {
             { name: 'AZURE_OPENAI_KEY', secretRef: 'azure-openai-key' }
             { name: 'OPENAI_DEPLOYMENT_CONFIG_PATH', value: '/mnt/deployment-config/simulator_deployment_config.json' }
             { name: 'LOG_LEVEL', value: logLevel }
-            { name: 'APPLICATIONINSIGHTS_CONNECTION_STRING', secretRef: 'app-insights-connection-string' }
+            { name: 'APPLICATIONINSIGHTS_CONNECTION_STRING', secretRef: 'applicationinsights-connection-string' }
             // Ensure cloudRoleName is set in telemetry
             // https://opentelemetry-python.readthedocs.io/en/latest/sdk/environment_variables.html#opentelemetry.sdk.environment_variables.OTEL_SERVICE_NAME
             { name: 'OTEL_SERVICE_NAME', value: apiSimulatorName }

diff --git a/loadtest/common/config.py b/loadtest/common/config.py
@@ -1,7 +1,8 @@
 import os
 
 api_key = os.getenv("API_KEY", os.getenv("SIMULATOR_API_KEY"))
-app_insights_connection_string = os.getenv("APP_INSIGHTS_CONNECTION_STRING")
+opentelemetry_exporter_otlp_endpoint = os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT")
+applicationinsights_connection_string = os.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING")
 log_analytics_workspace_id = os.getenv("LOG_ANALYTICS_WORKSPACE_ID")
 log_analytics_workspace_name = os.getenv("LOG_ANALYTICS_WORKSPACE_NAME")
 tenant_id = os.getenv("TENANT_ID")

diff --git a/loadtest/common/locust_app_insights.py b/loadtest/common/locust_app_insights.py
@@ -1,18 +1,18 @@
 import logging
-from opentelemetry import metrics
+
 from azure.monitor.opentelemetry import configure_azure_monitor
+from opentelemetry import metrics
 
 from .config import (
-    app_insights_connection_string,
+    applicationinsights_connection_string,
 )
 
-
 histogram_request_latency: metrics.Histogram
 
-if app_insights_connection_string:
+if applicationinsights_connection_string:
     # Options: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/monitor/azure-monitor-opentelemetry#usage
     logging.getLogger("azure").setLevel(logging.WARNING)
-    configure_azure_monitor(connection_string=app_insights_connection_string)
+    configure_azure_monitor(connection_string=applicationinsights_connection_string)
     histogram_request_latency = metrics.get_meter(__name__).create_histogram(
         "locust.request_latency", "Request latency", "s"
     )

diff --git a/loadtest/loadtest_chat_completions_1s_latency.py b/loadtest/loadtest_chat_completions_1s_latency.py
@@ -2,7 +2,7 @@
 import os
 
 import requests
-from common.config import api_key, app_insights_connection_string
+from common.config import api_key, applicationinsights_connection_string
 from common.latency import set_simulator_chat_completions_latency
 from common.locust_app_insights import (
     report_request_metric,
@@ -22,7 +22,7 @@ def on_locust_init(environment: Environment, **_):
     """
     Configure test
     """
-    if app_insights_connection_string:
+    if applicationinsights_connection_string:
         logging.info("App Insights connection string found - enabling request metrics")
         environment.events.request.add_listener(report_request_metric)
     else:

diff --git a/loadtest/loadtest_chat_completions_no_added_latency.py b/loadtest/loadtest_chat_completions_no_added_latency.py
@@ -2,7 +2,7 @@
 import os
 
 import requests
-from common.config import api_key, app_insights_connection_string
+from common.config import api_key, applicationinsights_connection_string
 from common.latency import set_simulator_chat_completions_latency
 from common.locust_app_insights import (
     report_request_metric,
@@ -26,7 +26,7 @@ def on_locust_init(environment: Environment, **_):
     """
     Configure test
     """
-    if app_insights_connection_string:
+    if applicationinsights_connection_string:
         logging.info("App Insights connection string found - enabling request metrics")
         environment.events.request.add_listener(report_request_metric)
     else:

diff --git a/sample.env b/sample.env
@@ -29,6 +29,9 @@ AZURE_FORM_RECOGNIZER_KEY=
 #  Open Telemetry Config (used within the simulator)
 OTEL_SERVICE_NAME=aoai-api-simulator-local-dev
 OTEL_METRIC_EXPORT_INTERVAL=10000
+# OTEL_EXPORTER_OTLP_ENDPOINT=http://host.docker.internal:4317 ## if running in docker outside of docker
+OTEL_EXPORTER_OTLP_ENDPOINT=
+APPLICATIONINSIGHTS_CONNECTION_STRING=
 
 
 # Test Client Config (used to direct the tests and test clients)

diff --git a/scripts/_run-load-test-aca.sh b/scripts/_run-load-test-aca.sh
@@ -86,8 +86,8 @@ if [[ -z "${key_vault_name}" ]]; then
 	echo "Key Vault Name not found in output.json"
 	exit 1
 fi
-app_insights_connection_string=$(az keyvault secret show --vault-name "$key_vault_name" --name app-insights-connection-string --query value --output tsv)
-if [[ -z "${app_insights_connection_string}" ]]; then
+applicationinsights_connection_string=$(az keyvault secret show --vault-name "$key_vault_name" --name applicationinsights-connection-string --query value --output tsv)
+if [[ -z "${applicationinsights_connection_string}" ]]; then
 	echo "App Insights Connection String not found in Key Vault"
 	exit 1
 fi
@@ -133,7 +133,7 @@ az containerapp job create \
   --cpu "1" \
   --memory "2Gi" \
   --command "locust" \
-  --env-vars "LOCUST_LOCUSTFILE=$TEST_FILE" "LOCUST_HOST=https://${api_fqdn}/" "LOCUST_USERS=$LOCUST_USERS" "LOCUST_SPAWN_RATE=$LOCUST_SPAWN_RATE" "LOCUST_AUTOSTART=true" "LOCUST_RUN_TIME=$LOCUST_RUN_TIME" "LOCUST_AUTOQUIT=10" "SIMULATOR_API_KEY=${SIMULATOR_API_KEY}" "APP_INSIGHTS_CONNECTION_STRING=${app_insights_connection_string}" "MAX_TOKENS=${MAX_TOKENS}" "DEPLOYMENT_NAME=${DEPLOYMENT_NAME}" ALLOW_429_RESPONSES=${ALLOW_429_RESPONSES} 1>&2
+  --env-vars "LOCUST_LOCUSTFILE=$TEST_FILE" "LOCUST_HOST=https://${api_fqdn}/" "LOCUST_USERS=$LOCUST_USERS" "LOCUST_SPAWN_RATE=$LOCUST_SPAWN_RATE" "LOCUST_AUTOSTART=true" "LOCUST_RUN_TIME=$LOCUST_RUN_TIME" "LOCUST_AUTOQUIT=10" "SIMULATOR_API_KEY=${SIMULATOR_API_KEY}" "APP_INSIGHTS_CONNECTION_STRING=${applicationinsights_connection_string}" "MAX_TOKENS=${MAX_TOKENS}" "DEPLOYMENT_NAME=${DEPLOYMENT_NAME}" ALLOW_429_RESPONSES=${ALLOW_429_RESPONSES} 1>&2
 
 
 start_time=$(date -u +"%Y-%m-%dT%H:%M:%SZ")

diff --git a/src/aoai-api-simulator/requirements.txt b/src/aoai-api-simulator/requirements.txt
@@ -2,10 +2,14 @@ fastapi==0.109.2
 uvicorn[standard]==0.27.0.post1
 gunicorn==22.0.0
 requests==2.32.0
+opentelemetry-instrumentation-requests==0.48b0
 PyYAML==6.0.1
 tiktoken==0.6.0
 nanoid==2.0.0
 limits==3.8.0
+opentelemetry-api==1.27.0
+opentelemetry-sdk==1.27.0
+opentelemetry-exporter-otlp==1.27.0
 azure-monitor-opentelemetry==1.3.0
 pydantic-settings==2.2.1
 python-multipart==0.0.9
diff --git a/src/aoai-api-simulator/src/aoai_api_simulator/config_loader.py b/src/aoai-api-simulator/src/aoai_api_simulator/config_loader.py
@@ -11,10 +11,11 @@
 from aoai_api_simulator.record_replay.handler import get_default_forwarders
 
 
-def get_config_from_env_vars(logger: logging.Logger) -> Config:
+def get_config_from_env_vars() -> Config:
     """
     Load configuration from environment variables
     """
+    logger = logging.getLogger()
     config = Config(generators=get_default_generators())
     config.recording.forwarders = get_default_forwarders()
     config.openai_deployments = _load_openai_deployments(logger)

diff --git a/src/aoai-api-simulator/src/aoai_api_simulator/latency.py b/src/aoai-api-simulator/src/aoai_api_simulator/latency.py
@@ -2,7 +2,7 @@
 import time
 
 from aoai_api_simulator import constants
-from aoai_api_simulator.metrics import simulator_metrics
+from aoai_api_simulator.telemetry import simulator_metrics
 from aoai_api_simulator.models import RequestContext
 from fastapi import Response
 

diff --git a/src/aoai-api-simulator/src/aoai_api_simulator/limiters.py b/src/aoai-api-simulator/src/aoai_api_simulator/limiters.py
@@ -7,12 +7,12 @@
 from typing import Awaitable, Callable
 
 from aoai_api_simulator import constants
-from aoai_api_simulator.metrics import simulator_metrics
 from aoai_api_simulator.models import (
     Config,
     OpenAIDeployment,
     RequestContext,
 )
+from aoai_api_simulator.telemetry import simulator_metrics
 from fastapi import Response
 
 logger = logging.getLogger(__name__)

diff --git a/src/aoai-api-simulator/src/aoai_api_simulator/main.py b/src/aoai-api-simulator/src/aoai_api_simulator/main.py
@@ -1,33 +1,15 @@
-import logging
-import os
-
-from azure.monitor.opentelemetry import configure_azure_monitor
+from aoai_api_simulator.app_builder import app as builder_app
+from aoai_api_simulator.app_builder import apply_config
 
 # from opentelemetry import trace
-
 from aoai_api_simulator.config_loader import get_config_from_env_vars, set_config
-from aoai_api_simulator.app_builder import app as builder_app, apply_config
-
-log_level = os.getenv("LOG_LEVEL") or "INFO"
-
-logger = logging.getLogger(__name__)
-logging.basicConfig(level=log_level)
-logging.getLogger("azure").setLevel(logging.WARNING)
-
-application_insights_connection_string = os.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING")
-if application_insights_connection_string:
-    logger.info("🚀 Configuring Azure Monitor telemetry")
+from aoai_api_simulator.telemetry import setup_auto_instrumentation, setup_telemetry
 
-    # Options: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/monitor/azure-monitor-opentelemetry#usage
-    configure_azure_monitor(connection_string=application_insights_connection_string)
-else:
-    logger.info("🚀 Azure Monitor telemetry not configured (set APPLICATIONINSIGHTS_CONNECTION_STRING)")
+using_azure_monitor: bool = setup_telemetry()
 
-# tracer = trace.get_tracer(__name__)
-
-config = get_config_from_env_vars(logger)
+config = get_config_from_env_vars()
 set_config(config)
-
-
 apply_config()
+
 app = builder_app  # expose to gunicorn
+setup_auto_instrumentation(app, using_azure_monitor)