Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding OpenTelemetry OTLP exporter support #54

Open
wants to merge 19 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ __pycache__/

# Distribution / packaging
.Python
build/
*/build/
develop-eggs/
dist/
downloads/
Expand Down
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,6 @@ docker-build-load-test: ## Build the AOAI Simulated API Load Test as a docker im
erase-recording: ## Erase all *.recording files
rm -rf "${makefile_dir}.recording"

start-telemetry:
-docker-compose -f build/telemetry-docker-compose.yaml down
docker-compose -f ./build/telemetry-docker-compose.yaml up
8 changes: 8 additions & 0 deletions build/telemetry-docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
services:
grafana-all-in-one: # https://grafana.com/blog/2024/03/13/an-opentelemetry-backend-in-a-docker-image-introducing-grafana/otel-lgtm/
image: grafana/otel-lgtm
container_name: otel-lgtm
ports:
- "3000:3000" # Grafana Web UI
dasiths marked this conversation as resolved.
Show resolved Hide resolved
- "4317:4317" # OTLP gRPC receiver
- "4318:4318" # OTLP http receiver
2 changes: 2 additions & 0 deletions docs/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ The simulator supports a set of basic Open Telemetry configuration options. Thes
| ----------------------------- | ----------------------------------------------------------------------------------------------- |
| `OTEL_SERVICE_NAME` | Sets the value of the service name reported to Open Telemetry. Defaults to `aoai-api-simulator` |
| `OTEL_METRIC_EXPORT_INTERVAL` | The time interval (in milliseconds) between the start of two export attempts.. |
| `APPLICATIONINSIGHTS_CONNECTION_STRING` | Sets up the app insights connection string for telemetry |
|`OTEL_EXPORTER_OTLP_ENDPOINT` | Sets up the OpenTelemetry OTLP exporter endpoint. This can be further customised using environment variables described [here](https://opentelemetry.io/docs/specs/otel/protocol/exporter/). i.e. `OTEL_EXPORTER_OTLP_TRACES_ENDPOINT`, `OTEL_EXPORTER_OTLP_METRICS_ENDPOINT` or `OTEL_EXPORTER_OTLP_LOGS_ENDPOINT` |

## Config API Endpoint

Expand Down
8 changes: 4 additions & 4 deletions infra/main.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ resource azureOpenAIKeySecret 'Microsoft.KeyVault/vaults/secrets@2023-07-01' = {
}
resource appInsightsConnectionStringSecret 'Microsoft.KeyVault/vaults/secrets@2023-07-01' = {
parent: vault
name: 'app-insights-connection-string'
name: 'applicationinsights-connection-string'
properties: {
value: appInsights.properties.ConnectionString
}
Expand Down Expand Up @@ -208,8 +208,8 @@ resource apiSim 'Microsoft.App/containerApps@2023-05-01' = {
identity: managedIdentity.id
}
{
name: 'app-insights-connection-string'
keyVaultUrl: '${keyVaultUri}secrets/app-insights-connection-string'
name: 'applicationinsights-connection-string'
keyVaultUrl: '${keyVaultUri}secrets/applicationinsights-connection-string'
identity: managedIdentity.id
}
{
Expand Down Expand Up @@ -243,7 +243,7 @@ resource apiSim 'Microsoft.App/containerApps@2023-05-01' = {
{ name: 'AZURE_OPENAI_KEY', secretRef: 'azure-openai-key' }
{ name: 'OPENAI_DEPLOYMENT_CONFIG_PATH', value: '/mnt/deployment-config/simulator_deployment_config.json' }
{ name: 'LOG_LEVEL', value: logLevel }
{ name: 'APPLICATIONINSIGHTS_CONNECTION_STRING', secretRef: 'app-insights-connection-string' }
{ name: 'APPLICATIONINSIGHTS_CONNECTION_STRING', secretRef: 'applicationinsights-connection-string' }
// Ensure cloudRoleName is set in telemetry
// https://opentelemetry-python.readthedocs.io/en/latest/sdk/environment_variables.html#opentelemetry.sdk.environment_variables.OTEL_SERVICE_NAME
{ name: 'OTEL_SERVICE_NAME', value: apiSimulatorName }
Expand Down
3 changes: 2 additions & 1 deletion loadtest/common/config.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import os

api_key = os.getenv("API_KEY", os.getenv("SIMULATOR_API_KEY"))
app_insights_connection_string = os.getenv("APP_INSIGHTS_CONNECTION_STRING")
opentelemetry_exporter_otlp_endpoint = os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT")
applicationinsights_connection_string = os.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING")
log_analytics_workspace_id = os.getenv("LOG_ANALYTICS_WORKSPACE_ID")
log_analytics_workspace_name = os.getenv("LOG_ANALYTICS_WORKSPACE_NAME")
tenant_id = os.getenv("TENANT_ID")
Expand Down
10 changes: 5 additions & 5 deletions loadtest/common/locust_app_insights.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
import logging
from opentelemetry import metrics

from azure.monitor.opentelemetry import configure_azure_monitor
from opentelemetry import metrics

from .config import (
app_insights_connection_string,
applicationinsights_connection_string,
)


histogram_request_latency: metrics.Histogram

if app_insights_connection_string:
if applicationinsights_connection_string:
# Options: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/monitor/azure-monitor-opentelemetry#usage
logging.getLogger("azure").setLevel(logging.WARNING)
configure_azure_monitor(connection_string=app_insights_connection_string)
configure_azure_monitor(connection_string=applicationinsights_connection_string)
histogram_request_latency = metrics.get_meter(__name__).create_histogram(
"locust.request_latency", "Request latency", "s"
)
Expand Down
8 changes: 4 additions & 4 deletions loadtest/loadtest_chat_completions_1s_latency.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import logging
import os
from locust import HttpUser, task, constant, events
from locust.env import Environment

from common.config import api_key, app_insights_connection_string
from common.config import api_key, applicationinsights_connection_string
from common.latency import set_simulator_chat_completions_latency
from common.locust_app_insights import (
report_request_metric,
)
from locust import HttpUser, constant, events, task
from locust.env import Environment

max_tokens = 100
deployment_name = os.getenv("DEPLOYMENT_NAME", None)
Expand All @@ -21,7 +21,7 @@ def on_locust_init(environment: Environment, **_):
"""
Configure test
"""
if app_insights_connection_string:
if applicationinsights_connection_string:
logging.info("App Insights connection string found - enabling request metrics")
environment.events.request.add_listener(report_request_metric)
else:
Expand Down
8 changes: 4 additions & 4 deletions loadtest/loadtest_chat_completions_no_added_latency.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import logging
import os
from locust import HttpUser, task, constant, events
from locust.env import Environment

from common.config import api_key, app_insights_connection_string
from common.config import api_key, applicationinsights_connection_string
from common.latency import set_simulator_chat_completions_latency
from common.locust_app_insights import (
report_request_metric,
)
from locust import HttpUser, constant, events, task
from locust.env import Environment

max_tokens = int(os.getenv("MAX_TOKENS", "100"))
deployment_name = os.getenv("DEPLOYMENT_NAME", None)
Expand All @@ -25,7 +25,7 @@ def on_locust_init(environment: Environment, **_):
"""
Configure test
"""
if app_insights_connection_string:
if applicationinsights_connection_string:
logging.info("App Insights connection string found - enabling request metrics")
environment.events.request.add_listener(report_request_metric)
else:
Expand Down
2 changes: 2 additions & 0 deletions sample.env
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ AZURE_FORM_RECOGNIZER_KEY=
# Open Telemetry Config (used within the simulator)
OTEL_SERVICE_NAME=aoai-api-simulator-local-dev
OTEL_METRIC_EXPORT_INTERVAL=10000
OTEL_EXPORTER_OTLP_ENDPOINT=http://host.docker.internal:4317
dasiths marked this conversation as resolved.
Show resolved Hide resolved
APPLICATIONINSIGHTS_CONNECTION_STRING=


# Test Client Config (used to direct the tests and test clients)
Expand Down
6 changes: 3 additions & 3 deletions scripts/_run-load-test-aca.sh
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,8 @@ if [[ -z "${key_vault_name}" ]]; then
echo "Key Vault Name not found in output.json"
exit 1
fi
app_insights_connection_string=$(az keyvault secret show --vault-name "$key_vault_name" --name app-insights-connection-string --query value --output tsv)
if [[ -z "${app_insights_connection_string}" ]]; then
applicationinsights_connection_string=$(az keyvault secret show --vault-name "$key_vault_name" --name applicationinsights-connection-string --query value --output tsv)
if [[ -z "${applicationinsights_connection_string}" ]]; then
echo "App Insights Connection String not found in Key Vault"
exit 1
fi
Expand Down Expand Up @@ -127,7 +127,7 @@ az containerapp job create \
--cpu "1" \
--memory "2Gi" \
--command "locust" \
--env-vars "LOCUST_LOCUSTFILE=$TEST_FILE" "LOCUST_HOST=https://${api_fqdn}/" "LOCUST_USERS=$LOCUST_USERS" "LOCUST_SPAWN_RATE=$LOCUST_SPAWN_RATE" "LOCUST_AUTOSTART=true" "LOCUST_RUN_TIME=$LOCUST_RUN_TIME" "LOCUST_AUTOQUIT=10" "SIMULATOR_API_KEY=${SIMULATOR_API_KEY}" "APP_INSIGHTS_CONNECTION_STRING=${app_insights_connection_string}" "MAX_TOKENS=${MAX_TOKENS}" "DEPLOYMENT_NAME=${DEPLOYMENT_NAME}" ALLOW_429_RESPONSES=${ALLOW_429_RESPONSES} 1>&2
--env-vars "LOCUST_LOCUSTFILE=$TEST_FILE" "LOCUST_HOST=https://${api_fqdn}/" "LOCUST_USERS=$LOCUST_USERS" "LOCUST_SPAWN_RATE=$LOCUST_SPAWN_RATE" "LOCUST_AUTOSTART=true" "LOCUST_RUN_TIME=$LOCUST_RUN_TIME" "LOCUST_AUTOQUIT=10" "SIMULATOR_API_KEY=${SIMULATOR_API_KEY}" "APP_INSIGHTS_CONNECTION_STRING=${applicationinsights_connection_string}" "MAX_TOKENS=${MAX_TOKENS}" "DEPLOYMENT_NAME=${DEPLOYMENT_NAME}" ALLOW_429_RESPONSES=${ALLOW_429_RESPONSES} 1>&2


start_time=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
Expand Down
4 changes: 4 additions & 0 deletions src/aoai-api-simulator/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,13 @@ fastapi==0.109.2
uvicorn[standard]==0.27.0.post1
gunicorn==22.0.0
requests==2.32.0
opentelemetry-instrumentation-requests==0.48b0
PyYAML==6.0.1
tiktoken==0.6.0
nanoid==2.0.0
limits==3.8.0
opentelemetry-api==1.27.0
opentelemetry-sdk==1.27.0
opentelemetry-exporter-otlp==1.27.0
azure-monitor-opentelemetry==1.3.0
pydantic-settings==2.2.1
68 changes: 63 additions & 5 deletions src/aoai-api-simulator/src/aoai_api_simulator/main.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,91 @@
import logging
import os

from azure.monitor.opentelemetry import configure_azure_monitor
from aoai_api_simulator.app_builder import app as builder_app
from aoai_api_simulator.app_builder import apply_config

# from opentelemetry import trace

from aoai_api_simulator.config_loader import get_config_from_env_vars, set_config
from aoai_api_simulator.app_builder import app as builder_app, apply_config
from azure.monitor.opentelemetry import configure_azure_monitor
from opentelemetry import metrics, trace
from opentelemetry.exporter.otlp.proto.grpc._log_exporter import OTLPLogExporter
from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
from opentelemetry.instrumentation.requests import RequestsInstrumentor

# from opentelemetry.sdk._logs.export import ConsoleLogExporter
from opentelemetry.sdk._logs import LoggerProvider, LoggingHandler
from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
from opentelemetry.sdk.metrics import MeterProvider
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
from opentelemetry.sdk.resources import Resource
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor

log_level = os.getenv("LOG_LEVEL") or "INFO"

logger = logging.getLogger(__name__)
logging.basicConfig(level=log_level)
logging.getLogger("azure").setLevel(logging.WARNING)

opentelemetry_exporter_otlp_endpoint = os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT")
application_insights_connection_string = os.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING")

using_azure_monitor: bool

if application_insights_connection_string:
logger.info("🚀 Configuring Azure Monitor telemetry")

# Options: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/monitor/azure-monitor-opentelemetry#usage
configure_azure_monitor(connection_string=application_insights_connection_string)
using_azure_monitor = True
else:
using_azure_monitor = False
logger.info("🚀 Azure Monitor telemetry not configured (set APPLICATIONINSIGHTS_CONNECTION_STRING)")

# tracer = trace.get_tracer(__name__)
if opentelemetry_exporter_otlp_endpoint:
logger.info("🚀 Configuring OTLP telemetry")

# setup the instrumentors
resource = Resource(attributes={"service.name": os.getenv("OTEL_SERVICE_NAME", "aoai-api-simulator")})

trace.set_tracer_provider(TracerProvider(resource=resource))
tracer = trace.get_tracer(__name__)

otlp_exporter = OTLPSpanExporter()

# tracing
tracer = trace.get_tracer(__name__)
span_processor = BatchSpanProcessor(otlp_exporter)
trace.get_tracer_provider().add_span_processor(span_processor)

# metrics
reader = PeriodicExportingMetricReader(OTLPMetricExporter())
meterProvider = MeterProvider(resource=resource, metric_readers=[reader])
metrics.set_meter_provider(meterProvider)

# logging
logger_provider = LoggerProvider(
resource=resource,
)

otlp_exporter = OTLPLogExporter()
logger_provider.add_log_record_processor(BatchLogRecordProcessor(otlp_exporter))

handler = LoggingHandler(level=os.getenv("OTEL_LOG_LEVEL", "INFO"), logger_provider=logger_provider)
# Attach OTLP handler to root logger
logging.getLogger().addHandler(handler)
else:
logger.info("🚀 OTLP telemetry exporter not configured (set OTEL_EXPORTER_OTLP_ENDPOINT)")

config = get_config_from_env_vars(logger)
set_config(config)


apply_config()

app = builder_app # expose to gunicorn

if not using_azure_monitor:
RequestsInstrumentor().instrument()
stuartleeks marked this conversation as resolved.
Show resolved Hide resolved
FastAPIInstrumentor.instrument_app(app)