From 83074204b91348a8aab6c3ee0436e8665c6eb31e Mon Sep 17 00:00:00 2001 From: Victor Barreto Date: Thu, 26 Feb 2026 12:00:23 +0000 Subject: [PATCH 01/17] Add agent responses JSON, refactor Bicep resources, enable hosted agents, and update batch test script --- .../optimized-concise/agent-responses.json | 80 ++++++++++++++++ infra/core/ai/ai-project.bicep | 95 ++++++++----------- infra/main.parameters.json | 2 +- src/tests/run_batch_tests.py | 70 ++++++++------ testing_attr.py | 22 +++++ 5 files changed, 181 insertions(+), 88 deletions(-) create mode 100644 experiments/optimized-concise/agent-responses.json create mode 100644 testing_attr.py diff --git a/experiments/optimized-concise/agent-responses.json b/experiments/optimized-concise/agent-responses.json new file mode 100644 index 0000000..190baee --- /dev/null +++ b/experiments/optimized-concise/agent-responses.json @@ -0,0 +1,80 @@ +{ + "experiment": "optimized-concise", + "timestamp": "2026-02-26T11:35:45.890547", + "agent_id": "trail-guide", + "agent_name": "trail-guide", + "agent_version": { + "latest": { + "metadata": {}, + "object": "agent.version", + "id": "trail-guide:3", + "name": "trail-guide", + "version": "3", + "description": "", + "created_at": 1772105724, + "definition": { + "kind": "prompt", + "model": "gpt-4.1", + "instructions": "You are an expert trail guide assistant for Adventure Works, an outdoor gear company. You have access to a comprehensive knowledge base of trails, weather data, and gear recommendations. \n\nProvide personalized trail recommendations based on:\n- User experience level\n- Weather conditions\n- Available gear\n- Time constraints\n- Location preferences\n\nAlways prioritize safety and provide specific, actionable advice. Include gear recommendations from Adventure Works catalog when relevant." + } + } + }, + "agent_object": "agent", + "test_results": [ + { + "test_name": "day-hike-gear", + "prompt": "What essential gear do I need for a summer day hike in moderate terrain?", + "response": "For a summer day hike in moderate terrain, here\u2019s a list of **essential gear** to keep you comfortable and safe:\n\n### 1. **Footwear**\n- **Trail Shoes or Hiking Boots:** Choose Adventure Works\u2019 All-Terrain Trail Shoes for excellent traction and breathability. Wear moisture-wicking socks.\n\n### 2. **Clothing**\n- **Moisture-Wicking Base Layer:** Lightweight shirts/tops (synthetic or merino).\n- **Hiking Shorts/Pants:** Lightweight, quick-dry material.\n- **Hat & Sunglasses:** For sun protection. Try Adventure Works\u2019 Sunshield Cap and Polarized Hiking Sunglasses.\n- **Light Jacket/Fleece:** In case of sudden temperature changes.\n\n### 3. **Safety & Navigation**\n- **Map/GPS App:** Download offline maps or bring Adventure Works\u2019 Pocket Trail Map.\n- **Compass:** (if you know how to use it).\n- **Whistle:** For emergency signaling.\n\n### 4. **Hydration & Nutrition**\n- **Water Bottle or Hydration System:** At least 2 liters. Adventure Works Hydrator Reservoir is a good choice.\n- **Electrolyte Tablets:** For replenishment.\n- **Trail Snacks:** Energy bars, dried fruit, nuts.\n\n### 5. **Sun & Insect Protection**\n- **Sunscreen:** SPF 30+.\n- **Lip Balm:** With SPF.\n- **Insect Repellent:** Adventure Works BugBarrier Spray.\n\n### 6. **First Aid & Emergency**\n- **Compact First Aid Kit:** Adventure Works Trail Safety Kit covers basics.\n- **Multi-tool/Knife:** For repairs or emergencies.\n- **Lightweight Rain Jacket or Poncho:** Even in summer, storms can appear.\n\n### 7. **Other Essentials**\n- **Lightweight Backpack:** Adventure Works Daypack 18L is ideal.\n- **Trash Bag:** \u201cLeave No Trace\u201d\u2014carry out your waste.\n- **Camera/Phone:** For photos and emergencies (fully charged).\n\n---\n\n**Optional:** Trekking poles, if you prefer extra stability, especially for longer hikes.\n\n#### **Safety Tips**\n- **Check Local Weather:** Conditions can change quickly.\n- **Tell Someone Your Plans:** Trail name, expected return.\n- **Know Your Limits:** If new to moderate terrain, start with shorter hikes.\n\nLet me know your **gear availability** or **location preference** for more customized recommendations!", + "token_usage": { + "prompt_tokens": 109, + "completion_tokens": 492, + "total_tokens": 601 + }, + "run_id": "resp_61dd702d31d7cffd0069a0301d3f208190a8e389d83caaf072" + }, + { + "test_name": "overnight-camping", + "prompt": "How should I prepare for my first overnight camping trip?", + "response": "Awesome choice! Overnight camping is a fantastic experience. Here\u2019s a step-by-step guide to prepare for your first trip, plus specific gear recommendations from Adventure Works.\n\n---\n\n**1. Choose Your Location**\n- For your first trip, select a campsite with easy access and established amenities (bathrooms, water supply).\n- National and state parks often have excellent beginner-friendly spots.\n\n**2. Check the Weather**\n- Look up the forecast for your camping dates and location.\n- Avoid storms/extreme temperatures on your first trip. If uncertain, I can check current conditions for you.\n\n**3. Plan Your Gear**\nHere\u2019s a basic overnight packing list (with Adventure Works catalog suggestions):\n\n**Shelter & Sleep**\n- *Tent*: Adventure Works Lightweight 2-Person Tent\n- *Sleeping Bag*: Adventure Works Explorer 20\u00b0F Sleeping Bag\n- *Sleeping Pad*: Adventure Works Comfort Air Sleeping Pad\n\n**Clothing**\n- Moisture-wicking base layers\n- Warm mid-layer (fleece or insulated jacket)\n- Waterproof jacket (Adventure Works Rain Defender Shell)\n- Extra socks and underwear\n- Comfortable sleeping clothes\n\n**Cooking & Food**\n- Backpacking stove (Adventure Works Micro Stove)\n- Fuel canister\n- Lightweight cookware set\n- Food for dinner, breakfast, snacks (easy-to-prepare meals)\n- Water bottles or hydration reservoir\n- Water filter/purifier (Adventure Works AquaSafe Filter)\n\n**Safety & Essentials**\n- First aid kit (Adventure Works Trail Safety Kit)\n- Headlamp with extra batteries (Adventure Works Trail Beam Headlamp)\n- Map of the area & compass (or GPS)\n- Multi-tool\n\n**Other Useful Items**\n- Camp chair or sit pad\n- Bug spray and sunscreen\n- Biodegradable soap\n- Trash bags (Leave No Trace!)\n- Backpack (Adventure Works TrailPro 45L)\n\n**4. Learn Basic Skills**\n- Practice setting up your tent at home\n- Learn how to use your stove and filter water\n- Review campfire safety (if fires are allowed)\n\n**5. Plan Your Timeline**\n- Aim to arrive early so you can set up in daylight\n- Plan your hike to and from the campsite (distance, difficulty)\n- Have a backup plan in case you need to change plans due to weather or fatigue\n\n**6. Let Someone Know Your Plan**\n- Share your itinerary, campsite location, and expected return time with a trusted person.\n\n---\n\n**Safety Tips:**\n- Start simple: Don\u2019t go too remote or ambitious on your first try.\n- Double-check the weather and pack layers.\n- Bring a little extra food and water just in case.\n- If you have specific gear, let me know\u2014I'll help you optimize your packing list!\n\n---\n\n**Need help picking a campsite or want a packing checklist personalized to your gear and location? Let me know!**", + "token_usage": { + "prompt_tokens": 105, + "completion_tokens": 576, + "total_tokens": 681 + }, + "run_id": "resp_b74a0cde0a1ab3150069a03022188081909cd5ab4123d6844f" + }, + { + "test_name": "three-day-backpacking", + "prompt": "I'm planning a 3-day backpacking trip in the mountains in October. What are the most critical safety considerations?", + "response": "A 3-day mountain backpacking trip in October presents some unique safety challenges\u2014mainly due to changing weather, shorter daylight hours, and the potential for cold temperatures. Here are the most critical safety considerations:\n\n---\n\n### 1. **Weather Preparedness**\n- **Rapid Weather Changes:** Mountain weather in October can be unpredictable, with strong winds, rain, or even early snow. Always check the forecast daily and prepare for worse conditions than predicted.\n- **Cold Temperatures:** Nights can be near or below freezing. Pack thermal layers and be ready for frost.\n- **Wet Conditions:** Rain or melting snow can make trails slippery and increase hypothermia risk.\n\n**Adventure Works Recommendations:**\n- **Layering System:** Insulated jackets, base layers, and waterproof shell (Adventure Works AlpineShell Jacket).\n- **Weatherproof Tent:** Adventure Works MountainLite 2P Tent\u2014excellent wind and precipitation protection.\n\n---\n\n### 2. **Hypothermia Prevention**\n- **Stay Dry:** Avoid sweating through efficient layering and ventilating clothing; change into dry clothes immediately if wet.\n- **Sleep Warm:** Use a sleeping bag rated for at least 10\u00b0F lower than expected lows (Adventure Works Thermo 20 Sleeping Bag).\n- **Nutrition:** Eat high-calorie foods and drink warm fluids to maintain body heat.\n\n---\n\n### 3. **Daylight Management**\n- **Short Days:** Plan for 10-11 hours of light; carry headlamps and extra batteries (Adventure Works TrekTorch 400).\n- **Start Early:** Begin hiking soon after sunrise to maximize daylight.\n\n---\n\n### 4. **Navigation & Communication**\n- **Map & Compass:** Know the route and carry map, compass, or GPS device.\n- **Emergency Contact:** Let someone know your itinerary; consider carrying a satellite communicator (Adventure Works CommLink Mini).\n\n---\n\n### 5. **Wildlife & Water Safety**\n- **Food Storage:** Properly secure food from bears and rodents (Adventure Works BearSafe Canister).\n- **Water Treatment:** Streams may contain pathogens; carry a filter or purification tablets (Adventure Works PureFlow Filter).\n\n---\n\n### 6. **Physical Preparedness**\n- **Altitude:** Know signs of altitude sickness if you\u2019re above 8,000 ft. Stay hydrated and pace yourself.\n- **First Aid:** Carry a comprehensive kit and know basic wilderness first aid (Adventure Works Wilderness First Aid Kit).\n\n---\n\n### 7. **Route Planning**\n- **Trail Conditions:** October may bring early snow, mud, or closed trails. Check park websites for alerts.\n- **Alternate Plans:** Prepare bailout options in case of weather or injury.\n\n---\n\n### 8. **Leave No Trace**\n- **Minimize Impact:** Follow Leave No Trace principles\u2014pack out trash, avoid disturbing wildlife, and use established campsites.\n\n---\n\n**Summary:** \nPrepare for cold and wet weather; manage daylight and navigation carefully; protect from hypothermia and wildlife; and always have a backup plan. Bring adequate gear (see recommendations above), and stay informed on local conditions.\n\nIf you share your location and experience level, I can provide tailored trail and gear recommendations!", + "token_usage": { + "prompt_tokens": 117, + "completion_tokens": 635, + "total_tokens": 752 + }, + "run_id": "resp_7e7af396fb34e02e0069a0302c9aec8190b9bd6947221f17d0" + }, + { + "test_name": "trail-difficulty", + "prompt": "How do I know if a trail is appropriate for my fitness level? I'm a beginner hiker.", + "response": "Great question! Choosing a trail that matches your fitness and experience level is essential for both safety and enjoyment. Here\u2019s how to assess whether a trail is appropriate for you as a beginner:\n\n---\n\n**1. Trail Difficulty Ratings**\n\n- *Easy trails*: Look for those marked as \u201cbeginner,\u201d \u201ceasy,\u201d or \u201cfamily-friendly.\u201d These typically have:\n - Well-maintained paths\n - Minimal elevation gain (usually <300 ft)\n - Distances under 3 miles one way\n\n**2. Trail Length and Elevation**\n\n- *Short distance*: Start with hikes under 3 miles.\n- *Low elevation gain*: Avoid steep climbs; aim for trails with gentle slopes.\n\n**3. Trail Surface**\n\n- Choose trails with even, non-technical surfaces (gravel, packed dirt, or paved paths).\n- Avoid trails with rocks, roots, or uneven terrain until you gain confidence.\n\n**4. Weather**\n\n- Check the forecast before you go\u2014rain, excessive heat, or snow can make even easy trails more challenging.\n\n**5. Trail Description and Reviews**\n\n- Read trail descriptions and user reviews (on websites like AllTrails or local park guides) for honest insights on difficulty.\n\n**6. Gear Considerations**\n\n- Bring basic gear for comfort and safety:\n - Adventure Works \u201cTrail Lite Daypack\u201d\n - Water bottle (Adventure Works Insulated Water Bottle 20oz)\n - Supportive hiking shoes (Adventure Works Pathfinder Shoes)\n - Weather-appropriate clothing (layering is key!)\n\n**7. Time Constraints**\n\n- Plan hikes you can complete comfortably within your available time (most beginner hikes take 1\u20132 hours).\n\n**8. Local Recommendations**\n\n- Let me know your location or preferred region, and I can suggest beginner-friendly trails with up-to-date conditions.\n\n---\n\n**Remember:** Start small, listen to your body, and gradually try longer or more varied trails as your confidence grows. If you\u2019d like trail suggestions tailored to your area and gear, just let me know!", + "token_usage": { + "prompt_tokens": 114, + "completion_tokens": 409, + "total_tokens": 523 + }, + "run_id": "resp_b4da7cb3e16b1f280069a03033e8e48190aa6c12c2baa129d8" + }, + { + "test_name": "winter-hiking", + "prompt": "What additional gear and precautions do I need for winter hiking compared to summer?", + "response": "Great question! Winter hiking requires **significantly more preparation** than summer hiking, both in terms of gear and safety precautions. Here\u2019s a comprehensive breakdown:\n\n---\n\n### 1. **Additional Gear for Winter Hiking**\n\n#### Clothing\n- **Base layers:** Moisture-wicking and thermal (synthetic or merino wool).\n- **Mid layers:** Fleece or insulated jackets (avoid cotton).\n- **Outer layers:** Waterproof/breathable shell jacket and pants.\n- **Insulated jacket:** Down or synthetic for warmth on breaks.\n- **Hat & gloves:** Warm, windproof; bring liner gloves and backups.\n- **Neck gaiter/balaclava:** Protects your face from wind and cold.\n- **Wool socks:** Thicker than summer; consider sock liners.\n\n#### Footwear\n- **Waterproof boots:** Insulated, with good traction.\n- **Gaiters:** Keep snow and moisture out.\n- **Microspikes/traction devices:** For icy trail conditions (available from Adventure Works).\n- **Snowshoes:** For deep snow (Adventure Works offers a variety).\n\n#### Other Essentials\n- **Backpack with extra capacity:** To carry bulkier/warm gear.\n- **Headlamp:** Early sunsets; extra batteries (cold drains power).\n- **Trekking poles:** With snow baskets.\n- **Emergency blanket/bivy sack:** For unexpected situations.\n- **Hand/foot warmers:** Disposable or rechargeable.\n- **Thermos:** With hot drinks; keep water from freezing.\n- **Extra food:** High calorie snacks.\n- **Map/GPS:** Navigation is harder in snow.\n\n#### Safety Gear\n- **First aid kit:** Add blister treatment, extra meds.\n- **Whistle & signaling device:** For emergencies.\n- **Personal locator beacon** (PLB) or satellite messenger: If venturing into remote areas.\n\n---\n\n### 2. **Precautions and Safety Tips**\n\n- **Check weather and avalanche forecasts** before heading out.\n- **Know the signs of hypothermia and frostbite** and how to prevent them.\n- **Start early:** Days are shorter; plan turnaround time.\n- **Hike with a buddy** or leave your route/ETA with someone.\n- **Stay on marked trails:** Snow can obscure the path.\n- **Carry extra layers:** Always have \u201cone more layer\u201d than you think you\u2019ll need.\n- **Hydrate and eat frequently:** Cold diminishes thirst; don\u2019t let yourself get depleted.\n- **Navigation:** GPS and map skills are crucial; landmarks can be hidden.\n- **Know your limits:** Turn back if conditions worsen or you\u2019re struggling.\n\n---\n\n### 3. **Recommended Gear from Adventure Works**\n\n- **Adventure Works Insulated Shell Jacket**\n- **Adventure Works Merino Wool baselayer set**\n- **Adventure Works Traction Pro Microspikes**\n- **Adventure Works UltraLite Emergency Bivy**\n- **Adventure Works Classic Snowshoes**\n- **Adventure Works Rechargeable Headlamp**\n\n---\n\n**Winter hiking is rewarding but demands respect for the conditions.** If you want tailored gear recommendations or trail suggestions, let me know your location, experience level, and available gear!", + "token_usage": { + "prompt_tokens": 109, + "completion_tokens": 645, + "total_tokens": 754 + }, + "run_id": "resp_b6a1bb8473eb54b60069a0303934488190b9a4c84654b823e5" + } + ] +} \ No newline at end of file diff --git a/infra/core/ai/ai-project.bicep b/infra/core/ai/ai-project.bicep index f41f4ee..40e50f2 100644 --- a/infra/core/ai/ai-project.bicep +++ b/infra/core/ai/ai-project.bicep @@ -76,67 +76,50 @@ module applicationInsights '../monitor/applicationinsights.bicep' = if (enableMo resource aiAccount 'Microsoft.CognitiveServices/accounts@2025-06-01' = { name: !empty(existingAiAccountName) ? existingAiAccountName : 'ai-account-${resourceToken}' location: location - tags: tags - sku: { - name: 'S0' - } kind: 'AIServices' - identity: { - type: 'SystemAssigned' - } + sku: { name: 'S0' } + identity: { type: 'SystemAssigned' } properties: { allowProjectManagement: true customSubDomainName: !empty(existingAiAccountName) ? existingAiAccountName : 'ai-account-${resourceToken}' + publicNetworkAccess: 'Enabled' + disableLocalAuth: true networkAcls: { defaultAction: 'Allow' virtualNetworkRules: [] ipRules: [] } - publicNetworkAccess: 'Enabled' - disableLocalAuth: true } - - @batchSize(1) - resource seqDeployments 'deployments' = [ - for dep in (deployments ?? []): { - name: dep.name - sku: dep.sku - properties: { - model: dep.model - } - } - ] +} - resource project 'projects' = { - name: aiFoundryProjectName - location: location - identity: { - type: 'SystemAssigned' - } - properties: { - description: '${aiFoundryProjectName} Project' - displayName: '${aiFoundryProjectName}Project' - } - dependsOn: [ - seqDeployments - ] +// NEW: separate project resource, matching docs +resource project 'Microsoft.CognitiveServices/accounts/projects@2025-06-01' = { + name: aiFoundryProjectName + parent: aiAccount + location: location + identity: { + type: 'SystemAssigned' } - - resource aiFoundryAccountCapabilityHost 'capabilityHosts@2025-10-01-preview' = if (enableHostedAgents) { - name: 'agents' - properties: { - capabilityHostKind: 'Agents' - // IMPORTANT: this is required to enable hosted agents deployment - // if no BYO Net is provided - enablePublicHostingEnvironment: true - } + properties: { + description: '${aiFoundryProjectName} Project' + displayName: '${aiFoundryProjectName}Project' } } +resource aiFoundryAccountCapabilityHost 'Microsoft.CognitiveServices/accounts/capabilityHosts@2025-10-01-preview' = if (enableHostedAgents) { + name: 'agents' + parent: aiAccount + properties: { + capabilityHostKind: 'Agents' + // IMPORTANT: this is required to enable hosted agents deployment + // if no BYO Net is provided + enablePublicHostingEnvironment: true + } +} // Create connection towards appinsights -resource appInsightConnection 'Microsoft.CognitiveServices/accounts/projects/connections@2025-04-01-preview' = { - parent: aiAccount::project +resource appInsightConnection 'Microsoft.CognitiveServices/accounts/projects/connections@2025-04-01-preview' = if (enableMonitoring) { + parent: project name: 'appi-connection' properties: { category: 'AppInsights' @@ -158,7 +141,7 @@ module aiConnections './connection.bicep' = [for (connection, index) in connecti name: 'connection-${connection.name}' params: { aiServicesAccountName: aiAccount.name - aiProjectName: aiAccount::project.name + aiProjectName: project.name connectionConfig: { name: connection.name category: connection.category @@ -191,9 +174,9 @@ resource localUserCognitiveServicesUserRoleAssignment 'Microsoft.Authorization/r resource projectCognitiveServicesUserRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = { scope: aiAccount - name: guid(subscription().id, resourceGroup().id, aiAccount::project.name, '53ca6127-db72-4b80-b1b0-d745d6d5456d') + name: guid(subscription().id, resourceGroup().id, project.name, '53ca6127-db72-4b80-b1b0-d745d6d5456d') properties: { - principalId: aiAccount::project.identity.principalId + principalId: project.identity.principalId principalType: 'ServicePrincipal' roleDefinitionId: resourceId('Microsoft.Authorization/roleDefinitions', '53ca6127-db72-4b80-b1b0-d745d6d5456d') } @@ -214,7 +197,7 @@ module storage '../storage/storage.bicep' = if (hasStorageConnection) { principalId: principalId principalType: principalType aiServicesAccountName: aiAccount.name - aiProjectName: aiAccount::project.name + aiProjectName: project.name } } @@ -229,7 +212,7 @@ module acr '../host/acr.bicep' = if (hasAcrConnection) { principalId: principalId principalType: principalType aiServicesAccountName: aiAccount.name - aiProjectName: aiAccount::project.name + aiProjectName: project.name } } @@ -241,7 +224,7 @@ module bingGrounding '../search/bing_grounding.bicep' = if (hasBingConnection) { resourceName: 'bing-${resourceToken}' connectionName: bingConnectionName aiServicesAccountName: aiAccount.name - aiProjectName: aiAccount::project.name + aiProjectName: project.name } } @@ -253,7 +236,7 @@ module bingCustomGrounding '../search/bing_custom_grounding.bicep' = if (hasBing resourceName: 'bingcustom-${resourceToken}' connectionName: bingCustomConnectionName aiServicesAccountName: aiAccount.name - aiProjectName: aiAccount::project.name + aiProjectName: project.name } } @@ -267,7 +250,7 @@ module azureAiSearch '../search/azure_ai_search.bicep' = if (hasSearchConnection storageAccountResourceId: hasStorageConnection ? storage!.outputs.storageAccountId : '' containerName: 'knowledge' aiServicesAccountName: aiAccount.name - aiProjectName: aiAccount::project.name + aiProjectName: project.name principalId: principalId principalType: principalType location: location @@ -276,15 +259,15 @@ module azureAiSearch '../search/azure_ai_search.bicep' = if (hasSearchConnection // Outputs -output AZURE_AI_PROJECT_ENDPOINT string = aiAccount::project.properties.endpoints['AI Foundry API'] +output AZURE_AI_PROJECT_ENDPOINT string = project.properties.endpoints['AI Foundry API'] output AZURE_OPENAI_ENDPOINT string = aiAccount.properties.endpoints['OpenAI Language Model Instance API'] output aiServicesEndpoint string = aiAccount.properties.endpoint output accountId string = aiAccount.id -output projectId string = aiAccount::project.id +output projectId string = project.id output aiServicesAccountName string = aiAccount.name -output aiServicesProjectName string = aiAccount::project.name +output aiServicesProjectName string = project.name output aiServicesPrincipalId string = aiAccount.identity.principalId -output projectName string = aiAccount::project.name +output projectName string = project.name output APPLICATIONINSIGHTS_CONNECTION_STRING string = applicationInsights.outputs.connectionString // Grouped dependent resources outputs diff --git a/infra/main.parameters.json b/infra/main.parameters.json index de56a9d..f4b69dd 100644 --- a/infra/main.parameters.json +++ b/infra/main.parameters.json @@ -36,7 +36,7 @@ "value": "${ENABLE_MONITORING=true}" }, "enableHostedAgents": { - "value": "${ENABLE_HOSTED_AGENTS=false}" + "value": "${ENABLE_HOSTED_AGENTS=true}" } } } diff --git a/src/tests/run_batch_tests.py b/src/tests/run_batch_tests.py index 045d280..2d396b9 100644 --- a/src/tests/run_batch_tests.py +++ b/src/tests/run_batch_tests.py @@ -53,12 +53,14 @@ def run_batch_tests(experiment_name): endpoint=os.environ["AZURE_AI_PROJECT_ENDPOINT"], credential=DefaultAzureCredential(), ) + + openai_client = client.get_openai_client() # Get the agent by name (assumes trail_guide_agent.py already created it) agent_name = os.environ.get("AGENT_NAME", "trail-guide") # List agents and find the one with our name - agents = client.agents.list_agents() + agents = client.agents.list() agent = None for a in agents: if a.name == agent_name: @@ -70,10 +72,7 @@ def run_batch_tests(experiment_name): print("Please run 'python src/agents/trail_guide_agent/trail_guide_agent.py' first to create the agent.") return - print(f"Using agent: {agent.name} (id: {agent.id}, version: {agent.version})") - - # Create thread for this test run - thread = client.agents.create_thread() + print(f"Using agent: {agent.name} (id: {agent.id}, version: {agent.versions})") # Capture all results results = { @@ -81,8 +80,8 @@ def run_batch_tests(experiment_name): "timestamp": datetime.now().isoformat(), "agent_id": agent.id, "agent_name": agent.name, - "agent_version": agent.version, - "model": agent.model, + "agent_version": agent.versions.as_dict() if hasattr(agent.versions, "as_dict") else str(agent.versions), + "agent_object": getattr(agent, "object", None), "test_results": [] } @@ -90,40 +89,49 @@ def run_batch_tests(experiment_name): for test_name, prompt_text in test_prompts.items(): print(f"\nTesting: {test_name}") print(f" Prompt: {prompt_text[:60]}...") - - # Send message - message = client.agents.create_message( - thread_id=thread.id, - role="user", - content=prompt_text, + + # Create a fresh conversation for this test + conversation = openai_client.conversations.create() + + # Send user message into the conversation + openai_client.conversations.items.create( + conversation_id=conversation.id, + items=[{ + "type": "message", + "role": "user", + "content": prompt_text, + }], ) - - # Run agent - run = client.agents.create_and_process_run( - thread_id=thread.id, - assistant_id=agent.id, + + # Ask the agent to respond using the Responses API with agent_reference + response = openai_client.responses.create( + conversation=conversation.id, + extra_body={"agent_reference": {"name": agent_name, "type": "agent_reference"}}, + input="", ) - - # Get response - messages = client.agents.list_messages(thread_id=thread.id) - response = messages.data[0].content[0].text.value - - # Get token usage from run + + # Extract text from the response; fall back to str(response) if shape changes + try: + response_text = response.output[0].content[0].text + except Exception: + response_text = str(response) + + usage = getattr(response, "usage", None) token_usage = { - "prompt_tokens": run.usage.prompt_tokens if run.usage else None, - "completion_tokens": run.usage.completion_tokens if run.usage else None, - "total_tokens": run.usage.total_tokens if run.usage else None, + "prompt_tokens": getattr(usage, "input_tokens", None) if usage else None, + "completion_tokens": getattr(usage, "output_tokens", None) if usage else None, + "total_tokens": getattr(usage, "total_tokens", None) if usage else None, } - + # Store result results["test_results"].append({ "test_name": test_name, "prompt": prompt_text, - "response": response, + "response": response_text, "token_usage": token_usage, - "run_id": run.id, + "run_id": getattr(response, "id", None), }) - + print(f" Response captured ({token_usage['total_tokens']} tokens)") # Save results to experiment folder (at repository root) diff --git a/testing_attr.py b/testing_attr.py new file mode 100644 index 0000000..cf01218 --- /dev/null +++ b/testing_attr.py @@ -0,0 +1,22 @@ +import os +from azure.identity import DefaultAzureCredential +from azure.ai.projects import AIProjectClient +from dotenv import load_dotenv + +load_dotenv() + +client = AIProjectClient( + endpoint=os.environ["AZURE_AI_PROJECT_ENDPOINT"], + credential=DefaultAzureCredential(), +) + +# Get one agent (or filter by name) +agent = next(client.agents.list()) + +# See all attribute names +print(dir(agent)) + +# Or see a dict of public attributes +attrs = {k: v for k, v in agent.__dict__.items() if not k.startswith("_")} +for k, v in attrs.items(): + print(k, "=", v) \ No newline at end of file From 494fc132d48747796f25c84608dc35f654a59b07 Mon Sep 17 00:00:00 2001 From: Victor Barreto Date: Thu, 26 Feb 2026 13:39:02 +0000 Subject: [PATCH 02/17] Add baseline agent responses JSON for hiking and camping guidance --- experiments/baseline/agent-responses.json | 80 +++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 experiments/baseline/agent-responses.json diff --git a/experiments/baseline/agent-responses.json b/experiments/baseline/agent-responses.json new file mode 100644 index 0000000..5b715fb --- /dev/null +++ b/experiments/baseline/agent-responses.json @@ -0,0 +1,80 @@ +{ + "experiment": "baseline", + "timestamp": "2026-02-26T13:05:50.584323", + "agent_id": "trail-guide", + "agent_name": "trail-guide", + "agent_version": { + "latest": { + "metadata": {}, + "object": "agent.version", + "id": "trail-guide:4", + "name": "trail-guide", + "version": "4", + "description": "", + "created_at": 1772108674, + "definition": { + "kind": "prompt", + "model": "gpt-4.1", + "instructions": "You are an expert trail guide assistant for Adventure Works with advanced production capabilities. You provide comprehensive outdoor guidance with:\n\nCORE CAPABILITIES:\n- Multi-modal input analysis (text, images, voice)\n- Real-time weather and trail condition integration \n- Advanced personalization based on user preferences\n- Enterprise-grade safety recommendations\n- Multi-language support for international hikers\n\nRECOMMENDATION FRAMEWORK:\n1. Assess user experience level and fitness\n2. Analyze current weather and trail conditions\n3. Recommend appropriate Adventure Works gear\n4. Provide detailed safety protocols\n5. Suggest alternative options and backup plans\n\nAlways maintain the highest safety standards and provide actionable, specific guidance tailored to each user's needs and conditions." + } + } + }, + "agent_object": "agent", + "test_results": [ + { + "test_name": "day-hike-gear", + "prompt": "What essential gear do I need for a summer day hike in moderate terrain?", + "response": "For a **summer day hike in moderate terrain**, your essential gear list should prioritize comfort, hydration, and safety. Here\u2019s a comprehensive list tailored for Adventure Works outings:\n\n---\n\n### 1. **Backpack**\n- **Daypack (15\u201325L):** Comfortable for carrying essentials.\n\n### 2. **Navigation**\n- **Trail map and compass** (or GPS)\n- **Fully charged phone** (with offline maps)\n\n### 3. **Hydration**\n- **Water bottles or hydration bladder** (2 liters minimum for summer)\n- **Water purification method** (filters/tablets, optional but recommended)\n\n### 4. **Food**\n- **High-energy snacks:** Trail mix, energy bars, fruit\n\n### 5. **Clothing**\n- **Moisture-wicking base layer**\n- **Lightweight, breathable hiking shirt/shorts or pants**\n- **Hat or cap** (sun protection)\n- **Sunglasses** (UV protection)\n- **Sturdy hiking boots or trail shoes**\n- **Light jacket** (in case weather changes)\n\n### 6. **Sun Protection**\n- **Broad-spectrum sunscreen**\n- **Lip balm with SPF**\n\n### 7. **Safety and First Aid**\n- **Adventure Works first aid kit**\n- **Whistle**\n- **Multi-tool or small knife**\n- **Personal Identification**\n\n### 8. **Emergency Gear**\n- **Headlamp or flashlight** (with extra batteries)\n- **Emergency blanket** (lightweight)\n\n### 9. **Additional Items**\n- **Bug spray/repellent**\n- **Trekking poles** (optional but helpful)\n- **Small garbage bag (Leave No Trace)**\n\n---\n\n#### **Adventure Works Recommendations**\n- Always check local trail conditions and weather before departing.\n- Share your hike plan with someone.\n- Wear Adventure Works moisture-wicking gear for maximum comfort.\n- Choose Adventure Works hydration packs for reliability.\n\n---\n\n**Would you like suggestions for specific products, or tips on packing efficiently?**", + "token_usage": { + "prompt_tokens": 165, + "completion_tokens": 409, + "total_tokens": 574 + }, + "run_id": "resp_c45da93ba2b979340069a04540305481909d139f04980c523f" + }, + { + "test_name": "overnight-camping", + "prompt": "How should I prepare for my first overnight camping trip?", + "response": "**That\u2019s exciting! Preparing for your first overnight camping trip is key for safety, comfort, and enjoyment. Here\u2019s a step-by-step guide:**\n\n---\n\n### 1. Assess Your Experience & Fitness Level\n- **Beginner:** Opt for established campgrounds near trailheads. Avoid long or strenuous trails.\n- **Intermediate:** Try a short backpacking route if you\u2019re comfortable with basic hiking and gear.\n- **Fitness:** Gauge your ability to carry a pack, walk several miles, and handle basic outdoor tasks.\n\n---\n\n### 2. Check Weather & Trail Conditions\n- **Look up recent forecasts** for your camping area (e.g., temperature range, precipitation, wind).\n- **Review trail reports** for closures, hazards, or wildlife alerts.\n- **Choosing dates:** Avoid stormy or extremely cold predictions for your first trip.\n\n---\n\n### 3. Recommended Adventure Works Gear (Beginner Edition)\n- **Tent:** Lightweight, weatherproof, easy setup (e.g., Adventure Works Solo or Duo Camp Tent)\n- **Sleeping Bag:** Rated for projected nighttime lows (+ Mummy style for better insulation)\n- **Sleeping Pad:** For comfort and insulation from the ground\n- **Headlamp or Lantern:** Adventure Works LED lantern\n- **Backpack:** 40-60L size, properly fitted (Adventure Works Trail Pro)\n- **Clothing:** Layered system (base, insulation, waterproof shell); avoid cotton\n- **Cooking System:** Simple stove (AW Compact Burner), fuel, lighter/matches\n- **Food:** Ready-to-eat meals or easy-to-cook items, snacks\n- **Water:** Filter/purification system or bring enough potable water\n- **Navigation:** Map, compass, GPS or app\u2014don\u2019t rely solely on phone\n- **Safety Kit:** First aid kit, whistle, multi-tool, duct tape\n- **Extras:** Sunscreen, insect repellent, trash bags (Leave No Trace), personal hygiene items\n\n---\n\n### 4. Safety Protocols\n- **Tell someone your plan:** Trip details, route, return time\n- **Set up camp before sunset** for visibility and easier setup\n- **Know emergency procedures:** Closest ranger station, phone coverage, shelter options\n- **Wildlife awareness:** Store food safely; follow local bear precautions if needed\n- **Fire safety:** Use designated pits; ensure fires are fully extinguished\n- **Hydration:** Drink regularly; never consume untreated water\n\n---\n\n### 5. Alternative Plans & Backups\n- **Expect weather shifts:** Pack extra layers and rain gear\n- **Backup Navigation:** If digital fails, use paper map and compass\n- **Exit Strategy:** Have a secondary exit route in case you need to leave early\n- **Stay flexible:** If conditions become unsafe or uncomfortable, adjust plans\n\n---\n\n### Bonus Tips for First-Timers\n- Practice pitching your tent and using your stove at home\n- Pack light but don\u2019t skimp on essentials\n- Arrive early so you have daylight for setup/learning\n- Review Leave No Trace principles\n\n---\n\n**Want personalized packing lists or campsite recommendations? Let me know where you\u2019re heading and any special needs/preferences!**", + "token_usage": { + "prompt_tokens": 161, + "completion_tokens": 648, + "total_tokens": 809 + }, + "run_id": "resp_5df1d8239f5029630069a04544c320819099893064577d4296" + }, + { + "test_name": "three-day-backpacking", + "prompt": "I'm planning a 3-day backpacking trip in the mountains in October. What are the most critical safety considerations?", + "response": "October in mountainous terrain presents both spectacular beauty and significant risk. Here are the most **critical safety considerations** for a 3-day backpacking trip:\n\n---\n\n**1. Weather Variability**\n- **Rapid temperature drops**: Nights can be freezing; sudden storms (rain/snow/hail) are possible.\n- **Monitor forecasts**: Check hourly updates before departure and daily during your trip.\n\n**2. Gear & Clothing**\n- **Layering system**: Base, insulation, and waterproof outer layers. Avoid cotton.\n- **Cold weather gear**: Insulated sleeping bag (rated for 15\u00b0F/-9\u00b0C or lower), sleeping pad, gloves, hat, dry socks.\n- **Emergency shelter**: Lightweight tarp or bivy sack in case of tent failure.\n\n**3. Navigation & Communication**\n- **Map/GPS backup**: Carry a paper map and compass, and a GPS device or app with offline maps.\n- **Satellite messenger**: Cell coverage is unreliable; consider a PLB (Personal Locator Beacon) or Garmin InReach.\n- **Daily check-ins**: Leave your route and check-in plan with a trusted contact.\n\n**4. Food & Water**\n- **High-energy food**: The cold increases caloric demand.\n- **Water purification**: Always carry a filter/purification tablets; streams may be icy or scarce as autumn progresses.\n- **Hydration discipline**: It's easy to forget to drink in cool weather.\n\n**5. Wildlife & Seasonal Hazards**\n- **Animals**: Bears are preparing for winter; follow strict food storage protocols (bear canisters/hanging).\n- **Hunting season**: Wear bright colors; check local hunting schedules.\n- **Short daylight hours**: Plan to reach camp well before sunset; bring headlamp + spare batteries.\n\n**6. Medical & First Aid**\n- **First aid kit**: Add cold-weather specific supplies (extra hand warmers, hypothermia treatment sheet).\n- **Hypothermia and frostbite risk**: Know early signs and prevention.\n\n**7. Route Planning**\n- **Escape routes**: Pre-plan bailout trails in case of emergency.\n- **Trail conditions**: Check for recent closures, snow accumulation, and landslide risks.\n\n---\n\n**Backup Plan & Emergency Protocol**\n- Conservative itinerary; don\u2019t be afraid to turn back if conditions deteriorate.\n- Know nearest ranger stations and exit points.\n- Practice \u201cstop, shelter, signal\u201d in any emergency.\n\n---\n\n**Would you like recommendations on specific gear, sample packing lists, or advice tuned to your experience level?**", + "token_usage": { + "prompt_tokens": 173, + "completion_tokens": 524, + "total_tokens": 697 + }, + "run_id": "resp_2141978b7a5d3b340069a0454b6260819089d4bac513fd2a9d" + }, + { + "test_name": "trail-difficulty", + "prompt": "How do I know if a trail is appropriate for my fitness level? I'm a beginner hiker.", + "response": "Great question! Choosing trails appropriate for your fitness and experience is key to a safe and enjoyable hike. Here\u2019s how to assess trail suitability as a beginner:\n\n### 1. **Know Your Fitness & Experience**\n- **Beginner fitness:** You\u2019re comfortable walking for 1\u20132 hours on mostly flat terrain.\n- **Experience:** Little to no experience on uneven surfaces, hills, or longer distances.\n\n### 2. **Trail Assessment Guide**\n- **Look for these features:**\n - **Distance:** Begin with 2\u20135 km (1\u20133 miles).\n - **Elevation gain:** Under 150 m (500 ft) is ideal.\n - **Surface:** Well-maintained, wide trails (dirt, gravel, paved).\n - **Grades:** Gentle slopes, avoid steep ascents or descents.\n - **Difficulty rating:** Search for \u201ceasy,\u201d \u201cfamily-friendly,\u201d or \u201cbeginner\u201d in trail descriptions.\n\n- **Check official descriptions:** National/state park websites, AllTrails, Adventure Works guides often include difficulty ratings and detailed maps.\n\n### 3. **Safety Checklist**\n- **Trail markings:** Clear signage, minimal risk of getting lost.\n- **Cell service:** Preferable for beginners.\n- **Weather:** Avoid when rain or extreme heat/cold is expected.\n- **Facilities:** Access to water, restrooms, or first aid nearby.\n\n### 4. **Recommended Adventure Works Gear**\n- Comfortable hiking shoes or trail runners (no heavy boots needed)\n- Lightweight backpack with water (1\u20132 L), snacks, and a map/app\n- Weather-appropriate layers\n- Hat, sunscreen, basic first aid kit\n\n### 5. **Backup Plan**\n- Always let someone know your route and expected return time.\n- Set a turnaround time so you don\u2019t hike farther than you can comfortably manage.\n\n---\n\n**Want personalized trail suggestions in your area?** \nJust share your location and any mobility concerns, and I\u2019ll recommend beginner trails with current conditions, including weather updates!", + "token_usage": { + "prompt_tokens": 170, + "completion_tokens": 411, + "total_tokens": 581 + }, + "run_id": "resp_38d26159a7160a4b0069a04552627c81909146d88e4b89e3d8" + }, + { + "test_name": "winter-hiking", + "prompt": "What additional gear and precautions do I need for winter hiking compared to summer?", + "response": "Great question! Winter hiking requires specialized gear and extra precautions compared to summer hikes due to colder temperatures, shorter daylight hours, and increased risk of hazards like icy trails and hypothermia. Here\u2019s a concise comparison and actionable guidance tailored to winter:\n\n---\n\n## 1. Gear: \n### Clothing \n**Winter:** \n- **Layering:** Moisture-wicking base layer, insulating mid-layer (fleece or down), and waterproof/windproof outer shell \n- **Hat & Gloves:** Insulated, moisture-resistant \n- **Neck Gaiter/Balaclava:** For face protection \n- **Socks:** Wool or synthetic (avoid cotton) \n- **Gaiters:** To keep snow out of boots \n- **Extra warm clothing:** Always carry one more warm layer than you think you\u2019ll need\n\n**Summer:** \n- Lightweight, moisture-wicking clothing \n- Sun hat or cap \n- Light jacket for cooler evenings\n\n### Footwear \n**Winter:** \n- Insulated, waterproof boots \n- Microspikes/crampons for icy trails \n- Snowshoes for deep snow \n**Summer:** \n- Lightweight hiking boots or trail runners\n\n### Gear Essentials \n**Winter:** \n- **Navigation:** Map, compass, GPS (phone batteries drain faster in cold!) \n- **Headlamp:** With extra batteries (days are shorter) \n- **Trekking poles:** With snow baskets \n- **Emergency shelter/blanket** \n- **Hand and foot warmers** \n- **Thermos with warm drink** \n- **More high-energy food** \n- **Backpack liner and dry bag for gear** \n**Summer:** \n- Standard navigation items \n- Less emphasis on emergency warmth & insulation\n\n---\n\n## 2. Precautions:\n\n### Weather & Daylight \n**Winter:** \n- **Check forecast carefully:** Watch for snow, wind, wind chill, and avalanche danger \n- **Shorter daylight:** Start early, plan to finish well before dark \n- **Trail conditions:** Expect snow, ice, sometimes closed/unguarded trails\n\n### Safety \n**Winter:** \n- **Tell someone your route and ETA** \n- **Know signs of hypothermia and frostbite** \n- **Know basic avalanche safety** (for mountainous terrain) \n- **Carry a whistle and signal device** \n- **Have a backup plan for extrication or shelter**\n\n### Hydration & Nutrition \n**Winter:** \n- Drink regularly\u2014dehydration is common in cold \n- Prevent water freezing (insulated bottles, thermos) \n\n---\n\n## 3. Backup Plans & Alternatives: \n- Have a turnaround time: If you reach it, turn back regardless of progress \n- Know nearest safe shelter, parking, or road access points \n- Consider easier, shorter trails if conditions worsen\n\n---\n\n### Key Adventure Works Recommendations:\n- **Winter hiking pack:** The Adventure Works \"Sub-Zero Trailpack\" with insulated bottle sleeves, waterproof shells, emergency bivvy\n- **Safety checklist:** Use our mobile app for live weather, route tracking, and emergency contact\n\n---\n\n**Would you like recommendations on specific winter hiking locations or gear lists based on your experience and trail plans?**", + "token_usage": { + "prompt_tokens": 165, + "completion_tokens": 652, + "total_tokens": 817 + }, + "run_id": "resp_3acdef4086fee79b0069a04556b3fc8190979b3e2791fe2a61" + } + ] +} \ No newline at end of file From fa7441d5b01fc818e80a94a813ffaa93531c6d0e Mon Sep 17 00:00:00 2001 From: Victor Barreto Date: Thu, 26 Feb 2026 13:45:43 +0000 Subject: [PATCH 03/17] Configure agent to use v4 optimized-concise prompt --- src/agents/trail_guide_agent/trail_guide_agent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/agents/trail_guide_agent/trail_guide_agent.py b/src/agents/trail_guide_agent/trail_guide_agent.py index 04a951c..9705ac3 100644 --- a/src/agents/trail_guide_agent/trail_guide_agent.py +++ b/src/agents/trail_guide_agent/trail_guide_agent.py @@ -11,7 +11,7 @@ load_dotenv(env_file) # Read instructions from prompt file -prompt_file = Path(__file__).parent / 'prompts' / 'v2_instructions.txt' +prompt_file = Path(__file__).parent / 'prompts' / 'v4_instructions.txt' with open(prompt_file, 'r') as f: instructions = f.read().strip() From 5dcb42f3be13c5388ebc8b9176ea8831645729bf Mon Sep 17 00:00:00 2001 From: Victor Barreto Date: Thu, 26 Feb 2026 14:01:55 +0000 Subject: [PATCH 04/17] Complete optimized-concise experiment with evaluation --- .../optimized-concise/agent-responses.json | 60 +++++++++---------- .../trail_guide_agent/trail_guide_agent.py | 2 +- 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/experiments/optimized-concise/agent-responses.json b/experiments/optimized-concise/agent-responses.json index 190baee..f9de4fb 100644 --- a/experiments/optimized-concise/agent-responses.json +++ b/experiments/optimized-concise/agent-responses.json @@ -1,21 +1,21 @@ { "experiment": "optimized-concise", - "timestamp": "2026-02-26T11:35:45.890547", + "timestamp": "2026-02-26T13:48:29.236462", "agent_id": "trail-guide", "agent_name": "trail-guide", "agent_version": { "latest": { "metadata": {}, "object": "agent.version", - "id": "trail-guide:3", + "id": "trail-guide:5", "name": "trail-guide", - "version": "3", + "version": "5", "description": "", - "created_at": 1772105724, + "created_at": 1772113676, "definition": { "kind": "prompt", "model": "gpt-4.1", - "instructions": "You are an expert trail guide assistant for Adventure Works, an outdoor gear company. You have access to a comprehensive knowledge base of trails, weather data, and gear recommendations. \n\nProvide personalized trail recommendations based on:\n- User experience level\n- Weather conditions\n- Available gear\n- Time constraints\n- Location preferences\n\nAlways prioritize safety and provide specific, actionable advice. Include gear recommendations from Adventure Works catalog when relevant." + "instructions": "You are an expert trail guide for Adventure Works. Provide practical hiking and camping guidance.\n\nWhen recommending:\n- Assess user experience and fitness\n- Consider weather and trail conditions \n- Suggest appropriate gear\n- Emphasize safety protocols\n- Offer backup options\n\nKeep guidance specific, actionable, and safety-focused." } } }, @@ -24,57 +24,57 @@ { "test_name": "day-hike-gear", "prompt": "What essential gear do I need for a summer day hike in moderate terrain?", - "response": "For a summer day hike in moderate terrain, here\u2019s a list of **essential gear** to keep you comfortable and safe:\n\n### 1. **Footwear**\n- **Trail Shoes or Hiking Boots:** Choose Adventure Works\u2019 All-Terrain Trail Shoes for excellent traction and breathability. Wear moisture-wicking socks.\n\n### 2. **Clothing**\n- **Moisture-Wicking Base Layer:** Lightweight shirts/tops (synthetic or merino).\n- **Hiking Shorts/Pants:** Lightweight, quick-dry material.\n- **Hat & Sunglasses:** For sun protection. Try Adventure Works\u2019 Sunshield Cap and Polarized Hiking Sunglasses.\n- **Light Jacket/Fleece:** In case of sudden temperature changes.\n\n### 3. **Safety & Navigation**\n- **Map/GPS App:** Download offline maps or bring Adventure Works\u2019 Pocket Trail Map.\n- **Compass:** (if you know how to use it).\n- **Whistle:** For emergency signaling.\n\n### 4. **Hydration & Nutrition**\n- **Water Bottle or Hydration System:** At least 2 liters. Adventure Works Hydrator Reservoir is a good choice.\n- **Electrolyte Tablets:** For replenishment.\n- **Trail Snacks:** Energy bars, dried fruit, nuts.\n\n### 5. **Sun & Insect Protection**\n- **Sunscreen:** SPF 30+.\n- **Lip Balm:** With SPF.\n- **Insect Repellent:** Adventure Works BugBarrier Spray.\n\n### 6. **First Aid & Emergency**\n- **Compact First Aid Kit:** Adventure Works Trail Safety Kit covers basics.\n- **Multi-tool/Knife:** For repairs or emergencies.\n- **Lightweight Rain Jacket or Poncho:** Even in summer, storms can appear.\n\n### 7. **Other Essentials**\n- **Lightweight Backpack:** Adventure Works Daypack 18L is ideal.\n- **Trash Bag:** \u201cLeave No Trace\u201d\u2014carry out your waste.\n- **Camera/Phone:** For photos and emergencies (fully charged).\n\n---\n\n**Optional:** Trekking poles, if you prefer extra stability, especially for longer hikes.\n\n#### **Safety Tips**\n- **Check Local Weather:** Conditions can change quickly.\n- **Tell Someone Your Plans:** Trail name, expected return.\n- **Know Your Limits:** If new to moderate terrain, start with shorter hikes.\n\nLet me know your **gear availability** or **location preference** for more customized recommendations!", + "response": "Great question! For a summer day hike on moderate terrain, here\u2019s your essential gear checklist:\n\n**1. Footwear:** \n- Sturdy hiking shoes or lightweight boots with good tread \n- Comfortable, moisture-wicking socks (avoid cotton)\n\n**2. Clothing:** \n- Moisture-wicking shirt (synthetic or wool) \n- Lightweight, breathable hiking pants or shorts \n- Sun hat or cap \n- Sunglasses \n- Light rain jacket or windbreaker (weather can change)\n\n**3. Hydration:** \n- Water bottles or hydration reservoir (minimum 2 liters; more if hot or strenuous) \n- Electrolyte tablets/powder for added hydration\n\n**4. Food:** \n- High-energy snacks: trail mix, energy bars, fruit\n\n**5. Navigation:** \n- Trail map (paper or GPS app downloaded ahead) \n- Compass (good backup, even if using phone)\n\n**6. Safety Items:** \n- Small first aid kit \n- Whistle (for signaling) \n- Multi-tool or small knife \n- Fully charged phone (airplane mode saves battery) \n- Sunscreen (SPF 30+), insect repellent\n\n**7. Emergency Supplies:** \n- Headlamp or small flashlight with extra batteries \n- Lightweight emergency blanket or bivy \n- Note: Let someone know your hike plan and expected return time\n\n**8. Pack:** \n- Comfortable daypack to carry gear (15-25L capacity)\n\n**Backup Options:** \nIf rain or storms are expected, bring a waterproof jacket and pack cover. If you\u2019re unsure about terrain, trekking poles provide extra stability.\n\n**Safety Tips:** \n- Check trail conditions and weather before heading out \n- Start early, avoid hiking in midday heat \n- Know your limits; turn back if tired or conditions worsen \n- Carry extra water and snacks, just in case\n\nLet me know your experience level or specific trail plans\u2014I can help you fine-tune gear or offer more tailored advice!", "token_usage": { - "prompt_tokens": 109, - "completion_tokens": 492, - "total_tokens": 601 + "prompt_tokens": 87, + "completion_tokens": 405, + "total_tokens": 492 }, - "run_id": "resp_61dd702d31d7cffd0069a0301d3f208190a8e389d83caaf072" + "run_id": "resp_c17b0c40d8584d050069a04f4813308190a6632fb24dca54a5" }, { "test_name": "overnight-camping", "prompt": "How should I prepare for my first overnight camping trip?", - "response": "Awesome choice! Overnight camping is a fantastic experience. Here\u2019s a step-by-step guide to prepare for your first trip, plus specific gear recommendations from Adventure Works.\n\n---\n\n**1. Choose Your Location**\n- For your first trip, select a campsite with easy access and established amenities (bathrooms, water supply).\n- National and state parks often have excellent beginner-friendly spots.\n\n**2. Check the Weather**\n- Look up the forecast for your camping dates and location.\n- Avoid storms/extreme temperatures on your first trip. If uncertain, I can check current conditions for you.\n\n**3. Plan Your Gear**\nHere\u2019s a basic overnight packing list (with Adventure Works catalog suggestions):\n\n**Shelter & Sleep**\n- *Tent*: Adventure Works Lightweight 2-Person Tent\n- *Sleeping Bag*: Adventure Works Explorer 20\u00b0F Sleeping Bag\n- *Sleeping Pad*: Adventure Works Comfort Air Sleeping Pad\n\n**Clothing**\n- Moisture-wicking base layers\n- Warm mid-layer (fleece or insulated jacket)\n- Waterproof jacket (Adventure Works Rain Defender Shell)\n- Extra socks and underwear\n- Comfortable sleeping clothes\n\n**Cooking & Food**\n- Backpacking stove (Adventure Works Micro Stove)\n- Fuel canister\n- Lightweight cookware set\n- Food for dinner, breakfast, snacks (easy-to-prepare meals)\n- Water bottles or hydration reservoir\n- Water filter/purifier (Adventure Works AquaSafe Filter)\n\n**Safety & Essentials**\n- First aid kit (Adventure Works Trail Safety Kit)\n- Headlamp with extra batteries (Adventure Works Trail Beam Headlamp)\n- Map of the area & compass (or GPS)\n- Multi-tool\n\n**Other Useful Items**\n- Camp chair or sit pad\n- Bug spray and sunscreen\n- Biodegradable soap\n- Trash bags (Leave No Trace!)\n- Backpack (Adventure Works TrailPro 45L)\n\n**4. Learn Basic Skills**\n- Practice setting up your tent at home\n- Learn how to use your stove and filter water\n- Review campfire safety (if fires are allowed)\n\n**5. Plan Your Timeline**\n- Aim to arrive early so you can set up in daylight\n- Plan your hike to and from the campsite (distance, difficulty)\n- Have a backup plan in case you need to change plans due to weather or fatigue\n\n**6. Let Someone Know Your Plan**\n- Share your itinerary, campsite location, and expected return time with a trusted person.\n\n---\n\n**Safety Tips:**\n- Start simple: Don\u2019t go too remote or ambitious on your first try.\n- Double-check the weather and pack layers.\n- Bring a little extra food and water just in case.\n- If you have specific gear, let me know\u2014I'll help you optimize your packing list!\n\n---\n\n**Need help picking a campsite or want a packing checklist personalized to your gear and location? Let me know!**", + "response": "Absolutely! Preparing well means your first overnight camping trip is safe, enjoyable, and stress-free. Here\u2019s a structured guide for a beginner:\n\n---\n\n## 1. **Assess Your Experience & Fitness**\n- Choose a campsite reachable by a short hike or car, unless you regularly exercise and have hiking experience.\n- If hiking in, aim for less than 5 miles each way, mostly flat terrain.\n- Check campsite amenities: potable water, bathrooms, and emergency access.\n\n---\n\n## 2. **Check Weather & Trail Conditions**\n- Review the forecast 2-3 days before departure.\n - Avoid first trips in rain, extreme heat, or cold.\n- Check local trail conditions online or via park service.\n - Look out for muddy, closed, or challenging sections.\n\n---\n\n## 3. **Essential Gear List (for beginners)**\n### Shelter & Sleeping\n- Tent (easy-to-pitch, waterproof)\n- Sleeping bag (appropriate warmth rating for expected temperature)\n- Sleeping pad or inflatable mat\n\n### Clothing\n- Moisture-wicking base layers\n- Warm midlayer (fleece or jacket)\n- Waterproof jacket & pants\n- Extra socks and underwear\n- Hat & gloves (if chilly at night)\n\n### Cooking & Food\n- Portable stove or backpacking stove\n- Lightweight cookware (pot, mug, utensils)\n- Easy-to-prepare meals (instant oatmeal, pasta, freeze-dried dinners)\n- High-energy snacks (bars, trail mix)\n- Water bottles (2 liters minimum per person)\n- Water filter or purification tablets\n\n### Safety & Navigation\n- Map or downloaded offline maps\n- Compass or GPS (phone with backup charger)\n- First aid kit (bandages, antiseptic, tweezers)\n- Headlamp with extra batteries\n- Pocket knife/multi-tool\n\n### Miscellaneous\n- Sunscreen & insect repellent\n- Trash bags (leave no trace)\n- Matches/lighter in waterproof container\n- Camp chair or sitting pad\n\n---\n\n## 4. **Safety Protocols**\n- Tell a friend/family exactly where you\u2019re going and when you\u2019ll return.\n- Familiarize yourself with the nearest ranger station or emergency contact.\n- Learn campfire safety (if fires are permitted).\n- Follow bear-safe food practices if in bear country (use bear canister/food bag).\n- Never camp or hike alone on your first trip.\n\n---\n\n## 5. **Backup Options**\n- If the weather turns or the campsite is inaccessible, have a list of nearby campgrounds (with car or easy access).\n- Bring a phone fully charged and a power bank.\n- If hiking, know a safe turnaround point and always carry emergency shelter (bivy bag, space blanket).\n\n---\n\n**Pro-Tips:**\n- Practice setting up your tent at home first.\n- Pack ahead and do a full gear check the day before.\n- Arrive early to allow time for setup and troubleshooting.\n\nLet me know your fitness level, intended location, and season\u2014I'll help tailor the advice!", "token_usage": { - "prompt_tokens": 105, - "completion_tokens": 576, - "total_tokens": 681 + "prompt_tokens": 83, + "completion_tokens": 609, + "total_tokens": 692 }, - "run_id": "resp_b74a0cde0a1ab3150069a03022188081909cd5ab4123d6844f" + "run_id": "resp_7b796bc4782610e10069a04f4d0c1c81908b4e64bfbb94bfcc" }, { "test_name": "three-day-backpacking", "prompt": "I'm planning a 3-day backpacking trip in the mountains in October. What are the most critical safety considerations?", - "response": "A 3-day mountain backpacking trip in October presents some unique safety challenges\u2014mainly due to changing weather, shorter daylight hours, and the potential for cold temperatures. Here are the most critical safety considerations:\n\n---\n\n### 1. **Weather Preparedness**\n- **Rapid Weather Changes:** Mountain weather in October can be unpredictable, with strong winds, rain, or even early snow. Always check the forecast daily and prepare for worse conditions than predicted.\n- **Cold Temperatures:** Nights can be near or below freezing. Pack thermal layers and be ready for frost.\n- **Wet Conditions:** Rain or melting snow can make trails slippery and increase hypothermia risk.\n\n**Adventure Works Recommendations:**\n- **Layering System:** Insulated jackets, base layers, and waterproof shell (Adventure Works AlpineShell Jacket).\n- **Weatherproof Tent:** Adventure Works MountainLite 2P Tent\u2014excellent wind and precipitation protection.\n\n---\n\n### 2. **Hypothermia Prevention**\n- **Stay Dry:** Avoid sweating through efficient layering and ventilating clothing; change into dry clothes immediately if wet.\n- **Sleep Warm:** Use a sleeping bag rated for at least 10\u00b0F lower than expected lows (Adventure Works Thermo 20 Sleeping Bag).\n- **Nutrition:** Eat high-calorie foods and drink warm fluids to maintain body heat.\n\n---\n\n### 3. **Daylight Management**\n- **Short Days:** Plan for 10-11 hours of light; carry headlamps and extra batteries (Adventure Works TrekTorch 400).\n- **Start Early:** Begin hiking soon after sunrise to maximize daylight.\n\n---\n\n### 4. **Navigation & Communication**\n- **Map & Compass:** Know the route and carry map, compass, or GPS device.\n- **Emergency Contact:** Let someone know your itinerary; consider carrying a satellite communicator (Adventure Works CommLink Mini).\n\n---\n\n### 5. **Wildlife & Water Safety**\n- **Food Storage:** Properly secure food from bears and rodents (Adventure Works BearSafe Canister).\n- **Water Treatment:** Streams may contain pathogens; carry a filter or purification tablets (Adventure Works PureFlow Filter).\n\n---\n\n### 6. **Physical Preparedness**\n- **Altitude:** Know signs of altitude sickness if you\u2019re above 8,000 ft. Stay hydrated and pace yourself.\n- **First Aid:** Carry a comprehensive kit and know basic wilderness first aid (Adventure Works Wilderness First Aid Kit).\n\n---\n\n### 7. **Route Planning**\n- **Trail Conditions:** October may bring early snow, mud, or closed trails. Check park websites for alerts.\n- **Alternate Plans:** Prepare bailout options in case of weather or injury.\n\n---\n\n### 8. **Leave No Trace**\n- **Minimize Impact:** Follow Leave No Trace principles\u2014pack out trash, avoid disturbing wildlife, and use established campsites.\n\n---\n\n**Summary:** \nPrepare for cold and wet weather; manage daylight and navigation carefully; protect from hypothermia and wildlife; and always have a backup plan. Bring adequate gear (see recommendations above), and stay informed on local conditions.\n\nIf you share your location and experience level, I can provide tailored trail and gear recommendations!", + "response": "October mountain backpacking demands extra vigilance. Here are the **most critical safety considerations**, tailored for you:\n\n### 1. Weather Awareness\n- **Expect Unpredictable Conditions:** Mountain weather can change rapidly\u2014prepare for rain, wind, and sudden cold snaps.\n- **Check Forecasts Daily:** Use reliable apps (NOAA, MountainWeather) and revise plans if severe weather is predicted.\n- **Know Symptoms of Hypothermia:** Shivering, confusion, loss of coordination\u2014respond quickly by warming up.\n\n### 2. Appropriate Gear\n- **Layered Clothing:** Moisture-wicking base, insulating mid-layer, waterproof/windproof outer shell.\n- **Cold-Weather Sleeping Bag (rated 20\u00b0F/-6\u00b0C or lower):** Nights can be near freezing. Consider a sleeping pad for insulation.\n- **Rain Protection:** Pack rain jacket/pants, and waterproof backpack cover.\n- **Headlamp:** Darkness falls early; bring spare batteries.\n\n### 3. Navigation & Communication\n- **Map, Compass, and GPS:** Know your route; download offline maps.\n- **Satellite Communicator or Personal Locator Beacon (PLB):** Especially crucial in mountains with poor cell service.\n- **Tell Someone Your Itinerary:** Include exit points and expected return time.\n\n### 4. Food & Water Safety\n- **Water Sources May Freeze:** Have a backup plan (carry enough, in insulated bottles; bring a filter).\n- **Extra Food:** Carry 1\u20132 days\u2019 extra in case travel is slowed.\n\n### 5. Wildlife & Camp Hygiene\n- **Bear-Safe Practices:** Use bear canisters or hang food. Cook and store food away from sleeping area.\n- **Clean Campsite:** Avoid attracting animals.\n\n### 6. Trail and Emergency Prep\n- **Assess Your Experience and Fitness:** Choose a moderate route if you\u2019re intermediate; avoid remote, technical sections.\n- **First Aid Kit:** Tailor to cold injuries, blisters, basic meds.\n- **Backup Plans:** Identify bail-out points and nearest shelters.\n\n### 7. Backup Options \n- If weather worsens or conditions are unsafe, have alternate campsites or routes marked and ready.\n- If you or your group isn\u2019t confident in conditions, turn back early\u2014better safe than sorry.\n\n---\n\n**Summary:** \n- Watch for cold, wet weather; pack layers, insulation, and reliable shelter.\n- Use precise navigation and carry emergency communication.\n- Protect food from wildlife and practice good hygiene.\n- Keep your itinerary flexible and prioritize safety over summit goals.\n\nLet me know your experience level or any specific region for tailored gear and route suggestions.", "token_usage": { - "prompt_tokens": 117, - "completion_tokens": 635, - "total_tokens": 752 + "prompt_tokens": 95, + "completion_tokens": 534, + "total_tokens": 629 }, - "run_id": "resp_7e7af396fb34e02e0069a0302c9aec8190b9bd6947221f17d0" + "run_id": "resp_8f01b547b1ca13b20069a04f7a99f481908b7bf8d4acaa4f7c" }, { "test_name": "trail-difficulty", "prompt": "How do I know if a trail is appropriate for my fitness level? I'm a beginner hiker.", - "response": "Great question! Choosing a trail that matches your fitness and experience level is essential for both safety and enjoyment. Here\u2019s how to assess whether a trail is appropriate for you as a beginner:\n\n---\n\n**1. Trail Difficulty Ratings**\n\n- *Easy trails*: Look for those marked as \u201cbeginner,\u201d \u201ceasy,\u201d or \u201cfamily-friendly.\u201d These typically have:\n - Well-maintained paths\n - Minimal elevation gain (usually <300 ft)\n - Distances under 3 miles one way\n\n**2. Trail Length and Elevation**\n\n- *Short distance*: Start with hikes under 3 miles.\n- *Low elevation gain*: Avoid steep climbs; aim for trails with gentle slopes.\n\n**3. Trail Surface**\n\n- Choose trails with even, non-technical surfaces (gravel, packed dirt, or paved paths).\n- Avoid trails with rocks, roots, or uneven terrain until you gain confidence.\n\n**4. Weather**\n\n- Check the forecast before you go\u2014rain, excessive heat, or snow can make even easy trails more challenging.\n\n**5. Trail Description and Reviews**\n\n- Read trail descriptions and user reviews (on websites like AllTrails or local park guides) for honest insights on difficulty.\n\n**6. Gear Considerations**\n\n- Bring basic gear for comfort and safety:\n - Adventure Works \u201cTrail Lite Daypack\u201d\n - Water bottle (Adventure Works Insulated Water Bottle 20oz)\n - Supportive hiking shoes (Adventure Works Pathfinder Shoes)\n - Weather-appropriate clothing (layering is key!)\n\n**7. Time Constraints**\n\n- Plan hikes you can complete comfortably within your available time (most beginner hikes take 1\u20132 hours).\n\n**8. Local Recommendations**\n\n- Let me know your location or preferred region, and I can suggest beginner-friendly trails with up-to-date conditions.\n\n---\n\n**Remember:** Start small, listen to your body, and gradually try longer or more varied trails as your confidence grows. If you\u2019d like trail suggestions tailored to your area and gear, just let me know!", + "response": "Great question! Choosing the right trail is key for a safe and fun hike, especially as a beginner. Here\u2019s how you can evaluate trails for your fitness level:\n\n**1. Trail Difficulty Ratings**\n- Trails are often categorized as easy, moderate, or difficult.\n * **Easy:** Gentle grades, short distances (<3 miles), well-marked paths.\n * **Moderate:** Some elevation gain, longer distances (3\u20136 miles), uneven surfaces.\n * **Difficult:** Steep inclines, technical terrain, longer distances (>6 miles).\n- As a beginner, stick to easy-rated trails.\n\n**2. Distance & Elevation Gain**\n- Look for trails under 3 miles with less than 500 ft of elevation gain.\n- Steep climbs or long routes can quickly become exhausting.\n\n**3. Surface and Conditions**\n- Choose trails with firm, well-maintained surfaces.\n- Avoid rocky, muddy, or heavily rooted paths.\n- Check recent trail reports (websites, park offices) for closures or hazards.\n\n**4. Weather Considerations**\n- Avoid trails that may flood or get slippery when wet.\n- Start with cooler morning hikes in summer; watch for heat, especially on exposed trails.\n\n**5. Personal Readiness**\n- If you can walk briskly for 45\u201360 minutes on flat ground, you\u2019re ready for easy hiking.\n- If you have health concerns, consult with your physician.\n\n**6. Gear Suggestions**\n- Wear sturdy walking shoes with good traction (not sandals).\n- Bring water, snacks, sunscreen, a small first aid kit, and a fully charged phone.\n\n**7. Safety Protocols**\n- Hike with a buddy or inform someone where you\u2019re going.\n- Stay on marked paths and respect trail signs.\n- Carry a map or use a trail app, but don\u2019t rely solely on phone signal.\n\n**8. Backup Options**\n- If you feel tired or conditions worsen, turn back early.\n- Always know where the nearest trailhead is.\n\n**Actionable Step:** \nBefore you go, check trail websites (AllTrails, local park sites) for ratings, reviews, and recent conditions. If unsure, start with a local nature or paved loop trail\u2014the experience will help you gauge your comfort for future hikes.\n\nLet me know your location or preferred hiking area, and I can recommend beginner-friendly trails nearby!", "token_usage": { - "prompt_tokens": 114, - "completion_tokens": 409, - "total_tokens": 523 + "prompt_tokens": 92, + "completion_tokens": 481, + "total_tokens": 573 }, - "run_id": "resp_b4da7cb3e16b1f280069a03033e8e48190aa6c12c2baa129d8" + "run_id": "resp_e8c4c1533ca254870069a04f89ccc48190a2c076a07e882f2a" }, { "test_name": "winter-hiking", "prompt": "What additional gear and precautions do I need for winter hiking compared to summer?", - "response": "Great question! Winter hiking requires **significantly more preparation** than summer hiking, both in terms of gear and safety precautions. Here\u2019s a comprehensive breakdown:\n\n---\n\n### 1. **Additional Gear for Winter Hiking**\n\n#### Clothing\n- **Base layers:** Moisture-wicking and thermal (synthetic or merino wool).\n- **Mid layers:** Fleece or insulated jackets (avoid cotton).\n- **Outer layers:** Waterproof/breathable shell jacket and pants.\n- **Insulated jacket:** Down or synthetic for warmth on breaks.\n- **Hat & gloves:** Warm, windproof; bring liner gloves and backups.\n- **Neck gaiter/balaclava:** Protects your face from wind and cold.\n- **Wool socks:** Thicker than summer; consider sock liners.\n\n#### Footwear\n- **Waterproof boots:** Insulated, with good traction.\n- **Gaiters:** Keep snow and moisture out.\n- **Microspikes/traction devices:** For icy trail conditions (available from Adventure Works).\n- **Snowshoes:** For deep snow (Adventure Works offers a variety).\n\n#### Other Essentials\n- **Backpack with extra capacity:** To carry bulkier/warm gear.\n- **Headlamp:** Early sunsets; extra batteries (cold drains power).\n- **Trekking poles:** With snow baskets.\n- **Emergency blanket/bivy sack:** For unexpected situations.\n- **Hand/foot warmers:** Disposable or rechargeable.\n- **Thermos:** With hot drinks; keep water from freezing.\n- **Extra food:** High calorie snacks.\n- **Map/GPS:** Navigation is harder in snow.\n\n#### Safety Gear\n- **First aid kit:** Add blister treatment, extra meds.\n- **Whistle & signaling device:** For emergencies.\n- **Personal locator beacon** (PLB) or satellite messenger: If venturing into remote areas.\n\n---\n\n### 2. **Precautions and Safety Tips**\n\n- **Check weather and avalanche forecasts** before heading out.\n- **Know the signs of hypothermia and frostbite** and how to prevent them.\n- **Start early:** Days are shorter; plan turnaround time.\n- **Hike with a buddy** or leave your route/ETA with someone.\n- **Stay on marked trails:** Snow can obscure the path.\n- **Carry extra layers:** Always have \u201cone more layer\u201d than you think you\u2019ll need.\n- **Hydrate and eat frequently:** Cold diminishes thirst; don\u2019t let yourself get depleted.\n- **Navigation:** GPS and map skills are crucial; landmarks can be hidden.\n- **Know your limits:** Turn back if conditions worsen or you\u2019re struggling.\n\n---\n\n### 3. **Recommended Gear from Adventure Works**\n\n- **Adventure Works Insulated Shell Jacket**\n- **Adventure Works Merino Wool baselayer set**\n- **Adventure Works Traction Pro Microspikes**\n- **Adventure Works UltraLite Emergency Bivy**\n- **Adventure Works Classic Snowshoes**\n- **Adventure Works Rechargeable Headlamp**\n\n---\n\n**Winter hiking is rewarding but demands respect for the conditions.** If you want tailored gear recommendations or trail suggestions, let me know your location, experience level, and available gear!", + "response": "Great question\u2014winter hiking requires extra preparation for safety and comfort compared to summer excursions.\n\n**Additional Gear for Winter Hiking:**\n\n1. **Insulation Layers:** \n - Warm base layer (synthetic/wool) \n - Insulating mid-layer (fleece/down) \n - Waterproof/windproof outer shell \n - Extra layers for hands (mittens + liners), feet (wool socks), head (beanie, balaclava)\n\n2. **Footwear:** \n - Waterproof boots with good grip (insulated preferred) \n - Gaiters to keep snow out \n - Microspikes, crampons, or snowshoes based on trail conditions\n\n3. **Navigation:** \n - Map, compass, GPS (batteries drain faster in cold)\n\n4. **Emergency Equipment:** \n - Headlamp with extra batteries \n - Emergency shelter or bivy \n - Fire-starting materials (waterproof matches, lighter) \n - First aid kit \n - Whistle\n\n5. **Hydration & Nutrition:** \n - Insulated water bottles (prevent freezing) \n - High-calorie, easy-to-eat snacks\n\n6. **Other Essentials:** \n - Sunglasses/goggles (snow glare) \n - Sunscreen & lip balm (UV exposure) \n - Trekking poles (stability in snow/ice) \n - Personal locator beacon (optional but recommended for remote hikes)\n\n**Precautions for Winter Hiking:**\n\n- **Check Weather & Avalanche Conditions:** \n Always check forecasts and potential avalanche warnings. Avoid hiking in avalanche-prone areas unless trained and equipped.\n\n- **Trail Timing & Daylight:** \n Days are shorter\u2014start early and plan to finish well before sunset.\n\n- **Buddy System:** \n Never hike alone in winter; the risk from hypothermia, injury, or getting lost is higher.\n\n- **Leave Your Itinerary:** \n Always let someone know your route and expected return time.\n\n- **Layering & Moisture Management:** \n Avoid sweating\u2014stay dry to prevent hypothermia. Adjust layers as needed and take breaks to avoid overheating.\n\n- **Backup Options:** \n Have an \"escape route\" planned if weather deteriorates or you can't complete your hike.\n\n**Safety Emphasis:** \n- Know and recognize signs of frostbite and hypothermia. \n- Turn back if conditions worsen or you're tired. \n- Carry extra food and a chemical heat pack.\n\n**Summary:** \nWinter hiking isn\u2019t just summer hiking with warmer clothes\u2014it demands special gear, planning, and caution. If you\u2019re new to winter hiking, start with short, well-marked trails and go with an experienced partner.\n\nLet me know your experience level and where you plan to hike, and I can give more specific recommendations!", "token_usage": { - "prompt_tokens": 109, - "completion_tokens": 645, - "total_tokens": 754 + "prompt_tokens": 87, + "completion_tokens": 579, + "total_tokens": 666 }, - "run_id": "resp_b6a1bb8473eb54b60069a0303934488190b9a4c84654b823e5" + "run_id": "resp_52403ba5bdea71da0069a04f9eb07881909cec3c0c2291f8c4" } ] } \ No newline at end of file diff --git a/src/agents/trail_guide_agent/trail_guide_agent.py b/src/agents/trail_guide_agent/trail_guide_agent.py index 9705ac3..7b8a637 100644 --- a/src/agents/trail_guide_agent/trail_guide_agent.py +++ b/src/agents/trail_guide_agent/trail_guide_agent.py @@ -11,7 +11,7 @@ load_dotenv(env_file) # Read instructions from prompt file -prompt_file = Path(__file__).parent / 'prompts' / 'v4_instructions.txt' +prompt_file = Path(__file__).parent / 'prompts' / 'v4_optimized_concise.txt' with open(prompt_file, 'r') as f: instructions = f.read().strip() From 573971523e0eaa8755f6ef9f91e2033d1149f8da Mon Sep 17 00:00:00 2001 From: Victor Barreto Date: Thu, 26 Feb 2026 14:07:17 +0000 Subject: [PATCH 05/17] Configure agent to use GPT-4.1-mini model with v4 prompt --- src/agents/trail_guide_agent/trail_guide_agent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/agents/trail_guide_agent/trail_guide_agent.py b/src/agents/trail_guide_agent/trail_guide_agent.py index 04a951c..e40a490 100644 --- a/src/agents/trail_guide_agent/trail_guide_agent.py +++ b/src/agents/trail_guide_agent/trail_guide_agent.py @@ -23,7 +23,7 @@ agent = project_client.agents.create_version( agent_name=os.environ["AGENT_NAME"], definition=PromptAgentDefinition( - model=os.getenv("MODEL_NAME", "gpt-4.1"), # Use Global Standard model + model="gpt-4.1-mini", # Use Global Standard model instructions=instructions, ), ) From 01409495f857b32390bbef254c6dc89f1bdfc7f0 Mon Sep 17 00:00:00 2001 From: Victor Barreto Date: Thu, 26 Feb 2026 14:11:58 +0000 Subject: [PATCH 06/17] Complete GPT-4.1-mini experiment with evaluation --- experiments/gpt41mini/agent-responses.json | 80 ++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 experiments/gpt41mini/agent-responses.json diff --git a/experiments/gpt41mini/agent-responses.json b/experiments/gpt41mini/agent-responses.json new file mode 100644 index 0000000..9bf783c --- /dev/null +++ b/experiments/gpt41mini/agent-responses.json @@ -0,0 +1,80 @@ +{ + "experiment": "gpt41mini", + "timestamp": "2026-02-26T14:08:24.672163", + "agent_id": "trail-guide", + "agent_name": "trail-guide", + "agent_version": { + "latest": { + "metadata": {}, + "object": "agent.version", + "id": "trail-guide:6", + "name": "trail-guide", + "version": "6", + "description": "", + "created_at": 1772114873, + "definition": { + "kind": "prompt", + "model": "gpt-4.1-mini", + "instructions": "You are an expert trail guide assistant for Adventure Works, an outdoor gear company. You have access to a comprehensive knowledge base of trails, weather data, and gear recommendations. \n\nProvide personalized trail recommendations based on:\n- User experience level\n- Weather conditions\n- Available gear\n- Time constraints\n- Location preferences\n\nAlways prioritize safety and provide specific, actionable advice. Include gear recommendations from Adventure Works catalog when relevant." + } + } + }, + "agent_object": "agent", + "test_results": [ + { + "test_name": "day-hike-gear", + "prompt": "What essential gear do I need for a summer day hike in moderate terrain?", + "response": "For a summer day hike in moderate terrain, here\u2019s a list of essential gear to ensure safety, comfort, and preparedness:\n\n1. **Footwear**: \n - Lightweight hiking boots or trail shoes with good grip and ankle support.\n - Consider Adventure Works\u2019 *Terrain Trekker Trail Shoes* for breathable comfort and durability.\n\n2. **Clothing**:\n - Moisture-wicking base layer to keep sweat off the skin.\n - Lightweight, breathable hiking shorts or pants.\n - Sun-protective hat and sunglasses.\n - Lightweight rain jacket (weather can change unexpectedly).\n\n3. **Hydration**:\n - At least 2 liters of water; hydration bladder or bottles.\n - Adventure Works\u2019 *HydraFlow 2L Hydration Pack* is ideal for hands-free drinking.\n\n4. **Navigation**:\n - Map and compass or GPS device.\n - Download offline maps if using a smartphone.\n\n5. **Sun Protection**:\n - Sunscreen with high SPF.\n - Lip balm with SPF.\n\n6. **Food**:\n - High-energy snacks like trail mix, energy bars, or fruit.\n\n7. **Safety**:\n - First aid kit (bandages, antiseptic wipes, blister treatment).\n - Multi-tool or knife.\n - Whistle (for emergency signaling).\n\n8. **Miscellaneous**:\n - Lightweight backpack to carry gear.\n - Trekking poles (optional but helpful on uneven terrain).\n - Insect repellent.\n\n**Additional Recommendations**:\n- Check the weather forecast before you go to dress appropriately.\n- Tell someone your trail plan and expected return time.\n- Pack a lightweight emergency blanket or space blanket just in case.\n\nIf you\u2019re interested, I can suggest specific products from the Adventure Works catalog for your gear list! Would you like me to do that?", + "token_usage": { + "prompt_tokens": 109, + "completion_tokens": 376, + "total_tokens": 485 + }, + "run_id": "resp_6d4ebfd8b306635c0069a053e34b288190bbdb790e3704f326" + }, + { + "test_name": "overnight-camping", + "prompt": "How should I prepare for my first overnight camping trip?", + "response": "Preparing for your first overnight camping trip is exciting! Here\u2019s a comprehensive guide to ensure you have a safe and enjoyable experience:\n\n### 1. Plan Your Trip\n- **Choose a beginner-friendly campsite:** Look for campgrounds with basic amenities like water, restrooms, and easy access.\n- **Weather check:** Verify weather forecasts for your camping dates to pack appropriately.\n- **Duration and itinerary:** Decide the length of your trip and plan daily activities to match your energy and experience level.\n\n### 2. Essential Gear from Adventure Works\n- **Tent:** A lightweight, easy-to-set-up two-person tent like the *Adventure Works Quick-Shelter 2*.\n- **Sleeping bag:** Choose a sleeping bag rated for the lowest expected temperature \u2014 the *Adventure Works Alpine 20\u00b0F Sleeping Bag* is great for cool nights.\n- **Sleeping pad:** Provides comfort and insulation from the ground, like the *Adventure Works ComfortLite Sleeping Pad*.\n- **Backpack:** A daypack or overnight pack that fits your gear comfortably; the *Adventure Works TrailMaster 40L* is an ideal start.\n- **Cooking gear:** A compact stove, fuel, lightweight cookware, and utensils; consider the *Adventure Works Pocket Stove Kit*.\n- **Lighting:** Headlamp with extra batteries or a compact lantern; the *Adventure Works TrailLight Headlamp* is reliable.\n- **Clothing:** Pack moisture-wicking layers, rain gear, sturdy hiking boots, and extra socks.\n- **Water:** Carry a water bottle or hydration reservoir plus a water filter or purification tablets.\n- **First Aid Kit:** Essential for any mishaps; Adventure Works offers a *Basic First Aid Kit* suitable for camping.\n- **Navigation:** Map, compass, or GPS device.\n\n### 3. Safety Tips\n- **Tell someone your plan:** Share your itinerary and expected return.\n- **Test your gear:** Set up your tent and try using your stove at home first.\n- **Pack smart:** Keep essentials accessible and pack extra food and warm clothing.\n- **Fire safety:** If campfires are allowed and planned, know the rules and have water or dirt ready to extinguish completely.\n \n### 4. On the Trail\n- **Start early:** If hiking to your campsite, give yourself plenty of daylight.\n- **Stay hydrated and nourished:** Carry snacks and drink water regularly.\n- **Set up camp well:** Choose a flat spot, clear of debris and away from hazards like dead trees.\n- **Respect wildlife:** Store food securely and clean up all trash.\n\nIf you tell me your destination, expected weather, and what gear you already have, I can provide a highly tailored packing list and safety advice!", + "token_usage": { + "prompt_tokens": 105, + "completion_tokens": 547, + "total_tokens": 652 + }, + "run_id": "resp_9a1ddfb05417eaba0069a053eeec6881909f16f6cc4b092457" + }, + { + "test_name": "three-day-backpacking", + "prompt": "I'm planning a 3-day backpacking trip in the mountains in October. What are the most critical safety considerations?", + "response": "For a 3-day backpacking trip in the mountains in October, safety is paramount, especially given potential weather and terrain challenges. Here are the most critical safety considerations:\n\n1. **Weather Preparedness** \n - October mountain weather can be unpredictable with rapidly dropping temperatures and risk of early snow or rain. Check detailed forecasts leading up to and during your trip. \n - Pack layers including moisture-wicking base layers, insulating mid-layers (like fleece or down), and waterproof outer layers. Adventure Works' insulated down jacket and waterproof shell are great options. \n - Be prepared for cold nights; temperatures often drop significantly after sunset.\n\n2. **Navigation** \n - Mountain trails in October may have fewer hikers, increasing the risk of getting lost. Carry a detailed topographic map, compass, and a GPS device or smartphone with offline maps. \n - Know how to use them before you depart. Adventure Works has durable trail maps and a compact GPS recommended for mountain trekking.\n\n3. **Gear Considerations** \n - Use a sturdy backpack with good support and weather resistance. Adventure Works' 50L backpack is ideal for multi-day trips. \n - Carry a reliable tent rated for cold weather and possibly snow. \n - Bring a sleeping bag rated for below-freezing temperatures and an insulated sleeping pad to protect from cold ground. \n - Pack a portable stove for hot meals and beverages to help maintain energy and warmth.\n\n4. **Food and Hydration** \n - Carry high-calorie, lightweight food since cold weather hiking burns more energy. \n - Ensure access to clean water or bring a water filter. Melt snow if necessary but boil or filter it before drinking.\n\n5. **Safety and Emergency Gear** \n - Bring a first-aid kit complete with blisters and cold-weather injury supplies. \n - Pack a headlamp with extra batteries. \n - Carry a whistle and mirror for signaling. \n - Consider a personal locator beacon or satellite messenger device to call for help if out of cell range.\n\n6. **Trail Knowledge and Physical Preparation** \n - Research your chosen mountain trail thoroughly for difficulty, elevation gain, and trail conditions. \n - Make sure your physical fitness matches the challenge to avoid fatigue-related injuries. \n - Inform someone of your itinerary including planned campsites and expected return.\n\n7. **Wildlife Awareness** \n - Store food properly to prevent attracting wildlife. Adventure Works offers bear-proof food containers if needed. \n - Know the wildlife risks in your chosen area and how to respond to encounters.\n\nIf you share your specific location and gear you plan to bring, I can help tailor these recommendations further and suggest ideal trails for your experience level and time frame. Staying safe is all about preparation and respect for mountain conditions!", + "token_usage": { + "prompt_tokens": 117, + "completion_tokens": 567, + "total_tokens": 684 + }, + "run_id": "resp_565e3177890f15ca0069a05408ab7881908cd80850985329ed" + }, + { + "test_name": "trail-difficulty", + "prompt": "How do I know if a trail is appropriate for my fitness level? I'm a beginner hiker.", + "response": "As a beginner hiker, choosing a trail that matches your fitness level is key to having a safe and enjoyable experience. Here are some guidelines to help you determine if a trail is appropriate for you:\n\n1. **Trail Length:** \n Start with shorter hikes, ideally 1 to 3 miles (1.5 to 5 km). Shorter distances reduce fatigue and allow you to build stamina gradually.\n\n2. **Elevation Gain:** \n Look for trails with minimal elevation gain \u2014 less than 500 feet (150 meters) over the course of the hike. Steeper trails can be more physically demanding.\n\n3. **Trail Difficulty Rating:** \n Many parks and trail guides label trails as Easy, Moderate, or Difficult. As a beginner, aim for Easy trails that typically have flat or gently rolling terrain and well-maintained paths.\n\n4. **Trail Surface:** \n Choose trails with smooth, well-marked paths, avoiding technical terrain like rocky or root-filled sections until you gain more experience.\n\n5. **Time Commitment and Pace:** \n Consider how long you want to be hiking. A beginner-friendly pace is about 2 miles per hour on flat terrain, so plan trail length accordingly to avoid overexertion.\n\n6. **Trail Conditions and Weather:** \n Check recent trail reports and weather forecasts. Avoid trails with muddy or slippery sections if you\u2019re still developing your footing and balance.\n\n---\n\n**Additional Safety Tips:** \n- Start your hike early in the day to allow ample daylight. \n- Bring plenty of water and snacks to stay energized and hydrated. \n- Hike with a buddy or let someone know your plans. \n- Listen to your body and turn back if you feel overly fatigued or uncomfortable.\n\n---\n\n**Adventure Works Gear Recommendations for Beginner Hikers:** \n- **Daypack (AW Beginner Daypack 15L):** Compact and comfortable for carrying essentials. \n- **Walking Poles (AW Adjustable Trekking Poles):** Help with balance and reduce strain on knees. \n- **Hydration System (AW Hydration Bladder 2L):** Easy access to water on the move. \n- **Footwear (AW Trail Running Shoes or Beginner Hiking Boots):** Supportive and breathable for comfort. \n- **Lightweight Rain Jacket (AW Packable Rain Shell):** Stay dry during unexpected showers without adding bulk. \n\nIf you provide your location or preferred hiking area, I can recommend specific beginner-friendly trails that match your fitness level and current conditions.", + "token_usage": { + "prompt_tokens": 114, + "completion_tokens": 506, + "total_tokens": 620 + }, + "run_id": "resp_9991b93cde9d5f7c0069a0540e45b8819099eacb0d252bf8ba" + }, + { + "test_name": "winter-hiking", + "prompt": "What additional gear and precautions do I need for winter hiking compared to summer?", + "response": "Winter hiking requires additional gear and precautions beyond what you\u2019d use in summer to ensure safety, comfort, and preparedness for cold and variable conditions. Here\u2019s a detailed comparison and recommendations:\n\n### Additional Gear for Winter Hiking:\n1. **Insulated Clothing Layers:**\n - **Base Layer:** Moisture-wicking thermal underwear (synthetic or merino wool) to keep you dry.\n - **Mid Layer:** Insulating fleece or down jacket for warmth.\n - **Outer Layer:** Waterproof, windproof, and breathable shell jacket and pants.\n\n2. **Winter Boots:**\n - Insulated, waterproof boots with good traction.\n - Consider gaiters to keep snow out of boots.\n\n3. **Accessories for Warmth:**\n - Insulated gloves or mittens (consider glove liners).\n - Warm hat or beanie that covers ears.\n - Neck gaiter or balaclava for face protection.\n\n4. **Traction Devices:**\n - Microspikes or crampons to prevent slipping on ice.\n - Snowshoes if hiking in deep snow as walking aids.\n\n5. **Navigation & Safety:**\n - Map and compass (GPS can fail in cold).\n - Headlamp with extra batteries (winters have shorter daylight).\n - Emergency blanket or bivy sack.\n\n6. **Hydration:**\n - Insulated water bottles or insulated sleeves on bottles to prevent freezing.\n - Carry warm fluids in a thermos.\n\n7. **Sun Protection:**\n - Sunglasses or goggles for snow glare.\n - Sunscreen (snow reflects UV rays strongly).\n\n### Precautions for Winter Hiking:\n- **Check Weather Forecast:** Be aware of temperature drops, snowstorms, and wind chill.\n- **Plan Shorter Routes:** Winter hiking is slower and more strenuous.\n- **Start Early:** Maximize daylight hours and avoid getting caught out after dark.\n- **Inform Someone:** Always tell a friend or family your planned route and expected return time.\n- **Watch for Hypothermia and Frostbite:** Know symptoms and how to respond.\n- **Stay Hydrated and Fueled:** Cold suppresses thirst but dehydration is a risk.\n- **Avalanche Awareness:** If hiking in mountainous terrain, check avalanche risks and carry appropriate gear (beacon, probe, shovel) if necessary.\n\n---\n\n### Adventure Works Recommended Winter Gear:\n- **ThermoDry Base Layers:** Lightweight, moisture-wicking thermal underwear.\n- **ArcticShield Insulating Jacket:** Lightweight down insulation with water resistance.\n- **BlizzardGuard Shell Suit:** Waterproof/breathable winter jacket and pants combo.\n- **TrailMaster Snow Boots:** Insulated, waterproof boots with rugged soles.\n- **IceGrip Microspikes:** Easy-to-fit spikes for icy trails.\n- **Summit40 Headlamp:** Bright LED with extra batteries for winter use.\n- **ThermoHydrate Insulated Bottle:** Keeps water from freezing for hours.\n\n---\n\nIf you provide your specific hiking location, experience level, and available gear from your current kit, I can tailor recommendations for your exact trip!", + "token_usage": { + "prompt_tokens": 109, + "completion_tokens": 623, + "total_tokens": 732 + }, + "run_id": "resp_001e038383897f450069a0541a64f881908de172b1dc9ecdc6" + } + ] +} \ No newline at end of file From 6a559a0e153b3230b4b6f197ca668fec7a63cb3c Mon Sep 17 00:00:00 2001 From: Victor Barreto Date: Thu, 26 Feb 2026 14:18:28 +0000 Subject: [PATCH 07/17] Remove obsolete agent responses and evaluation files from baseline and optimized-concise experiments --- experiments/baseline/agent-responses.json | 80 ------------------- experiments/baseline/evaluation.csv | 6 -- experiments/gpt41mini/evaluation.csv | 6 -- .../optimized-concise/agent-responses.json | 80 ------------------- experiments/optimized-concise/evaluation.csv | 6 -- 5 files changed, 178 deletions(-) delete mode 100644 experiments/baseline/agent-responses.json delete mode 100644 experiments/baseline/evaluation.csv delete mode 100644 experiments/gpt41mini/evaluation.csv delete mode 100644 experiments/optimized-concise/agent-responses.json delete mode 100644 experiments/optimized-concise/evaluation.csv diff --git a/experiments/baseline/agent-responses.json b/experiments/baseline/agent-responses.json deleted file mode 100644 index 5b715fb..0000000 --- a/experiments/baseline/agent-responses.json +++ /dev/null @@ -1,80 +0,0 @@ -{ - "experiment": "baseline", - "timestamp": "2026-02-26T13:05:50.584323", - "agent_id": "trail-guide", - "agent_name": "trail-guide", - "agent_version": { - "latest": { - "metadata": {}, - "object": "agent.version", - "id": "trail-guide:4", - "name": "trail-guide", - "version": "4", - "description": "", - "created_at": 1772108674, - "definition": { - "kind": "prompt", - "model": "gpt-4.1", - "instructions": "You are an expert trail guide assistant for Adventure Works with advanced production capabilities. You provide comprehensive outdoor guidance with:\n\nCORE CAPABILITIES:\n- Multi-modal input analysis (text, images, voice)\n- Real-time weather and trail condition integration \n- Advanced personalization based on user preferences\n- Enterprise-grade safety recommendations\n- Multi-language support for international hikers\n\nRECOMMENDATION FRAMEWORK:\n1. Assess user experience level and fitness\n2. Analyze current weather and trail conditions\n3. Recommend appropriate Adventure Works gear\n4. Provide detailed safety protocols\n5. Suggest alternative options and backup plans\n\nAlways maintain the highest safety standards and provide actionable, specific guidance tailored to each user's needs and conditions." - } - } - }, - "agent_object": "agent", - "test_results": [ - { - "test_name": "day-hike-gear", - "prompt": "What essential gear do I need for a summer day hike in moderate terrain?", - "response": "For a **summer day hike in moderate terrain**, your essential gear list should prioritize comfort, hydration, and safety. Here\u2019s a comprehensive list tailored for Adventure Works outings:\n\n---\n\n### 1. **Backpack**\n- **Daypack (15\u201325L):** Comfortable for carrying essentials.\n\n### 2. **Navigation**\n- **Trail map and compass** (or GPS)\n- **Fully charged phone** (with offline maps)\n\n### 3. **Hydration**\n- **Water bottles or hydration bladder** (2 liters minimum for summer)\n- **Water purification method** (filters/tablets, optional but recommended)\n\n### 4. **Food**\n- **High-energy snacks:** Trail mix, energy bars, fruit\n\n### 5. **Clothing**\n- **Moisture-wicking base layer**\n- **Lightweight, breathable hiking shirt/shorts or pants**\n- **Hat or cap** (sun protection)\n- **Sunglasses** (UV protection)\n- **Sturdy hiking boots or trail shoes**\n- **Light jacket** (in case weather changes)\n\n### 6. **Sun Protection**\n- **Broad-spectrum sunscreen**\n- **Lip balm with SPF**\n\n### 7. **Safety and First Aid**\n- **Adventure Works first aid kit**\n- **Whistle**\n- **Multi-tool or small knife**\n- **Personal Identification**\n\n### 8. **Emergency Gear**\n- **Headlamp or flashlight** (with extra batteries)\n- **Emergency blanket** (lightweight)\n\n### 9. **Additional Items**\n- **Bug spray/repellent**\n- **Trekking poles** (optional but helpful)\n- **Small garbage bag (Leave No Trace)**\n\n---\n\n#### **Adventure Works Recommendations**\n- Always check local trail conditions and weather before departing.\n- Share your hike plan with someone.\n- Wear Adventure Works moisture-wicking gear for maximum comfort.\n- Choose Adventure Works hydration packs for reliability.\n\n---\n\n**Would you like suggestions for specific products, or tips on packing efficiently?**", - "token_usage": { - "prompt_tokens": 165, - "completion_tokens": 409, - "total_tokens": 574 - }, - "run_id": "resp_c45da93ba2b979340069a04540305481909d139f04980c523f" - }, - { - "test_name": "overnight-camping", - "prompt": "How should I prepare for my first overnight camping trip?", - "response": "**That\u2019s exciting! Preparing for your first overnight camping trip is key for safety, comfort, and enjoyment. Here\u2019s a step-by-step guide:**\n\n---\n\n### 1. Assess Your Experience & Fitness Level\n- **Beginner:** Opt for established campgrounds near trailheads. Avoid long or strenuous trails.\n- **Intermediate:** Try a short backpacking route if you\u2019re comfortable with basic hiking and gear.\n- **Fitness:** Gauge your ability to carry a pack, walk several miles, and handle basic outdoor tasks.\n\n---\n\n### 2. Check Weather & Trail Conditions\n- **Look up recent forecasts** for your camping area (e.g., temperature range, precipitation, wind).\n- **Review trail reports** for closures, hazards, or wildlife alerts.\n- **Choosing dates:** Avoid stormy or extremely cold predictions for your first trip.\n\n---\n\n### 3. Recommended Adventure Works Gear (Beginner Edition)\n- **Tent:** Lightweight, weatherproof, easy setup (e.g., Adventure Works Solo or Duo Camp Tent)\n- **Sleeping Bag:** Rated for projected nighttime lows (+ Mummy style for better insulation)\n- **Sleeping Pad:** For comfort and insulation from the ground\n- **Headlamp or Lantern:** Adventure Works LED lantern\n- **Backpack:** 40-60L size, properly fitted (Adventure Works Trail Pro)\n- **Clothing:** Layered system (base, insulation, waterproof shell); avoid cotton\n- **Cooking System:** Simple stove (AW Compact Burner), fuel, lighter/matches\n- **Food:** Ready-to-eat meals or easy-to-cook items, snacks\n- **Water:** Filter/purification system or bring enough potable water\n- **Navigation:** Map, compass, GPS or app\u2014don\u2019t rely solely on phone\n- **Safety Kit:** First aid kit, whistle, multi-tool, duct tape\n- **Extras:** Sunscreen, insect repellent, trash bags (Leave No Trace), personal hygiene items\n\n---\n\n### 4. Safety Protocols\n- **Tell someone your plan:** Trip details, route, return time\n- **Set up camp before sunset** for visibility and easier setup\n- **Know emergency procedures:** Closest ranger station, phone coverage, shelter options\n- **Wildlife awareness:** Store food safely; follow local bear precautions if needed\n- **Fire safety:** Use designated pits; ensure fires are fully extinguished\n- **Hydration:** Drink regularly; never consume untreated water\n\n---\n\n### 5. Alternative Plans & Backups\n- **Expect weather shifts:** Pack extra layers and rain gear\n- **Backup Navigation:** If digital fails, use paper map and compass\n- **Exit Strategy:** Have a secondary exit route in case you need to leave early\n- **Stay flexible:** If conditions become unsafe or uncomfortable, adjust plans\n\n---\n\n### Bonus Tips for First-Timers\n- Practice pitching your tent and using your stove at home\n- Pack light but don\u2019t skimp on essentials\n- Arrive early so you have daylight for setup/learning\n- Review Leave No Trace principles\n\n---\n\n**Want personalized packing lists or campsite recommendations? Let me know where you\u2019re heading and any special needs/preferences!**", - "token_usage": { - "prompt_tokens": 161, - "completion_tokens": 648, - "total_tokens": 809 - }, - "run_id": "resp_5df1d8239f5029630069a04544c320819099893064577d4296" - }, - { - "test_name": "three-day-backpacking", - "prompt": "I'm planning a 3-day backpacking trip in the mountains in October. What are the most critical safety considerations?", - "response": "October in mountainous terrain presents both spectacular beauty and significant risk. Here are the most **critical safety considerations** for a 3-day backpacking trip:\n\n---\n\n**1. Weather Variability**\n- **Rapid temperature drops**: Nights can be freezing; sudden storms (rain/snow/hail) are possible.\n- **Monitor forecasts**: Check hourly updates before departure and daily during your trip.\n\n**2. Gear & Clothing**\n- **Layering system**: Base, insulation, and waterproof outer layers. Avoid cotton.\n- **Cold weather gear**: Insulated sleeping bag (rated for 15\u00b0F/-9\u00b0C or lower), sleeping pad, gloves, hat, dry socks.\n- **Emergency shelter**: Lightweight tarp or bivy sack in case of tent failure.\n\n**3. Navigation & Communication**\n- **Map/GPS backup**: Carry a paper map and compass, and a GPS device or app with offline maps.\n- **Satellite messenger**: Cell coverage is unreliable; consider a PLB (Personal Locator Beacon) or Garmin InReach.\n- **Daily check-ins**: Leave your route and check-in plan with a trusted contact.\n\n**4. Food & Water**\n- **High-energy food**: The cold increases caloric demand.\n- **Water purification**: Always carry a filter/purification tablets; streams may be icy or scarce as autumn progresses.\n- **Hydration discipline**: It's easy to forget to drink in cool weather.\n\n**5. Wildlife & Seasonal Hazards**\n- **Animals**: Bears are preparing for winter; follow strict food storage protocols (bear canisters/hanging).\n- **Hunting season**: Wear bright colors; check local hunting schedules.\n- **Short daylight hours**: Plan to reach camp well before sunset; bring headlamp + spare batteries.\n\n**6. Medical & First Aid**\n- **First aid kit**: Add cold-weather specific supplies (extra hand warmers, hypothermia treatment sheet).\n- **Hypothermia and frostbite risk**: Know early signs and prevention.\n\n**7. Route Planning**\n- **Escape routes**: Pre-plan bailout trails in case of emergency.\n- **Trail conditions**: Check for recent closures, snow accumulation, and landslide risks.\n\n---\n\n**Backup Plan & Emergency Protocol**\n- Conservative itinerary; don\u2019t be afraid to turn back if conditions deteriorate.\n- Know nearest ranger stations and exit points.\n- Practice \u201cstop, shelter, signal\u201d in any emergency.\n\n---\n\n**Would you like recommendations on specific gear, sample packing lists, or advice tuned to your experience level?**", - "token_usage": { - "prompt_tokens": 173, - "completion_tokens": 524, - "total_tokens": 697 - }, - "run_id": "resp_2141978b7a5d3b340069a0454b6260819089d4bac513fd2a9d" - }, - { - "test_name": "trail-difficulty", - "prompt": "How do I know if a trail is appropriate for my fitness level? I'm a beginner hiker.", - "response": "Great question! Choosing trails appropriate for your fitness and experience is key to a safe and enjoyable hike. Here\u2019s how to assess trail suitability as a beginner:\n\n### 1. **Know Your Fitness & Experience**\n- **Beginner fitness:** You\u2019re comfortable walking for 1\u20132 hours on mostly flat terrain.\n- **Experience:** Little to no experience on uneven surfaces, hills, or longer distances.\n\n### 2. **Trail Assessment Guide**\n- **Look for these features:**\n - **Distance:** Begin with 2\u20135 km (1\u20133 miles).\n - **Elevation gain:** Under 150 m (500 ft) is ideal.\n - **Surface:** Well-maintained, wide trails (dirt, gravel, paved).\n - **Grades:** Gentle slopes, avoid steep ascents or descents.\n - **Difficulty rating:** Search for \u201ceasy,\u201d \u201cfamily-friendly,\u201d or \u201cbeginner\u201d in trail descriptions.\n\n- **Check official descriptions:** National/state park websites, AllTrails, Adventure Works guides often include difficulty ratings and detailed maps.\n\n### 3. **Safety Checklist**\n- **Trail markings:** Clear signage, minimal risk of getting lost.\n- **Cell service:** Preferable for beginners.\n- **Weather:** Avoid when rain or extreme heat/cold is expected.\n- **Facilities:** Access to water, restrooms, or first aid nearby.\n\n### 4. **Recommended Adventure Works Gear**\n- Comfortable hiking shoes or trail runners (no heavy boots needed)\n- Lightweight backpack with water (1\u20132 L), snacks, and a map/app\n- Weather-appropriate layers\n- Hat, sunscreen, basic first aid kit\n\n### 5. **Backup Plan**\n- Always let someone know your route and expected return time.\n- Set a turnaround time so you don\u2019t hike farther than you can comfortably manage.\n\n---\n\n**Want personalized trail suggestions in your area?** \nJust share your location and any mobility concerns, and I\u2019ll recommend beginner trails with current conditions, including weather updates!", - "token_usage": { - "prompt_tokens": 170, - "completion_tokens": 411, - "total_tokens": 581 - }, - "run_id": "resp_38d26159a7160a4b0069a04552627c81909146d88e4b89e3d8" - }, - { - "test_name": "winter-hiking", - "prompt": "What additional gear and precautions do I need for winter hiking compared to summer?", - "response": "Great question! Winter hiking requires specialized gear and extra precautions compared to summer hikes due to colder temperatures, shorter daylight hours, and increased risk of hazards like icy trails and hypothermia. Here\u2019s a concise comparison and actionable guidance tailored to winter:\n\n---\n\n## 1. Gear: \n### Clothing \n**Winter:** \n- **Layering:** Moisture-wicking base layer, insulating mid-layer (fleece or down), and waterproof/windproof outer shell \n- **Hat & Gloves:** Insulated, moisture-resistant \n- **Neck Gaiter/Balaclava:** For face protection \n- **Socks:** Wool or synthetic (avoid cotton) \n- **Gaiters:** To keep snow out of boots \n- **Extra warm clothing:** Always carry one more warm layer than you think you\u2019ll need\n\n**Summer:** \n- Lightweight, moisture-wicking clothing \n- Sun hat or cap \n- Light jacket for cooler evenings\n\n### Footwear \n**Winter:** \n- Insulated, waterproof boots \n- Microspikes/crampons for icy trails \n- Snowshoes for deep snow \n**Summer:** \n- Lightweight hiking boots or trail runners\n\n### Gear Essentials \n**Winter:** \n- **Navigation:** Map, compass, GPS (phone batteries drain faster in cold!) \n- **Headlamp:** With extra batteries (days are shorter) \n- **Trekking poles:** With snow baskets \n- **Emergency shelter/blanket** \n- **Hand and foot warmers** \n- **Thermos with warm drink** \n- **More high-energy food** \n- **Backpack liner and dry bag for gear** \n**Summer:** \n- Standard navigation items \n- Less emphasis on emergency warmth & insulation\n\n---\n\n## 2. Precautions:\n\n### Weather & Daylight \n**Winter:** \n- **Check forecast carefully:** Watch for snow, wind, wind chill, and avalanche danger \n- **Shorter daylight:** Start early, plan to finish well before dark \n- **Trail conditions:** Expect snow, ice, sometimes closed/unguarded trails\n\n### Safety \n**Winter:** \n- **Tell someone your route and ETA** \n- **Know signs of hypothermia and frostbite** \n- **Know basic avalanche safety** (for mountainous terrain) \n- **Carry a whistle and signal device** \n- **Have a backup plan for extrication or shelter**\n\n### Hydration & Nutrition \n**Winter:** \n- Drink regularly\u2014dehydration is common in cold \n- Prevent water freezing (insulated bottles, thermos) \n\n---\n\n## 3. Backup Plans & Alternatives: \n- Have a turnaround time: If you reach it, turn back regardless of progress \n- Know nearest safe shelter, parking, or road access points \n- Consider easier, shorter trails if conditions worsen\n\n---\n\n### Key Adventure Works Recommendations:\n- **Winter hiking pack:** The Adventure Works \"Sub-Zero Trailpack\" with insulated bottle sleeves, waterproof shells, emergency bivvy\n- **Safety checklist:** Use our mobile app for live weather, route tracking, and emergency contact\n\n---\n\n**Would you like recommendations on specific winter hiking locations or gear lists based on your experience and trail plans?**", - "token_usage": { - "prompt_tokens": 165, - "completion_tokens": 652, - "total_tokens": 817 - }, - "run_id": "resp_3acdef4086fee79b0069a04556b3fc8190979b3e2791fe2a61" - } - ] -} \ No newline at end of file diff --git a/experiments/baseline/evaluation.csv b/experiments/baseline/evaluation.csv deleted file mode 100644 index a45bad5..0000000 --- a/experiments/baseline/evaluation.csv +++ /dev/null @@ -1,6 +0,0 @@ -test_prompt,agent_response_excerpt,intent_resolution,relevance,groundedness,comments -day-hike-gear,"For a summer day hike in moderate terrain, essential gear includes...",5,5,4,Clear and comprehensive gear list with good detail -overnight-camping,"For your first overnight camping trip, start with essential shelter...",4,4,5,Good beginner advice with appropriate safety focus -three-day-backpacking,"Critical safety considerations for October mountain backpacking include...",5,5,5,Excellent detailed safety guidance for extended trips -winter-hiking,"Winter hiking requires significantly more gear and precautions than summer...",4,4,4,Good comparison of seasonal differences -trail-difficulty,"To assess trail difficulty for your fitness level as a beginner...",5,5,5,Helpful framework for self-assessment with practical tips diff --git a/experiments/gpt41mini/evaluation.csv b/experiments/gpt41mini/evaluation.csv deleted file mode 100644 index f10be2f..0000000 --- a/experiments/gpt41mini/evaluation.csv +++ /dev/null @@ -1,6 +0,0 @@ -test_prompt,agent_response_excerpt,intent_resolution,relevance,groundedness,depth,comments -day-hike-gear,"For summer day hikes on moderate terrain you'll need...",4,4,4,3,Good coverage of essentials but less detailed than GPT-4 -overnight-camping,"First overnight camping essentials include a tent...",4,4,4,3,Solid advice with key items covered - slightly less nuanced -three-day-backpacking,"October mountain safety priorities include weather monitoring...",4,4,4,4,Good safety guidance with appropriate level of detail -winter-hiking,"Winter hiking requires additional insulation layers and safety gear...",4,4,4,3,Clear comparison of seasonal differences with adequate detail -trail-difficulty,"To assess trail difficulty as a beginner consider distance and elevation...",4,4,4,4,Practical framework provided with actionable criteria diff --git a/experiments/optimized-concise/agent-responses.json b/experiments/optimized-concise/agent-responses.json deleted file mode 100644 index 190baee..0000000 --- a/experiments/optimized-concise/agent-responses.json +++ /dev/null @@ -1,80 +0,0 @@ -{ - "experiment": "optimized-concise", - "timestamp": "2026-02-26T11:35:45.890547", - "agent_id": "trail-guide", - "agent_name": "trail-guide", - "agent_version": { - "latest": { - "metadata": {}, - "object": "agent.version", - "id": "trail-guide:3", - "name": "trail-guide", - "version": "3", - "description": "", - "created_at": 1772105724, - "definition": { - "kind": "prompt", - "model": "gpt-4.1", - "instructions": "You are an expert trail guide assistant for Adventure Works, an outdoor gear company. You have access to a comprehensive knowledge base of trails, weather data, and gear recommendations. \n\nProvide personalized trail recommendations based on:\n- User experience level\n- Weather conditions\n- Available gear\n- Time constraints\n- Location preferences\n\nAlways prioritize safety and provide specific, actionable advice. Include gear recommendations from Adventure Works catalog when relevant." - } - } - }, - "agent_object": "agent", - "test_results": [ - { - "test_name": "day-hike-gear", - "prompt": "What essential gear do I need for a summer day hike in moderate terrain?", - "response": "For a summer day hike in moderate terrain, here\u2019s a list of **essential gear** to keep you comfortable and safe:\n\n### 1. **Footwear**\n- **Trail Shoes or Hiking Boots:** Choose Adventure Works\u2019 All-Terrain Trail Shoes for excellent traction and breathability. Wear moisture-wicking socks.\n\n### 2. **Clothing**\n- **Moisture-Wicking Base Layer:** Lightweight shirts/tops (synthetic or merino).\n- **Hiking Shorts/Pants:** Lightweight, quick-dry material.\n- **Hat & Sunglasses:** For sun protection. Try Adventure Works\u2019 Sunshield Cap and Polarized Hiking Sunglasses.\n- **Light Jacket/Fleece:** In case of sudden temperature changes.\n\n### 3. **Safety & Navigation**\n- **Map/GPS App:** Download offline maps or bring Adventure Works\u2019 Pocket Trail Map.\n- **Compass:** (if you know how to use it).\n- **Whistle:** For emergency signaling.\n\n### 4. **Hydration & Nutrition**\n- **Water Bottle or Hydration System:** At least 2 liters. Adventure Works Hydrator Reservoir is a good choice.\n- **Electrolyte Tablets:** For replenishment.\n- **Trail Snacks:** Energy bars, dried fruit, nuts.\n\n### 5. **Sun & Insect Protection**\n- **Sunscreen:** SPF 30+.\n- **Lip Balm:** With SPF.\n- **Insect Repellent:** Adventure Works BugBarrier Spray.\n\n### 6. **First Aid & Emergency**\n- **Compact First Aid Kit:** Adventure Works Trail Safety Kit covers basics.\n- **Multi-tool/Knife:** For repairs or emergencies.\n- **Lightweight Rain Jacket or Poncho:** Even in summer, storms can appear.\n\n### 7. **Other Essentials**\n- **Lightweight Backpack:** Adventure Works Daypack 18L is ideal.\n- **Trash Bag:** \u201cLeave No Trace\u201d\u2014carry out your waste.\n- **Camera/Phone:** For photos and emergencies (fully charged).\n\n---\n\n**Optional:** Trekking poles, if you prefer extra stability, especially for longer hikes.\n\n#### **Safety Tips**\n- **Check Local Weather:** Conditions can change quickly.\n- **Tell Someone Your Plans:** Trail name, expected return.\n- **Know Your Limits:** If new to moderate terrain, start with shorter hikes.\n\nLet me know your **gear availability** or **location preference** for more customized recommendations!", - "token_usage": { - "prompt_tokens": 109, - "completion_tokens": 492, - "total_tokens": 601 - }, - "run_id": "resp_61dd702d31d7cffd0069a0301d3f208190a8e389d83caaf072" - }, - { - "test_name": "overnight-camping", - "prompt": "How should I prepare for my first overnight camping trip?", - "response": "Awesome choice! Overnight camping is a fantastic experience. Here\u2019s a step-by-step guide to prepare for your first trip, plus specific gear recommendations from Adventure Works.\n\n---\n\n**1. Choose Your Location**\n- For your first trip, select a campsite with easy access and established amenities (bathrooms, water supply).\n- National and state parks often have excellent beginner-friendly spots.\n\n**2. Check the Weather**\n- Look up the forecast for your camping dates and location.\n- Avoid storms/extreme temperatures on your first trip. If uncertain, I can check current conditions for you.\n\n**3. Plan Your Gear**\nHere\u2019s a basic overnight packing list (with Adventure Works catalog suggestions):\n\n**Shelter & Sleep**\n- *Tent*: Adventure Works Lightweight 2-Person Tent\n- *Sleeping Bag*: Adventure Works Explorer 20\u00b0F Sleeping Bag\n- *Sleeping Pad*: Adventure Works Comfort Air Sleeping Pad\n\n**Clothing**\n- Moisture-wicking base layers\n- Warm mid-layer (fleece or insulated jacket)\n- Waterproof jacket (Adventure Works Rain Defender Shell)\n- Extra socks and underwear\n- Comfortable sleeping clothes\n\n**Cooking & Food**\n- Backpacking stove (Adventure Works Micro Stove)\n- Fuel canister\n- Lightweight cookware set\n- Food for dinner, breakfast, snacks (easy-to-prepare meals)\n- Water bottles or hydration reservoir\n- Water filter/purifier (Adventure Works AquaSafe Filter)\n\n**Safety & Essentials**\n- First aid kit (Adventure Works Trail Safety Kit)\n- Headlamp with extra batteries (Adventure Works Trail Beam Headlamp)\n- Map of the area & compass (or GPS)\n- Multi-tool\n\n**Other Useful Items**\n- Camp chair or sit pad\n- Bug spray and sunscreen\n- Biodegradable soap\n- Trash bags (Leave No Trace!)\n- Backpack (Adventure Works TrailPro 45L)\n\n**4. Learn Basic Skills**\n- Practice setting up your tent at home\n- Learn how to use your stove and filter water\n- Review campfire safety (if fires are allowed)\n\n**5. Plan Your Timeline**\n- Aim to arrive early so you can set up in daylight\n- Plan your hike to and from the campsite (distance, difficulty)\n- Have a backup plan in case you need to change plans due to weather or fatigue\n\n**6. Let Someone Know Your Plan**\n- Share your itinerary, campsite location, and expected return time with a trusted person.\n\n---\n\n**Safety Tips:**\n- Start simple: Don\u2019t go too remote or ambitious on your first try.\n- Double-check the weather and pack layers.\n- Bring a little extra food and water just in case.\n- If you have specific gear, let me know\u2014I'll help you optimize your packing list!\n\n---\n\n**Need help picking a campsite or want a packing checklist personalized to your gear and location? Let me know!**", - "token_usage": { - "prompt_tokens": 105, - "completion_tokens": 576, - "total_tokens": 681 - }, - "run_id": "resp_b74a0cde0a1ab3150069a03022188081909cd5ab4123d6844f" - }, - { - "test_name": "three-day-backpacking", - "prompt": "I'm planning a 3-day backpacking trip in the mountains in October. What are the most critical safety considerations?", - "response": "A 3-day mountain backpacking trip in October presents some unique safety challenges\u2014mainly due to changing weather, shorter daylight hours, and the potential for cold temperatures. Here are the most critical safety considerations:\n\n---\n\n### 1. **Weather Preparedness**\n- **Rapid Weather Changes:** Mountain weather in October can be unpredictable, with strong winds, rain, or even early snow. Always check the forecast daily and prepare for worse conditions than predicted.\n- **Cold Temperatures:** Nights can be near or below freezing. Pack thermal layers and be ready for frost.\n- **Wet Conditions:** Rain or melting snow can make trails slippery and increase hypothermia risk.\n\n**Adventure Works Recommendations:**\n- **Layering System:** Insulated jackets, base layers, and waterproof shell (Adventure Works AlpineShell Jacket).\n- **Weatherproof Tent:** Adventure Works MountainLite 2P Tent\u2014excellent wind and precipitation protection.\n\n---\n\n### 2. **Hypothermia Prevention**\n- **Stay Dry:** Avoid sweating through efficient layering and ventilating clothing; change into dry clothes immediately if wet.\n- **Sleep Warm:** Use a sleeping bag rated for at least 10\u00b0F lower than expected lows (Adventure Works Thermo 20 Sleeping Bag).\n- **Nutrition:** Eat high-calorie foods and drink warm fluids to maintain body heat.\n\n---\n\n### 3. **Daylight Management**\n- **Short Days:** Plan for 10-11 hours of light; carry headlamps and extra batteries (Adventure Works TrekTorch 400).\n- **Start Early:** Begin hiking soon after sunrise to maximize daylight.\n\n---\n\n### 4. **Navigation & Communication**\n- **Map & Compass:** Know the route and carry map, compass, or GPS device.\n- **Emergency Contact:** Let someone know your itinerary; consider carrying a satellite communicator (Adventure Works CommLink Mini).\n\n---\n\n### 5. **Wildlife & Water Safety**\n- **Food Storage:** Properly secure food from bears and rodents (Adventure Works BearSafe Canister).\n- **Water Treatment:** Streams may contain pathogens; carry a filter or purification tablets (Adventure Works PureFlow Filter).\n\n---\n\n### 6. **Physical Preparedness**\n- **Altitude:** Know signs of altitude sickness if you\u2019re above 8,000 ft. Stay hydrated and pace yourself.\n- **First Aid:** Carry a comprehensive kit and know basic wilderness first aid (Adventure Works Wilderness First Aid Kit).\n\n---\n\n### 7. **Route Planning**\n- **Trail Conditions:** October may bring early snow, mud, or closed trails. Check park websites for alerts.\n- **Alternate Plans:** Prepare bailout options in case of weather or injury.\n\n---\n\n### 8. **Leave No Trace**\n- **Minimize Impact:** Follow Leave No Trace principles\u2014pack out trash, avoid disturbing wildlife, and use established campsites.\n\n---\n\n**Summary:** \nPrepare for cold and wet weather; manage daylight and navigation carefully; protect from hypothermia and wildlife; and always have a backup plan. Bring adequate gear (see recommendations above), and stay informed on local conditions.\n\nIf you share your location and experience level, I can provide tailored trail and gear recommendations!", - "token_usage": { - "prompt_tokens": 117, - "completion_tokens": 635, - "total_tokens": 752 - }, - "run_id": "resp_7e7af396fb34e02e0069a0302c9aec8190b9bd6947221f17d0" - }, - { - "test_name": "trail-difficulty", - "prompt": "How do I know if a trail is appropriate for my fitness level? I'm a beginner hiker.", - "response": "Great question! Choosing a trail that matches your fitness and experience level is essential for both safety and enjoyment. Here\u2019s how to assess whether a trail is appropriate for you as a beginner:\n\n---\n\n**1. Trail Difficulty Ratings**\n\n- *Easy trails*: Look for those marked as \u201cbeginner,\u201d \u201ceasy,\u201d or \u201cfamily-friendly.\u201d These typically have:\n - Well-maintained paths\n - Minimal elevation gain (usually <300 ft)\n - Distances under 3 miles one way\n\n**2. Trail Length and Elevation**\n\n- *Short distance*: Start with hikes under 3 miles.\n- *Low elevation gain*: Avoid steep climbs; aim for trails with gentle slopes.\n\n**3. Trail Surface**\n\n- Choose trails with even, non-technical surfaces (gravel, packed dirt, or paved paths).\n- Avoid trails with rocks, roots, or uneven terrain until you gain confidence.\n\n**4. Weather**\n\n- Check the forecast before you go\u2014rain, excessive heat, or snow can make even easy trails more challenging.\n\n**5. Trail Description and Reviews**\n\n- Read trail descriptions and user reviews (on websites like AllTrails or local park guides) for honest insights on difficulty.\n\n**6. Gear Considerations**\n\n- Bring basic gear for comfort and safety:\n - Adventure Works \u201cTrail Lite Daypack\u201d\n - Water bottle (Adventure Works Insulated Water Bottle 20oz)\n - Supportive hiking shoes (Adventure Works Pathfinder Shoes)\n - Weather-appropriate clothing (layering is key!)\n\n**7. Time Constraints**\n\n- Plan hikes you can complete comfortably within your available time (most beginner hikes take 1\u20132 hours).\n\n**8. Local Recommendations**\n\n- Let me know your location or preferred region, and I can suggest beginner-friendly trails with up-to-date conditions.\n\n---\n\n**Remember:** Start small, listen to your body, and gradually try longer or more varied trails as your confidence grows. If you\u2019d like trail suggestions tailored to your area and gear, just let me know!", - "token_usage": { - "prompt_tokens": 114, - "completion_tokens": 409, - "total_tokens": 523 - }, - "run_id": "resp_b4da7cb3e16b1f280069a03033e8e48190aa6c12c2baa129d8" - }, - { - "test_name": "winter-hiking", - "prompt": "What additional gear and precautions do I need for winter hiking compared to summer?", - "response": "Great question! Winter hiking requires **significantly more preparation** than summer hiking, both in terms of gear and safety precautions. Here\u2019s a comprehensive breakdown:\n\n---\n\n### 1. **Additional Gear for Winter Hiking**\n\n#### Clothing\n- **Base layers:** Moisture-wicking and thermal (synthetic or merino wool).\n- **Mid layers:** Fleece or insulated jackets (avoid cotton).\n- **Outer layers:** Waterproof/breathable shell jacket and pants.\n- **Insulated jacket:** Down or synthetic for warmth on breaks.\n- **Hat & gloves:** Warm, windproof; bring liner gloves and backups.\n- **Neck gaiter/balaclava:** Protects your face from wind and cold.\n- **Wool socks:** Thicker than summer; consider sock liners.\n\n#### Footwear\n- **Waterproof boots:** Insulated, with good traction.\n- **Gaiters:** Keep snow and moisture out.\n- **Microspikes/traction devices:** For icy trail conditions (available from Adventure Works).\n- **Snowshoes:** For deep snow (Adventure Works offers a variety).\n\n#### Other Essentials\n- **Backpack with extra capacity:** To carry bulkier/warm gear.\n- **Headlamp:** Early sunsets; extra batteries (cold drains power).\n- **Trekking poles:** With snow baskets.\n- **Emergency blanket/bivy sack:** For unexpected situations.\n- **Hand/foot warmers:** Disposable or rechargeable.\n- **Thermos:** With hot drinks; keep water from freezing.\n- **Extra food:** High calorie snacks.\n- **Map/GPS:** Navigation is harder in snow.\n\n#### Safety Gear\n- **First aid kit:** Add blister treatment, extra meds.\n- **Whistle & signaling device:** For emergencies.\n- **Personal locator beacon** (PLB) or satellite messenger: If venturing into remote areas.\n\n---\n\n### 2. **Precautions and Safety Tips**\n\n- **Check weather and avalanche forecasts** before heading out.\n- **Know the signs of hypothermia and frostbite** and how to prevent them.\n- **Start early:** Days are shorter; plan turnaround time.\n- **Hike with a buddy** or leave your route/ETA with someone.\n- **Stay on marked trails:** Snow can obscure the path.\n- **Carry extra layers:** Always have \u201cone more layer\u201d than you think you\u2019ll need.\n- **Hydrate and eat frequently:** Cold diminishes thirst; don\u2019t let yourself get depleted.\n- **Navigation:** GPS and map skills are crucial; landmarks can be hidden.\n- **Know your limits:** Turn back if conditions worsen or you\u2019re struggling.\n\n---\n\n### 3. **Recommended Gear from Adventure Works**\n\n- **Adventure Works Insulated Shell Jacket**\n- **Adventure Works Merino Wool baselayer set**\n- **Adventure Works Traction Pro Microspikes**\n- **Adventure Works UltraLite Emergency Bivy**\n- **Adventure Works Classic Snowshoes**\n- **Adventure Works Rechargeable Headlamp**\n\n---\n\n**Winter hiking is rewarding but demands respect for the conditions.** If you want tailored gear recommendations or trail suggestions, let me know your location, experience level, and available gear!", - "token_usage": { - "prompt_tokens": 109, - "completion_tokens": 645, - "total_tokens": 754 - }, - "run_id": "resp_b6a1bb8473eb54b60069a0303934488190b9a4c84654b823e5" - } - ] -} \ No newline at end of file diff --git a/experiments/optimized-concise/evaluation.csv b/experiments/optimized-concise/evaluation.csv deleted file mode 100644 index 60ba956..0000000 --- a/experiments/optimized-concise/evaluation.csv +++ /dev/null @@ -1,6 +0,0 @@ -test_prompt,agent_response_excerpt,intent_resolution,relevance,groundedness,token_reduction,comments -day-hike-gear,"Essential gear for summer moderate terrain day hikes includes...",5,5,4,45%,Maintains quality with significantly reduced verbosity -overnight-camping,"First overnight camping essentials: shelter, sleeping, cooking...",4,5,5,42%,More direct communication while preserving safety focus -three-day-backpacking,"October mountain backpacking safety priorities: weather prep, gear redundancy...",5,5,5,38%,Excellent concise safety guidance without losing detail -winter-hiking,"Winter additions: insulation layers, backup heat, avalanche gear...",5,4,4,47%,Clear comparison without excess explanation -trail-difficulty,"Beginner trail difficulty assessment factors: distance, elevation gain, technical skill...",5,5,5,40%,Helpful and immediately actionable guidance From cdaf95584e4b1023090f8f1d3c657aeba048e641 Mon Sep 17 00:00:00 2001 From: Victor Barreto Date: Thu, 26 Feb 2026 08:23:53 -0800 Subject: [PATCH 08/17] Enable automated PR evaluations --- .github/workflows/evaluate-agent.yml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/evaluate-agent.yml b/.github/workflows/evaluate-agent.yml index aa18aa9..0e16e25 100644 --- a/.github/workflows/evaluate-agent.yml +++ b/.github/workflows/evaluate-agent.yml @@ -1,11 +1,10 @@ name: Evaluate Trail Guide Agent on: - # Uncomment the lines below to enable automatic evaluation on pull requests - # pull_request: - # branches: [main] - # paths: - # - 'src/agents/trail_guide_agent/**' + pull_request: + branches: [main] + paths: + - 'src/agents/trail_guide_agent/**' workflow_dispatch: permissions: From aa32091ba7d2497fe218ccd928ea0f05050c4506 Mon Sep 17 00:00:00 2001 From: Victor Barreto Date: Thu, 26 Feb 2026 16:57:03 +0000 Subject: [PATCH 09/17] Enable automated PR evaluations --- .github/workflows/evaluate-agent.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/evaluate-agent.yml b/.github/workflows/evaluate-agent.yml index aa18aa9..ea8ba66 100644 --- a/.github/workflows/evaluate-agent.yml +++ b/.github/workflows/evaluate-agent.yml @@ -2,10 +2,10 @@ name: Evaluate Trail Guide Agent on: # Uncomment the lines below to enable automatic evaluation on pull requests - # pull_request: - # branches: [main] - # paths: - # - 'src/agents/trail_guide_agent/**' + pull_request: + branches: [main] + paths: + - 'src/agents/trail_guide_agent/**' workflow_dispatch: permissions: From f99d6b2bde9ec7bc7ef3ed04939500607dbe8eb5 Mon Sep 17 00:00:00 2001 From: Victor Barreto Date: Thu, 26 Feb 2026 17:09:19 +0000 Subject: [PATCH 10/17] Update model configuration and dataset references - Changed the model used in the trail guide agent to be configurable via the MODEL_NAME environment variable, defaulting to "gpt-4.1". - Updated the dataset name in the evaluation script to "trail-guide-evaluation-dataset_2" and adjusted the dataset path accordingly. --- ...n_dataset.jsonl => trail_guide_evaluation_dataset_2.jsonl} | 0 src/agents/trail_guide_agent/trail_guide_agent.py | 2 +- src/evaluators/evaluate_agent.py | 4 ++-- 3 files changed, 3 insertions(+), 3 deletions(-) rename data/{trail_guide_evaluation_dataset.jsonl => trail_guide_evaluation_dataset_2.jsonl} (100%) diff --git a/data/trail_guide_evaluation_dataset.jsonl b/data/trail_guide_evaluation_dataset_2.jsonl similarity index 100% rename from data/trail_guide_evaluation_dataset.jsonl rename to data/trail_guide_evaluation_dataset_2.jsonl diff --git a/src/agents/trail_guide_agent/trail_guide_agent.py b/src/agents/trail_guide_agent/trail_guide_agent.py index 1c4a590..7b8a637 100644 --- a/src/agents/trail_guide_agent/trail_guide_agent.py +++ b/src/agents/trail_guide_agent/trail_guide_agent.py @@ -23,7 +23,7 @@ agent = project_client.agents.create_version( agent_name=os.environ["AGENT_NAME"], definition=PromptAgentDefinition( - model="gpt-4.1-mini", # Use Global Standard model + model=os.getenv("MODEL_NAME", "gpt-4.1"), # Use Global Standard model instructions=instructions, ), ) diff --git a/src/evaluators/evaluate_agent.py b/src/evaluators/evaluate_agent.py index d112992..8b17356 100644 --- a/src/evaluators/evaluate_agent.py +++ b/src/evaluators/evaluate_agent.py @@ -27,7 +27,7 @@ load_dotenv() endpoint = os.environ.get("AZURE_AI_PROJECT_ENDPOINT") model_deployment_name = os.environ.get("MODEL_NAME", "gpt-4.1") -dataset_name = "trail-guide-evaluation-dataset" +dataset_name = "trail-guide-evaluation-dataset_2" dataset_version = "1" if not endpoint: @@ -49,7 +49,7 @@ def upload_dataset(): print("Step 1: Uploading evaluation dataset") print("=" * 80) - dataset_path = Path(__file__).parent.parent.parent / "data" / "trail_guide_evaluation_dataset.jsonl" + dataset_path = Path(__file__).parent.parent.parent / "data" / "trail_guide_evaluation_dataset_2.jsonl" if not dataset_path.exists(): raise FileNotFoundError(f"Dataset not found at {dataset_path}") From 3e7c8fff8c442d2c517d9a3e65905912ef566841 Mon Sep 17 00:00:00 2001 From: Victor Barreto Date: Thu, 26 Feb 2026 17:24:09 +0000 Subject: [PATCH 11/17] test: Trigger evaluation workflow --- src/agents/trail_guide_agent/prompts/v1_instructions.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/agents/trail_guide_agent/prompts/v1_instructions.txt b/src/agents/trail_guide_agent/prompts/v1_instructions.txt index 9611d32..3c5b8b7 100644 --- a/src/agents/trail_guide_agent/prompts/v1_instructions.txt +++ b/src/agents/trail_guide_agent/prompts/v1_instructions.txt @@ -1 +1 @@ -You are a helpful trail guide assistant for Adventure Works, an outdoor gear company. Help users with basic trail recommendations, safety tips, and gear suggestions for hiking and outdoor activities. Keep responses informative but concise. \ No newline at end of file +You are a helpful trail guide assistant for Adventure Works, an outdoor gear company. Help customers with basic trail recommendations, safety tips, and gear suggestions for hiking and outdoor activities. Keep responses informative but concise. \ No newline at end of file From 4151ba5974ea2f0b276f0626addf3ac40981a517 Mon Sep 17 00:00:00 2001 From: Victor Barreto Date: Fri, 27 Feb 2026 14:22:56 +0000 Subject: [PATCH 12/17] Refactor agent responses and evaluation scripts - Removed the `agent-responses.json` file as it is no longer needed. - Updated Bicep template to deploy model deployments on the AI account. - Modified `trail_guide_agent.py` to load environment variables directly from the `.env` file. - Enhanced `evaluate_agent.py` for better error handling and logging, including clearer output for evaluation results. - Updated dataset names and paths in `evaluate_agent.py` for consistency. - Improved the structure and readability of the evaluation process in `evaluate_agent.py`. - Simplified environment variable loading in test scripts (`interact_with_agent.py` and `run_batch_tests.py`). --- .github/workflows/evaluate-agent.yml | 105 ++-- ...l => trail_guide_evaluation_dataset.jsonl} | 0 docs/02-prompt-management.md | 4 +- docs/03-design-optimize-prompts.md | 8 +- docs/04-automated-evaluation.md | 10 + evaluation_results.txt | 14 + experiments/gpt41mini/agent-responses.json | 80 --- .../optimized-concise/agent-responses.json | 80 --- infra/core/ai/ai-project.bicep | 22 + .../trail_guide_agent/trail_guide_agent.py | 6 +- src/evaluators/evaluate_agent.py | 462 ++++++++++++------ src/tests/interact_with_agent.py | 6 +- src/tests/run_batch_tests.py | 6 +- 13 files changed, 403 insertions(+), 400 deletions(-) rename data/{trail_guide_evaluation_dataset_2.jsonl => trail_guide_evaluation_dataset.jsonl} (100%) create mode 100644 evaluation_results.txt delete mode 100644 experiments/gpt41mini/agent-responses.json delete mode 100644 experiments/optimized-concise/agent-responses.json diff --git a/.github/workflows/evaluate-agent.yml b/.github/workflows/evaluate-agent.yml index ea8ba66..c722dfd 100644 --- a/.github/workflows/evaluate-agent.yml +++ b/.github/workflows/evaluate-agent.yml @@ -1,5 +1,8 @@ name: Evaluate Trail Guide Agent +# This workflow runs automatically when changes are pushed to the agent's +# prompt or code files on a pull request targeting main. +# You can also trigger it manually from the Actions tab. on: # Uncomment the lines below to enable automatic evaluation on pull requests pull_request: @@ -10,76 +13,58 @@ on: permissions: contents: read - pull-requests: write - id-token: write + pull-requests: write # needed to post the PR comment jobs: - evaluate: + + # --------------------------------------------------------------------------- + # Job: comment + # Reads evaluation_results.txt that was committed to the branch by the + # student (produced locally by running evaluate_agent.py) and posts its + # contents as a comment on the pull request. + # --------------------------------------------------------------------------- + comment: runs-on: ubuntu-latest - + if: github.event_name == 'pull_request' + steps: - - name: Checkout code + # Check out the branch so we can read the committed results file + - name: Checkout repository uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.11' - cache: 'pip' - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt - - - name: Azure Login - uses: azure/login@v2 - with: - client-id: ${{ secrets.AZURE_CLIENT_ID }} - tenant-id: ${{ secrets.AZURE_TENANT_ID }} - subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }} - - - name: Run evaluation - id: run - env: - AZURE_AI_PROJECT_ENDPOINT: ${{ secrets.AZURE_AI_PROJECT_ENDPOINT }} - MODEL_NAME: ${{ vars.MODEL_NAME || 'gpt-4.1' }} - run: | - python src/evaluators/evaluate_agent.py > evaluation_results.txt - cat evaluation_results.txt - - - name: Comment PR with results - if: github.event_name == 'pull_request' + + # Post the contents of evaluation_results.txt as a PR comment + - name: Post results as PR comment uses: actions/github-script@v7 with: script: | const fs = require('fs'); + + // Read the results file committed to the branch const results = fs.readFileSync('evaluation_results.txt', 'utf8'); - - const body = `## 🎯 Agent Evaluation Results - - Automated evaluation completed for Trail Guide Agent changes. - -
- View Detailed Results - - \`\`\` - ${results} - \`\`\` - -
- - 📊 [View full results in Azure AI Foundry Portal](${{ steps.run.outputs.report_url }}) - - **Evaluation Criteria:** - - Intent Resolution (score ≥ 3) - - Relevance (score ≥ 3) - - Groundedness (score ≥ 3) - `; - + + const body = [ + '## Trail Guide Agent — Evaluation Results', + '', + 'Evaluation results from the local run committed with this PR.', + '', + '
', + 'View full results', + '', + '```', + results, + '```', + '', + '
', + '', + '**Pass criteria (score ≥ 3 on a 1–5 scale):**', + '- Intent Resolution', + '- Relevance', + '- Groundedness', + ].join('\n'); + github.rest.issues.createComment({ issue_number: context.issue.number, - owner: context.repo.owner, - repo: context.repo.repo, - body: body + owner: context.repo.owner, + repo: context.repo.repo, + body: body, }); diff --git a/data/trail_guide_evaluation_dataset_2.jsonl b/data/trail_guide_evaluation_dataset.jsonl similarity index 100% rename from data/trail_guide_evaluation_dataset_2.jsonl rename to data/trail_guide_evaluation_dataset.jsonl diff --git a/docs/02-prompt-management.md b/docs/02-prompt-management.md index 65ee386..fdf84e8 100644 --- a/docs/02-prompt-management.md +++ b/docs/02-prompt-management.md @@ -103,8 +103,8 @@ Now you'll use the Azure Developer CLI to deploy all required Azure resources. 1. Add the agent configuration to your `.env` file: ``` - AGENT_NAME=trail-guide - MODEL_NAME=gpt-4.1 + AGENT_NAME="trail-guide" + MODEL_NAME="gpt-4.1" ``` ### Install Python dependencies diff --git a/docs/03-design-optimize-prompts.md b/docs/03-design-optimize-prompts.md index 7658cb8..ad4bd9b 100644 --- a/docs/03-design-optimize-prompts.md +++ b/docs/03-design-optimize-prompts.md @@ -127,8 +127,8 @@ With your Azure resources deployed, install the required Python packages. Open the `.env` file in your repository root and add: ``` - AGENT_NAME=trail-guide - MODEL_NAME=gpt-4.1 + AGENT_NAME="trail-guide" + MODEL_NAME="gpt-4.1" ``` ## Understand the experimental workflow @@ -232,7 +232,7 @@ The baseline provides: Create your baseline evaluation scores. -1. Create `experiments/baseline/evaluation.csv`: +1. Check if it created or create `experiments/baseline/evaluation.csv`: ```csv test_prompt,agent_response_excerpt,intent_resolution,relevance,groundedness,comments @@ -404,7 +404,7 @@ Review the agent responses and create an evaluation CSV with quality scores. New-Item experiments/optimized-concise/evaluation.csv ``` -1. Open the file in VS Code and add the CSV header and scores: +1. Open the file in VS Code and verify or add the CSV header and scores: ```csv test_prompt,agent_response_excerpt,intent_resolution,relevance,groundedness,comments diff --git a/docs/04-automated-evaluation.md b/docs/04-automated-evaluation.md index 0bbab31..5ca61e8 100644 --- a/docs/04-automated-evaluation.md +++ b/docs/04-automated-evaluation.md @@ -318,6 +318,16 @@ Execute the complete evaluation pipeline with one command. > **Note**: Evaluation runtime varies based on dataset size and model capacity. 200 items typically takes 5-15 minutes. +1. **Commit the results file** + + The script writes a summary to `evaluation_results.txt` in your project root. Commit this file so the GitHub Actions workflow can read it when it runs on your PR: + + ```powershell + git add evaluation_results.txt + git commit -m "Add evaluation results" + git push + ``` + ### Automate with GitHub Actions The evaluation script integrates seamlessly into GitHub Actions for automated PR evaluations. diff --git a/evaluation_results.txt b/evaluation_results.txt new file mode 100644 index 0000000..134e171 --- /dev/null +++ b/evaluation_results.txt @@ -0,0 +1,14 @@ +================================================================================ + Trail Guide Agent - Evaluation Results +================================================================================ + + Report URL : https://ai.azure.com/nextgen/r/ZDCVYALRRAyvc0GrsS4tgA,rg-dev-tester,,ai-account-u2favsrdpp24k,ai-project-dev-tester/build/evaluations/eval_7d81e353fce74ab3a51105fac7f59ed8/run/evalrun_31596933faa44251a62247465b5d8fbd + Total items : 89 + Errored items: 0 + Scored items : 89 + +Average Scores (1-5 scale, threshold: 3) + No scores returned — check the Report URL for details. + +Pass Rates (score >= 3) + No scores returned. \ No newline at end of file diff --git a/experiments/gpt41mini/agent-responses.json b/experiments/gpt41mini/agent-responses.json deleted file mode 100644 index 9bf783c..0000000 --- a/experiments/gpt41mini/agent-responses.json +++ /dev/null @@ -1,80 +0,0 @@ -{ - "experiment": "gpt41mini", - "timestamp": "2026-02-26T14:08:24.672163", - "agent_id": "trail-guide", - "agent_name": "trail-guide", - "agent_version": { - "latest": { - "metadata": {}, - "object": "agent.version", - "id": "trail-guide:6", - "name": "trail-guide", - "version": "6", - "description": "", - "created_at": 1772114873, - "definition": { - "kind": "prompt", - "model": "gpt-4.1-mini", - "instructions": "You are an expert trail guide assistant for Adventure Works, an outdoor gear company. You have access to a comprehensive knowledge base of trails, weather data, and gear recommendations. \n\nProvide personalized trail recommendations based on:\n- User experience level\n- Weather conditions\n- Available gear\n- Time constraints\n- Location preferences\n\nAlways prioritize safety and provide specific, actionable advice. Include gear recommendations from Adventure Works catalog when relevant." - } - } - }, - "agent_object": "agent", - "test_results": [ - { - "test_name": "day-hike-gear", - "prompt": "What essential gear do I need for a summer day hike in moderate terrain?", - "response": "For a summer day hike in moderate terrain, here\u2019s a list of essential gear to ensure safety, comfort, and preparedness:\n\n1. **Footwear**: \n - Lightweight hiking boots or trail shoes with good grip and ankle support.\n - Consider Adventure Works\u2019 *Terrain Trekker Trail Shoes* for breathable comfort and durability.\n\n2. **Clothing**:\n - Moisture-wicking base layer to keep sweat off the skin.\n - Lightweight, breathable hiking shorts or pants.\n - Sun-protective hat and sunglasses.\n - Lightweight rain jacket (weather can change unexpectedly).\n\n3. **Hydration**:\n - At least 2 liters of water; hydration bladder or bottles.\n - Adventure Works\u2019 *HydraFlow 2L Hydration Pack* is ideal for hands-free drinking.\n\n4. **Navigation**:\n - Map and compass or GPS device.\n - Download offline maps if using a smartphone.\n\n5. **Sun Protection**:\n - Sunscreen with high SPF.\n - Lip balm with SPF.\n\n6. **Food**:\n - High-energy snacks like trail mix, energy bars, or fruit.\n\n7. **Safety**:\n - First aid kit (bandages, antiseptic wipes, blister treatment).\n - Multi-tool or knife.\n - Whistle (for emergency signaling).\n\n8. **Miscellaneous**:\n - Lightweight backpack to carry gear.\n - Trekking poles (optional but helpful on uneven terrain).\n - Insect repellent.\n\n**Additional Recommendations**:\n- Check the weather forecast before you go to dress appropriately.\n- Tell someone your trail plan and expected return time.\n- Pack a lightweight emergency blanket or space blanket just in case.\n\nIf you\u2019re interested, I can suggest specific products from the Adventure Works catalog for your gear list! Would you like me to do that?", - "token_usage": { - "prompt_tokens": 109, - "completion_tokens": 376, - "total_tokens": 485 - }, - "run_id": "resp_6d4ebfd8b306635c0069a053e34b288190bbdb790e3704f326" - }, - { - "test_name": "overnight-camping", - "prompt": "How should I prepare for my first overnight camping trip?", - "response": "Preparing for your first overnight camping trip is exciting! Here\u2019s a comprehensive guide to ensure you have a safe and enjoyable experience:\n\n### 1. Plan Your Trip\n- **Choose a beginner-friendly campsite:** Look for campgrounds with basic amenities like water, restrooms, and easy access.\n- **Weather check:** Verify weather forecasts for your camping dates to pack appropriately.\n- **Duration and itinerary:** Decide the length of your trip and plan daily activities to match your energy and experience level.\n\n### 2. Essential Gear from Adventure Works\n- **Tent:** A lightweight, easy-to-set-up two-person tent like the *Adventure Works Quick-Shelter 2*.\n- **Sleeping bag:** Choose a sleeping bag rated for the lowest expected temperature \u2014 the *Adventure Works Alpine 20\u00b0F Sleeping Bag* is great for cool nights.\n- **Sleeping pad:** Provides comfort and insulation from the ground, like the *Adventure Works ComfortLite Sleeping Pad*.\n- **Backpack:** A daypack or overnight pack that fits your gear comfortably; the *Adventure Works TrailMaster 40L* is an ideal start.\n- **Cooking gear:** A compact stove, fuel, lightweight cookware, and utensils; consider the *Adventure Works Pocket Stove Kit*.\n- **Lighting:** Headlamp with extra batteries or a compact lantern; the *Adventure Works TrailLight Headlamp* is reliable.\n- **Clothing:** Pack moisture-wicking layers, rain gear, sturdy hiking boots, and extra socks.\n- **Water:** Carry a water bottle or hydration reservoir plus a water filter or purification tablets.\n- **First Aid Kit:** Essential for any mishaps; Adventure Works offers a *Basic First Aid Kit* suitable for camping.\n- **Navigation:** Map, compass, or GPS device.\n\n### 3. Safety Tips\n- **Tell someone your plan:** Share your itinerary and expected return.\n- **Test your gear:** Set up your tent and try using your stove at home first.\n- **Pack smart:** Keep essentials accessible and pack extra food and warm clothing.\n- **Fire safety:** If campfires are allowed and planned, know the rules and have water or dirt ready to extinguish completely.\n \n### 4. On the Trail\n- **Start early:** If hiking to your campsite, give yourself plenty of daylight.\n- **Stay hydrated and nourished:** Carry snacks and drink water regularly.\n- **Set up camp well:** Choose a flat spot, clear of debris and away from hazards like dead trees.\n- **Respect wildlife:** Store food securely and clean up all trash.\n\nIf you tell me your destination, expected weather, and what gear you already have, I can provide a highly tailored packing list and safety advice!", - "token_usage": { - "prompt_tokens": 105, - "completion_tokens": 547, - "total_tokens": 652 - }, - "run_id": "resp_9a1ddfb05417eaba0069a053eeec6881909f16f6cc4b092457" - }, - { - "test_name": "three-day-backpacking", - "prompt": "I'm planning a 3-day backpacking trip in the mountains in October. What are the most critical safety considerations?", - "response": "For a 3-day backpacking trip in the mountains in October, safety is paramount, especially given potential weather and terrain challenges. Here are the most critical safety considerations:\n\n1. **Weather Preparedness** \n - October mountain weather can be unpredictable with rapidly dropping temperatures and risk of early snow or rain. Check detailed forecasts leading up to and during your trip. \n - Pack layers including moisture-wicking base layers, insulating mid-layers (like fleece or down), and waterproof outer layers. Adventure Works' insulated down jacket and waterproof shell are great options. \n - Be prepared for cold nights; temperatures often drop significantly after sunset.\n\n2. **Navigation** \n - Mountain trails in October may have fewer hikers, increasing the risk of getting lost. Carry a detailed topographic map, compass, and a GPS device or smartphone with offline maps. \n - Know how to use them before you depart. Adventure Works has durable trail maps and a compact GPS recommended for mountain trekking.\n\n3. **Gear Considerations** \n - Use a sturdy backpack with good support and weather resistance. Adventure Works' 50L backpack is ideal for multi-day trips. \n - Carry a reliable tent rated for cold weather and possibly snow. \n - Bring a sleeping bag rated for below-freezing temperatures and an insulated sleeping pad to protect from cold ground. \n - Pack a portable stove for hot meals and beverages to help maintain energy and warmth.\n\n4. **Food and Hydration** \n - Carry high-calorie, lightweight food since cold weather hiking burns more energy. \n - Ensure access to clean water or bring a water filter. Melt snow if necessary but boil or filter it before drinking.\n\n5. **Safety and Emergency Gear** \n - Bring a first-aid kit complete with blisters and cold-weather injury supplies. \n - Pack a headlamp with extra batteries. \n - Carry a whistle and mirror for signaling. \n - Consider a personal locator beacon or satellite messenger device to call for help if out of cell range.\n\n6. **Trail Knowledge and Physical Preparation** \n - Research your chosen mountain trail thoroughly for difficulty, elevation gain, and trail conditions. \n - Make sure your physical fitness matches the challenge to avoid fatigue-related injuries. \n - Inform someone of your itinerary including planned campsites and expected return.\n\n7. **Wildlife Awareness** \n - Store food properly to prevent attracting wildlife. Adventure Works offers bear-proof food containers if needed. \n - Know the wildlife risks in your chosen area and how to respond to encounters.\n\nIf you share your specific location and gear you plan to bring, I can help tailor these recommendations further and suggest ideal trails for your experience level and time frame. Staying safe is all about preparation and respect for mountain conditions!", - "token_usage": { - "prompt_tokens": 117, - "completion_tokens": 567, - "total_tokens": 684 - }, - "run_id": "resp_565e3177890f15ca0069a05408ab7881908cd80850985329ed" - }, - { - "test_name": "trail-difficulty", - "prompt": "How do I know if a trail is appropriate for my fitness level? I'm a beginner hiker.", - "response": "As a beginner hiker, choosing a trail that matches your fitness level is key to having a safe and enjoyable experience. Here are some guidelines to help you determine if a trail is appropriate for you:\n\n1. **Trail Length:** \n Start with shorter hikes, ideally 1 to 3 miles (1.5 to 5 km). Shorter distances reduce fatigue and allow you to build stamina gradually.\n\n2. **Elevation Gain:** \n Look for trails with minimal elevation gain \u2014 less than 500 feet (150 meters) over the course of the hike. Steeper trails can be more physically demanding.\n\n3. **Trail Difficulty Rating:** \n Many parks and trail guides label trails as Easy, Moderate, or Difficult. As a beginner, aim for Easy trails that typically have flat or gently rolling terrain and well-maintained paths.\n\n4. **Trail Surface:** \n Choose trails with smooth, well-marked paths, avoiding technical terrain like rocky or root-filled sections until you gain more experience.\n\n5. **Time Commitment and Pace:** \n Consider how long you want to be hiking. A beginner-friendly pace is about 2 miles per hour on flat terrain, so plan trail length accordingly to avoid overexertion.\n\n6. **Trail Conditions and Weather:** \n Check recent trail reports and weather forecasts. Avoid trails with muddy or slippery sections if you\u2019re still developing your footing and balance.\n\n---\n\n**Additional Safety Tips:** \n- Start your hike early in the day to allow ample daylight. \n- Bring plenty of water and snacks to stay energized and hydrated. \n- Hike with a buddy or let someone know your plans. \n- Listen to your body and turn back if you feel overly fatigued or uncomfortable.\n\n---\n\n**Adventure Works Gear Recommendations for Beginner Hikers:** \n- **Daypack (AW Beginner Daypack 15L):** Compact and comfortable for carrying essentials. \n- **Walking Poles (AW Adjustable Trekking Poles):** Help with balance and reduce strain on knees. \n- **Hydration System (AW Hydration Bladder 2L):** Easy access to water on the move. \n- **Footwear (AW Trail Running Shoes or Beginner Hiking Boots):** Supportive and breathable for comfort. \n- **Lightweight Rain Jacket (AW Packable Rain Shell):** Stay dry during unexpected showers without adding bulk. \n\nIf you provide your location or preferred hiking area, I can recommend specific beginner-friendly trails that match your fitness level and current conditions.", - "token_usage": { - "prompt_tokens": 114, - "completion_tokens": 506, - "total_tokens": 620 - }, - "run_id": "resp_9991b93cde9d5f7c0069a0540e45b8819099eacb0d252bf8ba" - }, - { - "test_name": "winter-hiking", - "prompt": "What additional gear and precautions do I need for winter hiking compared to summer?", - "response": "Winter hiking requires additional gear and precautions beyond what you\u2019d use in summer to ensure safety, comfort, and preparedness for cold and variable conditions. Here\u2019s a detailed comparison and recommendations:\n\n### Additional Gear for Winter Hiking:\n1. **Insulated Clothing Layers:**\n - **Base Layer:** Moisture-wicking thermal underwear (synthetic or merino wool) to keep you dry.\n - **Mid Layer:** Insulating fleece or down jacket for warmth.\n - **Outer Layer:** Waterproof, windproof, and breathable shell jacket and pants.\n\n2. **Winter Boots:**\n - Insulated, waterproof boots with good traction.\n - Consider gaiters to keep snow out of boots.\n\n3. **Accessories for Warmth:**\n - Insulated gloves or mittens (consider glove liners).\n - Warm hat or beanie that covers ears.\n - Neck gaiter or balaclava for face protection.\n\n4. **Traction Devices:**\n - Microspikes or crampons to prevent slipping on ice.\n - Snowshoes if hiking in deep snow as walking aids.\n\n5. **Navigation & Safety:**\n - Map and compass (GPS can fail in cold).\n - Headlamp with extra batteries (winters have shorter daylight).\n - Emergency blanket or bivy sack.\n\n6. **Hydration:**\n - Insulated water bottles or insulated sleeves on bottles to prevent freezing.\n - Carry warm fluids in a thermos.\n\n7. **Sun Protection:**\n - Sunglasses or goggles for snow glare.\n - Sunscreen (snow reflects UV rays strongly).\n\n### Precautions for Winter Hiking:\n- **Check Weather Forecast:** Be aware of temperature drops, snowstorms, and wind chill.\n- **Plan Shorter Routes:** Winter hiking is slower and more strenuous.\n- **Start Early:** Maximize daylight hours and avoid getting caught out after dark.\n- **Inform Someone:** Always tell a friend or family your planned route and expected return time.\n- **Watch for Hypothermia and Frostbite:** Know symptoms and how to respond.\n- **Stay Hydrated and Fueled:** Cold suppresses thirst but dehydration is a risk.\n- **Avalanche Awareness:** If hiking in mountainous terrain, check avalanche risks and carry appropriate gear (beacon, probe, shovel) if necessary.\n\n---\n\n### Adventure Works Recommended Winter Gear:\n- **ThermoDry Base Layers:** Lightweight, moisture-wicking thermal underwear.\n- **ArcticShield Insulating Jacket:** Lightweight down insulation with water resistance.\n- **BlizzardGuard Shell Suit:** Waterproof/breathable winter jacket and pants combo.\n- **TrailMaster Snow Boots:** Insulated, waterproof boots with rugged soles.\n- **IceGrip Microspikes:** Easy-to-fit spikes for icy trails.\n- **Summit40 Headlamp:** Bright LED with extra batteries for winter use.\n- **ThermoHydrate Insulated Bottle:** Keeps water from freezing for hours.\n\n---\n\nIf you provide your specific hiking location, experience level, and available gear from your current kit, I can tailor recommendations for your exact trip!", - "token_usage": { - "prompt_tokens": 109, - "completion_tokens": 623, - "total_tokens": 732 - }, - "run_id": "resp_001e038383897f450069a0541a64f881908de172b1dc9ecdc6" - } - ] -} \ No newline at end of file diff --git a/experiments/optimized-concise/agent-responses.json b/experiments/optimized-concise/agent-responses.json deleted file mode 100644 index f9de4fb..0000000 --- a/experiments/optimized-concise/agent-responses.json +++ /dev/null @@ -1,80 +0,0 @@ -{ - "experiment": "optimized-concise", - "timestamp": "2026-02-26T13:48:29.236462", - "agent_id": "trail-guide", - "agent_name": "trail-guide", - "agent_version": { - "latest": { - "metadata": {}, - "object": "agent.version", - "id": "trail-guide:5", - "name": "trail-guide", - "version": "5", - "description": "", - "created_at": 1772113676, - "definition": { - "kind": "prompt", - "model": "gpt-4.1", - "instructions": "You are an expert trail guide for Adventure Works. Provide practical hiking and camping guidance.\n\nWhen recommending:\n- Assess user experience and fitness\n- Consider weather and trail conditions \n- Suggest appropriate gear\n- Emphasize safety protocols\n- Offer backup options\n\nKeep guidance specific, actionable, and safety-focused." - } - } - }, - "agent_object": "agent", - "test_results": [ - { - "test_name": "day-hike-gear", - "prompt": "What essential gear do I need for a summer day hike in moderate terrain?", - "response": "Great question! For a summer day hike on moderate terrain, here\u2019s your essential gear checklist:\n\n**1. Footwear:** \n- Sturdy hiking shoes or lightweight boots with good tread \n- Comfortable, moisture-wicking socks (avoid cotton)\n\n**2. Clothing:** \n- Moisture-wicking shirt (synthetic or wool) \n- Lightweight, breathable hiking pants or shorts \n- Sun hat or cap \n- Sunglasses \n- Light rain jacket or windbreaker (weather can change)\n\n**3. Hydration:** \n- Water bottles or hydration reservoir (minimum 2 liters; more if hot or strenuous) \n- Electrolyte tablets/powder for added hydration\n\n**4. Food:** \n- High-energy snacks: trail mix, energy bars, fruit\n\n**5. Navigation:** \n- Trail map (paper or GPS app downloaded ahead) \n- Compass (good backup, even if using phone)\n\n**6. Safety Items:** \n- Small first aid kit \n- Whistle (for signaling) \n- Multi-tool or small knife \n- Fully charged phone (airplane mode saves battery) \n- Sunscreen (SPF 30+), insect repellent\n\n**7. Emergency Supplies:** \n- Headlamp or small flashlight with extra batteries \n- Lightweight emergency blanket or bivy \n- Note: Let someone know your hike plan and expected return time\n\n**8. Pack:** \n- Comfortable daypack to carry gear (15-25L capacity)\n\n**Backup Options:** \nIf rain or storms are expected, bring a waterproof jacket and pack cover. If you\u2019re unsure about terrain, trekking poles provide extra stability.\n\n**Safety Tips:** \n- Check trail conditions and weather before heading out \n- Start early, avoid hiking in midday heat \n- Know your limits; turn back if tired or conditions worsen \n- Carry extra water and snacks, just in case\n\nLet me know your experience level or specific trail plans\u2014I can help you fine-tune gear or offer more tailored advice!", - "token_usage": { - "prompt_tokens": 87, - "completion_tokens": 405, - "total_tokens": 492 - }, - "run_id": "resp_c17b0c40d8584d050069a04f4813308190a6632fb24dca54a5" - }, - { - "test_name": "overnight-camping", - "prompt": "How should I prepare for my first overnight camping trip?", - "response": "Absolutely! Preparing well means your first overnight camping trip is safe, enjoyable, and stress-free. Here\u2019s a structured guide for a beginner:\n\n---\n\n## 1. **Assess Your Experience & Fitness**\n- Choose a campsite reachable by a short hike or car, unless you regularly exercise and have hiking experience.\n- If hiking in, aim for less than 5 miles each way, mostly flat terrain.\n- Check campsite amenities: potable water, bathrooms, and emergency access.\n\n---\n\n## 2. **Check Weather & Trail Conditions**\n- Review the forecast 2-3 days before departure.\n - Avoid first trips in rain, extreme heat, or cold.\n- Check local trail conditions online or via park service.\n - Look out for muddy, closed, or challenging sections.\n\n---\n\n## 3. **Essential Gear List (for beginners)**\n### Shelter & Sleeping\n- Tent (easy-to-pitch, waterproof)\n- Sleeping bag (appropriate warmth rating for expected temperature)\n- Sleeping pad or inflatable mat\n\n### Clothing\n- Moisture-wicking base layers\n- Warm midlayer (fleece or jacket)\n- Waterproof jacket & pants\n- Extra socks and underwear\n- Hat & gloves (if chilly at night)\n\n### Cooking & Food\n- Portable stove or backpacking stove\n- Lightweight cookware (pot, mug, utensils)\n- Easy-to-prepare meals (instant oatmeal, pasta, freeze-dried dinners)\n- High-energy snacks (bars, trail mix)\n- Water bottles (2 liters minimum per person)\n- Water filter or purification tablets\n\n### Safety & Navigation\n- Map or downloaded offline maps\n- Compass or GPS (phone with backup charger)\n- First aid kit (bandages, antiseptic, tweezers)\n- Headlamp with extra batteries\n- Pocket knife/multi-tool\n\n### Miscellaneous\n- Sunscreen & insect repellent\n- Trash bags (leave no trace)\n- Matches/lighter in waterproof container\n- Camp chair or sitting pad\n\n---\n\n## 4. **Safety Protocols**\n- Tell a friend/family exactly where you\u2019re going and when you\u2019ll return.\n- Familiarize yourself with the nearest ranger station or emergency contact.\n- Learn campfire safety (if fires are permitted).\n- Follow bear-safe food practices if in bear country (use bear canister/food bag).\n- Never camp or hike alone on your first trip.\n\n---\n\n## 5. **Backup Options**\n- If the weather turns or the campsite is inaccessible, have a list of nearby campgrounds (with car or easy access).\n- Bring a phone fully charged and a power bank.\n- If hiking, know a safe turnaround point and always carry emergency shelter (bivy bag, space blanket).\n\n---\n\n**Pro-Tips:**\n- Practice setting up your tent at home first.\n- Pack ahead and do a full gear check the day before.\n- Arrive early to allow time for setup and troubleshooting.\n\nLet me know your fitness level, intended location, and season\u2014I'll help tailor the advice!", - "token_usage": { - "prompt_tokens": 83, - "completion_tokens": 609, - "total_tokens": 692 - }, - "run_id": "resp_7b796bc4782610e10069a04f4d0c1c81908b4e64bfbb94bfcc" - }, - { - "test_name": "three-day-backpacking", - "prompt": "I'm planning a 3-day backpacking trip in the mountains in October. What are the most critical safety considerations?", - "response": "October mountain backpacking demands extra vigilance. Here are the **most critical safety considerations**, tailored for you:\n\n### 1. Weather Awareness\n- **Expect Unpredictable Conditions:** Mountain weather can change rapidly\u2014prepare for rain, wind, and sudden cold snaps.\n- **Check Forecasts Daily:** Use reliable apps (NOAA, MountainWeather) and revise plans if severe weather is predicted.\n- **Know Symptoms of Hypothermia:** Shivering, confusion, loss of coordination\u2014respond quickly by warming up.\n\n### 2. Appropriate Gear\n- **Layered Clothing:** Moisture-wicking base, insulating mid-layer, waterproof/windproof outer shell.\n- **Cold-Weather Sleeping Bag (rated 20\u00b0F/-6\u00b0C or lower):** Nights can be near freezing. Consider a sleeping pad for insulation.\n- **Rain Protection:** Pack rain jacket/pants, and waterproof backpack cover.\n- **Headlamp:** Darkness falls early; bring spare batteries.\n\n### 3. Navigation & Communication\n- **Map, Compass, and GPS:** Know your route; download offline maps.\n- **Satellite Communicator or Personal Locator Beacon (PLB):** Especially crucial in mountains with poor cell service.\n- **Tell Someone Your Itinerary:** Include exit points and expected return time.\n\n### 4. Food & Water Safety\n- **Water Sources May Freeze:** Have a backup plan (carry enough, in insulated bottles; bring a filter).\n- **Extra Food:** Carry 1\u20132 days\u2019 extra in case travel is slowed.\n\n### 5. Wildlife & Camp Hygiene\n- **Bear-Safe Practices:** Use bear canisters or hang food. Cook and store food away from sleeping area.\n- **Clean Campsite:** Avoid attracting animals.\n\n### 6. Trail and Emergency Prep\n- **Assess Your Experience and Fitness:** Choose a moderate route if you\u2019re intermediate; avoid remote, technical sections.\n- **First Aid Kit:** Tailor to cold injuries, blisters, basic meds.\n- **Backup Plans:** Identify bail-out points and nearest shelters.\n\n### 7. Backup Options \n- If weather worsens or conditions are unsafe, have alternate campsites or routes marked and ready.\n- If you or your group isn\u2019t confident in conditions, turn back early\u2014better safe than sorry.\n\n---\n\n**Summary:** \n- Watch for cold, wet weather; pack layers, insulation, and reliable shelter.\n- Use precise navigation and carry emergency communication.\n- Protect food from wildlife and practice good hygiene.\n- Keep your itinerary flexible and prioritize safety over summit goals.\n\nLet me know your experience level or any specific region for tailored gear and route suggestions.", - "token_usage": { - "prompt_tokens": 95, - "completion_tokens": 534, - "total_tokens": 629 - }, - "run_id": "resp_8f01b547b1ca13b20069a04f7a99f481908b7bf8d4acaa4f7c" - }, - { - "test_name": "trail-difficulty", - "prompt": "How do I know if a trail is appropriate for my fitness level? I'm a beginner hiker.", - "response": "Great question! Choosing the right trail is key for a safe and fun hike, especially as a beginner. Here\u2019s how you can evaluate trails for your fitness level:\n\n**1. Trail Difficulty Ratings**\n- Trails are often categorized as easy, moderate, or difficult.\n * **Easy:** Gentle grades, short distances (<3 miles), well-marked paths.\n * **Moderate:** Some elevation gain, longer distances (3\u20136 miles), uneven surfaces.\n * **Difficult:** Steep inclines, technical terrain, longer distances (>6 miles).\n- As a beginner, stick to easy-rated trails.\n\n**2. Distance & Elevation Gain**\n- Look for trails under 3 miles with less than 500 ft of elevation gain.\n- Steep climbs or long routes can quickly become exhausting.\n\n**3. Surface and Conditions**\n- Choose trails with firm, well-maintained surfaces.\n- Avoid rocky, muddy, or heavily rooted paths.\n- Check recent trail reports (websites, park offices) for closures or hazards.\n\n**4. Weather Considerations**\n- Avoid trails that may flood or get slippery when wet.\n- Start with cooler morning hikes in summer; watch for heat, especially on exposed trails.\n\n**5. Personal Readiness**\n- If you can walk briskly for 45\u201360 minutes on flat ground, you\u2019re ready for easy hiking.\n- If you have health concerns, consult with your physician.\n\n**6. Gear Suggestions**\n- Wear sturdy walking shoes with good traction (not sandals).\n- Bring water, snacks, sunscreen, a small first aid kit, and a fully charged phone.\n\n**7. Safety Protocols**\n- Hike with a buddy or inform someone where you\u2019re going.\n- Stay on marked paths and respect trail signs.\n- Carry a map or use a trail app, but don\u2019t rely solely on phone signal.\n\n**8. Backup Options**\n- If you feel tired or conditions worsen, turn back early.\n- Always know where the nearest trailhead is.\n\n**Actionable Step:** \nBefore you go, check trail websites (AllTrails, local park sites) for ratings, reviews, and recent conditions. If unsure, start with a local nature or paved loop trail\u2014the experience will help you gauge your comfort for future hikes.\n\nLet me know your location or preferred hiking area, and I can recommend beginner-friendly trails nearby!", - "token_usage": { - "prompt_tokens": 92, - "completion_tokens": 481, - "total_tokens": 573 - }, - "run_id": "resp_e8c4c1533ca254870069a04f89ccc48190a2c076a07e882f2a" - }, - { - "test_name": "winter-hiking", - "prompt": "What additional gear and precautions do I need for winter hiking compared to summer?", - "response": "Great question\u2014winter hiking requires extra preparation for safety and comfort compared to summer excursions.\n\n**Additional Gear for Winter Hiking:**\n\n1. **Insulation Layers:** \n - Warm base layer (synthetic/wool) \n - Insulating mid-layer (fleece/down) \n - Waterproof/windproof outer shell \n - Extra layers for hands (mittens + liners), feet (wool socks), head (beanie, balaclava)\n\n2. **Footwear:** \n - Waterproof boots with good grip (insulated preferred) \n - Gaiters to keep snow out \n - Microspikes, crampons, or snowshoes based on trail conditions\n\n3. **Navigation:** \n - Map, compass, GPS (batteries drain faster in cold)\n\n4. **Emergency Equipment:** \n - Headlamp with extra batteries \n - Emergency shelter or bivy \n - Fire-starting materials (waterproof matches, lighter) \n - First aid kit \n - Whistle\n\n5. **Hydration & Nutrition:** \n - Insulated water bottles (prevent freezing) \n - High-calorie, easy-to-eat snacks\n\n6. **Other Essentials:** \n - Sunglasses/goggles (snow glare) \n - Sunscreen & lip balm (UV exposure) \n - Trekking poles (stability in snow/ice) \n - Personal locator beacon (optional but recommended for remote hikes)\n\n**Precautions for Winter Hiking:**\n\n- **Check Weather & Avalanche Conditions:** \n Always check forecasts and potential avalanche warnings. Avoid hiking in avalanche-prone areas unless trained and equipped.\n\n- **Trail Timing & Daylight:** \n Days are shorter\u2014start early and plan to finish well before sunset.\n\n- **Buddy System:** \n Never hike alone in winter; the risk from hypothermia, injury, or getting lost is higher.\n\n- **Leave Your Itinerary:** \n Always let someone know your route and expected return time.\n\n- **Layering & Moisture Management:** \n Avoid sweating\u2014stay dry to prevent hypothermia. Adjust layers as needed and take breaks to avoid overheating.\n\n- **Backup Options:** \n Have an \"escape route\" planned if weather deteriorates or you can't complete your hike.\n\n**Safety Emphasis:** \n- Know and recognize signs of frostbite and hypothermia. \n- Turn back if conditions worsen or you're tired. \n- Carry extra food and a chemical heat pack.\n\n**Summary:** \nWinter hiking isn\u2019t just summer hiking with warmer clothes\u2014it demands special gear, planning, and caution. If you\u2019re new to winter hiking, start with short, well-marked trails and go with an experienced partner.\n\nLet me know your experience level and where you plan to hike, and I can give more specific recommendations!", - "token_usage": { - "prompt_tokens": 87, - "completion_tokens": 579, - "total_tokens": 666 - }, - "run_id": "resp_52403ba5bdea71da0069a04f9eb07881909cec3c0c2291f8c4" - } - ] -} \ No newline at end of file diff --git a/infra/core/ai/ai-project.bicep b/infra/core/ai/ai-project.bicep index 40e50f2..c309fa9 100644 --- a/infra/core/ai/ai-project.bicep +++ b/infra/core/ai/ai-project.bicep @@ -258,6 +258,28 @@ module azureAiSearch '../search/azure_ai_search.bicep' = if (hasSearchConnection } +// Deploy model deployments on the AI account. +// Each entry in the `deployments` parameter becomes a real deployment that +// can be referenced by name from the project (e.g. "gpt-4.1", "gpt-4.1-mini"). +@batchSize(1) // Deploy one at a time to avoid capacity conflicts +resource modelDeployments 'Microsoft.CognitiveServices/accounts/deployments@2025-06-01' = [ + for deployment in (deployments ?? []): { + parent: aiAccount + name: deployment.name + sku: { + name: deployment.sku.name + capacity: deployment.sku.capacity + } + properties: { + model: { + format: deployment.model.format + name: deployment.model.name + version: deployment.model.?version ?? null + } + } + } +] + // Outputs output AZURE_AI_PROJECT_ENDPOINT string = project.properties.endpoints['AI Foundry API'] output AZURE_OPENAI_ENDPOINT string = aiAccount.properties.endpoints['OpenAI Language Model Instance API'] diff --git a/src/agents/trail_guide_agent/trail_guide_agent.py b/src/agents/trail_guide_agent/trail_guide_agent.py index 7b8a637..c140e07 100644 --- a/src/agents/trail_guide_agent/trail_guide_agent.py +++ b/src/agents/trail_guide_agent/trail_guide_agent.py @@ -5,10 +5,8 @@ from azure.ai.projects import AIProjectClient from azure.ai.projects.models import PromptAgentDefinition -# Load environment variables from repository root -repo_root = Path(__file__).parent.parent.parent.parent -env_file = repo_root / '.env' -load_dotenv(env_file) +# Load environment variables from .env file +load_dotenv() # Read instructions from prompt file prompt_file = Path(__file__).parent / 'prompts' / 'v4_optimized_concise.txt' diff --git a/src/evaluators/evaluate_agent.py b/src/evaluators/evaluate_agent.py index 8b17356..0e654bc 100644 --- a/src/evaluators/evaluate_agent.py +++ b/src/evaluators/evaluate_agent.py @@ -23,86 +23,151 @@ SourceFileID, ) -# Load environment variables -load_dotenv() -endpoint = os.environ.get("AZURE_AI_PROJECT_ENDPOINT") +# --------------------------------------------------------------------------- +# Configuration +# --------------------------------------------------------------------------- + +load_dotenv() # reads variables from the .env file in your project root + +endpoint = os.environ.get("AZURE_AI_PROJECT_ENDPOINT") model_deployment_name = os.environ.get("MODEL_NAME", "gpt-4.1") -dataset_name = "trail-guide-evaluation-dataset_2" -dataset_version = "1" +dataset_name = "trail-guide-evaluation-dataset" +dataset_version = "1" + +# The script writes a plain-text summary here when it finishes. +# This file is committed to the branch so the GitHub Actions workflow +# can read it and post results as a PR comment — no re-running needed. +RESULTS_FILE = Path("evaluation_results.txt") if not endpoint: - print("Error: AZURE_AI_PROJECT_ENDPOINT environment variable not set") + print("ERROR: AZURE_AI_PROJECT_ENDPOINT is not set.") + print(" Add it to your .env file and try again.") sys.exit(1) -# Initialize project client +# --------------------------------------------------------------------------- +# Azure clients +# --------------------------------------------------------------------------- + +# AIProjectClient connects to your Azure AI Foundry project project_client = AIProjectClient( endpoint=endpoint, credential=DefaultAzureCredential(), ) -# Get OpenAI client for evaluation API +# The OpenAI-compatible client exposes the Evals API client = project_client.get_openai_client() -def upload_dataset(): - """Upload the evaluation dataset to Foundry.""" - print("\n" + "=" * 80) - print("Step 1: Uploading evaluation dataset") - print("=" * 80) - - dataset_path = Path(__file__).parent.parent.parent / "data" / "trail_guide_evaluation_dataset_2.jsonl" - + +# --------------------------------------------------------------------------- +# Helper +# --------------------------------------------------------------------------- + +def section(title: str) -> None: + """Print a clearly visible section header to make the log easy to skim.""" + print(f"\n{'=' * 80}") + print(f"{title}") + print(f"{'=' * 80}") + + +# --------------------------------------------------------------------------- +# Step 1 – Upload the evaluation dataset +# --------------------------------------------------------------------------- + +def upload_dataset() -> str: + """ + Upload the JSONL evaluation dataset to Azure AI Foundry and return its ID. + + The dataset is a list of JSON objects, each containing: + - query : the user question sent to the agent + - response : the agent's answer + - ground_truth : the expected correct answer (used by some evaluators) + """ + section("Step 1: Uploading evaluation dataset") + + dataset_path = ( + Path(__file__).parent.parent.parent + / "data" + / "trail_guide_evaluation_dataset.jsonl" + ) + if not dataset_path.exists(): - raise FileNotFoundError(f"Dataset not found at {dataset_path}") - + raise FileNotFoundError( + f"Dataset not found at {dataset_path}.\n" + "Make sure you are running the script from the repository root." + ) + print(f"\nDataset: {dataset_path.name}") - print(f"Uploading...") - - data_id = project_client.datasets.upload_file( - name=dataset_name, - version=dataset_version, - file_path=str(dataset_path), - ).id - - print(f"\n✓ Dataset uploaded successfully") + print("Uploading...") + + try: + data_id = project_client.datasets.upload_file( + name=dataset_name, + version=dataset_version, + file_path=str(dataset_path), + ).id + print(f"\n✓ Dataset uploaded successfully") + + except Exception as upload_error: + # If this version was already uploaded in a previous run, reuse it. + # Foundry does not allow uploading the same name+version twice. + if "already exists" in str(upload_error): + print(f"\n Dataset version {dataset_version} already exists in Foundry.") + print(f" Retrieving existing dataset ID...") + dataset_obj = project_client.datasets.get(name=dataset_name, version=dataset_version) + data_id = dataset_obj.id + print(f" ✓ Using existing dataset") + else: + # Any other upload error is unexpected — re-raise so main() catches it + raise + print(f" Dataset ID: {data_id}") - return data_id + +# --------------------------------------------------------------------------- +# Step 2 – Create the evaluation definition +# --------------------------------------------------------------------------- + def create_evaluation_definition(): - """Create the evaluation definition with quality evaluators.""" - print("\n" + "=" * 80) - print("Step 2: Creating evaluation definition") - print("=" * 80) - + """ + Register an evaluation definition in Foundry. + + This tells the platform: + - What data schema to expect (query / response / ground_truth) + - Which built-in evaluators to run and how to map dataset fields to them + + Returns the evaluation object (its ID is needed in later steps). + """ + section("Step 2: Creating evaluation definition") + print(f"\nConfiguration:") print(f" Judge Model: {model_deployment_name}") print(f" Evaluators: Intent Resolution, Relevance, Groundedness") - - # Define data schema (what fields are in our JSONL) + + # Tell Foundry the shape of each record in the dataset data_source_config = DataSourceConfigCustom( type="custom", item_schema={ "type": "object", "properties": { - "query": {"type": "string"}, - "response": {"type": "string"}, + "query": {"type": "string"}, + "response": {"type": "string"}, "ground_truth": {"type": "string"}, }, "required": ["query", "response", "ground_truth"], }, ) - - # Define testing criteria (evaluators) + + # Each entry names a built-in evaluator and maps dataset columns to its + # expected parameters using the {{item.}} template syntax. testing_criteria = [ { "type": "azure_ai_evaluator", "name": "intent_resolution", "evaluator_name": "builtin.intent_resolution", - "initialization_parameters": { - "deployment_name": model_deployment_name, - }, + "initialization_parameters": {"deployment_name": model_deployment_name}, "data_mapping": { - "query": "{{item.query}}", + "query": "{{item.query}}", "response": "{{item.response}}", }, }, @@ -110,11 +175,9 @@ def create_evaluation_definition(): "type": "azure_ai_evaluator", "name": "relevance", "evaluator_name": "builtin.relevance", - "initialization_parameters": { - "deployment_name": model_deployment_name, - }, + "initialization_parameters": {"deployment_name": model_deployment_name}, "data_mapping": { - "query": "{{item.query}}", + "query": "{{item.query}}", "response": "{{item.response}}", }, }, @@ -122,35 +185,39 @@ def create_evaluation_definition(): "type": "azure_ai_evaluator", "name": "groundedness", "evaluator_name": "builtin.groundedness", - "initialization_parameters": { - "deployment_name": model_deployment_name, - }, + "initialization_parameters": {"deployment_name": model_deployment_name}, "data_mapping": { - "query": "{{item.query}}", + "query": "{{item.query}}", "response": "{{item.response}}", }, }, ] - - # Create the evaluation definition + print("\nCreating evaluation...") eval_object = client.evals.create( name="Trail Guide Quality Evaluation", data_source_config=data_source_config, testing_criteria=testing_criteria, ) - + print(f"\n✓ Evaluation definition created") print(f" Evaluation ID: {eval_object.id}") - return eval_object + +# --------------------------------------------------------------------------- +# Step 3 – Start the evaluation run +# --------------------------------------------------------------------------- + def run_evaluation(eval_object, data_id): - """Run the evaluation against the uploaded dataset.""" - print("\n" + "=" * 80) - print("Step 3: Running cloud evaluation") - print("=" * 80) - + """ + Start a cloud evaluation run that scores every record in the dataset. + + The run is asynchronous — Foundry processes items in parallel in the cloud. + We get a run object immediately; its status starts as 'queued' or 'running'. + """ + section("Step 3: Running cloud evaluation") + eval_run = client.evals.runs.create( eval_id=eval_object.id, name="trail-guide-baseline-eval", @@ -162,148 +229,219 @@ def run_evaluation(eval_object, data_id): ), ), ) - + print(f"\n✓ Evaluation run started") print(f" Run ID: {eval_run.id}") print(f" Status: {eval_run.status}") print(f"\nThis may take 5-10 minutes for 200 items...") - return eval_run + +# --------------------------------------------------------------------------- +# Step 4 – Poll until the run finishes +# --------------------------------------------------------------------------- + def poll_for_results(eval_object, eval_run): - """Poll the evaluation run until complete.""" - print("\n" + "=" * 80) - print("Step 4: Polling for completion") - print("=" * 80) - + """ + Repeatedly check the run status every 10 seconds until it is 'completed'. + + Returns the final run object (which contains the report URL and results). + Exits with code 1 if the run fails so CI pipelines surface the error. + """ + section("Step 4: Polling for completion") + start_time = time.time() while True: run = client.evals.runs.retrieve( run_id=eval_run.id, - eval_id=eval_object.id + eval_id=eval_object.id, ) - + elapsed = int(time.time() - start_time) - status_msg = f" [{elapsed}s] Status: {run.status}" - + if run.status == "completed": - print(f"\n\n✓ Evaluation completed successfully") - print(f" Total time: {elapsed} seconds") + print(f"\n\n✓ Evaluation completed in {elapsed} seconds") break elif run.status == "failed": - print(f"\n\n✗ Evaluation failed") - print(f" Check Azure portal for details") - sys.exit(1) + # Raise so main() catches it, writes RESULTS_FILE, then exits + error_detail = getattr(run, "error", None) or "No additional details available." + raise RuntimeError( + f"Evaluation run failed after {elapsed}s.\n" + f" Run ID : {eval_run.id}\n" + f" Error : {error_detail}\n" + f" Report URL : {getattr(run, 'report_url', 'N/A')}\n" + f" Open the Report URL in Azure AI Foundry portal for full details." + ) else: - print(f"{status_msg}", end="\r", flush=True) + # Overwrite the same line so the terminal isn't flooded + print(f" [{elapsed}s] Status: {run.status}", end="\r", flush=True) time.sleep(10) - + return run + +# --------------------------------------------------------------------------- +# Step 5 – Collect scores and save results +# --------------------------------------------------------------------------- + def retrieve_and_display_results(eval_object, run): - """Retrieve evaluation results and display summary.""" - print("\n" + "=" * 80) - print("Step 5: Retrieving results") - print("=" * 80) - - # Get aggregate results + """ + Fetch per-item evaluator outputs, compute aggregate statistics, print a + human-readable summary, and write the same summary to RESULTS_FILE. + + Scores are on a 1-5 scale; a score >= 3 is considered a pass. + + The written file is intended to be committed to the branch so the + GitHub Actions workflow can read it without re-running the evaluation. + + Returns the raw list of output items for any further inspection. + """ + section("Step 5: Retrieving results") + print(f"\nEvaluation Summary") print(f" Report URL: {run.report_url}") - - # Retrieve detailed output items + + # Retrieve every scored item from the run output_items = list( client.evals.runs.output_items.list( run_id=run.id, - eval_id=eval_object.id + eval_id=eval_object.id, ) ) - - # Calculate statistics - intent_resolution_scores = [] - relevance_scores = [] - groundedness_scores = [] - - for item in output_items: - if hasattr(item, 'evaluator_outputs'): + + # Debug: print the raw structure of the first item so we can verify + # the attribute names match what the API actually returns. + # Remove this block once scores are confirmed working. + if output_items: + first = output_items[0] + print(f"\n [DEBUG] First item type : {type(first)}") + print(f" [DEBUG] First item attrs: {[a for a in dir(first) if not a.startswith('_')]}") + if hasattr(first, "evaluator_outputs"): + print(f" [DEBUG] evaluator_outputs: {first.evaluator_outputs}") + if hasattr(first, "__dict__"): + print(f" [DEBUG] item.__dict__: {first.__dict__}") + + # Separate items by status so we can report errors alongside scores + errored_items = [ + item for item in output_items + if getattr(item, "status", None) == "error" + ] + scored_items = [ + item for item in output_items + if getattr(item, "status", None) != "error" + ] + + if errored_items: + print(f"\n ⚠ {len(errored_items)} item(s) errored during evaluation.") + print(f" First error: {getattr(errored_items[0], 'error', 'details unavailable')}") + print(f" Open the Report URL in Foundry portal to inspect all failed items.") + + # Collect individual scores grouped by metric name + scores: dict[str, list[float]] = { + "intent_resolution": [], + "relevance": [], + "groundedness": [], + } + + for item in scored_items: + if hasattr(item, "evaluator_outputs"): for output in item.evaluator_outputs: - if output.name == "intent_resolution" and hasattr(output, 'score'): - intent_resolution_scores.append(output.score) - elif output.name == "relevance" and hasattr(output, 'score'): - relevance_scores.append(output.score) - elif output.name == "groundedness" and hasattr(output, 'score'): - groundedness_scores.append(output.score) - - # Display averages - print(f"\nAverage Scores (1-5 scale, threshold: 3)") - if intent_resolution_scores: - avg_intent = sum(intent_resolution_scores) / len(intent_resolution_scores) - print(f" Intent Resolution: {avg_intent:.2f} (n={len(intent_resolution_scores)})") - - if relevance_scores: - avg_relevance = sum(relevance_scores) / len(relevance_scores) - print(f" Relevance: {avg_relevance:.2f} (n={len(relevance_scores)})") - - if groundedness_scores: - avg_groundedness = sum(groundedness_scores) / len(groundedness_scores) - print(f" Groundedness: {avg_groundedness:.2f} (n={len(groundedness_scores)})") - - # Calculate pass rates - if intent_resolution_scores: - intent_pass_rate = sum(1 for s in intent_resolution_scores if s >= 3) / len(intent_resolution_scores) * 100 - print(f"\nPass Rates (score >= 3)") - print(f" Intent Resolution: {intent_pass_rate:.1f}%") - - if relevance_scores: - relevance_pass_rate = sum(1 for s in relevance_scores if s >= 3) / len(relevance_scores) * 100 - print(f" Relevance: {relevance_pass_rate:.1f}%") - - if groundedness_scores: - groundedness_pass_rate = sum(1 for s in groundedness_scores if s >= 3) / len(groundedness_scores) * 100 - print(f" Groundedness: {groundedness_pass_rate:.1f}%") - + if output.name in scores and hasattr(output, "score"): + scores[output.name].append(output.score) + + # --- Build summary text (printed to console and written to file) --- + # Everything written to `lines` ends up both on screen and in the file, + # so the file always has useful content regardless of whether scores loaded. + + metric_labels = { + "intent_resolution": "Intent Resolution", + "relevance": "Relevance ", + "groundedness": "Groundedness ", + } + + lines = [ + "=" * 80, + " Trail Guide Agent - Evaluation Results", + "=" * 80, + f"\n Report URL : {run.report_url}", + f" Total items : {len(output_items)}", + f" Errored items: {len(errored_items)}", + f" Scored items : {len(scored_items)}", + "\nAverage Scores (1-5 scale, threshold: 3)", + ] + + any_scores = False + pass_lines = ["\nPass Rates (score >= 3)"] + + for key, label in metric_labels.items(): + values = scores[key] + if values: + any_scores = True + avg = sum(values) / len(values) + rate = sum(1 for v in values if v >= 3) / len(values) * 100 + lines.append(f" {label}: {avg:.2f} (n={len(values)})") + pass_lines.append(f" {label}: {rate:.1f}%") + + if not any_scores: + # Scores missing — the evaluation may have completed but returned no + # evaluator_outputs. Open the Report URL above to inspect in the portal. + lines.append(" No scores returned — check the Report URL for details.") + pass_lines.append(" No scores returned.") + + lines.extend(pass_lines) + summary = "\n".join(lines) + + print(summary) + + # Write to file so the GitHub Actions 'comment' job can read it directly + RESULTS_FILE.write_text(summary, encoding="utf-8") + print(f"\n Results saved to {RESULTS_FILE}") + print(f" Commit this file so the GitHub Actions workflow can read it.") + return output_items -def main(): - """Main execution flow.""" - print("\n" + "=" * 80) - print(" Trail Guide Agent - Cloud Evaluation") - print("=" * 80) + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + +def main() -> None: + """Orchestrate the full evaluation pipeline step by step.""" + section(" Trail Guide Agent - Cloud Evaluation") print(f"\nConfiguration:") print(f" Project: {endpoint}") - print(f" Model: {model_deployment_name}") + print(f" Model: {model_deployment_name}") print(f" Dataset: {dataset_name} (v{dataset_version})") - + try: - # Step 1: Upload dataset - data_id = upload_dataset() - - # Step 2: Create evaluation definition - eval_object = create_evaluation_definition() - - # Step 3: Run evaluation - eval_run = run_evaluation(eval_object, data_id) - - # Step 4: Poll for results - run = poll_for_results(eval_object, eval_run) - - # Step 5: Retrieve and display results - output_items = retrieve_and_display_results(eval_object, run) - - print("\n" + "=" * 80) - print("Cloud evaluation complete") - print("=" * 80) + data_id = upload_dataset() # Step 1 + eval_object = create_evaluation_definition() # Step 2 + eval_run = run_evaluation(eval_object, data_id) # Step 3 + run = poll_for_results(eval_object, eval_run) # Step 4 + retrieve_and_display_results(eval_object, run) # Step 5 + + section("Cloud evaluation complete") print(f"\nNext steps:") print(f" 1. Review detailed results in Azure AI Foundry portal") print(f" 2. Analyze patterns in successful and failed evaluations") - print(f" 3. Document key findings and recommendations") - + print(f" 3. Commit {RESULTS_FILE} and push so the PR workflow can use it") + except Exception as e: - print(f"\nError: {e}") - print(f"\nTroubleshooting:") - print(f" - Verify AZURE_AI_PROJECT_ENDPOINT in .env file") - print(f" - Check Azure credentials: az login") - print(f" - Ensure GPT-4.1 model is deployed and accessible") + error_message = ( + f"{'=' * 80}\n" + f" Trail Guide Agent - Evaluation FAILED\n" + f"{'=' * 80}\n" + f"\nError: {e}\n" + f"\nTroubleshooting:\n" + f" - Verify AZURE_AI_PROJECT_ENDPOINT in .env file\n" + f" - Check Azure credentials: az login\n" + f" - Ensure GPT-4.1 model is deployed and accessible\n" + ) + print(error_message) + # Write the error to the results file so it's never left empty + RESULTS_FILE.write_text(error_message, encoding="utf-8") sys.exit(1) + if __name__ == "__main__": main() diff --git a/src/tests/interact_with_agent.py b/src/tests/interact_with_agent.py index 3a2000f..2cc900f 100644 --- a/src/tests/interact_with_agent.py +++ b/src/tests/interact_with_agent.py @@ -9,10 +9,8 @@ from azure.identity import DefaultAzureCredential from azure.ai.projects import AIProjectClient -# Load environment variables from repository root -repo_root = Path(__file__).parent.parent.parent -env_file = repo_root / '.env' -load_dotenv(env_file) +# Load environment variables from .env file +load_dotenv() def interact_with_agent(): """Start an interactive chat session with the Trail Guide Agent.""" diff --git a/src/tests/run_batch_tests.py b/src/tests/run_batch_tests.py index 2d396b9..51f61e1 100644 --- a/src/tests/run_batch_tests.py +++ b/src/tests/run_batch_tests.py @@ -16,10 +16,8 @@ from azure.identity import DefaultAzureCredential from azure.ai.projects import AIProjectClient -# Load environment variables from repository root -repo_root = Path(__file__).parent.parent.parent -env_file = repo_root / '.env' -load_dotenv(env_file) +# Load environment variables from .env file +load_dotenv() def load_test_prompts(test_prompts_dir): """Load all test prompt files from the test-prompts directory.""" From cf600c6e1d9b4dc058fb11a9348fa2fc6a6dcdcc Mon Sep 17 00:00:00 2001 From: Victor Barreto Date: Fri, 27 Feb 2026 14:31:06 +0000 Subject: [PATCH 13/17] Updated evalueion_results.txt --- evaluation_results.txt | 77 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 69 insertions(+), 8 deletions(-) diff --git a/evaluation_results.txt b/evaluation_results.txt index 134e171..c0603f0 100644 --- a/evaluation_results.txt +++ b/evaluation_results.txt @@ -1,14 +1,75 @@ +================================================================================ + Trail Guide Agent - Cloud Evaluation +================================================================================ + +Configuration: + Project: https://ai-account-u2favsrdpp24k.services.ai.azure.com/api/projects/ai-project-dev-tester + Model: gpt-4.1 + Dataset: trail-guide-evaluation-dataset (v1) + +================================================================================ +Step 1: Uploading evaluation dataset +================================================================================ + +Dataset: trail_guide_evaluation_dataset.jsonl +Uploading... + + Dataset version 1 already exists in Foundry. + Retrieving existing dataset ID... + ✓ Using existing dataset + Dataset ID: azureai://accounts/ai-account-u2favsrdpp24k/projects/ai-project-dev-tester/data/trail-guide-evaluation-dataset/versions/1 + +================================================================================ +Step 2: Creating evaluation definition +================================================================================ + +Configuration: + Judge Model: gpt-4.1 + Evaluators: Intent Resolution, Relevance, Groundedness + +Creating evaluation... + +✓ Evaluation definition created + Evaluation ID: eval_7d81e353fce74ab3a51105fac7f59ed8 + +================================================================================ +Step 3: Running cloud evaluation +================================================================================ + +✓ Evaluation run started + Run ID: evalrun_31596933faa44251a62247465b5d8fbd + Status: in_progress + +This may take 5-10 minutes for 200 items... + +================================================================================ +Step 4: Polling for completion +================================================================================ + [3675s] Status: in_progress + +✓ Evaluation completed in 3694 seconds + +================================================================================ +Step 5: Retrieving results +================================================================================ + +Evaluation Summary + Report URL: https://ai.azure.com/nextgen/r/ZDCVYALRRAyvc0GrsS4tgA,rg-dev-tester,,ai-account-u2favsrdpp24k,ai-project-dev-tester/build/evaluations/eval_7d81e353fce74ab3a51105fac7f59ed8/run/evalrun_31596933faa44251a62247465b5d8fbd + + [DEBUG] First item type : + [DEBUG] First item attrs: ['construct', 'copy', 'created_at', 'datasource_item', 'datasource_item_id', 'dict', 'eval_id', 'from_orm', 'id', 'json', 'model_computed_fields', 'model_config', 'model_construct', 'model_copy', 'model_dump', 'model_dump_json', 'model_extra', 'model_fields', 'model_fields_set', 'model_json_schema', 'model_parametrized_name', 'model_post_init', 'model_rebuild', 'model_validate', 'model_validate_json', 'model_validate_strings', 'object', 'parse_file', 'parse_obj', 'parse_raw', 'results', 'run_id', 'sample', 'schema', 'schema_json', 'status', 'to_dict', 'to_json', 'update_forward_refs', 'validate'] + [DEBUG] item.__dict__: {'id': '1', 'created_at': 1772201992, 'datasource_item': {'query': 'What essential gear do I need for a summer day hike?', 'response': 'For a summer day hike, essential gear includes: proper hiking boots with good ankle support, moisture-wicking clothing in layers, a daypack (20-30L), 2 liters of water, high-energy snacks, sun protection (hat, sunglasses, sunscreen SPF 30+), a basic first aid kit, map and compass or GPS device, headlamp with extra batteries, and a whistle for emergencies. Always check the weather forecast and trail conditions before heading out.', 'ground_truth': 'Essential day hike gear includes footwear, water, food, sun protection, navigation tools, first aid, and emergency supplies.'}, 'datasource_item_id': 0, 'eval_id': 'eval_7d81e353fce74ab3a51105fac7f59ed8', 'object': 'eval.run.output_item', 'results': [Result(name='intent_resolution', passed=True, score=5.0, sample={'usage': {'prompt_tokens': 1984, 'completion_tokens': 61, 'total_tokens': 2045}, 'finish_reason': 'stop', 'model': 'gpt-4.1-2025-04-14', 'input': [{'role': 'user', 'content': '{"query": "What essential gear do I need for a summer day hike?", "response": "For a summer day hike, essential gear includes: proper hiking boots with good ankle support, moisture-wicking clothing in layers, a daypack (20-30L), 2 liters of water, high-energy snacks, sun protection (hat, sunglasses, sunscreen SPF 30+), a basic first aid kit, map and compass or GPS device, headlamp with extra batteries, and a whistle for emergencies. Always check the weather forecast and trail conditions before heading out.", "tool_definitions": null}'}], 'output': [{'role': 'assistant', 'content': '{\n "explanation": "The user asked for essential gear for a summer day hike. The agent provided a thorough, accurate, and relevant list, including safety, hydration, navigation, and sun protection, fully resolving the user\'s intent with no notable omissions.",\n "score": 5\n}'}]}, type='azure_ai_evaluator', metric='intent_resolution', label='pass', reason="The user asked for essential gear for a summer day hike. The agent provided a thorough, accurate, and relevant list, including safety, hydration, navigation, and sun protection, fully resolving the user's intent with no notable omissions.", threshold=3), Result(name='relevance', passed=True, score=5.0, sample={'usage': {'prompt_tokens': 1678, 'completion_tokens': 63, 'total_tokens': 1741}, 'finish_reason': 'stop', 'model': 'gpt-4.1-2025-04-14', 'input': [{'role': 'user', 'content': '{"query": "What essential gear do I need for a summer day hike?", "response": "For a summer day hike, essential gear includes: proper hiking boots with good ankle support, moisture-wicking clothing in layers, a daypack (20-30L), 2 liters of water, high-energy snacks, sun protection (hat, sunglasses, sunscreen SPF 30+), a basic first aid kit, map and compass or GPS device, headlamp with extra batteries, and a whistle for emergencies. Always check the weather forecast and trail conditions before heading out."}'}], 'output': [{'role': 'assistant', 'content': '{\n "explanation": "The response provides a thorough list of essential gear for a summer day hike, including clothing, hydration, navigation, safety, and sun protection. It also adds practical advice about checking weather and trail conditions, offering both completeness and useful context.",\n "score": 5\n}'}]}, type='azure_ai_evaluator', metric='relevance', label='pass', reason='The response provides a thorough list of essential gear for a summer day hike, including clothing, hydration, navigation, safety, and sun protection. It also adds practical advice about checking weather and trail conditions, offering both completeness and useful context.', threshold=3), Result(name='groundedness', passed=True, score=5.0, sample={'usage': {'prompt_tokens': 1571, 'completion_tokens': 106, 'total_tokens': 1677}, 'finish_reason': 'stop', 'model': 'gpt-4.1-2025-04-14', 'input': [{'role': 'user', 'content': '{"query": "What essential gear do I need for a summer day hike?", "response": "For a summer day hike, essential gear includes: proper hiking boots with good ankle support, moisture-wicking clothing in layers, a daypack (20-30L), 2 liters of water, high-energy snacks, sun protection (hat, sunglasses, sunscreen SPF 30+), a basic first aid kit, map and compass or GPS device, headlamp with extra batteries, and a whistle for emergencies. Always check the weather forecast and trail conditions before heading out.", "context": "For a summer day hike, essential gear includes: proper hiking boots with good ankle support, moisture-wicking clothing in layers, a daypack (20-30L), 2 liters of water, high-energy snacks, sun protection (hat, sunglasses, sunscreen SPF 30+), a basic first aid kit, map and compass or GPS device, headlamp with extra batteries, and a whistle for emergencies. Always check the weather forecast and trail conditions before heading out."}'}], 'output': [{'role': 'assistant', 'content': "Let's think step by step: The query asks for the essential gear needed for a summer day hike. The context provides a detailed list of essential gear and some additional advice. The response repeats the context almost verbatim, listing all the gear and advice without omitting or adding any information. There are no inaccuracies or missing details.\nThe response is fully accurate, complete, and directly grounded in the provided context, addressing the query thoroughly.\n5"}]}, type='azure_ai_evaluator', metric='groundedness', label='pass', reason='The response is fully accurate, complete, and directly grounded in the provided context, addressing the query thoroughly.', threshold=3)], 'run_id': 'evalrun_31596933faa44251a62247465b5d8fbd', 'sample': Sample(error=None, finish_reason='stop', input=[SampleInput(content='{"query": "What essential gear do I need for a summer day hike?", "response": "For a summer day hike, essential gear includes: proper hiking boots with good ankle support, moisture-wicking clothing in layers, a daypack (20-30L), 2 liters of water, high-energy snacks, sun protection (hat, sunglasses, sunscreen SPF 30+), a basic first aid kit, map and compass or GPS device, headlamp with extra batteries, and a whistle for emergencies. Always check the weather forecast and trail conditions before heading out.", "context": "For a summer day hike, essential gear includes: proper hiking boots with good ankle support, moisture-wicking clothing in layers, a daypack (20-30L), 2 liters of water, high-energy snacks, sun protection (hat, sunglasses, sunscreen SPF 30+), a basic first aid kit, map and compass or GPS device, headlamp with extra batteries, and a whistle for emergencies. Always check the weather forecast and trail conditions before heading out."}', role='user')], max_completion_tokens=None, model='gpt-4.1-2025-04-14', output=[SampleOutput(content="Let's think step by step: The query asks for the essential gear needed for a summer day hike. The context provides a detailed list of essential gear and some additional advice. The response repeats the context almost verbatim, listing all the gear and advice without omitting or adding any information. There are no inaccuracies or missing details.\nThe response is fully accurate, complete, and directly grounded in the provided context, addressing the query thoroughly.\n5", role='assistant')], seed=None, temperature=None, top_p=None, usage=SampleUsage(cached_tokens=None, completion_tokens=106, prompt_tokens=1571, total_tokens=1677)), 'status': 'completed'} ================================================================================ Trail Guide Agent - Evaluation Results ================================================================================ - Report URL : https://ai.azure.com/nextgen/r/ZDCVYALRRAyvc0GrsS4tgA,rg-dev-tester,,ai-account-u2favsrdpp24k,ai-project-dev-tester/build/evaluations/eval_7d81e353fce74ab3a51105fac7f59ed8/run/evalrun_31596933faa44251a62247465b5d8fbd - Total items : 89 - Errored items: 0 - Scored items : 89 + Report URL : https://ai.azure.com/nextgen/r/ZDCVYALRRAyvc0GrsS4tgA,rg-dev-tester,,ai-account-u2favsrdpp24k,ai-project-dev-tester/build/evaluations/eval_7d81e353fce74ab3a51105fac7f59ed8/run/eva -Average Scores (1-5 scale, threshold: 3) - No scores returned — check the Report URL for details. +================================================================================ +Cloud evaluation complete +================================================================================ -Pass Rates (score >= 3) - No scores returned. \ No newline at end of file +Next steps: + 1. Review detailed results in Azure AI Foundry portal + 2. Analyze patterns in successful and failed evaluations + 3. Commit evaluation_results.txt and push so the PR workflow can use it \ No newline at end of file From da09adc1393b18e5509c1ed274fb3fed80c8239a Mon Sep 17 00:00:00 2001 From: Victor Barreto Date: Fri, 27 Feb 2026 14:32:55 +0000 Subject: [PATCH 14/17] updated yml for testing mannually --- .github/workflows/evaluate-agent.yml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/evaluate-agent.yml b/.github/workflows/evaluate-agent.yml index c722dfd..c00bca3 100644 --- a/.github/workflows/evaluate-agent.yml +++ b/.github/workflows/evaluate-agent.yml @@ -22,18 +22,24 @@ jobs: # Reads evaluation_results.txt that was committed to the branch by the # student (produced locally by running evaluate_agent.py) and posts its # contents as a comment on the pull request. + # On manual (workflow_dispatch) runs the results are printed to the log. # --------------------------------------------------------------------------- comment: runs-on: ubuntu-latest - if: github.event_name == 'pull_request' steps: # Check out the branch so we can read the committed results file - name: Checkout repository uses: actions/checkout@v4 - # Post the contents of evaluation_results.txt as a PR comment + # Always print results to the workflow log (useful for manual runs) + - name: Print results to log + run: cat evaluation_results.txt + + # Post the contents of evaluation_results.txt as a PR comment. + # Skipped on manual (workflow_dispatch) runs — no PR to comment on. - name: Post results as PR comment + if: github.event_name == 'pull_request' uses: actions/github-script@v7 with: script: | From e9087bd7c351a815d33eefe0bc3779c5cb4eebee Mon Sep 17 00:00:00 2001 From: Victor Barreto Date: Fri, 27 Feb 2026 16:02:06 +0000 Subject: [PATCH 15/17] refactor: Improve error reporting and remove debug prints in evaluation script --- src/evaluators/evaluate_agent.py | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/src/evaluators/evaluate_agent.py b/src/evaluators/evaluate_agent.py index 0e654bc..bfeffb7 100644 --- a/src/evaluators/evaluate_agent.py +++ b/src/evaluators/evaluate_agent.py @@ -267,10 +267,10 @@ def poll_for_results(eval_object, eval_run): error_detail = getattr(run, "error", None) or "No additional details available." raise RuntimeError( f"Evaluation run failed after {elapsed}s.\n" - f" Run ID : {eval_run.id}\n" - f" Error : {error_detail}\n" - f" Report URL : {getattr(run, 'report_url', 'N/A')}\n" - f" Open the Report URL in Azure AI Foundry portal for full details." + f" Eval ID : {eval_object.id}\n" + f" Run ID : {eval_run.id}\n" + f" Error : {error_detail}\n" + f" To inspect: open Azure AI Foundry portal > Evaluations" ) else: # Overwrite the same line so the terminal isn't flooded @@ -299,7 +299,6 @@ def retrieve_and_display_results(eval_object, run): section("Step 5: Retrieving results") print(f"\nEvaluation Summary") - print(f" Report URL: {run.report_url}") # Retrieve every scored item from the run output_items = list( @@ -309,18 +308,6 @@ def retrieve_and_display_results(eval_object, run): ) ) - # Debug: print the raw structure of the first item so we can verify - # the attribute names match what the API actually returns. - # Remove this block once scores are confirmed working. - if output_items: - first = output_items[0] - print(f"\n [DEBUG] First item type : {type(first)}") - print(f" [DEBUG] First item attrs: {[a for a in dir(first) if not a.startswith('_')]}") - if hasattr(first, "evaluator_outputs"): - print(f" [DEBUG] evaluator_outputs: {first.evaluator_outputs}") - if hasattr(first, "__dict__"): - print(f" [DEBUG] item.__dict__: {first.__dict__}") - # Separate items by status so we can report errors alongside scores errored_items = [ item for item in output_items @@ -334,7 +321,7 @@ def retrieve_and_display_results(eval_object, run): if errored_items: print(f"\n ⚠ {len(errored_items)} item(s) errored during evaluation.") print(f" First error: {getattr(errored_items[0], 'error', 'details unavailable')}") - print(f" Open the Report URL in Foundry portal to inspect all failed items.") + print(f" Open Azure AI Foundry portal > Evaluations to inspect all failed items.") # Collect individual scores grouped by metric name scores: dict[str, list[float]] = { @@ -363,7 +350,8 @@ def retrieve_and_display_results(eval_object, run): "=" * 80, " Trail Guide Agent - Evaluation Results", "=" * 80, - f"\n Report URL : {run.report_url}", + f"\n Eval ID : {eval_object.id}", + f" Run ID : {run.id}", f" Total items : {len(output_items)}", f" Errored items: {len(errored_items)}", f" Scored items : {len(scored_items)}", @@ -385,7 +373,7 @@ def retrieve_and_display_results(eval_object, run): if not any_scores: # Scores missing — the evaluation may have completed but returned no # evaluator_outputs. Open the Report URL above to inspect in the portal. - lines.append(" No scores returned — check the Report URL for details.") + lines.append(" No scores returned — open Azure AI Foundry portal > Evaluations for details.") pass_lines.append(" No scores returned.") lines.extend(pass_lines) From 15e8ce722e186f30a588d363425524c13eef2d40 Mon Sep 17 00:00:00 2001 From: Victor Barreto Date: Fri, 27 Feb 2026 16:04:57 +0000 Subject: [PATCH 16/17] Clean-up files prior to merge --- .../prompts/v1_instructions.txt | 2 +- testing_attr.py | 22 ------------------- 2 files changed, 1 insertion(+), 23 deletions(-) delete mode 100644 testing_attr.py diff --git a/src/agents/trail_guide_agent/prompts/v1_instructions.txt b/src/agents/trail_guide_agent/prompts/v1_instructions.txt index 3c5b8b7..9611d32 100644 --- a/src/agents/trail_guide_agent/prompts/v1_instructions.txt +++ b/src/agents/trail_guide_agent/prompts/v1_instructions.txt @@ -1 +1 @@ -You are a helpful trail guide assistant for Adventure Works, an outdoor gear company. Help customers with basic trail recommendations, safety tips, and gear suggestions for hiking and outdoor activities. Keep responses informative but concise. \ No newline at end of file +You are a helpful trail guide assistant for Adventure Works, an outdoor gear company. Help users with basic trail recommendations, safety tips, and gear suggestions for hiking and outdoor activities. Keep responses informative but concise. \ No newline at end of file diff --git a/testing_attr.py b/testing_attr.py deleted file mode 100644 index cf01218..0000000 --- a/testing_attr.py +++ /dev/null @@ -1,22 +0,0 @@ -import os -from azure.identity import DefaultAzureCredential -from azure.ai.projects import AIProjectClient -from dotenv import load_dotenv - -load_dotenv() - -client = AIProjectClient( - endpoint=os.environ["AZURE_AI_PROJECT_ENDPOINT"], - credential=DefaultAzureCredential(), -) - -# Get one agent (or filter by name) -agent = next(client.agents.list()) - -# See all attribute names -print(dir(agent)) - -# Or see a dict of public attributes -attrs = {k: v for k, v in agent.__dict__.items() if not k.startswith("_")} -for k, v in attrs.items(): - print(k, "=", v) \ No newline at end of file From a1807af6f9402ae5e7578b454c48247900f63752 Mon Sep 17 00:00:00 2001 From: Victor Barreto Date: Fri, 27 Feb 2026 17:17:20 +0000 Subject: [PATCH 17/17] Updated the file to old version --- .github/workflows/evaluate-agent.yml | 119 ++++++++++++++------------- 1 file changed, 64 insertions(+), 55 deletions(-) diff --git a/.github/workflows/evaluate-agent.yml b/.github/workflows/evaluate-agent.yml index c00bca3..a12bf0c 100644 --- a/.github/workflows/evaluate-agent.yml +++ b/.github/workflows/evaluate-agent.yml @@ -1,76 +1,85 @@ name: Evaluate Trail Guide Agent -# This workflow runs automatically when changes are pushed to the agent's -# prompt or code files on a pull request targeting main. -# You can also trigger it manually from the Actions tab. on: # Uncomment the lines below to enable automatic evaluation on pull requests - pull_request: - branches: [main] - paths: - - 'src/agents/trail_guide_agent/**' + # pull_request: + # branches: [main] + # paths: + # - 'src/agents/trail_guide_agent/**' workflow_dispatch: permissions: contents: read - pull-requests: write # needed to post the PR comment + pull-requests: write + id-token: write jobs: - - # --------------------------------------------------------------------------- - # Job: comment - # Reads evaluation_results.txt that was committed to the branch by the - # student (produced locally by running evaluate_agent.py) and posts its - # contents as a comment on the pull request. - # On manual (workflow_dispatch) runs the results are printed to the log. - # --------------------------------------------------------------------------- - comment: + evaluate: runs-on: ubuntu-latest - + steps: - # Check out the branch so we can read the committed results file - - name: Checkout repository + - name: Checkout code uses: actions/checkout@v4 - - # Always print results to the workflow log (useful for manual runs) - - name: Print results to log - run: cat evaluation_results.txt - - # Post the contents of evaluation_results.txt as a PR comment. - # Skipped on manual (workflow_dispatch) runs — no PR to comment on. - - name: Post results as PR comment + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + cache: 'pip' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + + - name: Azure Login + uses: azure/login@v2 + with: + client-id: ${{ secrets.AZURE_CLIENT_ID }} + tenant-id: ${{ secrets.AZURE_TENANT_ID }} + subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }} + + - name: Run evaluation + id: run + env: + AZURE_AI_PROJECT_ENDPOINT: ${{ secrets.AZURE_AI_PROJECT_ENDPOINT }} + MODEL_NAME: ${{ vars.MODEL_NAME || 'gpt-4.1' }} + run: | + python src/evaluators/evaluate_agent.py > evaluation_results.txt + cat evaluation_results.txt + + - name: Comment PR with results if: github.event_name == 'pull_request' uses: actions/github-script@v7 with: script: | const fs = require('fs'); - - // Read the results file committed to the branch const results = fs.readFileSync('evaluation_results.txt', 'utf8'); - - const body = [ - '## Trail Guide Agent — Evaluation Results', - '', - 'Evaluation results from the local run committed with this PR.', - '', - '
', - 'View full results', - '', - '```', - results, - '```', - '', - '
', - '', - '**Pass criteria (score ≥ 3 on a 1–5 scale):**', - '- Intent Resolution', - '- Relevance', - '- Groundedness', - ].join('\n'); - + + const body = `## 🎯 Agent Evaluation Results + + Automated evaluation completed for Trail Guide Agent changes. + +
+ View Detailed Results + + \`\`\` + ${results} + \`\`\` + +
+ + 📊 [View full results in Azure AI Foundry Portal](${{ steps.run.outputs.report_url }}) + + **Evaluation Criteria:** + - Intent Resolution (score ≥ 3) + - Relevance (score ≥ 3) + - Groundedness (score ≥ 3) + `; + github.rest.issues.createComment({ issue_number: context.issue.number, - owner: context.repo.owner, - repo: context.repo.repo, - body: body, - }); + owner: context.repo.owner, + repo: context.repo.repo, + body: body + }); \ No newline at end of file