-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #765 from airtai/dev
5 PRs
- Loading branch information
Showing
21 changed files
with
558 additions
and
288 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
5 changes: 0 additions & 5 deletions
5
benchmarking/end2end-benchmark-task-list-2024-06-04T20:57:23-aggregated.csv
This file was deleted.
Oops, something went wrong.
61 changes: 0 additions & 61 deletions
61
benchmarking/end2end-benchmark-task-list-2024-06-04T20:57:23.csv
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
,url,success_percentage,success_with_retry_percentage,failed_percentage,avg_time | ||
0,https://camelbackflowershop.com/,100.0,0.0,0.0,787.82 | ||
1,https://faststream.airt.ai,100.0,0.0,0.0,546.88 | ||
2,https://getbybus.com/hr/,100.0,0.0,0.0,600.33 | ||
3,https://websitedemos.net/organic-shop-02/,100.0,0.0,0.0,632.92 | ||
4,https://www.disneystore.eu,100.0,0.0,0.0,600.73 | ||
5,https://www.hamleys.com/,100.0,0.0,0.0,646.25 | ||
6,https://www.ikea.com/gb/en/,100.0,0.0,0.0,1038.94 | ||
7,https://www.konzum.hr,100.0,0.0,0.0,746.19 | ||
8,https://zagreb.cinestarcinemas.hr/,100.0,0.0,0.0,967.01 | ||
9,www.bbc.com/news,100.0,0.0,0.0,777.56 | ||
10,Total,100.0,0.0,0.0,734.46 |
Large diffs are not rendered by default.
Oops, something went wrong.
3 changes: 3 additions & 0 deletions
3
benchmarking/weekly-analysis-benchmark-task-list-aggregated.csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
,url,success_percentage,success_with_retry_percentage,failed_percentage,avg_time | ||
0,faststream-web-search,100.0,0.0,0.0,157.63 | ||
1,Total,100.0,0.0,0.0,157.63 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
,task,url,llm,execution_time,status,success,output,retries | ||
0,weekly_analysis,faststream-web-search,gpt4o,121.84500002861024,DONE,Success,"Response from team '123_234': | ||
{""subject"": ""Capt’n.ai Weekly Analysis"", ""email_content"": ""<html></html>"", ""proposed_user_action"": [""Pause Ad '680002685922' in Campaign 'faststream-web-search' due to high cost and zero conversions."", ""Add negative keywords to filter out irrelevant traffic. Suggested negative keywords could be derived from the client's business context."", ""Create new headlines and descriptions for the ads. Ensure they are relevant and within the character limits.""], ""terminate_groupchat"": true}",0.0 | ||
1,weekly_analysis,faststream-web-search,gpt4o,100.31810402870178,DONE,Success,"Response from team '123_234': | ||
{""subject"": ""Capt’n.ai Weekly Analysis"", ""email_content"": ""<html></html>"", ""proposed_user_action"": [""Pause the ad with ID '680002685922' in the campaign 'faststream-web-search' for customer '7119828439' due to high cost and zero conversions."", ""Add negative keywords to the campaign '20979579987' for customer '2324127278' to filter out irrelevant traffic."", ""Change the ad copy for ads with unreachable final URLs in the campaign 'Website traffic-Search-3-updated-up' for customer '2324127278'.""], ""terminate_groupchat"": true}",0.0 | ||
2,weekly_analysis,faststream-web-search,gpt4o,107.02347493171692,DONE,Success,"Response from team '123_234': | ||
{""subject"": ""Capt’n.ai Weekly Analysis"", ""email_content"": ""<html></html>"", ""proposed_user_action"": [""Pause the ad with ID 688768033895 in the campaign 'Website traffic-Search-3-updated-up' due to high cost and zero conversions."", ""Add negative keywords to filter out irrelevant traffic for better performance."", ""Update the final URLs for the ads with unreachable URLs and improve the ad copy.""], ""terminate_groupchat"": true}",0.0 | ||
3,weekly_analysis,faststream-web-search,gpt4o,127.66284489631651,DONE,Success,"Response from team '123_234': | ||
{""subject"": ""Capt’n.ai Weekly Analysis"", ""email_content"": ""<html></html>"", ""proposed_user_action"": [""Pause Ad 688768033895 in both Ad Group 156261983518 (fastapi get super-dooper-cool) and Ad Group 158468020535 (TVs) in Campaign 20761810762 (Website traffic-Search-3-updated-up) due to unreachable final URLs."", ""Add relevant negative keywords to Campaign 20761810762 (Website traffic-Search-3-updated-up) and Campaign 20750580900 (faststream-web-search) to filter out irrelevant traffic."", ""Update the final URLs in Campaign 20761810762 (Website traffic-Search-3-updated-up) to ensure they are reachable and add more ads to test different variations.""], ""terminate_groupchat"": true}",0.0 | ||
4,weekly_analysis,faststream-web-search,gpt4o,221.9658682346344,DONE,Success,"Response from team '123_234': | ||
{""subject"": ""Capt’n.ai Weekly Analysis"", ""email_content"": ""<html></html>"", ""proposed_user_action"": [""Pause the ads in Campaign 'Website traffic-Search-3-updated-up' (ID: 20761810762) and Campaign 'faststream-web-search' (ID: 20750580900) due to high cost and zero conversions."", ""Add negative keywords to filter out irrelevant traffic."", ""Fix the URLs that are not reachable.""], ""terminate_groupchat"": true}",0.0 | ||
5,weekly_analysis,faststream-web-search,gpt4o,184.42552876472473,DONE,Success,"Response from team '123_234': | ||
{""subject"": ""Capt’n.ai Weekly Analysis"", ""email_content"": ""<html></html>"", ""proposed_user_action"": [""Pause the ads in the campaign 'Website traffic-Search-3-updated-up' due to unreachable final URLs."", ""Pause the campaign 'Empty' as it has no recorded metrics."", ""Add relevant keywords to the ad groups to improve targeting and add negative keywords to filter out irrelevant traffic.""], ""terminate_groupchat"": true}",0.0 | ||
6,weekly_analysis,faststream-web-search,gpt4o,206.85009813308716,DONE,Success,"Response from team '123_234': | ||
{""subject"": ""Capt’n.ai Weekly Analysis"", ""email_content"": ""<html></html>"", ""proposed_user_action"": [""Pause the ads with unreachable final URLs in the campaign 'Website traffic-Search-3-updated-up' for Customer 2324127278."", ""Add relevant keywords to the ad groups in the campaign 'Website traffic-Search-3-updated-up' for Customer 2324127278."", ""Add relevant keywords to the ad group in the campaign 'faststream-web-search' for Customer 7119828439.""], ""terminate_groupchat"": true}",0.0 | ||
7,weekly_analysis,faststream-web-search,gpt4o,227.48140382766724,DONE,Success,"Response from team '123_234': | ||
{""subject"": ""Capt’n.ai Weekly Analysis"", ""email_content"": ""<html></html>"", ""proposed_user_action"": [""Pause Ad '688768033895' in Campaign '20761810762' (Website traffic-Search-3-updated-up) for Customer '2324127278' due to high cost and zero conversions."", ""Pause Ad '680002685922' in Campaign '20750580900' (faststream-web-search) for Customer '7119828439' due to high cost and zero conversions."", ""Add relevant keywords to the ad groups in Campaign '20761810762' (Website traffic-Search-3-updated-up) for Customer '2324127278'.""], ""terminate_groupchat"": true}",0.0 | ||
8,weekly_analysis,faststream-web-search,gpt4o,145.32040429115295,DONE,Success,"Response from team '123_234': | ||
{""subject"": ""Capt’n.ai Weekly Analysis"", ""email_content"": ""<html></html>"", ""proposed_user_action"": [""Pause the ad with ID 688768033895 in the campaign 'Website traffic-Search-3-updated-up' due to high cost and zero conversions."", ""Add relevant keywords to the campaign 'faststream-web-search' to improve targeting."", ""Update the ad copy for the ad with ID 688768033895 in the campaign 'Website traffic-Search-3-updated-up' to ensure it is compelling and relevant.""], ""terminate_groupchat"": true}",0.0 | ||
9,weekly_analysis,faststream-web-search,gpt4o,133.40970301628113,DONE,Success,"Response from team '123_234': | ||
{""subject"": ""Capt’n.ai Weekly Analysis"", ""email_content"": ""<html></html>"", ""proposed_user_action"": [""Pause ads in Campaign 'faststream-web-search' (ID: 20750580900) due to high cost and no conversions."", ""Add negative keywords to filter out irrelevant traffic and positive keywords to target more relevant traffic."", ""Update ad copy to make it more compelling and add more ads to test different variations.""], ""terminate_groupchat"": true}",0.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
197 changes: 197 additions & 0 deletions
197
captn/captn_agents/backend/benchmarking/weekly_analysis_team.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,197 @@ | ||
import json | ||
import unittest | ||
from tempfile import TemporaryDirectory | ||
from typing import Tuple | ||
|
||
from autogen.cache import Cache | ||
|
||
from ..teams import Team | ||
from ..teams._weekly_analysis_team import ( | ||
WeeklyAnalysisTeam, | ||
_create_task_message, | ||
_validate_conversation_and_send_email, | ||
construct_weekly_report_email_from_template, | ||
) | ||
from .helpers import get_config_list | ||
from .models import Models | ||
|
||
weekly_report = { | ||
"weekly_customer_reports": [ | ||
{ | ||
"customer_id": "2324127278", | ||
"currency": "USD", | ||
"campaigns": { | ||
"20761810762": { | ||
"id": "20761810762", | ||
"metrics": { | ||
"clicks": 5, | ||
"conversions": 0.0, | ||
"cost_micros": 22222, | ||
"impressions": 148, | ||
"interactions": 10, | ||
"clicks_increase": -42.86, | ||
"conversions_increase": 0.0, | ||
"cost_micros_increase": -32.94, | ||
"impressions_increase": None, | ||
"interactions_increase": 42.86, | ||
}, | ||
"name": "Website traffic-Search-3-updated-up", | ||
"ad_groups": { | ||
"156261983518": { | ||
"id": "156261983518", | ||
"metrics": {}, | ||
"name": "fastapi get super-dooper-cool", | ||
"keywords": {}, | ||
"ad_group_ads": { | ||
"688768033895": { | ||
"id": "688768033895", | ||
"metrics": {}, | ||
"final_urls": ["https://not-reachable.airt.ai/"], | ||
} | ||
}, | ||
}, | ||
"158468020535": { | ||
"id": "158468020535", | ||
"metrics": {}, | ||
"name": "TVs", | ||
"keywords": {}, | ||
"ad_group_ads": { | ||
"688768033895": { | ||
"id": "688768033895", | ||
"metrics": {}, | ||
"final_urls": [ | ||
"https://also-not-reachable.airt.ai/" | ||
], | ||
} | ||
}, | ||
}, | ||
}, | ||
}, | ||
"20979579987": { | ||
"id": "20979579987", | ||
"metrics": { | ||
"clicks": 0, | ||
"conversions": 0.0, | ||
"cost_micros": 0, | ||
"impressions": 0, | ||
"interactions": 0, | ||
"clicks_increase": 0, | ||
"conversions_increase": 0, | ||
"cost_micros_increase": 0, | ||
"impressions_increase": 0, | ||
"interactions_increase": 0, | ||
}, | ||
"name": "Empty", | ||
"ad_groups": {}, | ||
}, | ||
}, | ||
}, | ||
{ | ||
"customer_id": "7119828439", | ||
"currency": "EUR", | ||
"campaigns": { | ||
"20750580900": { | ||
"id": "20750580900", | ||
"metrics": { | ||
"clicks": 10, | ||
"conversions": 0.0, | ||
"cost_micros": 2830000, | ||
"impressions": 148, | ||
"interactions": 10, | ||
"clicks_increase": None, | ||
"conversions_increase": 0.0, | ||
"cost_micros_increase": -32.94, | ||
"impressions_increase": None, | ||
"interactions_increase": 42.86, | ||
}, | ||
"name": "faststream-web-search", | ||
"ad_groups": { | ||
"155431182157": { | ||
"id": "155431182157", | ||
"metrics": {}, | ||
"name": "Ad group 1", | ||
"keywords": {}, | ||
"ad_group_ads": { | ||
"680002685922": { | ||
"id": "680002685922", | ||
"metrics": {}, | ||
"final_urls": [ | ||
"https://github.com/airtai/faststream" | ||
], | ||
} | ||
}, | ||
} | ||
}, | ||
} | ||
}, | ||
}, | ||
] | ||
} | ||
|
||
|
||
def benchmark_weekly_analysis( | ||
url: str = "currently_not_used", | ||
llm: str = Models.gpt4o, | ||
) -> Tuple[str, int]: | ||
date = "2024-04-14" | ||
( | ||
weekly_report_message, | ||
_, | ||
) = construct_weekly_report_email_from_template( | ||
weekly_reports=weekly_report, date=date | ||
) | ||
|
||
task = _create_task_message(date, json.dumps(weekly_report), weekly_report_message) | ||
user_id = 123 | ||
conv_id = 234 | ||
|
||
config_list = get_config_list(llm) | ||
weekly_analysis_team = WeeklyAnalysisTeam( | ||
task=task, user_id=user_id, conv_id=conv_id, config_list=config_list | ||
) | ||
|
||
try: | ||
with ( | ||
unittest.mock.patch.object( | ||
weekly_analysis_team.toolbox.functions, | ||
"list_accessible_customers", | ||
return_value=["1111"], | ||
), | ||
unittest.mock.patch.object( | ||
weekly_analysis_team.toolbox.functions, | ||
"execute_query", | ||
return_value=( | ||
"You have all the necessary details. Do not use the execute_query anymore." | ||
), | ||
), | ||
unittest.mock.patch.object( | ||
weekly_analysis_team.toolbox.functions, | ||
"send_email", | ||
wraps=weekly_analysis_team.toolbox.functions.send_email, # type: ignore[attr-defined] | ||
) as mock_send_email, | ||
unittest.mock.patch( | ||
"captn.captn_agents.backend.teams._weekly_analysis_team._update_chat_message_and_send_email", | ||
return_value=None, | ||
) as mock_update_chat_message_and_send_email, | ||
): | ||
with TemporaryDirectory() as cache_dir: | ||
with Cache.disk(cache_path_root=cache_dir) as cache: | ||
weekly_analysis_team.initiate_chat(cache=cache) | ||
|
||
mock_send_email.assert_called_once() | ||
|
||
_validate_conversation_and_send_email( | ||
weekly_analysis_team=weekly_analysis_team, | ||
conv_uuid="fake_uuid", | ||
email="fake@email.com", | ||
weekly_report_message="fake_message", | ||
main_email_template="fake_template", | ||
) | ||
mock_update_chat_message_and_send_email.assert_called_once() | ||
|
||
last_message = weekly_analysis_team.get_last_message() | ||
return last_message, weekly_analysis_team.retry_from_scratch_counter | ||
|
||
finally: | ||
poped_team = Team.pop_team(user_id=user_id, conv_id=conv_id) | ||
assert isinstance(poped_team, Team) # nosec: [B101] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.