Skip to content

Commit

Permalink
Merge pull request #757 from airtai/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
rjambrecic committed Jun 10, 2024
2 parents 4c4c711 + cfac5d6 commit 70c87b4
Show file tree
Hide file tree
Showing 19 changed files with 768 additions and 123 deletions.
8 changes: 4 additions & 4 deletions captn/captn_agents/backend/benchmarking/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def generate_task_table_for_websurfer(
help="File name of the task list",
),
repeat: int = typer.Option(
10,
5,
help="Number of times to repeat each url",
),
introduce_give_up_after: int = typer.Option(
Expand Down Expand Up @@ -132,15 +132,15 @@ def generate_task_table_for_websurfer(
@app.command()
def generate_task_table_for_brief_creation(
llm: Models = typer.Option( # noqa: B008
Models.gpt4,
Models.gpt4o,
help="Model which will be used by all agents",
),
file_name: str = typer.Option(
"brief-creation-benchmark-tasks.csv",
help="File name of the task list",
),
repeat: int = typer.Option(
10,
5,
help="Number of times to repeat each url",
),
output_dir: str = typer.Option( # noqa: B008
Expand Down Expand Up @@ -182,7 +182,7 @@ def generate_task_table_for_campaign_creation(
help="File name of the task list",
),
repeat: int = typer.Option(
10,
5,
help="Number of times to repeat each url",
),
output_dir: str = typer.Option( # noqa: B008
Expand Down
137 changes: 75 additions & 62 deletions captn/captn_agents/backend/benchmarking/brief_creation_team.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from contextlib import contextmanager
from tempfile import TemporaryDirectory
from typing import Any, Iterator, Tuple
from unittest.mock import patch
from unittest.mock import MagicMock, patch

from autogen.cache import Cache

Expand All @@ -11,11 +11,16 @@
)
from .fixtures.brief_creation_team_fixtures import (
BRIEF_CREATION_TEAM_RESPONSE,
WEB_PAGE_SUMMARY_BBC,
WEB_PAGE_SUMMARY_CINESTAR,
WEB_PAGE_SUMMARY_DISNEY,
WEB_PAGE_SUMMARY_FASTSTREAM,
WEB_PAGE_SUMMARY_FLOWERSHOP,
WEB_PAGE_SUMMARY_GET_BY_BUS,
WEB_PAGE_SUMMARY_HAMLEYS,
WEB_PAGE_SUMMARY_IKEA,
WEB_PAGE_SUMMARY_KONZUM,
WEB_PAGE_SUMMARY_WEBSITE_DEMOS,
)
from .helpers import get_client_response, get_config_list
from .models import Models
Expand All @@ -32,16 +37,20 @@
"https://www.hamleys.com/": WEB_PAGE_SUMMARY_HAMLEYS,
"https://www.konzum.hr": WEB_PAGE_SUMMARY_KONZUM,
"https://faststream.airt.ai": WEB_PAGE_SUMMARY_FASTSTREAM,
"https://camelbackflowershop.com": WEB_PAGE_SUMMARY_FLOWERSHOP,
"https://www.bbc.com": WEB_PAGE_SUMMARY_BBC,
"https://zagreb.cinestarcinemas.hr": WEB_PAGE_SUMMARY_CINESTAR,
"https://websitedemos.net/organic-shop-02": WEB_PAGE_SUMMARY_WEBSITE_DEMOS,
"https://getbybus.com/hr": WEB_PAGE_SUMMARY_GET_BY_BUS,
}


@contextmanager
def _patch_vars(
url: str,
team: BriefCreationTeam,
client_system_message: str,
cache: Cache,
) -> Iterator[Tuple[Any, Any, Any, Any]]:
) -> Iterator[Tuple[Any, Any, Any]]:
with (
patch.object(
team.toolbox.functions,
Expand All @@ -53,10 +62,6 @@ def _patch_vars(
client_system_message=client_system_message,
),
) as mock_reply_to_client,
patch(
"captn.captn_agents.backend.tools._brief_creation_team_tools._get_info_from_the_web_page_original",
return_value=URL_SUMMARY_DICT[url],
) as mock_get_info_from_the_web_page,
patch(
"captn.captn_agents.backend.tools._brief_creation_team_tools._change_the_team_and_start_new_chat",
return_value=BRIEF_CREATION_TEAM_RESPONSE,
Expand All @@ -69,7 +74,6 @@ def _patch_vars(
):
yield (
mock_reply_to_client,
mock_get_info_from_the_web_page,
mock_change_the_team_and_start_new_chat,
mock_get_brief_template,
)
Expand Down Expand Up @@ -112,63 +116,72 @@ def _get_task(url: str) -> str:
def benchmark_brief_creation(
url: str,
team_name: str,
llm: str = Models.gpt3_5,
llm: str = Models.gpt4o,
) -> Tuple[str, int]:
config_list = get_config_list(llm)

user_id = 123
conv_id = 234
task = _get_task(url)
team = BriefCreationTeam(
task=task, user_id=user_id, conv_id=conv_id, config_list=config_list
)
client_system_message = _client_system_messages[team_name]
try:
with TemporaryDirectory() as cache_dir:
with Cache.disk(cache_path_root=cache_dir) as cache:
with _patch_vars(
url=url,
team=team,
client_system_message=client_system_message,
cache=cache,
) as (
_,
mock_get_info_from_the_web_page,
mock_change_the_team_and_start_new_chat,
mock_get_brief_template,
):
# return "it's ok"
team.initiate_chat(cache=cache)

mock_get_info_from_the_web_page.assert_called()
mock_get_brief_template.assert_called()
mock_change_the_team_and_start_new_chat.assert_called()
team_class: Team = (
mock_change_the_team_and_start_new_chat.call_args.kwargs[
"team_class"
]
)
assert team_class.get_registred_team_name() == team_name # nosec: [B101]

delegate_task_function_sugestion = team.get_messages()[-2]
assert "tool_calls" in delegate_task_function_sugestion # nosec: [B101]

delegate_task_function_sugestion_function = (
delegate_task_function_sugestion["tool_calls"][0]["function"]
)
assert (
delegate_task_function_sugestion_function["name"]
== "delagate_task"
) # nosec: [B101]

assert "arguments" in delegate_task_function_sugestion_function # nosec: [B101]
assert (
"task" in delegate_task_function_sugestion_function["arguments"]
) # nosec: [B101]

return delegate_task_function_sugestion_function[
"arguments"
], team.retry_from_scratch_counter
finally:
poped_team = Team.pop_team(user_id=user_id, conv_id=conv_id)
assert isinstance(poped_team, Team) # nosec: [B101]

with patch(
"captn.captn_agents.backend.tools._brief_creation_team_tools.WebPageInfo.get_info_from_the_web_page_f",
) as mock_get_info_from_the_web_page:
f = MagicMock()
f.return_value = URL_SUMMARY_DICT[url]
mock_get_info_from_the_web_page.return_value = f

team = BriefCreationTeam(
task=task, user_id=user_id, conv_id=conv_id, config_list=config_list
)
client_system_message = _client_system_messages[team_name]
try:
with TemporaryDirectory() as cache_dir:
with Cache.disk(cache_path_root=cache_dir) as cache:
with _patch_vars(
team=team,
client_system_message=client_system_message,
cache=cache,
) as (
_,
mock_change_the_team_and_start_new_chat,
mock_get_brief_template,
):
# return "it's ok"
team.initiate_chat(cache=cache)

mock_get_info_from_the_web_page.assert_called()
mock_get_brief_template.assert_called()
mock_change_the_team_and_start_new_chat.assert_called()
team_class: Team = (
mock_change_the_team_and_start_new_chat.call_args.kwargs[
"team_class"
]
)
assert team_class.get_registred_team_name() == team_name # nosec: [B101]

delegate_task_function_sugestion = team.get_messages()[-2]
assert "tool_calls" in delegate_task_function_sugestion # nosec: [B101]

delegate_task_function_sugestion_function = (
delegate_task_function_sugestion["tool_calls"][0][
"function"
]
)
assert (
delegate_task_function_sugestion_function["name"]
== "delagate_task"
) # nosec: [B101]

assert "arguments" in delegate_task_function_sugestion_function # nosec: [B101]
assert (
"task"
in delegate_task_function_sugestion_function["arguments"]
) # nosec: [B101]

return delegate_task_function_sugestion_function[
"arguments"
], team.retry_from_scratch_counter
finally:
poped_team = Team.pop_team(user_id=user_id, conv_id=conv_id)
assert isinstance(poped_team, Team) # nosec: [B101]
25 changes: 21 additions & 4 deletions captn/captn_agents/backend/benchmarking/campaign_creation_team.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,31 @@
from ..tools._functions import Context
from ..tools._google_ads_team_tools import _mock_create_campaign
from .fixtures.campaign_creation_team_fixtures import (
CAMPAIGN_CREATION_BBC,
CAMPAIGN_CREATION_CINESTAR,
CAMPAIGN_CREATION_DISNEY,
CAMPAIGN_CREATION_FASTSTREAM,
CAMPAIGN_CREATION_FLOWERSHOP,
CAMPAIGN_CREATION_GETBYBUS,
CAMPAIGN_CREATION_HAMLEYS,
CAMPAIGN_CREATION_IKEA,
CAMPAIGN_CREATION_KONZUM,
CAMPAIGN_CREATION_WEBSITEDEMOS,
)
from .helpers import get_client_response_for_the_team_conv, get_config_list
from .models import Models

URL_TASK_DICT = {
"https://www.ikea.com/gb/en/": CAMPAIGN_CREATION_IKEA,
"https://www.disneystore.eu": CAMPAIGN_CREATION_DISNEY,
# "https://www.hamleys.com/": "",
# "https://www.konzum.hr": "",
"https://faststream.airt.ai": CAMPAIGN_CREATION_FASTSTREAM,
"https://www.hamleys.com/": CAMPAIGN_CREATION_HAMLEYS,
"https://www.konzum.hr": CAMPAIGN_CREATION_KONZUM,
"https://websitedemos.net/organic-shop-02/": CAMPAIGN_CREATION_WEBSITEDEMOS,
"www.bbc.com/news": CAMPAIGN_CREATION_BBC,
"https://zagreb.cinestarcinemas.hr/": CAMPAIGN_CREATION_CINESTAR,
"https://camelbackflowershop.com/": CAMPAIGN_CREATION_FLOWERSHOP,
"https://getbybus.com/hr/": CAMPAIGN_CREATION_GETBYBUS,
}


Expand All @@ -48,6 +60,7 @@ def mock_get_campaign_ids(context: Context, customer_id: str) -> List[str]:

@contextmanager
def _patch_campaign_creation_team_vars() -> Iterator[Tuple[Any, Any, Any, Any]]:
accessible_customers = ["1111"]
with (
# unittest.mock.patch.object(
# campaign_creation_team.toolbox.functions,
Expand All @@ -56,7 +69,11 @@ def _patch_campaign_creation_team_vars() -> Iterator[Tuple[Any, Any, Any, Any]]:
# ),
unittest.mock.patch(
"captn.captn_agents.backend.tools._google_ads_team_tools.list_accessible_customers_client",
return_value=["1111"],
return_value=accessible_customers,
),
unittest.mock.patch(
"captn.captn_agents.backend.tools._functions.list_accessible_customers",
return_value=accessible_customers,
),
# unittest.mock.patch.object(
# campaign_creation_team.toolbox.functions,
Expand Down Expand Up @@ -124,7 +141,7 @@ def _patch_campaign_creation_team_vars() -> Iterator[Tuple[Any, Any, Any, Any]]:
):
mock_requests_get.return_value.ok = True
mock_requests_get.return_value.json.side_effect = [
f"Created resource/new/{random.randint(100, 1000)}" # nosec: [B311]
f"Created resource/{random.randint(100, 1000)}" # nosec: [B311]
for _ in range(200)
]
yield (
Expand Down
8 changes: 4 additions & 4 deletions captn/captn_agents/backend/benchmarking/end2end.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
CampaignCreationTeam,
Team,
)
from ..tools._brief_creation_team_tools import _change_the_team_and_start_new_chat
from ..tools._brief_creation_team_tools import (
_change_the_team_and_start_new_chat,
)
from .brief_creation_team import _client_system_messages, _get_task
from .campaign_creation_team import (
_patch_campaign_creation_team_vars,
Expand Down Expand Up @@ -48,10 +50,8 @@ def _patch_brief_creation_team_vars(

def benchmark_end2end(
url: str,
llm: str = Models.gpt4,
llm: str = Models.gpt4o,
) -> Tuple[str, int]:
# Remove the following line after integrating gpt4-o for brief creation team
llm = Models.gpt4
config_list = get_config_list(llm)

user_id = 123
Expand Down
Loading

0 comments on commit 70c87b4

Please sign in to comment.