diff --git a/django_app/tests/test_consumers.py b/django_app/tests/test_consumers.py index 207771836..e0f32daf6 100644 --- a/django_app/tests/test_consumers.py +++ b/django_app/tests/test_consumers.py @@ -575,7 +575,7 @@ def mocked_connect(uploaded_file: File) -> Connect: "event": "on_custom_event", "name": "on_metadata_generation", "data": RequestMetadata( - llm_calls=[LLMCallMetadata(model_name="gpt-4o", input_tokens=123, output_tokens=1000)], + llm_calls=[LLMCallMetadata(llm_model_name="gpt-4o", input_tokens=123, output_tokens=1000)], selected_files_total_tokens=1000, number_of_selected_files=1, ), diff --git a/redbox-core/redbox/models/chain.py b/redbox-core/redbox/models/chain.py index 74cf56064..a18438f38 100644 --- a/redbox-core/redbox/models/chain.py +++ b/redbox-core/redbox/models/chain.py @@ -147,7 +147,7 @@ class RedboxQuery(BaseModel): class LLMCallMetadata(BaseModel): id: str = Field(default_factory=lambda: str(uuid4())) - model_name: str + llm_model_name: str input_tokens: int output_tokens: int timestamp: datetime = Field(default_factory=lambda: datetime.now(UTC)) @@ -164,8 +164,8 @@ class RequestMetadata(BaseModel): def input_tokens(self): tokens_by_model = dict() for call_metadata in self.llm_calls: - tokens_by_model[call_metadata.model_name] = ( - tokens_by_model.get(call_metadata.model_name, 0) + call_metadata.input_tokens + tokens_by_model[call_metadata.llm_model_name] = ( + tokens_by_model.get(call_metadata.llm_model_name, 0) + call_metadata.input_tokens ) return tokens_by_model @@ -173,8 +173,8 @@ def input_tokens(self): def output_tokens(self): tokens_by_model = dict() for call_metadata in self.llm_calls: - tokens_by_model[call_metadata.model_name] = ( - tokens_by_model.get(call_metadata.model_name, 0) + call_metadata.output_tokens + tokens_by_model[call_metadata.llm_model_name] = ( + tokens_by_model.get(call_metadata.llm_model_name, 0) + call_metadata.output_tokens ) return tokens_by_model diff --git a/redbox-core/redbox/transform.py b/redbox-core/redbox/transform.py index fad790157..550401893 100644 --- a/redbox-core/redbox/transform.py +++ b/redbox-core/redbox/transform.py @@ -141,7 +141,7 @@ def to_request_metadata(prompt_response_model: dict): output_tokens = len(tokeniser.encode(prompt_response_model["response"])) metadata_event = RequestMetadata( - llm_calls=[LLMCallMetadata(model_name=model, input_tokens=input_tokens, output_tokens=output_tokens)] + llm_calls=[LLMCallMetadata(llm_model_name=model, input_tokens=input_tokens, output_tokens=output_tokens)] ) dispatch_custom_event(RedboxEventType.on_metadata_generation.value, metadata_event) diff --git a/redbox-core/tests/graph/test_state.py b/redbox-core/tests/graph/test_state.py index 168d162d2..790e7412b 100644 --- a/redbox-core/tests/graph/test_state.py +++ b/redbox-core/tests/graph/test_state.py @@ -135,30 +135,30 @@ def test_document_reducer(a: DocumentState, b: DocumentState, expected: Document GPT_4o_multiple_calls_1 = [ - LLMCallMetadata(model_name="gpt-4o", input_tokens=0, output_tokens=0), - LLMCallMetadata(model_name="gpt-4o", input_tokens=10, output_tokens=10), - LLMCallMetadata(model_name="gpt-4o", input_tokens=10, output_tokens=10), + LLMCallMetadata(llm_model_name="gpt-4o", input_tokens=0, output_tokens=0), + LLMCallMetadata(llm_model_name="gpt-4o", input_tokens=10, output_tokens=10), + LLMCallMetadata(llm_model_name="gpt-4o", input_tokens=10, output_tokens=10), ] GPT_4o_multiple_calls_1a = GPT_4o_multiple_calls_1 + [ - LLMCallMetadata(model_name="gpt-4o", input_tokens=50, output_tokens=50), - LLMCallMetadata(model_name="gpt-4o", input_tokens=60, output_tokens=60), + LLMCallMetadata(llm_model_name="gpt-4o", input_tokens=50, output_tokens=50), + LLMCallMetadata(llm_model_name="gpt-4o", input_tokens=60, output_tokens=60), ] GPT_4o_multiple_calls_2 = [ - LLMCallMetadata(model_name="gpt-4o", input_tokens=100, output_tokens=200), - LLMCallMetadata(model_name="gpt-4o", input_tokens=0, output_tokens=10), - LLMCallMetadata(model_name="gpt-4o", input_tokens=100, output_tokens=210), + LLMCallMetadata(llm_model_name="gpt-4o", input_tokens=100, output_tokens=200), + LLMCallMetadata(llm_model_name="gpt-4o", input_tokens=0, output_tokens=10), + LLMCallMetadata(llm_model_name="gpt-4o", input_tokens=100, output_tokens=210), ] multiple_models_multiple_calls_1 = [ - LLMCallMetadata(model_name="gpt-4o", input_tokens=100, output_tokens=200), - LLMCallMetadata(model_name="gpt-3.5", input_tokens=20, output_tokens=20), - LLMCallMetadata(model_name="gpt-4o", input_tokens=100, output_tokens=210), + LLMCallMetadata(llm_model_name="gpt-4o", input_tokens=100, output_tokens=200), + LLMCallMetadata(llm_model_name="gpt-3.5", input_tokens=20, output_tokens=20), + LLMCallMetadata(llm_model_name="gpt-4o", input_tokens=100, output_tokens=210), ] multiple_models_multiple_calls_1a = multiple_models_multiple_calls_1 + [ - LLMCallMetadata(model_name="gpt-4o", input_tokens=300, output_tokens=310), + LLMCallMetadata(llm_model_name="gpt-4o", input_tokens=300, output_tokens=310), ] @@ -305,14 +305,14 @@ def test_tool_calls_reducer(a: ToolState, b: ToolState, expected: ToolState): llm_calls=[ { "id": "e7b9c8e4-8c6d-4f9b-8b8e-2f8e8e8e8e8e", - "model_name": "gpt-4o", + "llm_model_name": "gpt-4o", "input_tokens": 80, "output_tokens": 160, "timestamp": datetime(2023, 10, 1, 12, 0, 0, tzinfo=timezone.utc).timestamp(), }, { "id": "d3b9c8e4-8c6d-4f9b-8b8e-2f8e8e8e8e8e", - "model_name": "gpt-3.5", + "llm_model_name": "gpt-3.5", "input_tokens": 60, "output_tokens": 120, "timestamp": datetime(2023, 10, 2, 14, 30, 0, tzinfo=timezone.utc).timestamp(), @@ -341,7 +341,7 @@ def test_tool_calls_reducer(a: ToolState, b: ToolState, expected: ToolState): llm_calls=[ { "id": "c1b9c8e4-8c6d-4f9b-8b8e-2f8e8e8e8e8e", - "model_name": "gpt-4o", + "llm_model_name": "gpt-4o", "input_tokens": 10, "output_tokens": 10, "timestamp": datetime(2023, 10, 3, 16, 45, 0, tzinfo=timezone.utc).timestamp(), @@ -372,21 +372,21 @@ def test_tool_calls_reducer(a: ToolState, b: ToolState, expected: ToolState): llm_calls=[ { "id": "e7b9c8e4-8c6d-4f9b-8b8e-2f8e8e8e8e8e", - "model_name": "gpt-4o", + "llm_model_name": "gpt-4o", "input_tokens": 80, "output_tokens": 160, "timestamp": datetime(2023, 10, 1, 12, 0, 0, tzinfo=timezone.utc).timestamp(), }, { "id": "d3b9c8e4-8c6d-4f9b-8b8e-2f8e8e8e8e8e", - "model_name": "gpt-3.5", + "llm_model_name": "gpt-3.5", "input_tokens": 60, "output_tokens": 120, "timestamp": datetime(2023, 10, 2, 14, 30, 0, tzinfo=timezone.utc).timestamp(), }, { "id": "c1b9c8e4-8c6d-4f9b-8b8e-2f8e8e8e8e8e", - "model_name": "gpt-4o", + "llm_model_name": "gpt-4o", "input_tokens": 10, "output_tokens": 10, "timestamp": datetime(2023, 10, 3, 16, 45, 0, tzinfo=timezone.utc).timestamp(), diff --git a/redbox-core/tests/test_transform.py b/redbox-core/tests/test_transform.py index d07b91a99..b20b299f8 100644 --- a/redbox-core/tests/test_transform.py +++ b/redbox-core/tests/test_transform.py @@ -169,7 +169,7 @@ def test_elbow_filter(scores: list[float], target_len: int): ), "model": "gpt-4o", }, - RequestMetadata(llm_calls={LLMCallMetadata(model_name="gpt-4o", input_tokens=6, output_tokens=23)}), + RequestMetadata(llm_calls={LLMCallMetadata(llm_model_name="gpt-4o", input_tokens=6, output_tokens=23)}), ), ( { @@ -181,7 +181,9 @@ def test_elbow_filter(scores: list[float], target_len: int): ), "model": "unknown-model", }, - RequestMetadata(llm_calls={LLMCallMetadata(model_name="unknown-model", input_tokens=6, output_tokens=23)}), + RequestMetadata( + llm_calls={LLMCallMetadata(llm_model_name="unknown-model", input_tokens=6, output_tokens=23)} + ), ), ], )