test: change benchmarking test

kkahadze · kkahadze · commit 9d5582b29658 · 2024-03-19T13:49:17.000-04:00
diff --git a/tests/test_benchmarking.py b/tests/test_benchmarking.py
@@ -17,95 +17,99 @@
 import json
 
 
-@pytest.fixture(scope="module")
-def api_key():
-    load_dotenv(dotenv_path="./.env")  # Adjust the path as needed
-    return os.getenv("OPENAI_API_KEY")
-
-
-@pytest.mark.API_test
-def test_json_output(api_key):
-    openai.api_key = api_key
-    prompts = [
-        "You are a brilliant math professor. Solve the following problem and put your answer after four hashtags like the following example: \nQuestion: What is 4 + 4?\nAnswer: 4 + 4 is ####8\n\n Make your response as short as possible.",
-        "You are a foolish high-school student. Solve the following problem and put your answer after four hashtags like the following example: \nQuestion: What is 4 + 4?\nAnswer: 4 + 4 is ####8\n\n Make your response as short as possible.",
-    ]
-
-    model = "gpt-3.5-turbo-1106"
-
-    response = query_model(
-        prompts[0],
-        "What is 4 + 4?",
-        model_name=model,
-        output_tokens=150,
-    )
-
-    response_dict = response_to_dict(response)
-
-    # Check the main keys
-    assert "id" in response_dict
-    assert "model" in response_dict
-    assert "object" in response_dict
-    assert "created" in response_dict
-    assert "system_fingerprint" in response_dict
-    assert "choices" in response_dict
-    assert "usage" in response_dict
-
-    # Check the types of the main keys
-    assert isinstance(response_dict["id"], str)
-    assert isinstance(response_dict["model"], str)
-    assert isinstance(response_dict["object"], str)
-    assert isinstance(response_dict["created"], int)
-    assert isinstance(response_dict["system_fingerprint"], str)
-    assert isinstance(response_dict["choices"], list)
-    assert isinstance(response_dict["usage"], dict)
-
-    # Check the structure and types of the 'choices' key
-    assert len(response_dict["choices"]) > 0
-    for choice in response_dict["choices"]:
-        assert "finish_reason" in choice
-        assert "index" in choice
-        assert "message" in choice
-        assert isinstance(choice["finish_reason"], str)
-        assert isinstance(choice["index"], int)
-        assert isinstance(choice["message"], dict)
-        assert "content" in choice["message"]
-        assert "role" in choice["message"]
-        assert isinstance(choice["message"]["content"], str)
-        assert isinstance(choice["message"]["role"], str)
-
-    # Check the structure and types of the 'usage' key
-    assert "completion_tokens" in response_dict["usage"]
-    assert "prompt_tokens" in response_dict["usage"]
-    assert "total_tokens" in response_dict["usage"]
-    assert isinstance(response_dict["usage"]["completion_tokens"], int)
-    assert isinstance(response_dict["usage"]["prompt_tokens"], int)
-    assert isinstance(response_dict["usage"]["total_tokens"], int)
-
-
-@pytest.mark.API_test
-def test_query_model(api_key):
-    openai.api_key = api_key
-    prompt = "You are a brilliant math professor. Solve the following problem and put your answer after four hashtags like the following example: \nQuestion: What is 4 + 4?\nAnswer: 4 + 4 is ####8\n\n Make your response as short as possible."
-    question = "What is 4 + 4?"
-    model_name = "gpt-3.5-turbo-1106"
-    output_tokens = 150
-    response = query_model(prompt, question, model_name, output_tokens)
-    assert isinstance(response.choices[0].message.content, str)
-    assert len(response.choices[0].message.content) > 0
-    assert "8" in response.choices[0].message.content
-
-    prompt = 'You are a brilliant math professor. Solve the following problem and return a JSON with the first entry being the reasoning behind the choice labeled as "reasoning", and the second entry being the answer to the question containing only the letter "A", "B", "C" or "D", labeled as "answer". Try to keep your reasoning concise.'
-    question = "What is 4 + 4? A. 8 B. 9 C. 10 D. 11"
-    model_name = "gpt-3.5-turbo-1106"
-    output_tokens = 150
-    json_mode = True
-    response = query_model(
-        prompt, question, model_name, output_tokens, return_json=json_mode
-    )
-    json_response = json.loads(response.choices[0].message.content)
-    assert isinstance(json_response, dict)
-    assert json_response["answer"] == "A"
+# @pytest.fixture(scope="module")
+# def api_key():
+#     load_dotenv(dotenv_path="./.env")  # Adjust the path as needed
+#     return os.getenv("OPENAI_API_KEY")
+
+
+# @pytest.mark.API_test
+# def test_json_output(api_key):
+#     openai.api_key = api_key
+#     prompts = [
+#         "You are a brilliant math professor. Solve the following problem and put your answer after four hashtags like the following example: \nQuestion: What is 4 + 4?\nAnswer: 4 + 4 is ####8\n\n Make your response as short as possible.",
+#         "You are a foolish high-school student. Solve the following problem and put your answer after four hashtags like the following example: \nQuestion: What is 4 + 4?\nAnswer: 4 + 4 is ####8\n\n Make your response as short as possible.",
+#     ]
+
+#     model = "gpt-3.5-turbo-1106"
+
+#     print("Testing the JSON output of the query_model function")
+
+#     response = query_model(
+#         prompts[0],
+#         "What is 4 + 4?",
+#         model_name=model,
+#         output_tokens=150,
+#     )
+
+#     print(response.choices[0].message.content)
+
+#     response_dict = response_to_dict(response)
+
+#     # Check the main keys
+#     assert "id" in response_dict
+#     assert "model" in response_dict
+#     assert "object" in response_dict
+#     assert "created" in response_dict
+#     assert "system_fingerprint" in response_dict
+#     assert "choices" in response_dict
+#     assert "usage" in response_dict
+
+#     # Check the types of the main keys
+#     assert isinstance(response_dict["id"], str)
+#     assert isinstance(response_dict["model"], str)
+#     assert isinstance(response_dict["object"], str)
+#     assert isinstance(response_dict["created"], int)
+#     assert isinstance(response_dict["system_fingerprint"], str)
+#     assert isinstance(response_dict["choices"], list)
+#     assert isinstance(response_dict["usage"], dict)
+
+#     # Check the structure and types of the 'choices' key
+#     assert len(response_dict["choices"]) > 0
+#     for choice in response_dict["choices"]:
+#         assert "finish_reason" in choice
+#         assert "index" in choice
+#         assert "message" in choice
+#         assert isinstance(choice["finish_reason"], str)
+#         assert isinstance(choice["index"], int)
+#         assert isinstance(choice["message"], dict)
+#         assert "content" in choice["message"]
+#         assert "role" in choice["message"]
+#         assert isinstance(choice["message"]["content"], str)
+#         assert isinstance(choice["message"]["role"], str)
+
+#     # Check the structure and types of the 'usage' key
+#     assert "completion_tokens" in response_dict["usage"]
+#     assert "prompt_tokens" in response_dict["usage"]
+#     assert "total_tokens" in response_dict["usage"]
+#     assert isinstance(response_dict["usage"]["completion_tokens"], int)
+#     assert isinstance(response_dict["usage"]["prompt_tokens"], int)
+#     assert isinstance(response_dict["usage"]["total_tokens"], int)
+
+
+# @pytest.mark.API_test
+# def test_query_model(api_key):
+#     openai.api_key = api_key
+#     prompt = "You are a brilliant math professor. Solve the following problem and put your answer after four hashtags like the following example: \nQuestion: What is 4 + 4?\nAnswer: 4 + 4 is ####8\n\n Make your response as short as possible."
+#     question = "What is 4 + 4?"
+#     model_name = "gpt-3.5-turbo-1106"
+#     output_tokens = 150
+#     response = query_model(prompt, question, model_name, output_tokens)
+#     assert isinstance(response.choices[0].message.content, str)
+#     assert len(response.choices[0].message.content) > 0
+#     assert "8" in response.choices[0].message.content
+
+#     prompt = 'You are a brilliant math professor. Solve the following problem and return a JSON with the first entry being the reasoning behind the choice labeled as "reasoning", and the second entry being the answer to the question containing only the letter "A", "B", "C" or "D", labeled as "answer". Try to keep your reasoning concise.'
+#     question = "What is 4 + 4? A. 8 B. 9 C. 10 D. 11"
+#     model_name = "gpt-3.5-turbo-1106"
+#     output_tokens = 150
+#     json_mode = True
+#     response = query_model(
+#         prompt, question, model_name, output_tokens, return_json=json_mode
+#     )
+#     json_response = json.loads(response.choices[0].message.content)
+#     assert isinstance(json_response, dict)
+#     assert json_response["answer"] == "A"
 
 
 def test_with_commas_and_dollar_sign():