diff --git a/coleman4hcs/reward.py b/coleman4hcs/reward.py index 4d313c5..b510e8f 100644 --- a/coleman4hcs/reward.py +++ b/coleman4hcs/reward.py @@ -123,12 +123,23 @@ def get_name(self): return 'RNFail' def evaluate(self, reward: EvaluationMetric, last_prioritization: List[str]): - if not reward.detected_failures: + """ + Evaluate rewards based on failures. + + :param reward: Evaluation metric containing detection ranks and scheduled test cases. + :param last_prioritization: Test case names in prioritization order. + :return: List of rewards for each test case in the prioritization. + """ + if not reward.detection_ranks: + # No failing test cases: All rewards are 0 return [0.0] * len(last_prioritization) - rank_idx = np.array(reward.detection_ranks) - 1 - rewards = np.zeros(len(reward.scheduled_testcases)) - rewards[rank_idx] = 1 + # Convert detection ranks to 0-based indices + failing_indices = set(reward.detection_ranks) - return [rewards[reward.scheduled_testcases.index(tc)] if tc in reward.scheduled_testcases else 0.0 for tc in - last_prioritization] + # Assign rewards: 1 for failing test cases, 0 otherwise + rewards = [ + 1.0 if i + 1 in failing_indices else 0.0 + for i, tc in enumerate(last_prioritization) + ] + return rewards diff --git a/coleman4hcs/tests/test_reward.py b/coleman4hcs/tests/test_reward.py index 7ea49d8..306feaa 100644 --- a/coleman4hcs/tests/test_reward.py +++ b/coleman4hcs/tests/test_reward.py @@ -113,3 +113,57 @@ def test_rn_fail_reward_no_failures(mock_empty_evaluation_metric, sample_priorit reward = RNFailReward() results = reward.evaluate(mock_empty_evaluation_metric, sample_prioritization) assert np.allclose(results, [0.0] * len(sample_prioritization)) + + +@pytest.mark.benchmark +def test_time_rank_reward_performance(benchmark): + """ + Performance test for TimeRankReward evaluation method. + This test evaluates the performance of the reward function for larger datasets. + """ + # Mocking a large dataset + num_testcases = 10000 + detection_ranks = list(range(1, num_testcases + 1, 2)) # Failures at odd indices + scheduled_testcases = [f"Test{i}" for i in range(1, num_testcases + 1)] + sample_prioritization = scheduled_testcases # Assume prioritization is the same as scheduling + + # Mocking EvaluationMetric + mock_metric = MagicMock(spec=EvaluationMetric) + mock_metric.detection_ranks = detection_ranks + mock_metric.scheduled_testcases = scheduled_testcases + + reward = TimeRankReward() + + # Benchmark the evaluation method + def run_evaluation(): + reward.evaluate(mock_metric, sample_prioritization) + + benchmark(run_evaluation) + + +@pytest.mark.benchmark +def test_rn_fail_reward_performance(benchmark): + """ + Performance test for RNFailReward evaluation method. + This test evaluates the performance of the reward function for larger datasets. + """ + # Mocking a large dataset + num_testcases = 10000 + detection_ranks = list(range(1, num_testcases + 1, 2)) # Failures at odd indices + scheduled_testcases = [f"Test{i}" for i in range(1, num_testcases + 1)] + sample_prioritization = scheduled_testcases # Assume prioritization is the same as scheduling + + # Mocking EvaluationMetric + mock_metric = MagicMock(spec=EvaluationMetric) + mock_metric.detection_ranks = detection_ranks + mock_metric.scheduled_testcases = scheduled_testcases + mock_metric.detected_failures = True + + reward = RNFailReward() + + # Benchmark the evaluation method + def run_evaluation(): + reward.evaluate(mock_metric, sample_prioritization) + + benchmark(run_evaluation) +