Skip to content

Commit

Permalink
feat(reward): optimize RNFailReward and enhance tests
Browse files Browse the repository at this point in the history
- Improved the `RNFailReward` evaluation logic by directly iterating over detection ranks, reducing memory usage.
- Updated `RNFailReward` to assign rewards directly, improving performance for large datasets.
- Added performance tests for `TimeRankReward` and `RNFailReward` to ensure scalability and efficiency.
- Enhanced unit test coverage for edge cases and performance scenarios.
- Adjusted benchmark settings to validate the performance improvements.
  • Loading branch information
jacksonpradolima committed Dec 23, 2024
1 parent c7a1253 commit 88842a8
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 6 deletions.
23 changes: 17 additions & 6 deletions coleman4hcs/reward.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,12 +123,23 @@ def get_name(self):
return 'RNFail'

def evaluate(self, reward: EvaluationMetric, last_prioritization: List[str]):
if not reward.detected_failures:
"""
Evaluate rewards based on failures.
:param reward: Evaluation metric containing detection ranks and scheduled test cases.
:param last_prioritization: Test case names in prioritization order.
:return: List of rewards for each test case in the prioritization.
"""
if not reward.detection_ranks:
# No failing test cases: All rewards are 0
return [0.0] * len(last_prioritization)

rank_idx = np.array(reward.detection_ranks) - 1
rewards = np.zeros(len(reward.scheduled_testcases))
rewards[rank_idx] = 1
# Convert detection ranks to 0-based indices
failing_indices = set(reward.detection_ranks)

return [rewards[reward.scheduled_testcases.index(tc)] if tc in reward.scheduled_testcases else 0.0 for tc in
last_prioritization]
# Assign rewards: 1 for failing test cases, 0 otherwise
rewards = [
1.0 if i + 1 in failing_indices else 0.0
for i, tc in enumerate(last_prioritization)
]
return rewards
54 changes: 54 additions & 0 deletions coleman4hcs/tests/test_reward.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,3 +113,57 @@ def test_rn_fail_reward_no_failures(mock_empty_evaluation_metric, sample_priorit
reward = RNFailReward()
results = reward.evaluate(mock_empty_evaluation_metric, sample_prioritization)
assert np.allclose(results, [0.0] * len(sample_prioritization))


@pytest.mark.benchmark
def test_time_rank_reward_performance(benchmark):
"""
Performance test for TimeRankReward evaluation method.
This test evaluates the performance of the reward function for larger datasets.
"""
# Mocking a large dataset
num_testcases = 10000
detection_ranks = list(range(1, num_testcases + 1, 2)) # Failures at odd indices
scheduled_testcases = [f"Test{i}" for i in range(1, num_testcases + 1)]
sample_prioritization = scheduled_testcases # Assume prioritization is the same as scheduling

# Mocking EvaluationMetric
mock_metric = MagicMock(spec=EvaluationMetric)
mock_metric.detection_ranks = detection_ranks
mock_metric.scheduled_testcases = scheduled_testcases

reward = TimeRankReward()

# Benchmark the evaluation method
def run_evaluation():
reward.evaluate(mock_metric, sample_prioritization)

benchmark(run_evaluation)


@pytest.mark.benchmark
def test_rn_fail_reward_performance(benchmark):
"""
Performance test for RNFailReward evaluation method.
This test evaluates the performance of the reward function for larger datasets.
"""
# Mocking a large dataset
num_testcases = 10000
detection_ranks = list(range(1, num_testcases + 1, 2)) # Failures at odd indices
scheduled_testcases = [f"Test{i}" for i in range(1, num_testcases + 1)]
sample_prioritization = scheduled_testcases # Assume prioritization is the same as scheduling

# Mocking EvaluationMetric
mock_metric = MagicMock(spec=EvaluationMetric)
mock_metric.detection_ranks = detection_ranks
mock_metric.scheduled_testcases = scheduled_testcases
mock_metric.detected_failures = True

reward = RNFailReward()

# Benchmark the evaluation method
def run_evaluation():
reward.evaluate(mock_metric, sample_prioritization)

benchmark(run_evaluation)

0 comments on commit 88842a8

Please sign in to comment.