Skip to content

Commit

Permalink
feat: TNL-11812 test file cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
Bernard Szabo committed Jan 27, 2025
1 parent de1b99f commit af93867
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 100 deletions.
14 changes: 0 additions & 14 deletions cms/djangoapps/contentstore/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1315,20 +1315,6 @@ def _check_broken_links(task_instance, user_id, course_key_string, language):
task_instance.status.fail({'raw_error_msg': str(e)})
return

def _record_broken_links(task, broken_or_locked_urls, course_key):
# Save to temp file
file_name = str(course_key)
broken_links_file = NamedTemporaryFile(prefix=file_name + '.', suffix='.json')
LOGGER.debug(f'[Link Check] json file being generated at {broken_links_file.name}')
with open(broken_links_file.name, 'w') as file:
json.dump(broken_or_locked_urls, file, indent=4)

# Now upload temp file to permanent storage
artifact = UserTaskArtifact(status=task.status, name='BrokenLinks')
artifact.file.save(name=os.path.basename(broken_links_file.name), content=File(broken_links_file))
artifact.save()
return artifact.file.name

@shared_task(base=CourseLinkCheckTask, bind=True)
def check_broken_links(self, user_id, course_key_string, language):
"""
Expand Down
137 changes: 51 additions & 86 deletions cms/djangoapps/contentstore/tests/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,6 @@ def _assert_failed(self, task_result, error_message):
self.assertEqual(error.name, 'Error')
self.assertEqual(error.text, error_message)


@override_settings(CONTENTSTORE=TEST_DATA_CONTENTSTORE)
class ExportLibraryTestCase(LibraryTestCase):
"""
Expand Down Expand Up @@ -226,6 +225,7 @@ class MockCourseLinkCheckTask(Task):
def __init__(self):
self.status = mock.Mock()

############## Course Optimizer tests ##############

class CheckBrokenLinksTaskTest(ModuleStoreTestCase):
def setUp(self):
Expand Down Expand Up @@ -255,11 +255,14 @@ def test_check_broken_links_stores_broken_and_locked_urls(
mock_user_task_artifact
):
'''
The test should verify that the check_broken_links task correctly
identifies and stores broken or locked URLs in the course.
The expected behavior is that the task scans the course,
validates the URLs, filters the results, and stores them in a
The test verifies that the check_broken_links task correctly
stores broken or locked URLs in the course.
The expected behavior is that the after scanning the course,
validating the URLs, and filtering the results, the task stores the results in a
JSON file.
Note that this test mocks all validation functions and therefore
does not test link validation or any of its support functions.
'''
# Arrange
mock_user = UserFactory.create(username='student', password='password')
Expand Down Expand Up @@ -289,21 +292,24 @@ def test_no_course_access_raises_exception(self):
assert True

def test_hash_tags_stripped_from_url_lists(self):
NUM_HASH_TAG_LINES = 2
url_list = '''
href='#'
href='#' # 1 of 2 lines that will be stripped
href='http://google.com'
src='#'
src='#' # 2 of 2 lines that will be stripped
href='https://microsoft.com'
src="/static/resource_name"
'''

original_lines = len(url_list.splitlines()) - 2 # Correct for the two carriage returns surrounding the ''' marks
processed_url_list = _get_urls(url_list)
pprint.pp(processed_url_list)
processed_lines = len(processed_url_list) # regex returns a list
original_lines = 5
assert processed_lines == original_lines-2, \
processed_lines = len(processed_url_list)

assert processed_lines == original_lines - NUM_HASH_TAG_LINES, \
f'Processed URL list lines = {processed_lines}; expected {original_lines - 2}'

# TODO - Document here what counts as a legitimate URL & modify test accordingly
@pytest.mark.skip(reason="Valid URL format to be nailed down")
def test_src_and_href_urls_extracted(self):
FIRST_URL = 'http://google.com'
SECOND_URL = 'https://microsoft.com'
Expand Down Expand Up @@ -352,20 +358,8 @@ def test_container_url_without_url_base_is_recognized_as_studio_url_scheme(self)
def test_slash_url_without_url_base_is_recognized_as_studio_url_scheme(self):
self.assertTrue(_is_studio_url(f'/static/test'))

@pytest.mark.parametrize("url, course_key, post_substitution_url",
[("/static/anything_goes_here?raw", "1", "2")])
def test_url_substitution_on_static_prefixes(self, url, course_key, post_substitution_url):
substitution_result = _convert_to_standard_url(url, course_key)
assert substitution_result == post_substitution_url, \
f'{substitution_result} expected to be {post_substitution_url}'

@pytest.mark.skip(reason="This test is not yet implemented")
def test_url_substitution_on_forward_slash_prefixes(self):
assert True

@pytest.mark.skip(reason="This test is not yet implemented")
def test_url_subsitution_on_containers(self):
assert True
# TODO
# Need additional negative tests on _is_studio_url would be appropriate

@mock.patch('cms.djangoapps.contentstore.tasks.ModuleStoreEnum', autospec=True)
@mock.patch('cms.djangoapps.contentstore.tasks.modulestore', autospec=True)
Expand All @@ -388,69 +382,62 @@ def test_course_scan_occurs_on_published_version(self, mock_modulestore, mock_mo

@mock.patch('cms.djangoapps.contentstore.tasks._get_urls', autospec=True)
def test_number_of_scanned_blocks_equals_blocks_in_course(self, mock_get_urls):
"""_scan_course_for_links should call _get_urls once per block in course"""
mock_get_urls = mock.Mock()
result = self.store.get_items(self.test_course.id)
"""
_scan_course_for_links should call _get_urls once per block in course.
TODO - verify that the created course actually has blocks. This test not meaningful if it doesn't
"""
expected_blocks = self.store.get_items(self.test_course.id)

_scan_course_for_links(self.test_course.id)
self.assertEqual(len(result), mock_get_urls.call_count)

@pytest.mark.skip(reason="This test is not yet implemented")
def test_every_detected_link_is_validated(self):
assert True

@pytest.mark.skip(reason="This test is not yet implemented")
def test_number_of_scanned_blocks_equals_blocks_in_course(self):
assert True
self.assertEqual(len(expected_blocks), mock_get_urls.call_count)

@pytest.mark.asyncio
async def test_every_detected_link_is_validated(self):
logging.info("******** In test_every_detected_link_is_validated *******")
'''
The call to _validate_urls_access_in_batches() should call _validate_batch() three times, once for each
of the three batches of length 2 in url_list. The lambda function supplied for _validate_batch will
simply return the set of urls fed to _validate_batch(), and _validate_urls_access_in_batches() will
aggregate these into a list identical to the original url_list.
What this shows is that each url submitted to _validate_urls_access_in_batches() is ending up as an argument
to one of the generated _validate_batch() calls, and that no input URL is left unprocessed.
'''
url_list = ['1', '2', '3', '4', '5']
course_key = 'course-v1:edX+DemoX+Demo_Course'
batch_size = 2
with patch("cms.djangoapps.contentstore.tasks._validate_batch", new_callable=AsyncMock) as mock_validate_batch:
mock_validate_batch.side_effect = lambda x, y: x
validated_urls = await _validate_urls_access_in_batches(url_list, course_key, batch_size)
pprint.pp(validated_urls)
pprint.pp(url_list)
assert validated_urls == url_list, \
f"List of validated urls {validated_urls} is not identical to sourced urls {url_list}"

@pytest.mark.asyncio
async def test_link_validation_is_batched(self):
logging.info("******** In test_link_validation_is_batched *******")
with patch("cms.djangoapps.contentstore.tasks._validate_batch", new_callable=AsyncMock) as mock_validate_batch:
mock_validate_batch.return_value = {"status": 200}

url_list = ['1', '2', '3', '4', '5']
course_key = 'course-v1:edX+DemoX+Demo_Course'
batch_size=2
results = await _validate_urls_access_in_batches(url_list, course_key, batch_size)
print(" ***** results = ******")
pprint.pp(results)
mock_validate_batch.assert_called()
assert mock_validate_batch.call_count == 3 # two full batches and one partial batch
assert validated_urls == url_list, \
f"List of validated urls {validated_urls} is not identical to sourced urls {url_list}"

@pytest.mark.asyncio
async def test_all_links_are_validated_with_batch_validation(self):
logging.info("******** In test_all_links_are_validated_with_batch_validation *******")
'''
Here the focus is not on batching, but rather that when validation occurs it does so on the intended
URL strings
'''
with patch("cms.djangoapps.contentstore.tasks._validate_url_access", new_callable=AsyncMock) as mock_validate:
mock_validate.return_value = {"status": 200}

url_list = ['1', '2', '3', '4', '5']
course_key = 'course-v1:edX+DemoX+Demo_Course'
batch_size=2
results = await _validate_urls_access_in_batches(url_list, course_key, batch_size)
print(" ***** results = ******")
pprint.pp(results)
await _validate_urls_access_in_batches(url_list, course_key, batch_size)
args_list = mock_validate.call_args_list
urls = [call_args.args[1] for call_args in args_list] # The middle argument in each of the function calls
for i in range(1,len(url_list)+1):
assert str(i) in urls, f'{i} not supplied as a url for validation in batches function'

def test_no_retries_on_403_access_denied_links(self):
logging.info("******** In test_no_retries_on_403_access_denied_links *******")
'''
No mocking required here. Will populate "filtering_input" with simulated results for link checks where
some links time out, some links receive 403 errors, and some receive 200 success. This test then
ensures that "_filter_by_status()" tallies the three categories as expected, and formats the result
as expected.
'''
url_list = ['1', '2', '3', '4', '5']
filtering_input = []
for i in range(1, len(url_list)+1): # Notch out one of the URLs, having it return a '403' status code
Expand All @@ -464,26 +451,13 @@ def test_no_retries_on_403_access_denied_links(self):
filtering_input[4]['status'] = None

broken_or_locked_urls, retry_list = _filter_by_status(filtering_input)
print(" ***** broken_or_locked_urls = ******")
pprint.pp(broken_or_locked_urls)
assert len(broken_or_locked_urls) == 2 # The inputs with status = 403 and 500
assert len(retry_list) == 1 # The input with status = None
assert retry_list[0][1] == '5' # The only URL fit for a retry operation (status == None)

# TODO - test retry logic

# @pytest.mark.asyncio
# def test_retries_attempted_on_connection_errors(self):
# logging.info("******** In test_retries_attempted_on_connection_errors *******")
# with patch("cms.djangoapps.contentstore.tasks._validate_urls_access_in_batches",
# new_callable=AsyncMock) as mock_validate:
# mock_validate.return_value = [], [['block_1', '1']]
# with patch("cms.djangoapps.contentstore.tasks._retry_validation",
# new_callable=AsyncMock) as mock_retry:
# mock_retry.return_value = [['block_1', '1']]
# check_broken_links('user_id', 'course_key_string', 'language')
# assert mock_retry.call_count == 0, \
# f'_retry_validation() called {mock_retry.call_count} times; expected 0'

@pytest.mark.skip(reason="Failing but needs review -- test not yet correct")
@pytest.mark.asyncio
async def test_max_number_of_retries_is_respected(self):
logging.info("******** In test_max_number_of_retries_is_respected *******")
Expand Down Expand Up @@ -511,12 +485,6 @@ async def test_max_number_of_retries_is_respected(self):
assert mock_retry_validation.call_count == MAX_RETRIES, \
f'Got {mock_retry_validation.call_count} retries; expected {MAX_RETRIES}'

# def test_scan_generates_file_named_by_course_key(self, tmp_path):
# course_key = 'course-v1:edX+DemoX+Demo_Course'
# filename = _record_broken_links("", course_key)
# expected_filename = ""
# assert filename == "", f'Got f{filename} as broken links filename; expected {expected_filename}'

@patch("cms.djangoapps.contentstore.tasks._validate_user", return_value=MagicMock())
@patch("cms.djangoapps.contentstore.tasks._scan_course_for_links", return_value=["url1", "url2"])
@patch("cms.djangoapps.contentstore.tasks._validate_urls_access_in_batches",
Expand All @@ -525,9 +493,7 @@ async def test_max_number_of_retries_is_respected(self):
return_value=(["block_1", "url1", True], ["block_2", "url2"]))
@patch("cms.djangoapps.contentstore.tasks._retry_validation",
return_value=['block_2', 'url2'])
@patch("cms.djangoapps.contentstore.tasks._record_broken_links")
def test_broken_links(self,
mock_record_broken_links,
def test_check_broken_links_calls_expected_support_functions(self,
mock_retry_validation,
mock_filter,
mock_validate_urls,
Expand All @@ -538,7 +504,6 @@ def test_broken_links(self,
language = "en"
course_key_string = "course-v1:edX+DemoX+2025"


# Mocking self and status attributes for the test
class MockStatus:
def __init__(self):
Expand Down Expand Up @@ -590,6 +555,6 @@ def __init__(self):
mock_filter.assert_called_once_with(validated_url_list)
if retry_list:
mock_retry_validation.assert_called_once_with(retry_list, course_key, retry_count=3)
mock_record_broken_links.assert_called_once()



0 comments on commit af93867

Please sign in to comment.