From ec323864508bfdfd1d2db52a2e72fd83858ff9d1 Mon Sep 17 00:00:00 2001 From: Anastasia Beglova Date: Tue, 14 Jan 2025 19:58:25 -0500 Subject: [PATCH] add edx block ids to content files --- frontends/api/src/generated/v0/api.ts | 6 ++ frontends/api/src/generated/v1/api.ts | 66 +++++++++++++++++++ learning_resources/etl/utils.py | 21 ++++++ learning_resources/etl/utils_test.py | 7 +- learning_resources/filters.py | 9 +++ learning_resources/filters_test.py | 27 +++++++- .../0083_contentfile_edx_block_id.py | 17 +++++ learning_resources/models.py | 1 + learning_resources/serializers.py | 2 + learning_resources/serializers_test.py | 2 + openapi/specs/v0.yaml | 4 ++ openapi/specs/v1.yaml | 31 +++++++++ 12 files changed, 188 insertions(+), 5 deletions(-) create mode 100644 learning_resources/migrations/0083_contentfile_edx_block_id.py diff --git a/frontends/api/src/generated/v0/api.ts b/frontends/api/src/generated/v0/api.ts index 3dc7b32264..1190999f03 100644 --- a/frontends/api/src/generated/v0/api.ts +++ b/frontends/api/src/generated/v0/api.ts @@ -727,6 +727,12 @@ export interface ContentFile { * @memberof ContentFile */ run_readable_id: string + /** + * + * @type {string} + * @memberof ContentFile + */ + edx_block_id?: string | null } /** diff --git a/frontends/api/src/generated/v1/api.ts b/frontends/api/src/generated/v1/api.ts index 0ed5a27cb0..810b378b72 100644 --- a/frontends/api/src/generated/v1/api.ts +++ b/frontends/api/src/generated/v1/api.ts @@ -397,6 +397,12 @@ export interface ContentFile { * @memberof ContentFile */ run_readable_id: string + /** + * + * @type {string} + * @memberof ContentFile + */ + edx_block_id?: string | null } /** @@ -8654,6 +8660,7 @@ export const ContentfilesApiAxiosParamCreator = function ( * @summary List * @param {number} learning_resource_id id of the parent learning resource * @param {Array} [content_feature_type] Multiple values may be separated by commas. + * @param {Array} [edx_block_id] Multiple values may be separated by commas. * @param {number} [limit] Number of results to return per page. * @param {Array} [offered_by] The organization that offers a learning resource the content file belongs to * `mitx` - MITx * `ocw` - MIT OpenCourseWare * `bootcamps` - Bootcamps * `xpro` - MIT xPRO * `mitpe` - MIT Professional Education * `see` - MIT Sloan Executive Education * @param {number} [offset] The initial index from which to return the results. @@ -8666,6 +8673,7 @@ export const ContentfilesApiAxiosParamCreator = function ( contentfilesList: async ( learning_resource_id: number, content_feature_type?: Array, + edx_block_id?: Array, limit?: number, offered_by?: Array, offset?: number, @@ -8704,6 +8712,12 @@ export const ContentfilesApiAxiosParamCreator = function ( content_feature_type.join(COLLECTION_FORMATS.csv) } + if (edx_block_id) { + localVarQueryParameter["edx_block_id"] = edx_block_id.join( + COLLECTION_FORMATS.csv, + ) + } + if (limit !== undefined) { localVarQueryParameter["limit"] = limit } @@ -8816,6 +8830,7 @@ export const ContentfilesApiFp = function (configuration?: Configuration) { * @summary List * @param {number} learning_resource_id id of the parent learning resource * @param {Array} [content_feature_type] Multiple values may be separated by commas. + * @param {Array} [edx_block_id] Multiple values may be separated by commas. * @param {number} [limit] Number of results to return per page. * @param {Array} [offered_by] The organization that offers a learning resource the content file belongs to * `mitx` - MITx * `ocw` - MIT OpenCourseWare * `bootcamps` - Bootcamps * `xpro` - MIT xPRO * `mitpe` - MIT Professional Education * `see` - MIT Sloan Executive Education * @param {number} [offset] The initial index from which to return the results. @@ -8828,6 +8843,7 @@ export const ContentfilesApiFp = function (configuration?: Configuration) { async contentfilesList( learning_resource_id: number, content_feature_type?: Array, + edx_block_id?: Array, limit?: number, offered_by?: Array, offset?: number, @@ -8845,6 +8861,7 @@ export const ContentfilesApiFp = function (configuration?: Configuration) { await localVarAxiosParamCreator.contentfilesList( learning_resource_id, content_feature_type, + edx_block_id, limit, offered_by, offset, @@ -8925,6 +8942,7 @@ export const ContentfilesApiFactory = function ( .contentfilesList( requestParameters.learning_resource_id, requestParameters.content_feature_type, + requestParameters.edx_block_id, requestParameters.limit, requestParameters.offered_by, requestParameters.offset, @@ -8977,6 +8995,13 @@ export interface ContentfilesApiContentfilesListRequest { */ readonly content_feature_type?: Array + /** + * Multiple values may be separated by commas. + * @type {Array} + * @memberof ContentfilesApiContentfilesList + */ + readonly edx_block_id?: Array + /** * Number of results to return per page. * @type {number} @@ -9064,6 +9089,7 @@ export class ContentfilesApi extends BaseAPI { .contentfilesList( requestParameters.learning_resource_id, requestParameters.content_feature_type, + requestParameters.edx_block_id, requestParameters.limit, requestParameters.offered_by, requestParameters.offset, @@ -9462,6 +9488,7 @@ export const CoursesApiAxiosParamCreator = function ( * @summary Learning Resource Content File List * @param {number} learning_resource_id id of the parent learning resource * @param {Array} [content_feature_type] Multiple values may be separated by commas. + * @param {Array} [edx_block_id] Multiple values may be separated by commas. * @param {number} [limit] Number of results to return per page. * @param {Array} [offered_by] The organization that offers a learning resource the content file belongs to * `mitx` - MITx * `ocw` - MIT OpenCourseWare * `bootcamps` - Bootcamps * `xpro` - MIT xPRO * `mitpe` - MIT Professional Education * `see` - MIT Sloan Executive Education * @param {number} [offset] The initial index from which to return the results. @@ -9474,6 +9501,7 @@ export const CoursesApiAxiosParamCreator = function ( coursesContentfilesList: async ( learning_resource_id: number, content_feature_type?: Array, + edx_block_id?: Array, limit?: number, offered_by?: Array, offset?: number, @@ -9513,6 +9541,12 @@ export const CoursesApiAxiosParamCreator = function ( content_feature_type.join(COLLECTION_FORMATS.csv) } + if (edx_block_id) { + localVarQueryParameter["edx_block_id"] = edx_block_id.join( + COLLECTION_FORMATS.csv, + ) + } + if (limit !== undefined) { localVarQueryParameter["limit"] = limit } @@ -9816,6 +9850,7 @@ export const CoursesApiFp = function (configuration?: Configuration) { * @summary Learning Resource Content File List * @param {number} learning_resource_id id of the parent learning resource * @param {Array} [content_feature_type] Multiple values may be separated by commas. + * @param {Array} [edx_block_id] Multiple values may be separated by commas. * @param {number} [limit] Number of results to return per page. * @param {Array} [offered_by] The organization that offers a learning resource the content file belongs to * `mitx` - MITx * `ocw` - MIT OpenCourseWare * `bootcamps` - Bootcamps * `xpro` - MIT xPRO * `mitpe` - MIT Professional Education * `see` - MIT Sloan Executive Education * @param {number} [offset] The initial index from which to return the results. @@ -9828,6 +9863,7 @@ export const CoursesApiFp = function (configuration?: Configuration) { async coursesContentfilesList( learning_resource_id: number, content_feature_type?: Array, + edx_block_id?: Array, limit?: number, offered_by?: Array, offset?: number, @@ -9845,6 +9881,7 @@ export const CoursesApiFp = function (configuration?: Configuration) { await localVarAxiosParamCreator.coursesContentfilesList( learning_resource_id, content_feature_type, + edx_block_id, limit, offered_by, offset, @@ -10033,6 +10070,7 @@ export const CoursesApiFactory = function ( .coursesContentfilesList( requestParameters.learning_resource_id, requestParameters.content_feature_type, + requestParameters.edx_block_id, requestParameters.limit, requestParameters.offered_by, requestParameters.offset, @@ -10134,6 +10172,13 @@ export interface CoursesApiCoursesContentfilesListRequest { */ readonly content_feature_type?: Array + /** + * Multiple values may be separated by commas. + * @type {Array} + * @memberof CoursesApiCoursesContentfilesList + */ + readonly edx_block_id?: Array + /** * Number of results to return per page. * @type {number} @@ -10361,6 +10406,7 @@ export class CoursesApi extends BaseAPI { .coursesContentfilesList( requestParameters.learning_resource_id, requestParameters.content_feature_type, + requestParameters.edx_block_id, requestParameters.limit, requestParameters.offered_by, requestParameters.offset, @@ -11724,6 +11770,7 @@ export const LearningResourcesApiAxiosParamCreator = function ( * @summary Learning Resource Content File List * @param {number} learning_resource_id id of the parent learning resource * @param {Array} [content_feature_type] Multiple values may be separated by commas. + * @param {Array} [edx_block_id] Multiple values may be separated by commas. * @param {number} [limit] Number of results to return per page. * @param {Array} [offered_by] The organization that offers a learning resource the content file belongs to * `mitx` - MITx * `ocw` - MIT OpenCourseWare * `bootcamps` - Bootcamps * `xpro` - MIT xPRO * `mitpe` - MIT Professional Education * `see` - MIT Sloan Executive Education * @param {number} [offset] The initial index from which to return the results. @@ -11736,6 +11783,7 @@ export const LearningResourcesApiAxiosParamCreator = function ( learningResourcesContentfilesList: async ( learning_resource_id: number, content_feature_type?: Array, + edx_block_id?: Array, limit?: number, offered_by?: Array, offset?: number, @@ -11775,6 +11823,12 @@ export const LearningResourcesApiAxiosParamCreator = function ( content_feature_type.join(COLLECTION_FORMATS.csv) } + if (edx_block_id) { + localVarQueryParameter["edx_block_id"] = edx_block_id.join( + COLLECTION_FORMATS.csv, + ) + } + if (limit !== undefined) { localVarQueryParameter["limit"] = limit } @@ -12621,6 +12675,7 @@ export const LearningResourcesApiFp = function (configuration?: Configuration) { * @summary Learning Resource Content File List * @param {number} learning_resource_id id of the parent learning resource * @param {Array} [content_feature_type] Multiple values may be separated by commas. + * @param {Array} [edx_block_id] Multiple values may be separated by commas. * @param {number} [limit] Number of results to return per page. * @param {Array} [offered_by] The organization that offers a learning resource the content file belongs to * `mitx` - MITx * `ocw` - MIT OpenCourseWare * `bootcamps` - Bootcamps * `xpro` - MIT xPRO * `mitpe` - MIT Professional Education * `see` - MIT Sloan Executive Education * @param {number} [offset] The initial index from which to return the results. @@ -12633,6 +12688,7 @@ export const LearningResourcesApiFp = function (configuration?: Configuration) { async learningResourcesContentfilesList( learning_resource_id: number, content_feature_type?: Array, + edx_block_id?: Array, limit?: number, offered_by?: Array, offset?: number, @@ -12650,6 +12706,7 @@ export const LearningResourcesApiFp = function (configuration?: Configuration) { await localVarAxiosParamCreator.learningResourcesContentfilesList( learning_resource_id, content_feature_type, + edx_block_id, limit, offered_by, offset, @@ -13171,6 +13228,7 @@ export const LearningResourcesApiFactory = function ( .learningResourcesContentfilesList( requestParameters.learning_resource_id, requestParameters.content_feature_type, + requestParameters.edx_block_id, requestParameters.limit, requestParameters.offered_by, requestParameters.offset, @@ -13420,6 +13478,13 @@ export interface LearningResourcesApiLearningResourcesContentfilesListRequest { */ readonly content_feature_type?: Array + /** + * Multiple values may be separated by commas. + * @type {Array} + * @memberof LearningResourcesApiLearningResourcesContentfilesList + */ + readonly edx_block_id?: Array + /** * Number of results to return per page. * @type {number} @@ -14013,6 +14078,7 @@ export class LearningResourcesApi extends BaseAPI { .learningResourcesContentfilesList( requestParameters.learning_resource_id, requestParameters.content_feature_type, + requestParameters.edx_block_id, requestParameters.limit, requestParameters.offered_by, requestParameters.offset, diff --git a/learning_resources/etl/utils.py b/learning_resources/etl/utils.py index 504838ab12..399bfd9a75 100644 --- a/learning_resources/etl/utils.py +++ b/learning_resources/etl/utils.py @@ -325,6 +325,7 @@ def documents_from_olx( path = "/".join(root.split("/")[3:]) for filename in files: extension_lower = Path(filename).suffix.lower() + if extension_lower in VALID_TEXT_FILE_TYPES and "draft" not in root: with Path.open(Path(root, filename), "rb") as f: filebytes = f.read() @@ -345,6 +346,24 @@ def documents_from_olx( ) +def get_edx_block_id(path: str, run: LearningResourceRun) -> str: + """ + Return the XBlock ID from a path + + Args: + path (str): The path to the file + + Returns: + str: The XBlock ID + """ + name = Path(path).stem + module_type = path.split("/")[-2] + return ( + f"block-v1:{run.run_id.replace('course-v1:', '')}" + f"+type@{module_type}+block@{name}" + ) + + def text_from_srt_content(content: str): """ Remove timestamps and other extraneous data from SRT content @@ -402,6 +421,7 @@ def transform_content_files( mime_type = metadata.get("mime_type") file_extension = metadata.get("file_extension") source_path = metadata.get("source_path") + edx_block_id = get_edx_block_id(source_path, run) existing_content = ContentFile.objects.filter(key=key, run=run).first() if ( @@ -453,6 +473,7 @@ def transform_content_files( "checksum": metadata.get("checksum"), "file_extension": file_extension, "source_path": source_path, + "edx_block_id": edx_block_id, **content_dict, } ) diff --git a/learning_resources/etl/utils_test.py b/learning_resources/etl/utils_test.py index 4d91f9cdfb..a3a6acc105 100644 --- a/learning_resources/etl/utils_test.py +++ b/learning_resources/etl/utils_test.py @@ -180,7 +180,7 @@ def test_transform_content_files( run = LearningResourceRunFactory.create(published=True) document = "some text in the document" file_extension = ".html" - key = f"root/key{file_extension}" + key = f"key{file_extension}" content_type = "course" checksum = "7s35721d1647f962d59b8120a52210a7" metadata = {"title": "the title of the course"} if has_metadata else None @@ -206,7 +206,7 @@ def test_transform_content_files( "content_type": content_type, "checksum": checksum, "file_extension": file_extension, - "source_path": "root", + "source_path": f"root/folder/{key}", }, ) ], @@ -240,7 +240,8 @@ def test_transform_content_files( "content_type": content_type, "checksum": checksum, "file_extension": file_extension, - "source_path": "root", + "source_path": f"root/folder/{key}", + "edx_block_id": f"block-v1:{run.run_id.replace('course-v1:', '')}+type@folder+block@key", } ] else: diff --git a/learning_resources/filters.py b/learning_resources/filters.py index 38b65be0d5..f13cf4f3ad 100644 --- a/learning_resources/filters.py +++ b/learning_resources/filters.py @@ -215,6 +215,11 @@ class ContentFileFilter(FilterSet): method="filter_content_feature_type", ) + edx_block_id = CharInFilter( + label="The edx block id of the content file", + method="filter_edx_block_id", + ) + offered_by = MultipleChoiceFilter( label="The organization that offers a learning resource the content file " "belongs to", @@ -251,6 +256,10 @@ def filter_content_feature_type(self, queryset, _, value): """Content feature type filter for contentfiles""" return multi_or_filter(queryset, "content_tags__name__iexact", value) + def filter_edx_block_id(self, queryset, _, value): + """Edx block id Filter for contentfiles""" + return multi_or_filter(queryset, "edx_block_id__iexact", value) + class Meta: model = ContentFile fields = [] diff --git a/learning_resources/filters_test.py b/learning_resources/filters_test.py index 8564612a26..4b88276ad9 100644 --- a/learning_resources/filters_test.py +++ b/learning_resources/filters_test.py @@ -89,10 +89,13 @@ def mock_content_files(): platform=LearningResourcePlatformFactory.create(code=platform), offered_by=LearningResourceOfferorFactory.create(code=offeror), ) - ) + ), + edx_block_id=f"block_{platform}" + if platform != PlatformType.ocw.name + else None, ), ) - ContentFile.objects.exclude(id__in=[cf.id for cf in content_files[:2]]).delete() + ContentFile.objects.exclude(id__in=[cf.id for cf in content_files]).delete() return content_files @@ -572,6 +575,26 @@ def test_content_file_filter_resource_id(mock_content_files, client): ) +def test_content_file_filter_edx_block_id(mock_content_files, client): + """Test that the resource_id filter works for contentfiles""" + assert mock_content_files[0].edx_block_id is None + assert mock_content_files[1].edx_block_id == "block_xpro" + assert mock_content_files[2].edx_block_id == "block_mitxonline" + + results = client.get(f"{CONTENT_API_URL}?edx_block_id=block_xpro").json()["results"] + assert len(results) == 1 + assert results[0]["edx_block_id"] == "block_xpro" + + results = client.get( + f"{CONTENT_API_URL}?edx_block_id=block_mitxonline&edx_block_id=block_xpro" + ).json()["results"] + assert len(results) == 2 + assert sorted([result["edx_block_id"] for result in results]) == [ + "block_mitxonline", + "block_xpro", + ] + + def test_content_file_filter_platform(mock_content_files, client): """Test that the platform filter works""" diff --git a/learning_resources/migrations/0083_contentfile_edx_block_id.py b/learning_resources/migrations/0083_contentfile_edx_block_id.py new file mode 100644 index 0000000000..caba0623be --- /dev/null +++ b/learning_resources/migrations/0083_contentfile_edx_block_id.py @@ -0,0 +1,17 @@ +# Generated by Django 4.2.17 on 2025-01-14 18:46 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("learning_resources", "0082_alter_podcastepisode_audio_url"), + ] + + operations = [ + migrations.AddField( + model_name="contentfile", + name="edx_block_id", + field=models.CharField(blank=True, max_length=1024, null=True), + ), + ] diff --git a/learning_resources/models.py b/learning_resources/models.py index 28af96a1f9..cc4878d522 100644 --- a/learning_resources/models.py +++ b/learning_resources/models.py @@ -881,6 +881,7 @@ class ContentFile(TimestampedModel): checksum = models.CharField(max_length=32, null=True, blank=True) # noqa: DJ001 source_path = models.CharField(max_length=1024, null=True, blank=True) # noqa: DJ001 file_extension = models.CharField(max_length=32, null=True, blank=True) # noqa: DJ001 + edx_block_id = models.CharField(max_length=1024, null=True, blank=True) # noqa: DJ001 class Meta: unique_together = (("key", "run"),) diff --git a/learning_resources/serializers.py b/learning_resources/serializers.py index 01f3a23786..bc4aa05713 100644 --- a/learning_resources/serializers.py +++ b/learning_resources/serializers.py @@ -819,6 +819,8 @@ class Meta: "offered_by", "platform", "run_readable_id", + "file_extension", + "edx_block_id", ] diff --git a/learning_resources/serializers_test.py b/learning_resources/serializers_test.py index 442cbad8ef..ae6ef88638 100644 --- a/learning_resources/serializers_test.py +++ b/learning_resources/serializers_test.py @@ -475,6 +475,7 @@ def test_content_file_serializer(settings, expected_types, has_channels): "content_author": "MIT", "content_language": "en", "content_title": "test title", + "edx_block_id": "edx_block_id", } platform = PlatformType.ocw.name course = factories.CourseFactory.create(platform=platform) @@ -573,6 +574,7 @@ def test_content_file_serializer(settings, expected_types, has_channels): "content_feature_type": sorted( [tag.name for tag in content_file.content_tags.all()] ), + "edx_block_id": content_file.edx_block_id, }, ) diff --git a/openapi/specs/v0.yaml b/openapi/specs/v0.yaml index 9f7e004d7f..1cb7224969 100644 --- a/openapi/specs/v0.yaml +++ b/openapi/specs/v0.yaml @@ -1858,6 +1858,10 @@ components: $ref: '#/components/schemas/LearningResourcePlatform' run_readable_id: type: string + edx_block_id: + type: string + nullable: true + maxLength: 1024 required: - content_feature_type - course_number diff --git a/openapi/specs/v1.yaml b/openapi/specs/v1.yaml index e3da5f08c7..f7cf0ee287 100644 --- a/openapi/specs/v1.yaml +++ b/openapi/specs/v1.yaml @@ -335,6 +335,15 @@ paths: description: Multiple values may be separated by commas. explode: false style: form + - in: query + name: edx_block_id + schema: + type: array + items: + type: string + description: Multiple values may be separated by commas. + explode: false + style: form - in: path name: learning_resource_id schema: @@ -947,6 +956,15 @@ paths: description: Multiple values may be separated by commas. explode: false style: form + - in: query + name: edx_block_id + schema: + type: array + items: + type: string + description: Multiple values may be separated by commas. + explode: false + style: form - in: path name: learning_resource_id schema: @@ -2740,6 +2758,15 @@ paths: description: Multiple values may be separated by commas. explode: false style: form + - in: query + name: edx_block_id + schema: + type: array + items: + type: string + description: Multiple values may be separated by commas. + explode: false + style: form - in: path name: learning_resource_id schema: @@ -8609,6 +8636,10 @@ components: $ref: '#/components/schemas/LearningResourcePlatform' run_readable_id: type: string + edx_block_id: + type: string + nullable: true + maxLength: 1024 required: - content_feature_type - course_number