Skip to content

Commit

Permalink
add edx block ids to content files
Browse files Browse the repository at this point in the history
  • Loading branch information
abeglova committed Jan 28, 2025
1 parent 407ee1b commit ec32386
Show file tree
Hide file tree
Showing 12 changed files with 188 additions and 5 deletions.
6 changes: 6 additions & 0 deletions frontends/api/src/generated/v0/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -727,6 +727,12 @@ export interface ContentFile {
* @memberof ContentFile
*/
run_readable_id: string
/**
*
* @type {string}
* @memberof ContentFile
*/
edx_block_id?: string | null
}

/**
Expand Down
66 changes: 66 additions & 0 deletions frontends/api/src/generated/v1/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,12 @@ export interface ContentFile {
* @memberof ContentFile
*/
run_readable_id: string
/**
*
* @type {string}
* @memberof ContentFile
*/
edx_block_id?: string | null
}

/**
Expand Down Expand Up @@ -8654,6 +8660,7 @@ export const ContentfilesApiAxiosParamCreator = function (
* @summary List
* @param {number} learning_resource_id id of the parent learning resource
* @param {Array<string>} [content_feature_type] Multiple values may be separated by commas.
* @param {Array<string>} [edx_block_id] Multiple values may be separated by commas.
* @param {number} [limit] Number of results to return per page.
* @param {Array<ContentfilesListOfferedByEnum>} [offered_by] The organization that offers a learning resource the content file belongs to * &#x60;mitx&#x60; - MITx * &#x60;ocw&#x60; - MIT OpenCourseWare * &#x60;bootcamps&#x60; - Bootcamps * &#x60;xpro&#x60; - MIT xPRO * &#x60;mitpe&#x60; - MIT Professional Education * &#x60;see&#x60; - MIT Sloan Executive Education
* @param {number} [offset] The initial index from which to return the results.
Expand All @@ -8666,6 +8673,7 @@ export const ContentfilesApiAxiosParamCreator = function (
contentfilesList: async (
learning_resource_id: number,
content_feature_type?: Array<string>,
edx_block_id?: Array<string>,
limit?: number,
offered_by?: Array<ContentfilesListOfferedByEnum>,
offset?: number,
Expand Down Expand Up @@ -8704,6 +8712,12 @@ export const ContentfilesApiAxiosParamCreator = function (
content_feature_type.join(COLLECTION_FORMATS.csv)
}

if (edx_block_id) {
localVarQueryParameter["edx_block_id"] = edx_block_id.join(
COLLECTION_FORMATS.csv,
)
}

if (limit !== undefined) {
localVarQueryParameter["limit"] = limit
}
Expand Down Expand Up @@ -8816,6 +8830,7 @@ export const ContentfilesApiFp = function (configuration?: Configuration) {
* @summary List
* @param {number} learning_resource_id id of the parent learning resource
* @param {Array<string>} [content_feature_type] Multiple values may be separated by commas.
* @param {Array<string>} [edx_block_id] Multiple values may be separated by commas.
* @param {number} [limit] Number of results to return per page.
* @param {Array<ContentfilesListOfferedByEnum>} [offered_by] The organization that offers a learning resource the content file belongs to * &#x60;mitx&#x60; - MITx * &#x60;ocw&#x60; - MIT OpenCourseWare * &#x60;bootcamps&#x60; - Bootcamps * &#x60;xpro&#x60; - MIT xPRO * &#x60;mitpe&#x60; - MIT Professional Education * &#x60;see&#x60; - MIT Sloan Executive Education
* @param {number} [offset] The initial index from which to return the results.
Expand All @@ -8828,6 +8843,7 @@ export const ContentfilesApiFp = function (configuration?: Configuration) {
async contentfilesList(
learning_resource_id: number,
content_feature_type?: Array<string>,
edx_block_id?: Array<string>,
limit?: number,
offered_by?: Array<ContentfilesListOfferedByEnum>,
offset?: number,
Expand All @@ -8845,6 +8861,7 @@ export const ContentfilesApiFp = function (configuration?: Configuration) {
await localVarAxiosParamCreator.contentfilesList(
learning_resource_id,
content_feature_type,
edx_block_id,
limit,
offered_by,
offset,
Expand Down Expand Up @@ -8925,6 +8942,7 @@ export const ContentfilesApiFactory = function (
.contentfilesList(
requestParameters.learning_resource_id,
requestParameters.content_feature_type,
requestParameters.edx_block_id,
requestParameters.limit,
requestParameters.offered_by,
requestParameters.offset,
Expand Down Expand Up @@ -8977,6 +8995,13 @@ export interface ContentfilesApiContentfilesListRequest {
*/
readonly content_feature_type?: Array<string>

/**
* Multiple values may be separated by commas.
* @type {Array<string>}
* @memberof ContentfilesApiContentfilesList
*/
readonly edx_block_id?: Array<string>

/**
* Number of results to return per page.
* @type {number}
Expand Down Expand Up @@ -9064,6 +9089,7 @@ export class ContentfilesApi extends BaseAPI {
.contentfilesList(
requestParameters.learning_resource_id,
requestParameters.content_feature_type,
requestParameters.edx_block_id,
requestParameters.limit,
requestParameters.offered_by,
requestParameters.offset,
Expand Down Expand Up @@ -9462,6 +9488,7 @@ export const CoursesApiAxiosParamCreator = function (
* @summary Learning Resource Content File List
* @param {number} learning_resource_id id of the parent learning resource
* @param {Array<string>} [content_feature_type] Multiple values may be separated by commas.
* @param {Array<string>} [edx_block_id] Multiple values may be separated by commas.
* @param {number} [limit] Number of results to return per page.
* @param {Array<CoursesContentfilesListOfferedByEnum>} [offered_by] The organization that offers a learning resource the content file belongs to * &#x60;mitx&#x60; - MITx * &#x60;ocw&#x60; - MIT OpenCourseWare * &#x60;bootcamps&#x60; - Bootcamps * &#x60;xpro&#x60; - MIT xPRO * &#x60;mitpe&#x60; - MIT Professional Education * &#x60;see&#x60; - MIT Sloan Executive Education
* @param {number} [offset] The initial index from which to return the results.
Expand All @@ -9474,6 +9501,7 @@ export const CoursesApiAxiosParamCreator = function (
coursesContentfilesList: async (
learning_resource_id: number,
content_feature_type?: Array<string>,
edx_block_id?: Array<string>,
limit?: number,
offered_by?: Array<CoursesContentfilesListOfferedByEnum>,
offset?: number,
Expand Down Expand Up @@ -9513,6 +9541,12 @@ export const CoursesApiAxiosParamCreator = function (
content_feature_type.join(COLLECTION_FORMATS.csv)
}

if (edx_block_id) {
localVarQueryParameter["edx_block_id"] = edx_block_id.join(
COLLECTION_FORMATS.csv,
)
}

if (limit !== undefined) {
localVarQueryParameter["limit"] = limit
}
Expand Down Expand Up @@ -9816,6 +9850,7 @@ export const CoursesApiFp = function (configuration?: Configuration) {
* @summary Learning Resource Content File List
* @param {number} learning_resource_id id of the parent learning resource
* @param {Array<string>} [content_feature_type] Multiple values may be separated by commas.
* @param {Array<string>} [edx_block_id] Multiple values may be separated by commas.
* @param {number} [limit] Number of results to return per page.
* @param {Array<CoursesContentfilesListOfferedByEnum>} [offered_by] The organization that offers a learning resource the content file belongs to * &#x60;mitx&#x60; - MITx * &#x60;ocw&#x60; - MIT OpenCourseWare * &#x60;bootcamps&#x60; - Bootcamps * &#x60;xpro&#x60; - MIT xPRO * &#x60;mitpe&#x60; - MIT Professional Education * &#x60;see&#x60; - MIT Sloan Executive Education
* @param {number} [offset] The initial index from which to return the results.
Expand All @@ -9828,6 +9863,7 @@ export const CoursesApiFp = function (configuration?: Configuration) {
async coursesContentfilesList(
learning_resource_id: number,
content_feature_type?: Array<string>,
edx_block_id?: Array<string>,
limit?: number,
offered_by?: Array<CoursesContentfilesListOfferedByEnum>,
offset?: number,
Expand All @@ -9845,6 +9881,7 @@ export const CoursesApiFp = function (configuration?: Configuration) {
await localVarAxiosParamCreator.coursesContentfilesList(
learning_resource_id,
content_feature_type,
edx_block_id,
limit,
offered_by,
offset,
Expand Down Expand Up @@ -10033,6 +10070,7 @@ export const CoursesApiFactory = function (
.coursesContentfilesList(
requestParameters.learning_resource_id,
requestParameters.content_feature_type,
requestParameters.edx_block_id,
requestParameters.limit,
requestParameters.offered_by,
requestParameters.offset,
Expand Down Expand Up @@ -10134,6 +10172,13 @@ export interface CoursesApiCoursesContentfilesListRequest {
*/
readonly content_feature_type?: Array<string>

/**
* Multiple values may be separated by commas.
* @type {Array<string>}
* @memberof CoursesApiCoursesContentfilesList
*/
readonly edx_block_id?: Array<string>

/**
* Number of results to return per page.
* @type {number}
Expand Down Expand Up @@ -10361,6 +10406,7 @@ export class CoursesApi extends BaseAPI {
.coursesContentfilesList(
requestParameters.learning_resource_id,
requestParameters.content_feature_type,
requestParameters.edx_block_id,
requestParameters.limit,
requestParameters.offered_by,
requestParameters.offset,
Expand Down Expand Up @@ -11724,6 +11770,7 @@ export const LearningResourcesApiAxiosParamCreator = function (
* @summary Learning Resource Content File List
* @param {number} learning_resource_id id of the parent learning resource
* @param {Array<string>} [content_feature_type] Multiple values may be separated by commas.
* @param {Array<string>} [edx_block_id] Multiple values may be separated by commas.
* @param {number} [limit] Number of results to return per page.
* @param {Array<LearningResourcesContentfilesListOfferedByEnum>} [offered_by] The organization that offers a learning resource the content file belongs to * &#x60;mitx&#x60; - MITx * &#x60;ocw&#x60; - MIT OpenCourseWare * &#x60;bootcamps&#x60; - Bootcamps * &#x60;xpro&#x60; - MIT xPRO * &#x60;mitpe&#x60; - MIT Professional Education * &#x60;see&#x60; - MIT Sloan Executive Education
* @param {number} [offset] The initial index from which to return the results.
Expand All @@ -11736,6 +11783,7 @@ export const LearningResourcesApiAxiosParamCreator = function (
learningResourcesContentfilesList: async (
learning_resource_id: number,
content_feature_type?: Array<string>,
edx_block_id?: Array<string>,
limit?: number,
offered_by?: Array<LearningResourcesContentfilesListOfferedByEnum>,
offset?: number,
Expand Down Expand Up @@ -11775,6 +11823,12 @@ export const LearningResourcesApiAxiosParamCreator = function (
content_feature_type.join(COLLECTION_FORMATS.csv)
}

if (edx_block_id) {
localVarQueryParameter["edx_block_id"] = edx_block_id.join(
COLLECTION_FORMATS.csv,
)
}

if (limit !== undefined) {
localVarQueryParameter["limit"] = limit
}
Expand Down Expand Up @@ -12621,6 +12675,7 @@ export const LearningResourcesApiFp = function (configuration?: Configuration) {
* @summary Learning Resource Content File List
* @param {number} learning_resource_id id of the parent learning resource
* @param {Array<string>} [content_feature_type] Multiple values may be separated by commas.
* @param {Array<string>} [edx_block_id] Multiple values may be separated by commas.
* @param {number} [limit] Number of results to return per page.
* @param {Array<LearningResourcesContentfilesListOfferedByEnum>} [offered_by] The organization that offers a learning resource the content file belongs to * &#x60;mitx&#x60; - MITx * &#x60;ocw&#x60; - MIT OpenCourseWare * &#x60;bootcamps&#x60; - Bootcamps * &#x60;xpro&#x60; - MIT xPRO * &#x60;mitpe&#x60; - MIT Professional Education * &#x60;see&#x60; - MIT Sloan Executive Education
* @param {number} [offset] The initial index from which to return the results.
Expand All @@ -12633,6 +12688,7 @@ export const LearningResourcesApiFp = function (configuration?: Configuration) {
async learningResourcesContentfilesList(
learning_resource_id: number,
content_feature_type?: Array<string>,
edx_block_id?: Array<string>,
limit?: number,
offered_by?: Array<LearningResourcesContentfilesListOfferedByEnum>,
offset?: number,
Expand All @@ -12650,6 +12706,7 @@ export const LearningResourcesApiFp = function (configuration?: Configuration) {
await localVarAxiosParamCreator.learningResourcesContentfilesList(
learning_resource_id,
content_feature_type,
edx_block_id,
limit,
offered_by,
offset,
Expand Down Expand Up @@ -13171,6 +13228,7 @@ export const LearningResourcesApiFactory = function (
.learningResourcesContentfilesList(
requestParameters.learning_resource_id,
requestParameters.content_feature_type,
requestParameters.edx_block_id,
requestParameters.limit,
requestParameters.offered_by,
requestParameters.offset,
Expand Down Expand Up @@ -13420,6 +13478,13 @@ export interface LearningResourcesApiLearningResourcesContentfilesListRequest {
*/
readonly content_feature_type?: Array<string>

/**
* Multiple values may be separated by commas.
* @type {Array<string>}
* @memberof LearningResourcesApiLearningResourcesContentfilesList
*/
readonly edx_block_id?: Array<string>

/**
* Number of results to return per page.
* @type {number}
Expand Down Expand Up @@ -14013,6 +14078,7 @@ export class LearningResourcesApi extends BaseAPI {
.learningResourcesContentfilesList(
requestParameters.learning_resource_id,
requestParameters.content_feature_type,
requestParameters.edx_block_id,
requestParameters.limit,
requestParameters.offered_by,
requestParameters.offset,
Expand Down
21 changes: 21 additions & 0 deletions learning_resources/etl/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,7 @@ def documents_from_olx(
path = "/".join(root.split("/")[3:])
for filename in files:
extension_lower = Path(filename).suffix.lower()

if extension_lower in VALID_TEXT_FILE_TYPES and "draft" not in root:
with Path.open(Path(root, filename), "rb") as f:
filebytes = f.read()
Expand All @@ -345,6 +346,24 @@ def documents_from_olx(
)


def get_edx_block_id(path: str, run: LearningResourceRun) -> str:
"""
Return the XBlock ID from a path
Args:
path (str): The path to the file
Returns:
str: The XBlock ID
"""
name = Path(path).stem
module_type = path.split("/")[-2]
return (
f"block-v1:{run.run_id.replace('course-v1:', '')}"
f"+type@{module_type}+block@{name}"
)


def text_from_srt_content(content: str):
"""
Remove timestamps and other extraneous data from SRT content
Expand Down Expand Up @@ -402,6 +421,7 @@ def transform_content_files(
mime_type = metadata.get("mime_type")
file_extension = metadata.get("file_extension")
source_path = metadata.get("source_path")
edx_block_id = get_edx_block_id(source_path, run)

existing_content = ContentFile.objects.filter(key=key, run=run).first()
if (
Expand Down Expand Up @@ -453,6 +473,7 @@ def transform_content_files(
"checksum": metadata.get("checksum"),
"file_extension": file_extension,
"source_path": source_path,
"edx_block_id": edx_block_id,
**content_dict,
}
)
Expand Down
7 changes: 4 additions & 3 deletions learning_resources/etl/utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def test_transform_content_files(
run = LearningResourceRunFactory.create(published=True)
document = "some text in the document"
file_extension = ".html"
key = f"root/key{file_extension}"
key = f"key{file_extension}"
content_type = "course"
checksum = "7s35721d1647f962d59b8120a52210a7"
metadata = {"title": "the title of the course"} if has_metadata else None
Expand All @@ -206,7 +206,7 @@ def test_transform_content_files(
"content_type": content_type,
"checksum": checksum,
"file_extension": file_extension,
"source_path": "root",
"source_path": f"root/folder/{key}",
},
)
],
Expand Down Expand Up @@ -240,7 +240,8 @@ def test_transform_content_files(
"content_type": content_type,
"checksum": checksum,
"file_extension": file_extension,
"source_path": "root",
"source_path": f"root/folder/{key}",
"edx_block_id": f"block-v1:{run.run_id.replace('course-v1:', '')}+type@folder+block@key",
}
]
else:
Expand Down
9 changes: 9 additions & 0 deletions learning_resources/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,11 @@ class ContentFileFilter(FilterSet):
method="filter_content_feature_type",
)

edx_block_id = CharInFilter(
label="The edx block id of the content file",
method="filter_edx_block_id",
)

offered_by = MultipleChoiceFilter(
label="The organization that offers a learning resource the content file "
"belongs to",
Expand Down Expand Up @@ -251,6 +256,10 @@ def filter_content_feature_type(self, queryset, _, value):
"""Content feature type filter for contentfiles"""
return multi_or_filter(queryset, "content_tags__name__iexact", value)

def filter_edx_block_id(self, queryset, _, value):
"""Edx block id Filter for contentfiles"""
return multi_or_filter(queryset, "edx_block_id__iexact", value)

class Meta:
model = ContentFile
fields = []
Expand Down
Loading

0 comments on commit ec32386

Please sign in to comment.