Skip to content

Commit

Permalink
Updates to harvest_dag to handle list of lists
Browse files Browse the repository at this point in the history
  • Loading branch information
amywieliczka committed Feb 14, 2024
1 parent 445b304 commit 3c1c3b2
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 12 deletions.
9 changes: 5 additions & 4 deletions dags/harvest_dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
create_merged_version, put_merged_page)
from rikolti.dags.shared_tasks import create_stage_index_task
from rikolti.dags.shared_tasks import cleanup_failed_index_creation_task
from rikolti.dags.shared_tasks import paginate_filepaths_for_fanout


def get_child_records(version, parent_id) -> list:
Expand Down Expand Up @@ -80,10 +79,12 @@ def merge_children(version):

@task()
def get_mapped_page_filenames_task(mapped_pages):
mapped_pages = [mapped['mapped_page_path'] for mapped in mapped_pages
if mapped['mapped_page_path']]
mapped_pages = [
[mapped['mapped_page_path'] for mapped in mapped_page_list
if mapped['mapped_page_path']]
for mapped_page_list in mapped_pages]

return paginate_filepaths_for_fanout(mapped_pages)
return json.dumps(mapped_pages)


@dag(
Expand Down
16 changes: 8 additions & 8 deletions dags/shared_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,18 +218,18 @@ def get_mapping_status_task(collection: dict, paginated_mapped_pages: list):
@task()
def create_mapped_version_task(collection, vernacular_pages):
"""
vernacular pages is a list of the filepaths of the vernacular metadata
relative to the collection id, ex: [
'3433/vernacular_metadata_2023-01-01T00:00:00/data/1',
'3433/vernacular_metadata_2023-01-01T00:00:00/data/2'
vernacular pages is a list of lists of the filepaths of the vernacular
metadata relative to the collection id, ex: [
['3433/vernacular_metadata_2023-01-01T00:00:00/data/1'],
['3433/vernacular_metadata_2023-01-01T00:00:00/data/2]'
]
returns the path to a new mapped version, ex:
"3433/vernacular_metadata_2023-01-01T00:00:00/mapped_metadata_2023-01-01T00:00:00/"
"""
vernacular_version = get_version(collection.get('id'), vernacular_pages[0])
vernacular_version = get_version(collection.get('id'), vernacular_pages[0][0])
if not vernacular_version:
raise ValueError(
f"Vernacular version not found in {vernacular_pages[0]}")
f"Vernacular version not found in {vernacular_pages[0][0]}")
mapped_data_version = create_mapped_version(vernacular_version)
return mapped_data_version

Expand Down Expand Up @@ -264,8 +264,8 @@ def validate_collection_task(collection_id: int, mapped_metadata_pages: dict) ->


@task()
def create_with_content_urls_version_task(collection: dict, mapped_pages: list[dict]):
mapped_page_path = [page['mapped_page_path'] for page in mapped_pages
def create_with_content_urls_version_task(collection: dict, mapped_pages: list[list[dict]]):
mapped_page_path = [page['mapped_page_path'] for page in mapped_pages[0]
if page['mapped_page_path']][0]
mapped_version = get_version(collection['id'], mapped_page_path)
return create_with_content_urls_version(mapped_version)
Expand Down

0 comments on commit 3c1c3b2

Please sign in to comment.