Skip to content

Commit

Permalink
Sort checklist data by scientific_name (#4218)
Browse files Browse the repository at this point in the history
  • Loading branch information
dimasciput authored Sep 3, 2024
1 parent 80c6dc0 commit 14fca11
Showing 1 changed file with 19 additions and 9 deletions.
28 changes: 19 additions & 9 deletions bims/api_views/checklist.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,13 @@ def generate_csv_checklist(download_request, module_name, collection_records, ba
fieldnames = [key for key in get_serializer_keys(ChecklistSerializer) if key != 'id']
custom_header = get_custom_header(fieldnames, CSV_HEADER_TITLE)

taxonomy_collection_records = collection_records.distinct('taxonomy')
taxonomy_collection_records = (
collection_records.distinct(
'taxonomy__scientific_name'
).order_by('taxonomy__scientific_name')
)
taxonomy_collection_records_count = taxonomy_collection_records.count()
taxonomy_ids = list(taxonomy_collection_records.values_list('taxonomy_id', flat=True))

written_taxa_ids = set()

Expand All @@ -115,7 +120,7 @@ def generate_csv_checklist(download_request, module_name, collection_records, ba
writer.writerow(dict(zip(fieldnames, custom_header)))

for start in range(0, taxonomy_collection_records_count, batch_size):
batch = taxonomy_collection_records[start:start + batch_size]
batch = taxonomy_ids[start:start + batch_size]
process_batch(batch, writer, written_taxa_ids, collection_records)
download_request.progress = (
f'{start}/{taxonomy_collection_records_count}'
Expand Down Expand Up @@ -145,7 +150,11 @@ def generate_pdf_checklist(download_request, module_name, collection_records, ba
all_taxa = []
common_names_and_count = {}

taxonomy_collection_records = collection_records.distinct('taxonomy')
taxonomy_collection_records = (
collection_records.distinct(
'taxonomy__scientific_name'
).order_by('taxonomy__scientific_name')
)
taxonomy_collection_records_count = taxonomy_collection_records.count()
taxonomy_ids = list(taxonomy_collection_records.values_list('taxonomy_id', flat=True))

Expand All @@ -156,12 +165,12 @@ def generate_pdf_checklist(download_request, module_name, collection_records, ba
if unique_taxonomy_ids:
taxa = Taxonomy.objects.filter(
id__in=unique_taxonomy_ids
).order_by(
'scientific_name'
).filter(
rank__in=[
TaxonomicRank.SPECIES.name,
TaxonomicRank.SUBSPECIES.name]
).order_by(
'scientific_name'
)
taxon_serializer = ChecklistPDFSerializer(taxa, many=True)
for taxon in taxon_serializer.data:
Expand Down Expand Up @@ -244,20 +253,21 @@ def generate_pdf_checklist(download_request, module_name, collection_records, ba
return True


def process_batch(batch, writer, written_taxa_ids, collection_records):
def process_batch(record_taxonomy_ids, writer, written_taxa_ids, collection_records):
"""
Process a batch of collection records and write unique taxa to the CSV file.
Args:
batch (QuerySet): A batch of collection records.
record_taxonomy_ids (list): list of taxonomy ids
writer (csv.DictWriter): CSV writer object.
written_taxa_ids (set): Set of already written taxa IDs to avoid duplication.
collection_records (QuerySet): Filtered collection records
"""
record_taxonomy_ids = batch.values_list('taxonomy_id', flat=True)
unique_taxonomy_ids = set(record_taxonomy_ids) - written_taxa_ids

if unique_taxonomy_ids:
taxa = Taxonomy.objects.filter(id__in=unique_taxonomy_ids)
taxa = Taxonomy.objects.filter(
id__in=unique_taxonomy_ids
).order_by('scientific_name')
taxon_serializer = ChecklistSerializer(
taxa,
many=True,
Expand Down

0 comments on commit 14fca11

Please sign in to comment.