Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automatically retrieve dataset when harvesting data from gbif #4279

Merged
merged 2 commits into from
Oct 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 16 additions & 2 deletions bims/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import json

from ckeditor_uploader.widgets import CKEditorUploadingWidget
from django.http import HttpResponse
from django.http import HttpResponse, HttpResponseRedirect
from django.conf import settings
from rangefilter.filter import DateRangeFilter
from preferences.admin import PreferencesAdmin
Expand All @@ -25,7 +25,7 @@
from django.db.models import Q
from django.utils.html import format_html
from django.contrib.auth import get_user_model
from django.urls import reverse
from django.urls import reverse, path
from django.contrib.auth.forms import UserCreationForm

from django_json_widget.widgets import JSONEditorWidget
Expand Down Expand Up @@ -2048,6 +2048,20 @@ def display_order(self, instance):
class DatasetAdmin(admin.ModelAdmin):
list_display = ('uuid', 'name', 'abbreviation')
search_fields = ('uuid', 'name')
change_list_template = "admin/dataset_changelist.html"

def get_urls(self):
urls = super().get_urls()
custom_urls = [
path('fetch-datasets/', self.fetch_datasets, name='fetch_datasets'),
]
return custom_urls + urls

def fetch_datasets(self, request):
from bims.tasks.dataset import retrieve_datasets_from_gbif
retrieve_datasets_from_gbif.delay()
self.message_user(request, 'Fetching datasets in background')
return HttpResponseRedirect(reverse('admin:bims_dataset_changelist'))


class TagGroupAdmin(admin.ModelAdmin):
Expand Down
38 changes: 22 additions & 16 deletions bims/scripts/extract_dataset_keys.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,26 @@ def get_dataset_details_from_gbif(dataset_uuid):
return None


def create_dataset_from_gbif(dataset_key):
dataset_details = get_dataset_details_from_gbif(dataset_key)

dataset_name = dataset_details.get('title', '')
description = dataset_details.get('description', '')
citation = dataset_details.get('citation', '')
url = dataset_details.get('url', '')

dataset, created = Dataset.objects.update_or_create(
uuid=dataset_key,
defaults={
'name': dataset_name,
'description': description,
'citation': citation,
'url': url,
}
)
return dataset, created


def extract_dataset_keys():
bio = BiologicalCollectionRecord.objects.filter(
source_collection='gbif'
Expand All @@ -45,20 +65,6 @@ def extract_dataset_keys():
f'with complete information: {dataset.name}')
continue

dataset_details = get_dataset_details_from_gbif(dataset_key)

dataset_name = dataset_details.get('title', '')
description = dataset_details.get('description', '')
citation = dataset_details.get('citation', '')
url = dataset_details.get('url', '')
dataset, created = create_dataset_from_gbif(dataset_key)

dataset, created = Dataset.objects.update_or_create(
uuid=dataset_key,
defaults={
'name': dataset_name,
'description': description,
'citation': citation,
'url': url,
}
)
print(f'{dataset_key} - {dataset_name} - {created}')
print(f'{dataset_key} - {dataset.name}')
16 changes: 11 additions & 5 deletions bims/scripts/import_gbif_occurrences.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,30 +4,29 @@
import requests
import logging
import datetime
import simplejson
from django.contrib.gis.geos import MultiPolygon
from urllib3.exceptions import ProtocolError

from bims.models.source_reference import DatabaseRecord

from bims.models.location_site import generate_site_code
from dateutil.parser import parse, ParserError
from requests.exceptions import HTTPError
from preferences import preferences
from django.contrib.gis.geos import Point, GEOSGeometry
from django.contrib.gis.db import models
from django.contrib.gis.measure import D

from bims.scripts.extract_dataset_keys import create_dataset_from_gbif
from geonode.people.models import Profile
from bims.models import (
LocationSite,
LocationType,
BiologicalCollectionRecord,
collection_post_save_handler,
HarvestSession, SourceReferenceDatabase,
Boundary
Boundary,
Dataset
)
from bims.utils.gbif import round_coordinates
from bims.models.site_setting import SiteSetting

logger = logging.getLogger('bims')

Expand All @@ -43,6 +42,7 @@
LOCALITY_KEY = 'locality'
DEFAULT_LOCALITY = 'No locality, from GBIF'
SPECIES_KEY = 'species'
DATASET_KEY = 'datasetKey'
MODIFIED_DATE_KEY = 'modified'
LIMIT = 20

Expand Down Expand Up @@ -183,6 +183,12 @@ def process_gbif_response(json_result,
reference = result.get(REFERENCE_KEY, '')
species = result.get(SPECIES_KEY, None)
collection_date = None
dataset_key = result.get(DATASET_KEY, None)

if dataset_key:
datasets = Dataset.objects.filter(uuid=dataset_key)
if not datasets.exists():
dataset, created = create_dataset_from_gbif(dataset_key)

if event_date:
try:
Expand Down
1 change: 1 addition & 0 deletions bims/tasks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from bims.tasks.virtual_museum_import import import_data_task
from bims.tasks.taxon_group import delete_occurrences_by_taxon_group
from bims.tasks.caches import reset_caches
from bims.tasks.dataset import retrieve_datasets_from_gbif


@shared_task(name='bims.tasks.test_celery', queue='update')
Expand Down
7 changes: 7 additions & 0 deletions bims/tasks/dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from celery import shared_task


@shared_task(name='bims.tasks.retrieve_datasets_from_gbif', queue='geocontext')
def retrieve_datasets_from_gbif():
from bims.scripts.extract_dataset_keys import extract_dataset_keys
extract_dataset_keys()
8 changes: 8 additions & 0 deletions bims/templates/admin/dataset_changelist.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{% extends "admin/change_list.html" %}

{% block object-tools %}
{{ block.super }}
<div class="object-tools">
<a href="{% url 'admin:fetch_datasets' %}" class="grp-button" title="Run the selected action">Fetch Datasets</a>
</div>
{% endblock %}
1 change: 1 addition & 0 deletions deployment/docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ COPY deployment/docker/REQUIREMENTS.txt /REQUIREMENTS.txt
RUN pip install --no-cache-dir -r /REQUIREMENTS.txt

# Add and install Node.js dependencies
RUN npm --quiet -g install yuglify && npm install -g grunt-cli
COPY deployment/docker/package.json /package.json
COPY deployment/docker/Gruntfile.js /Gruntfile.js
RUN npm install
Expand Down
Loading