diff --git a/.gitignore b/.gitignore index 290d48a9e..ea177d08f 100644 --- a/.gitignore +++ b/.gitignore @@ -86,7 +86,6 @@ venv*/ apache* /oep-django-5 - .DS_Store # Deployment files diff --git a/api/serializers.py b/api/serializers.py index 0f80d4faa..4689c5a4b 100644 --- a/api/serializers.py +++ b/api/serializers.py @@ -1,8 +1,13 @@ +from re import match +from uuid import UUID + from django.urls import reverse from rest_framework import serializers +from dataedit.helper import get_readable_table_name from dataedit.models import Table from modelview.models import Energyframework, Energymodel +from oeplatform.settings import URL class EnergyframeworkSerializer(serializers.ModelSerializer): @@ -53,3 +58,104 @@ class Meta: model = Table # fields = ["id", "model_name", "acronym", "url"] fields = ["id", "name", "human_readable_name", "url"] + + +class DatasetSerializer(serializers.Serializer): + name = serializers.CharField(max_length=255, required=True) + external_url = serializers.URLField( + max_length=1000, required=False, allow_null=True + ) + type = serializers.ChoiceField(choices=["input", "output"], required=True) + # title = serializers.SerializerMethodField() + + # ✅ Basic validation for 'name' (regex check only) + def validate_name(self, value): + if not match(r"^[\w]+$", value): + raise serializers.ValidationError( + "Dataset name should contain only alphanumeric characters " + "and underscores." + ) + return value # Don't check DB here, do it in validate() + + # ✅ Main validation logic (includes db check for object existence) + def validate(self, data): + name = data.get("name") + external_url = data.get("external_url") + + if external_url: + # ✅ External URL provided → Skip DB check for 'name' + if not external_url.startswith("https://databus.openenergyplatform.org"): + raise serializers.ValidationError( + { + "external_url": ( + "If you want to link distributions stored outside the OEP, " + "please use the Databus: " + "https://databus.openenergyplatform.org/app/publish-wizard " + "to register your data and use the file or version URI as " + "a persistent identifier." + ) + } + ) + data["name"] = f"{name} (external dataset)" + else: + # ✅ No external URL → Validate 'name' in the database + if not Table.objects.filter(name=name).exists(): + raise serializers.ValidationError( + { + "name": f"Dataset '{name}' does not exist in the database." + "If you want to add links to external distributions please " + "add 'external_url' to the request body." + } + ) + full_label = self.get_title(data) + if full_label: + data["name"] = full_label + + # ✅ Generate internal distribution URL + reversed_url = reverse( + "dataedit:view", + kwargs={"schema": "scenario", "table": name}, + ) + data["external_url"] = f"{URL}{reversed_url}" + + return data # Return updated data with 'distribution_url' if applicable + + def get_title(self, data): + name = data.get("name") + # ✅ Generate internal distribution label + full_label = get_readable_table_name(table_obj=Table.objects.get(name=name)) + if full_label: + return full_label + else: + return None + + +class ScenarioBundleScenarioDatasetSerializer(serializers.Serializer): + scenario_bundle = serializers.UUIDField( + required=True + ) # Validate the scenario bundle UUID + scenario = serializers.UUIDField(required=True) # Validate the scenario UUID + datasets = serializers.ListField( + child=DatasetSerializer(), required=True + ) # List of datasets with 'name' and 'type' + + # Custom validation for 'scenario' + def validate_scenario(self, value): + try: + UUID(str(value)) + except ValueError: + raise serializers.ValidationError("Invalid UUID format for scenario.") + + return value + + # Custom validation for the entire dataset list + def validate_dataset(self, value): + if not value: + raise serializers.ValidationError("The dataset list cannot be empty.") + + # Check for duplicates in dataset names + dataset_names = [dataset["name"] for dataset in value] + if len(dataset_names) != len(set(dataset_names)): + raise serializers.ValidationError("Dataset names must be unique.") + + return value diff --git a/api/urls.py b/api/urls.py index 726fd8abe..d66f8075c 100644 --- a/api/urls.py +++ b/api/urls.py @@ -203,4 +203,9 @@ views.ScenarioDataTablesListAPIView.as_view(), name="list-scenario-datasets", ), + re_path( + r"^v0/scenario-bundle/scenario/manage-datasets/?$", + views.ManageOekgScenarioDatasets.as_view(), + name="add-scenario-datasets", + ), ] diff --git a/api/utils.py b/api/utils.py new file mode 100644 index 000000000..de18f06d0 --- /dev/null +++ b/api/utils.py @@ -0,0 +1,14 @@ +""" +Collection of utility functions for the API used to define various action +like processing steps. +""" + +from oekg.sparqlModels import DatasetConfig + + +def get_dataset_configs(validated_data) -> list[DatasetConfig]: + """Converts validated serializer data into a list of DatasetConfig objects.""" + return [ + DatasetConfig.from_serializer_data(validated_data, dataset_entry) + for dataset_entry in validated_data["datasets"] + ] diff --git a/api/views.py b/api/views.py index 7b3871a60..9db97c07b 100644 --- a/api/views.py +++ b/api/views.py @@ -31,6 +31,10 @@ from omi.dialects.oep.compiler import JSONCompiler from omi.structure import OEPMetadata from rest_framework import generics, status +from rest_framework.permissions import IsAuthenticated + +# views.py +from rest_framework.response import Response from rest_framework.views import APIView import api.parser @@ -43,13 +47,19 @@ from api.serializers import ( EnergyframeworkSerializer, EnergymodelSerializer, + ScenarioBundleScenarioDatasetSerializer, ScenarioDataTablesSerializer, ) +from api.utils import get_dataset_configs from dataedit.models import Embargo from dataedit.models import Schema as DBSchema from dataedit.models import Table as DBTable from dataedit.views import get_tag_keywords_synchronized_metadata, schema_whitelist +from factsheet.permission_decorator import post_only_if_user_is_owner_of_scenario_bundle from modelview.models import Energyframework, Energymodel + +# from oekg.sparqlQuery import remove_datasets_from_scenario +from oekg.utils import process_datasets_sparql_query from oeplatform.settings import PLAYGROUNDS, UNVERSIONED_SCHEMAS, USE_LOEP, USE_ONTOP if USE_LOEP: @@ -244,11 +254,11 @@ class Sequence(APIView): @api_exception def put(self, request, schema, sequence): if schema not in PLAYGROUNDS and schema not in UNVERSIONED_SCHEMAS: - raise APIError('Schema is not in allowed set of schemes for upload') + raise APIError("Schema is not in allowed set of schemes for upload") if schema.startswith("_"): - raise APIError('Schema starts with _, which is not allowed') + raise APIError("Schema starts with _, which is not allowed") if request.user.is_anonymous: - raise APIError('User is anonymous', 401) + raise APIError("User is anonymous", 401) if actions.has_table(dict(schema=schema, sequence_name=sequence), {}): raise APIError("Sequence already exists", 409) return self.__create_sequence(request, schema, sequence, request.data) @@ -257,11 +267,11 @@ def put(self, request, schema, sequence): @require_delete_permission def delete(self, request, schema, sequence): if schema not in PLAYGROUNDS and schema not in UNVERSIONED_SCHEMAS: - raise APIError('Schema is not in allowed set of schemes for upload') + raise APIError("Schema is not in allowed set of schemes for upload") if schema.startswith("_"): - raise APIError('Schema starts with _, which is not allowed') + raise APIError("Schema starts with _, which is not allowed") if request.user.is_anonymous: - raise APIError('User is anonymous', 401) + raise APIError("User is anonymous", 401) return self.__delete_sequence(request, schema, sequence, request.data) @load_cursor() @@ -371,9 +381,9 @@ def post(self, request, schema, table): :return: """ if schema not in PLAYGROUNDS and schema not in UNVERSIONED_SCHEMAS: - raise APIError('Schema is not in allowed set of schemes for upload') + raise APIError("Schema is not in allowed set of schemes for upload") if schema.startswith("_"): - raise APIError('Schema starts with _, which is not allowed') + raise APIError("Schema starts with _, which is not allowed") json_data = request.data if "column" in json_data["type"]: @@ -423,11 +433,11 @@ def put(self, request, schema, table): :return: """ if schema not in PLAYGROUNDS and schema not in UNVERSIONED_SCHEMAS: - raise APIError('Schema is not in allowed set of schemes for upload') + raise APIError("Schema is not in allowed set of schemes for upload") if schema.startswith("_"): - raise APIError('Schema starts with _, which is not allowed') + raise APIError("Schema starts with _, which is not allowed") if request.user.is_anonymous: - raise APIError('User is anonymous', 401) + raise APIError("User is anonymous", 401) if actions.has_table(dict(schema=schema, table=table), {}): raise APIError("Table already exists", 409) json_data = request.data.get("query", {}) @@ -967,10 +977,10 @@ def get(self, request, schema, table, row_id=None): content_type="text/csv", session=session, ) - response["Content-Disposition"] = ( - 'attachment; filename="{schema}__{table}.csv"'.format( - schema=schema, table=table - ) + response[ + "Content-Disposition" + ] = 'attachment; filename="{schema}__{table}.csv"'.format( + schema=schema, table=table ) return response elif format == "datapackage": @@ -998,10 +1008,10 @@ def get(self, request, schema, table, row_id=None): content_type="application/zip", session=session, ) - response["Content-Disposition"] = ( - 'attachment; filename="{schema}__{table}.zip"'.format( - schema=schema, table=table - ) + response[ + "Content-Disposition" + ] = 'attachment; filename="{schema}__{table}.zip"'.format( + schema=schema, table=table ) return response else: @@ -1574,3 +1584,56 @@ class ScenarioDataTablesListAPIView(generics.ListAPIView): topic = "scenario" queryset = DBTable.objects.filter(schema__name=topic) serializer_class = ScenarioDataTablesSerializer + + +class ManageOekgScenarioDatasets(APIView): + permission_classes = [IsAuthenticated] # Require authentication + + @post_only_if_user_is_owner_of_scenario_bundle + def post(self, request): + serializer = ScenarioBundleScenarioDatasetSerializer(data=request.data) + if not serializer.is_valid(): + return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) + + try: + dataset_configs = get_dataset_configs(serializer.validated_data) + response_data = process_datasets_sparql_query(dataset_configs) + except APIError as e: + return Response({"error": str(e)}, status=e.status) + except Exception: + return Response({"error": "An unexpected error occurred."}, status=500) + + if "error" in response_data: + return Response(response_data, status=status.HTTP_400_BAD_REQUEST) + + return Response(response_data, status=status.HTTP_200_OK) + + # @post_only_if_user_is_owner_of_scenario_bundle + # def delete(self, request): + # serializer = ScenarioBundleScenarioDatasetSerializer(data=request.data) + # if serializer.is_valid(): + # scenario_uuid = serializer.validated_data["scenario"] + # datasets = serializer.validated_data["datasets"] + + # # Iterate over each dataset to process it properly + # for dataset in datasets: + # dataset_name = dataset["name"] + # dataset_type = dataset["type"] + + # # Remove the dataset from the scenario in the bundle + # success = remove_datasets_from_scenario( + # scenario_uuid, dataset_name, dataset_type + # ) + + # if not success: + # return Response( + # {"error": f"Failed to remove dataset {dataset_name}"}, + # status=status.HTTP_400_BAD_REQUEST, + # ) + + # return Response( + # {"message": "Datasets removed successfully"}, + # status=status.HTTP_200_OK, + # ) + + # return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) diff --git a/dataedit/models.py b/dataedit/models.py index 1ea828af4..e80747154 100644 --- a/dataedit/models.py +++ b/dataedit/models.py @@ -16,6 +16,7 @@ IntegerField, JSONField, ) +from django.urls import reverse from django.utils import timezone # Create your models here. @@ -77,6 +78,9 @@ class Table(Tagable): is_publish = BooleanField(null=False, default=False) human_readable_name = CharField(max_length=1000, null=True) + def get_absolute_url(self): + return reverse("dataedit:view", kwargs={"pk": self.pk}) + @classmethod def load(cls, schema, table): """ @@ -719,5 +723,6 @@ def filter_opr_by_table(schema, table): """ return PeerReview.objects.filter(schema=schema, table=table) + @staticmethod def filter_opr_by_id(opr_id): return PeerReview.objects.filter(id=opr_id).first() diff --git a/dataedit/views.py b/dataedit/views.py index ecc1edfa1..e4e80600d 100644 --- a/dataedit/views.py +++ b/dataedit/views.py @@ -2355,9 +2355,6 @@ def post(self, request, schema, table, review_id): Handle POST requests for contributor's review. Merges and updates the review data in the PeerReview table. - Missing parts: - - merge contributor field review and reviewer field review - Args: request (HttpRequest): The incoming HTTP POST request. schema (str): The schema of the table. @@ -2367,9 +2364,6 @@ def post(self, request, schema, table, review_id): Returns: HttpResponse: Rendered HTML response for contributor review. - Note: - This method has some missing parts regarding the merging of contributor - and reviewer field review. """ context = {} diff --git a/docs/oeplatform-code/web-api/oekg-api/scenario-dataset.md b/docs/oeplatform-code/web-api/oekg-api/scenario-dataset.md new file mode 100644 index 000000000..4976f0d82 --- /dev/null +++ b/docs/oeplatform-code/web-api/oekg-api/scenario-dataset.md @@ -0,0 +1,86 @@ +# API to manipulate dataset for in a scenario + +## Basics + +This functionality is part of the oeplatform web api and can be accessed sending POST requests to this endpoint: + +- `https://openenergyplatform.org/api/v0/scenario-bundle/scenario/manage-datasets/` + +You need a client to send http requests. + +- Python: requests +- linux: curl +- Client software: HTTPie +- and more + +For authorization you must use you API Token which can be optioned form the profile page on the OEP. In case you leaked it you can also reset the token. See section Access restrictions and future consideration. + +The post request must contain a body with payload: + +``` json +{ + "scenario_bundle": "1970ba29-155b-6e70-7c22-c12a33244a24", + "scenario": "5d95247d-df75-a95b-7286-dd4b3bc1c92a", + "datasets": [ + { + "name": "eu_leg_data_2017_eio_ir_article23_t3", + "type": "input" + }, + { + "name": "testetstetst", + "type": "output" + }, + { + "name": "WS_23_24_B665_2025_01_23", + "external_url": "https://databus.openenergyplatform.org/koubaa/LLEC_Dataset/WS_23_24_B665_2025_01_23/WS_23_24_B665_2025_01_23", + "type": "output" + }, + ] +} +``` + +- scenario_bundle: can be obtained from the scenario bundle website (copy from url) +- scenario: can also be obtained from the website; In the scenario tab there is a button to copy each scenario UID +- datasets: Is a list of all datasets you want to add +- name: you can lookup a table name that is available on the OEP and published in the scenario topic. The technical name is required here. +- type: Chose either "input" or "output" here, the dataset will be added to the related section in the scenario +- external_url: This parameter is OPTIONAL to be precise you dont have to use it if you are adding a dataset that is available on the OEP. You can use it to link external datasets but it requires you to first register them on the databus to get a persistent id. The databus offers a Publishing page. After the dataset is registered you can copy the file or version URL and add it to the external_url field. + +- +- The databus also offers a API in case you want to register in bulk + +## Example using curl + +``` bash +curl --request POST \ + --url https://openenergyplatform.org/api/v0/scenario-bundle/scenario/manage-datasets/ \ + --header 'Authorization: Token ' \ + --header 'Content-Type: application/json' \ + --data '{ + "scenario_bundle": "1970ba29-155b-6e70-7c22-c12a33244a24", + "scenario": "5d95247d-df75-a95b-7286-dd4b3bc1c92a", + "datasets": [ + { + "name": "eu_leg_data_2017_eio_ir_article23_t3", + "type": "input" + }, + { + "name": "testetstetst", + "type": "output" + }, + { + "name": "WS_23_24_B665_2025_01_23", + "external_url": "https://databus.openenergyplatform.org/koubaa/LLEC_Dataset/WS_23_24_B665_2025_01_23/WS_23_24_B665_2025_01_23", + "type": "output" + }, + { + "name": "first_test_table", + "type": "output" + } + ] +}' +``` + +## Access restrictions and future consideration + +Currently only the person who created a scenario bundle is able to edit its content. Soon this will change and users will be able to assign a group to a bundle. Groups are also used to manage access to dataset resources on the OEP here we will use the same groups. Once this is implemented you will have to create/assign a group to you bundle and then you can collaborate on the editing. diff --git a/factsheet/frontend/src/components/scenarioBundle.js b/factsheet/frontend/src/components/scenarioBundle.js index 5186d7418..70e6e5680 100644 --- a/factsheet/frontend/src/components/scenarioBundle.js +++ b/factsheet/frontend/src/components/scenarioBundle.js @@ -26,6 +26,7 @@ import Tabs from '@mui/material/Tabs'; import Tab from '@mui/material/Tab'; import Box from '@mui/material/Box'; import Fab from '@mui/material/Fab'; +import ContentCopyOutlinedIcon from '@mui/icons-material/ContentCopyOutlined'; import AddIcon from '@mui/icons-material/Add.js'; import Checkbox from '@mui/material/Checkbox'; import FormGroup from '@mui/material/FormGroup'; @@ -410,7 +411,8 @@ function Factsheet(props) { const filteredResult = filterByValue(selectedTechnologies, technologies); - setSelectedTechnologiesTree(filteredResult[0]["children"]); + // setSelectedTechnologiesTree(filteredResult[0]); + // setSelectedTechnologies(s) function getAllNodeIds(nodes) { let ids = []; @@ -426,13 +428,9 @@ function Factsheet(props) { const allIds = getAllNodeIds(filteredResult[0]["children"]); setAllNodeIds(allIds); - }, []); - - - - + }, []); - }, [selectedTechnologies, technologies]); + }, []); // Todo: check if the empty dependency array raises errors const handleSaveFactsheet = () => { setOpenBackDrop(true); @@ -2010,11 +2008,30 @@ function Factsheet(props) { ] } const scenario_count = 'Scenarios' + ' (' + scenarios.length + ')'; - const renderScenariosOverview = () => ( - - { - scenarios.map((v, i) => - v.acronym !== '' && +console.log(scenarios); + +const renderScenariosOverview = () => ( + + {scenarios.map((v, i) => + v.acronym !== '' ? ( + + + @@ -2228,10 +2245,12 @@ function Factsheet(props) {
- ) - } -
- ) + + ) : null + )} +
+); + const renderPublicationOverview = () => ( diff --git a/factsheet/oekg/connection.py b/factsheet/oekg/connection.py index c43bf48f6..b96759078 100644 --- a/factsheet/oekg/connection.py +++ b/factsheet/oekg/connection.py @@ -39,6 +39,7 @@ update_endpoint = "http://%(host)s:%(port)s/%(name)s/update" % rdfdb sparql = SPARQLWrapper(query_endpoint) +sparql_wrapper_update = SPARQLWrapper(update_endpoint) store = sparqlstore.SPARQLUpdateStore() diff --git a/factsheet/permission_decorator.py b/factsheet/permission_decorator.py index 773e6854b..e24514e66 100644 --- a/factsheet/permission_decorator.py +++ b/factsheet/permission_decorator.py @@ -1,12 +1,21 @@ import json from functools import wraps -from django.http import HttpResponseForbidden +from django.http import HttpResponse, HttpResponseForbidden from factsheet.models import ScenarioBundleAccessControl def only_if_user_is_owner_of_scenario_bundle(view_func): + """ + Wrapper that checks if the current user is the owner of + the Scenario bundle. + + It determines the owner of the Scenario bundle by checking + the ScenarioBundleEditAccess model. The uid of the scenario + bundle is passed as a URL parameter or in the request body. + """ + @wraps(view_func) def _wrapped_view(request, *args, **kwargs): # Get the uid from the URL parameters or any other source. @@ -15,6 +24,7 @@ def _wrapped_view(request, *args, **kwargs): kwargs.get("uid") or json.loads(request.body).get("uid") or json.loads(request.body).get("id") + or request.GET.get("id") ) except Exception: uid = request.GET.get("id") @@ -38,3 +48,54 @@ def _wrapped_view(request, *args, **kwargs): return HttpResponseForbidden("Access Denied") return _wrapped_view + + +def post_only_if_user_is_owner_of_scenario_bundle(view_func): + """ + Wrapper that checks if the current user is the owner of + the Scenario bundle. This is a decorator for POST requests. + + It differs from the only_if_user_is_owner_of_scenario_bundle + as it depends on data from the request body instead of URL parameters. + + It determines the owner of the Scenario bundle by checking + the ScenarioBundleEditAccess model. The uid of the scenario + bundle is passed as a URL parameter or in the request body. + """ + + @wraps(view_func) + def _wrapped_view(view_instance, request, *args, **kwargs): + # Get the uid from the URL parameters or any other source. + + bundle_uid = kwargs.get("uid") or request.data.get("scenario_bundle") + if not bundle_uid: + return HttpResponse( + "The bundle_uid (scenario bundle) was not found in" + "the request body or URL parameters", + ) + + user_id = request.user + if not user_id: + return HttpResponse( + "The user id was not found in the request body or URL parameters", + ) + + try: + # Retrieve the ScenarioBundleEditAccess object based on the uid. + scenario_bundle_access = ScenarioBundleAccessControl.objects.get( + bundle_id=bundle_uid + ) + except ScenarioBundleAccessControl.DoesNotExist: + # Handle the case where the ScenarioBundleEditAccess with the + # provided uid is not found. + return HttpResponseForbidden( + "UID not available or scenario bundle does not exist. Access denied" + ) + + # Check if the current user is the owner (creator) of the Scenario bundle. + if request.user == scenario_bundle_access.owner_user: + return view_func(view_instance, request, *args, **kwargs) + else: + return HttpResponseForbidden("Access Denied") + + return _wrapped_view diff --git a/mkdocs.yml b/mkdocs.yml index 62a2de40c..fb82ad5aa 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -116,6 +116,7 @@ nav: # - Draft open-API schema: oeplatform-code/web-api/oedb-rest-api/swagger-ui.html - OEKG API: - oeplatform-code/web-api/oekg-api/index.md + - Edit scenario datasets: oeplatform-code/web-api/oekg-api/scenario-dataset.md - Features: - oeplatform-code/features/index.md - metaBuilder Metadata creation: diff --git a/oekg/README.md b/oekg/README.md new file mode 100644 index 000000000..aa285f205 --- /dev/null +++ b/oekg/README.md @@ -0,0 +1,5 @@ +# What is this app used for? + +The OEKG django app is used to encapsulate functionality to interact with the OEKG within the OEP. If one needs such functionality in another django app like `api` then the oekg app should be imported there. New functionality should also extend the oekg app. + +This includes variables and functions to connect to databases (like jenna fuseki) and to access or edit its content. The main libraries used here are rdfLib (broadly used in the facthseet app to create scenario bundles) and the SPARQLWrapper to formulate a Query as a string. The latter approach is more efficient as it avoids parsing data (like the Graph) to python data types. diff --git a/oekg/sparqlModels.py b/oekg/sparqlModels.py new file mode 100644 index 000000000..df4ba35f1 --- /dev/null +++ b/oekg/sparqlModels.py @@ -0,0 +1,24 @@ +from dataclasses import dataclass +from uuid import UUID, uuid4 + + +@dataclass +class DatasetConfig: + bundle_uuid: UUID + scenario_uuid: UUID + dataset_label: str + dataset_url: str + dataset_id: UUID + dataset_type: str + + @classmethod + def from_serializer_data(cls, validated_data: dict, dataset_entry: dict): + """Converts validated serializer data into a DatasetConfig object.""" + return cls( + bundle_uuid=validated_data["scenario_bundle"], + scenario_uuid=validated_data["scenario"], + dataset_label=dataset_entry["name"], + dataset_url=dataset_entry["external_url"], + dataset_id=uuid4(), + dataset_type=dataset_entry["type"], # "input" or "output" + ) diff --git a/oekg/sparqlQuery.py b/oekg/sparqlQuery.py new file mode 100644 index 000000000..0e1812f2c --- /dev/null +++ b/oekg/sparqlQuery.py @@ -0,0 +1,135 @@ +import logging +from uuid import UUID + +import requests +from SPARQLWrapper import JSON, POST + +from factsheet.oekg.connection import sparql, sparql_wrapper_update, update_endpoint +from oekg.sparqlModels import DatasetConfig + +logger = logging.getLogger("oeplatform") + + +def scenario_in_bundle(bundle_uuid: UUID, scenario_uuid: UUID) -> bool: + """ + Check if a scenario is part of a scenario bundle in the KG. + """ + sparql_query = f""" + PREFIX oeo: + + ASK {{ + ?p + . + }} + """ + sparql.setQuery(sparql_query) + sparql.setMethod(POST) + sparql.setReturnFormat(JSON) + response = sparql.query().convert() + + return response.get( + "boolean", False + ) # Returns True if scenario is part of the bundle + + +def dataset_exists(scenario_uuid: UUID, dataset_url: str) -> bool: + """ + Check if a dataset with the same label already exists. + """ + + sparql_query = f""" + PREFIX oeo: + PREFIX rdfs: + + ASK {{ + ?p ?dataset . + ?dataset oeo:has_iri "{dataset_url}" . + }} + + """ # noqa + + sparql.setQuery(sparql_query) + sparql.setMethod(POST) + sparql.setReturnFormat(JSON) + response = sparql.query().convert() + + return response.get("boolean", False) # Returns True if dataset exists + + +def add_datasets_to_scenario(oekgDatasetConfig: DatasetConfig): + """ + Function to add datasets to a scenario bundle in Jena Fuseki. + """ + + # Check if a dataset with the same label exists + if dataset_exists(oekgDatasetConfig.scenario_uuid, oekgDatasetConfig.dataset_url): + return False # Skip insertion + + # Check: used constant string values here. Get ids from oeo + # graph to make sure ids still exists? + if oekgDatasetConfig.dataset_type == "input": + rel_property = "RO_0002233" + type_entity = "OEO_00030029" + elif oekgDatasetConfig.dataset_type == "output": + rel_property = "RO_0002234" + type_entity = "OEO_00030030" + + # oeo:has_id "{oekgDatasetConfig.dataset_id}" ; + # The above seems to be deprecated in the OEKG + sparql_query = f""" + PREFIX oeo: + PREFIX rdfs: + + INSERT DATA {{ + a oeo:{type_entity} ; + rdfs:label "{oekgDatasetConfig.dataset_label}" ; + oeo:has_iri "{oekgDatasetConfig.dataset_url}" ; + oeo:has_key "{oekgDatasetConfig.dataset_id}" . + + oeo:{rel_property} + . + }} + """ # noqa + + print(sparql_query) + # response = send_sparql_update(sparql_query) + sparql_wrapper_update.setQuery(sparql_query) + sparql_wrapper_update.setMethod(POST) + sparql_wrapper_update.setReturnFormat(JSON) + try: + response = sparql_wrapper_update.query() + http_response = response.response + if not http_response.status == 200: + return False # Return False if any query fails + except Exception as e: + logger.error(f"Failed to update datasets in OEKG: {e}") + return False + + return True + + +def remove_datasets_from_scenario(scenario_uuid, dataset_name, dataset_type): + """ + Function to remove datasets from a scenario bundle in Jena Fuseki. + """ + sparql_query = f""" + PREFIX oeo: + DELETE DATA {{ + GRAPH {{ + oeo:{dataset_name} a oeo:{dataset_type}Dataset . + }} + }} + """ + response = send_sparql_update(sparql_query) + if not response.ok: + return False # Return False if any query fails + return True + + +def send_sparql_update(query): + """ + Helper function to send a SPARQL update query to Fuseki. + """ + headers = {"Content-Type": "application/sparql-update"} + response = requests.post(update_endpoint, data=query, headers=headers) + return response diff --git a/oekg/utils.py b/oekg/utils.py index 45efc56c2..6df506b58 100644 --- a/oekg/utils.py +++ b/oekg/utils.py @@ -1,7 +1,10 @@ import re +from oekg.sparqlModels import DatasetConfig +from oekg.sparqlQuery import add_datasets_to_scenario, scenario_in_bundle -def validate_sparql_query(query): + +def validate_public_sparql_query(query): """ Validate the SPARQL query to prevent injection attacks. """ @@ -25,3 +28,46 @@ def validate_sparql_query(query): return False return True + + +def process_datasets_sparql_query(dataset_configs: list[DatasetConfig]): + """ + Attempts to add each dataset to the scenario. + Returns a count of added datasets and a list of skipped ones. + """ + total_datasets = len(dataset_configs) + added_count = 0 + skipped_datasets = [] + + for dataset_config in dataset_configs: + # Check if scenario is part of the scenario bundle + + if not scenario_in_bundle( + dataset_config.bundle_uuid, dataset_config.scenario_uuid + ): + response: dict = {} + response["error"] = ( + f"Scenario {dataset_config.scenario_uuid} is not part" + f"of bundle {dataset_config.bundle_uuid}" + ) + return response + + success = add_datasets_to_scenario(dataset_config) + + if success: + added_count += 1 + else: + skipped_datasets.append(dataset_config.dataset_label) + + # Construct a clear response + response: dict = { + "info": "successfully processed your request", + "added_count": f"{added_count} / {total_datasets}", + } + + if skipped_datasets: + # TODO: Add return a reason from add_datasets_to_scenario if needed + response["reason"] = "Dataset already exists in the scenario." + response["skipped"] = skipped_datasets + + return response diff --git a/oekg/views.py b/oekg/views.py index 8291ea4a9..c7328b4d4 100644 --- a/oekg/views.py +++ b/oekg/views.py @@ -4,8 +4,8 @@ from django.shortcuts import render from django.views.decorators.http import require_POST +from oekg.utils import validate_public_sparql_query from oeplatform.settings import OEKG_SPARQL_ENDPOINT_URL -from oekg.utils import validate_sparql_query def main_view(request): @@ -16,19 +16,22 @@ def main_view(request): @require_POST def sparql_endpoint(request): + """ + Public SPARQL endpoint. Must only allow read queries. + """ sparql_query = request.POST.get("query", "") if not sparql_query: return HttpResponseBadRequest("Missing 'query' parameter.") - if not validate_sparql_query(sparql_query): + if not validate_public_sparql_query(sparql_query): raise SuspiciousOperation("Invalid SPARQL query.") endpoint_url = OEKG_SPARQL_ENDPOINT_URL headers = {"Accept": "application/sparql-results+json"} - response = requests.get( + response = requests.post( endpoint_url, params={"query": sparql_query}, headers=headers ) diff --git a/versions/changelogs/current.md b/versions/changelogs/current.md index 1ba472ebe..f6ed25cca 100644 --- a/versions/changelogs/current.md +++ b/versions/changelogs/current.md @@ -14,6 +14,8 @@ ## Features +- Implement new API Endpoint to add new datasets to a scenario bundle -> scenario -> input or output datasets. This eases bulk adding datasets. The API provides extensive error messages. Datasets listed in the scenario topic on the OEP and external datasets registered on the databus.openenergyplatform.org can be used. [(#1914)](https://github.com/OpenEnergyPlatform/oeplatform/pull/1894) + - divide metadata builder flow into subsections [(#1747)](https://github.com/OpenEnergyPlatform/oeplatform/pull/1747) - update tab indicators once all fields in a specific metadata category are reviewed [(#1900)](https://github.com/OpenEnergyPlatform/oeplatform/pull/1900) @@ -21,3 +23,5 @@ ## Bugs ## Documentation updates + +- Provide documentation for the OEKG:Scenario Bundle dataset management as described in #1890 [(#1914)](https://github.com/OpenEnergyPlatform/oeplatform/pull/1894)