From 59471bb29a72d6b828d03f27f8fe9155cf9d93c3 Mon Sep 17 00:00:00 2001 From: Fabio Lopes Date: Fri, 26 Jul 2024 10:18:59 +0200 Subject: [PATCH] Correct connection to openBIS --- notebooks/eln_import.ipynb | 133 ++++++++++++------------------------- 1 file changed, 42 insertions(+), 91 deletions(-) diff --git a/notebooks/eln_import.ipynb b/notebooks/eln_import.ipynb index 79dd9dd27..3b12310d0 100644 --- a/notebooks/eln_import.ipynb +++ b/notebooks/eln_import.ipynb @@ -47,9 +47,11 @@ "from aiidalab_widgets_base import viewer\n", "from traitlets import dlink\n", "\n", - "from jsonschema import validate, RefResolver, Draft7Validator, ValidationError\n", "import json\n", - "from pyld import jsonld" + "from pyld import jsonld\n", + "\n", + "import sys\n", + "import os" ] }, { @@ -59,75 +61,16 @@ "metadata": {}, "outputs": [], "source": [ - "def load_json(filepath: str) -> dict:\n", - " return json.load(open(filepath, \"r\"))\n", - "\n", - "def find_refs(obj, refs=None) -> list[str]:\n", - " if refs is None:\n", - " refs = []\n", - " \n", - " if isinstance(obj, dict):\n", - " for key, value in obj.items():\n", - " if key == '$ref':\n", - " refs.append(value[2:])\n", - " else:\n", - " find_refs(value, refs)\n", - " \n", - " return refs\n", - "\n", - "def find_ref_schemas(schemas_filenames: list[str], refs = None):\n", - " if refs is None:\n", - " refs = []\n", - " \n", - " for schema_filename in schemas_filenames:\n", - " schema = load_json(f\"/home/jovyan/aiida-openbis/Notebooks/New_Metadata_Schemas/{schema_filename}\")\n", - " ref_schemas_filenames = find_refs(schema)\n", - " \n", - " if schema_filename not in refs:\n", - " refs.append(schema_filename)\n", - " find_ref_schemas(ref_schemas_filenames, refs)\n", - " \n", - " return refs\n", - "\n", - "def set_up_validator(json_schema_filename):\n", - " schemas_filenames = [json_schema_filename]\n", - " all_schemas_filenames = find_ref_schemas(schemas_filenames)\n", - "\n", - " first_schema = load_json(f\"/home/jovyan/aiida-openbis/Notebooks/New_Metadata_Schemas/{all_schemas_filenames[0]}\")\n", - " resolver = RefResolver(base_uri=\"http://example.com/\", referrer = first_schema)\n", - "\n", - " for referenced_schema_filename in all_schemas_filenames:\n", - " referenced_schema = load_json(f\"/home/jovyan/aiida-openbis/Notebooks/New_Metadata_Schemas/{referenced_schema_filename}\")\n", - " resolver.store[referenced_schema[\"$id\"]] = referenced_schema\n", - "\n", - " validator = Draft7Validator(first_schema, resolver=resolver)\n", - " \n", - " return validator\n", - "\n", - "# Function to expand context terms\n", - "def expand_context(base_context):\n", - " expanded_context = {}\n", - " for key, value in base_context.items():\n", - " if isinstance(value, str) and ':' in value:\n", - " prefix, suffix = value.split(':', 1)\n", - " if prefix in base_context:\n", - " expanded_context[key] = base_context[prefix] + suffix\n", - " else:\n", - " expanded_context[key] = value\n", - " else:\n", - " expanded_context[key] = value\n", - " return expanded_context\n", - "\n", - "def replace_keys_recursive(data, old_key, new_key):\n", - " if isinstance(data, dict):\n", - " for key in list(data.keys()): # Create a copy of keys to avoid RuntimeError\n", - " if key == old_key:\n", - " key = new_key\n", - " data[new_key] = data.pop(old_key)\n", - " replace_keys_recursive(data[key], old_key, new_key)\n", - " elif isinstance(data, list):\n", - " for item in data:\n", - " replace_keys_recursive(item, old_key, new_key)" + "#sys.path.append(os.path.dirname(\"/home/jovyan/aiida-openbis/Notebooks/Metadata_Schemas_LinkML/\"))\n", + "# from materialMLinfo import Molecule, slots\n", + "# def extract_molecule_data_with_linkml(molecule_jsonld):\n", + "# expanded = jsonld.expand(molecule_jsonld)\n", + "# molecule_data = {}\n", + "# for obj in jsonld_object:\n", + "# if '@type' in obj and str(Molecule.class_class_uri) in obj['@type']:\n", + "# molecule_data['name'] = obj.get(str(slots.name.uri), [None])[0]['@value']\n", + "# molecule_data['smiles'] = obj.get(str(slots.smiles.uri), [None])[0]['@value']\n", + "# return molecule_data" ] }, { @@ -137,35 +80,43 @@ "metadata": {}, "outputs": [], "source": [ + "# Extract data from the converted JSON-LD\n", + "def extract_molecule_data(jsonld_object, context):\n", + " expanded = jsonld.expand(molecule_jsonld)\n", + " compacted = jsonld.compact(expanded, context)\n", + " return {\"name\": compacted[\"name\"], \"smiles\": compacted[\"smiles\"]}\n", + "\n", + "def read_json(filepath: str) -> dict:\n", + " return json.load(open(filepath, \"r\"))\n", + "\n", "url = urlparse.urlsplit(jupyter_notebook_url)\n", "parsed_url = urlparse.parse_qs(url.query)\n", "params = {key:value[0] for key, value in parsed_url.items()}\n", + "molecule_jsonld = json.loads(params[\"molecule_info\"])\n", "\n", - "molecule_json = json.loads(params[\"molecule_info\"])\n", + "molecule_info_valid = False\n", "\n", - "molecule_validator = set_up_validator(\"molecule.schema.json\")\n", - "schema_context = molecule_validator.schema[\"@context\"]\n", - "schema_context = expand_context(schema_context)\n", - "json_context = expand_context(molecule_json[\"@context\"])\n", + "jsonld_context_filename = os.path.join(\n", + " os.sep, \n", + " \"home\", \n", + " \"jovyan\", \n", + " \"aiida-openbis\", \n", + " \"Notebooks\", \n", + " \"Metadata_Schemas_LinkML\",\n", + " \"materialMLinfoContext.jsonld\"\n", + ")\n", "\n", - "if \"@context\" in molecule_json:\n", - " _ = molecule_json.pop(\"@context\")\n", - " \n", - "for key1, value1 in schema_context.items():\n", - " for key2, value2 in json_context.items():\n", - " if value1 == value2:\n", - " replace_keys_recursive(molecule_json, key2, key1)\n", - " \n", - "molecule_info_valid = False\n", + "\n", + "aiida_context = read_json(jsonld_context_filename)\n", "\n", "try:\n", - " molecule_validator.validate(instance = molecule_json)\n", - " molecule_info_valid = True\n", - " params[\"molecule_info\"] = json.dumps(molecule_json)\n", + " molecule_data = extract_molecule_data(molecule_jsonld, aiida_context)\n", + " params[\"molecule_info\"] = json.dumps(molecule_data)\n", " eln_widget = ElnImportWidget(path_to_root=\"../../\", **params)\n", - "except ValidationError as e:\n", - " message = e.schema[\"error_msg\"] if \"error_msg\" in e.schema else e.message\n", - " print(f'Invalid data: {message}')" + " molecule_info_valid = True\n", + "except ValueError as e:\n", + " print(e)\n", + "\n" ] }, {