diff --git a/Kahi_openalex_sources/LICENSE b/Kahi_openalex_sources/LICENSE new file mode 100644 index 0000000..cef2ea0 --- /dev/null +++ b/Kahi_openalex_sources/LICENSE @@ -0,0 +1,30 @@ +Copyright (c) 2005-2020, Colav Developers. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the NumPy Developers nor the names of any + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/Kahi_openalex_sources/MANIFEST.in b/Kahi_openalex_sources/MANIFEST.in new file mode 100644 index 0000000..eab13ec --- /dev/null +++ b/Kahi_openalex_sources/MANIFEST.in @@ -0,0 +1,2 @@ +recursive-include kahi_openalex_sources/ *.py +recursive-include kahi_openalex_sources/ *.* \ No newline at end of file diff --git a/Kahi_openalex_sources/README.md b/Kahi_openalex_sources/README.md new file mode 100644 index 0000000..5e2ab30 --- /dev/null +++ b/Kahi_openalex_sources/README.md @@ -0,0 +1,44 @@ +
+ +# Kahi OpenAlex sources plugin +Kahi will use this plugin to insert or update the journal information from openalex + +# Description +Plugin that reads the information from a mongodb collection with openalex information to update or insert the information of the journals in CoLav's database format. + +# Installation +You could download the repository from github. Go into the folder where the setup.py is located and run +```shell +pip3 install . +``` +From the package you can install by running +```shell +pip3 install kahi_doaj_sources +``` + +## Dependencies +Software dependencies will automatically be installed when installing the plugin. +The user must have a copy of the openalex dumpwith the collection of venues which can be downloaded at [OpenAlex data dump website](https://docs.openalex.org/download-all-data/openalex-snapshot "OpenAlex data dump website") and import it on a mongodb database. + +# Usage +To use this plugin you must have kahi installed in your system and construct a yaml file such as +```yaml +config: + database_url: localhost:27017 + database_name: kahi + log_database: kahi_log + log_collection: log +workflow: + openalex_sources: + database_url: localhost:27017 + database_name: openalex + collection_name: venues +``` + + +# License +BSD-3-Clause License + +# Links +http://colav.udea.edu.co/ + diff --git a/Kahi_openalex_sources/kahi_openalex_sources/Kahi_openalex_sources.py b/Kahi_openalex_sources/kahi_openalex_sources/Kahi_openalex_sources.py new file mode 100644 index 0000000..83f328d --- /dev/null +++ b/Kahi_openalex_sources/kahi_openalex_sources/Kahi_openalex_sources.py @@ -0,0 +1,121 @@ +from kahi.KahiBase import KahiBase +from pymongo import MongoClient +from datetime import datetime as dt +from time import time + + +class Kahi_openalex_sources(KahiBase): + + config = {} + + def __init__(self, config): + self.config = config + + self.mongodb_url = config["database_url"] + + self.client = MongoClient(self.mongodb_url) + + self.db = self.client[config["database_name"]] + self.collection = self.db["sources"] + + self.openalex_client = MongoClient( + config["openalex_sources"]["database_url"]) + self.openalex_db = self.openalex_client[config["openalex_sources"] + ["database_name"]] + self.openalex_collection = self.openalex_db[config["openalex_sources"] + ["collection_name"]] + + self.already_processed = [] + + def process_openalex(self): + with self.openalex_client.start_session() as session: + self.openalex_db = self.openalex_client[self.config["openalex_sources"] + ["database_name"]] + self.openalex_collection = self.openalex_db[self.config["openalex_sources"] + ["collection_name"]] + old = dt.now() + for source in self.openalex_collection.find({"id": {"$nin": self.already_processed}}): + if source["id"] in self.already_processed: + continue + source_db = None + if "issn" in source.keys(): + source_db = self.collection.find_one( + {"external_ids.id": source["issn"]}) + if not source_db: + if "issn_l" in source.keys(): + source_db = self.collection.find_one( + {"external_ids.id": source["issn_l"]}) + if source_db: + oa_found = False + for up in source_db["updated"]: + if up["source"] == "openalex": + oa_found = True + break + if oa_found: + continue + + source_db["updated"].append( + {"source": "openalex", "time": int(time())}) + source_db["external_ids"].append( + {"source": "openalex", "id": source["id"]}) + source_db["types"].append( + {"source": "openalex", "type": source["type"]}) + source_db["names"].append( + {"name": source["display_name"], "lang": "en", "source": "openalex"}) + + self.collection.update_one({"_id": source_db["_id"]}, {"$set": { + "updated": source_db["updated"], + "names": source_db["names"], + "external_ids": source_db["external_ids"], + "types": source_db["types"], + "subjects": source_db["subjects"] + }}) + else: + entry = self.empty_source() + entry["updated"] = [ + {"source": "openalex", "time": int(time())}] + entry["names"].append( + {"name": source["display_name"], "lang": "en", "source": "openalex"}) + entry["external_ids"].append( + {"source": "openalex", "id": source["id"]}) + if "issn" in source.keys(): + entry["external_ids"].append( + {"source": "issn", "id": source["issn"]}) + if "issn_l" in source.keys(): + entry["external_ids"].append( + {"source": "issn_l", "id": source["issn_l"]}) + entry["types"].append( + {"source": "openalex", "type": source["type"]}) + if "publisher" in source.keys(): + if source["publisher"]: + entry["publisher"] = { + "name": source["publisher"], "country_code": ""} + if "apc_usd" in source.keys(): + if source["apc_usd"]: + entry["apc"] = {"currency": "USD", + "charges": source["apc_usd"]} + if "abbreviated_title" in source.keys(): + if source["abbreviated_title"]: + entry["abbreviations"].append( + source["abbreviated_title"]) + for name in source["alternate_titles"]: + entry["abbreviations"].append(name) + if source["homepage_url"]: + entry["external_urls"].append( + {"source": "site", "url": source["homepage_url"]}) + if source["societies"]: + for soc in source["societies"]: + entry["external_urls"].append( + {"source": soc["organization"], "url": soc["url"]}) + + self.collection.insert_one(entry) + self.already_processed.append(source["id"]) + delta = dt.now() - old + if delta.seconds > 240: + self.openalex_client.admin.command( + 'refreshSessions', [session.session_id], session=session) + old = dt.now() + + def run(self): + self.process_openalex() + return 0 diff --git a/Kahi_openalex_sources/kahi_openalex_sources/__init__.py b/Kahi_openalex_sources/kahi_openalex_sources/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/Kahi_openalex_sources/kahi_openalex_sources/_version.py b/Kahi_openalex_sources/kahi_openalex_sources/_version.py new file mode 100644 index 0000000..7b6ecff --- /dev/null +++ b/Kahi_openalex_sources/kahi_openalex_sources/_version.py @@ -0,0 +1,6 @@ +# flake8: noqa +__version__ = '0.0.1-alpha' + + +def get_version(): + return __version__ diff --git a/Kahi_openalex_sources/setup.py b/Kahi_openalex_sources/setup.py new file mode 100644 index 0000000..f11bb39 --- /dev/null +++ b/Kahi_openalex_sources/setup.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 +# coding: utf-8 + +# Copyright (c) Colav. +# Distributed under the terms of the Modified BSD License. + +# ----------------------------------------------------------------------------- +# Minimal Python version sanity check (from IPython) +# ----------------------------------------------------------------------------- + +# See https://stackoverflow.com/a/26737258/2268280 +# sudo pip3 install twine +# python3 setup.py sdist bdist_wheel +# twine upload dist/* +# For test purposes +# twine upload --repository-url https://test.pypi.org/legacy/ dist/* + +from __future__ import print_function +from setuptools import setup, find_packages + +import os +import sys +import codecs + + +v = sys.version_info + + +def read(rel_path): + here = os.path.abspath(os.path.dirname(__file__)) + with codecs.open(os.path.join(here, rel_path), 'r') as fp: + return fp.read() + + +def get_version(rel_path): + for line in read(rel_path).splitlines(): + if line.startswith('__version__'): + delim = '"' if '"' in line else "'" + return line.split(delim)[1] + else: + raise RuntimeError("Unable to find version string.") + + +shell = False +if os.name in ('nt', 'dos'): + shell = True + warning = "WARNING: Windows is not officially supported" + print(warning, file=sys.stderr) + + +def main(): + setup( + # Application name: + name="Kahi_openalex_sources", + + # Version number (initial): + version=get_version('kahi_openalex_sources/_version.py'), + + # Application author details: + author="Colav", + author_email="colav@udea.edu.co", + + # Packages + packages=find_packages(exclude=['tests']), + + # Include additional files into the package + include_package_data=True, + + # Details + url="https://github.com/colav/Kahi_plugins", + # + license="BSD", + + description="Kahi plugin to insert and update the sources from openalex", + + long_description=open("README.md").read(), + + long_description_content_type="text/markdown", + + # Dependent packages (distributions) + # put you packages here + install_requires=[ + 'kahi', + 'pymongo' + ], + ) + + +if __name__ == "__main__": + main()