diff --git a/Kahi_openalex_sources/LICENSE b/Kahi_openalex_sources/LICENSE
new file mode 100644
index 0000000..cef2ea0
--- /dev/null
+++ b/Kahi_openalex_sources/LICENSE
@@ -0,0 +1,30 @@
+Copyright (c) 2005-2020, Colav Developers.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following
+ disclaimer in the documentation and/or other materials provided
+ with the distribution.
+
+ * Neither the name of the NumPy Developers nor the names of any
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/Kahi_openalex_sources/MANIFEST.in b/Kahi_openalex_sources/MANIFEST.in
new file mode 100644
index 0000000..eab13ec
--- /dev/null
+++ b/Kahi_openalex_sources/MANIFEST.in
@@ -0,0 +1,2 @@
+recursive-include kahi_openalex_sources/ *.py
+recursive-include kahi_openalex_sources/ *.*
\ No newline at end of file
diff --git a/Kahi_openalex_sources/README.md b/Kahi_openalex_sources/README.md
new file mode 100644
index 0000000..5e2ab30
--- /dev/null
+++ b/Kahi_openalex_sources/README.md
@@ -0,0 +1,44 @@
+
+
+# Kahi OpenAlex sources plugin
+Kahi will use this plugin to insert or update the journal information from openalex
+
+# Description
+Plugin that reads the information from a mongodb collection with openalex information to update or insert the information of the journals in CoLav's database format.
+
+# Installation
+You could download the repository from github. Go into the folder where the setup.py is located and run
+```shell
+pip3 install .
+```
+From the package you can install by running
+```shell
+pip3 install kahi_doaj_sources
+```
+
+## Dependencies
+Software dependencies will automatically be installed when installing the plugin.
+The user must have a copy of the openalex dumpwith the collection of venues which can be downloaded at [OpenAlex data dump website](https://docs.openalex.org/download-all-data/openalex-snapshot "OpenAlex data dump website") and import it on a mongodb database.
+
+# Usage
+To use this plugin you must have kahi installed in your system and construct a yaml file such as
+```yaml
+config:
+ database_url: localhost:27017
+ database_name: kahi
+ log_database: kahi_log
+ log_collection: log
+workflow:
+ openalex_sources:
+ database_url: localhost:27017
+ database_name: openalex
+ collection_name: venues
+```
+
+
+# License
+BSD-3-Clause License
+
+# Links
+http://colav.udea.edu.co/
+
diff --git a/Kahi_openalex_sources/kahi_openalex_sources/Kahi_openalex_sources.py b/Kahi_openalex_sources/kahi_openalex_sources/Kahi_openalex_sources.py
new file mode 100644
index 0000000..83f328d
--- /dev/null
+++ b/Kahi_openalex_sources/kahi_openalex_sources/Kahi_openalex_sources.py
@@ -0,0 +1,121 @@
+from kahi.KahiBase import KahiBase
+from pymongo import MongoClient
+from datetime import datetime as dt
+from time import time
+
+
+class Kahi_openalex_sources(KahiBase):
+
+ config = {}
+
+ def __init__(self, config):
+ self.config = config
+
+ self.mongodb_url = config["database_url"]
+
+ self.client = MongoClient(self.mongodb_url)
+
+ self.db = self.client[config["database_name"]]
+ self.collection = self.db["sources"]
+
+ self.openalex_client = MongoClient(
+ config["openalex_sources"]["database_url"])
+ self.openalex_db = self.openalex_client[config["openalex_sources"]
+ ["database_name"]]
+ self.openalex_collection = self.openalex_db[config["openalex_sources"]
+ ["collection_name"]]
+
+ self.already_processed = []
+
+ def process_openalex(self):
+ with self.openalex_client.start_session() as session:
+ self.openalex_db = self.openalex_client[self.config["openalex_sources"]
+ ["database_name"]]
+ self.openalex_collection = self.openalex_db[self.config["openalex_sources"]
+ ["collection_name"]]
+ old = dt.now()
+ for source in self.openalex_collection.find({"id": {"$nin": self.already_processed}}):
+ if source["id"] in self.already_processed:
+ continue
+ source_db = None
+ if "issn" in source.keys():
+ source_db = self.collection.find_one(
+ {"external_ids.id": source["issn"]})
+ if not source_db:
+ if "issn_l" in source.keys():
+ source_db = self.collection.find_one(
+ {"external_ids.id": source["issn_l"]})
+ if source_db:
+ oa_found = False
+ for up in source_db["updated"]:
+ if up["source"] == "openalex":
+ oa_found = True
+ break
+ if oa_found:
+ continue
+
+ source_db["updated"].append(
+ {"source": "openalex", "time": int(time())})
+ source_db["external_ids"].append(
+ {"source": "openalex", "id": source["id"]})
+ source_db["types"].append(
+ {"source": "openalex", "type": source["type"]})
+ source_db["names"].append(
+ {"name": source["display_name"], "lang": "en", "source": "openalex"})
+
+ self.collection.update_one({"_id": source_db["_id"]}, {"$set": {
+ "updated": source_db["updated"],
+ "names": source_db["names"],
+ "external_ids": source_db["external_ids"],
+ "types": source_db["types"],
+ "subjects": source_db["subjects"]
+ }})
+ else:
+ entry = self.empty_source()
+ entry["updated"] = [
+ {"source": "openalex", "time": int(time())}]
+ entry["names"].append(
+ {"name": source["display_name"], "lang": "en", "source": "openalex"})
+ entry["external_ids"].append(
+ {"source": "openalex", "id": source["id"]})
+ if "issn" in source.keys():
+ entry["external_ids"].append(
+ {"source": "issn", "id": source["issn"]})
+ if "issn_l" in source.keys():
+ entry["external_ids"].append(
+ {"source": "issn_l", "id": source["issn_l"]})
+ entry["types"].append(
+ {"source": "openalex", "type": source["type"]})
+ if "publisher" in source.keys():
+ if source["publisher"]:
+ entry["publisher"] = {
+ "name": source["publisher"], "country_code": ""}
+ if "apc_usd" in source.keys():
+ if source["apc_usd"]:
+ entry["apc"] = {"currency": "USD",
+ "charges": source["apc_usd"]}
+ if "abbreviated_title" in source.keys():
+ if source["abbreviated_title"]:
+ entry["abbreviations"].append(
+ source["abbreviated_title"])
+ for name in source["alternate_titles"]:
+ entry["abbreviations"].append(name)
+ if source["homepage_url"]:
+ entry["external_urls"].append(
+ {"source": "site", "url": source["homepage_url"]})
+ if source["societies"]:
+ for soc in source["societies"]:
+ entry["external_urls"].append(
+ {"source": soc["organization"], "url": soc["url"]})
+
+ self.collection.insert_one(entry)
+ self.already_processed.append(source["id"])
+ delta = dt.now() - old
+ if delta.seconds > 240:
+ self.openalex_client.admin.command(
+ 'refreshSessions', [session.session_id], session=session)
+ old = dt.now()
+
+ def run(self):
+ self.process_openalex()
+ return 0
diff --git a/Kahi_openalex_sources/kahi_openalex_sources/__init__.py b/Kahi_openalex_sources/kahi_openalex_sources/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/Kahi_openalex_sources/kahi_openalex_sources/_version.py b/Kahi_openalex_sources/kahi_openalex_sources/_version.py
new file mode 100644
index 0000000..7b6ecff
--- /dev/null
+++ b/Kahi_openalex_sources/kahi_openalex_sources/_version.py
@@ -0,0 +1,6 @@
+# flake8: noqa
+__version__ = '0.0.1-alpha'
+
+
+def get_version():
+ return __version__
diff --git a/Kahi_openalex_sources/setup.py b/Kahi_openalex_sources/setup.py
new file mode 100644
index 0000000..f11bb39
--- /dev/null
+++ b/Kahi_openalex_sources/setup.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python3
+# coding: utf-8
+
+# Copyright (c) Colav.
+# Distributed under the terms of the Modified BSD License.
+
+# -----------------------------------------------------------------------------
+# Minimal Python version sanity check (from IPython)
+# -----------------------------------------------------------------------------
+
+# See https://stackoverflow.com/a/26737258/2268280
+# sudo pip3 install twine
+# python3 setup.py sdist bdist_wheel
+# twine upload dist/*
+# For test purposes
+# twine upload --repository-url https://test.pypi.org/legacy/ dist/*
+
+from __future__ import print_function
+from setuptools import setup, find_packages
+
+import os
+import sys
+import codecs
+
+
+v = sys.version_info
+
+
+def read(rel_path):
+ here = os.path.abspath(os.path.dirname(__file__))
+ with codecs.open(os.path.join(here, rel_path), 'r') as fp:
+ return fp.read()
+
+
+def get_version(rel_path):
+ for line in read(rel_path).splitlines():
+ if line.startswith('__version__'):
+ delim = '"' if '"' in line else "'"
+ return line.split(delim)[1]
+ else:
+ raise RuntimeError("Unable to find version string.")
+
+
+shell = False
+if os.name in ('nt', 'dos'):
+ shell = True
+ warning = "WARNING: Windows is not officially supported"
+ print(warning, file=sys.stderr)
+
+
+def main():
+ setup(
+ # Application name:
+ name="Kahi_openalex_sources",
+
+ # Version number (initial):
+ version=get_version('kahi_openalex_sources/_version.py'),
+
+ # Application author details:
+ author="Colav",
+ author_email="colav@udea.edu.co",
+
+ # Packages
+ packages=find_packages(exclude=['tests']),
+
+ # Include additional files into the package
+ include_package_data=True,
+
+ # Details
+ url="https://github.com/colav/Kahi_plugins",
+ #
+ license="BSD",
+
+ description="Kahi plugin to insert and update the sources from openalex",
+
+ long_description=open("README.md").read(),
+
+ long_description_content_type="text/markdown",
+
+ # Dependent packages (distributions)
+ # put you packages here
+ install_requires=[
+ 'kahi',
+ 'pymongo'
+ ],
+ )
+
+
+if __name__ == "__main__":
+ main()