diff --git a/Kahi_scienti_sources/LICENSE b/Kahi_scienti_sources/LICENSE
new file mode 100644
index 0000000..cef2ea0
--- /dev/null
+++ b/Kahi_scienti_sources/LICENSE
@@ -0,0 +1,30 @@
+Copyright (c) 2005-2020, Colav Developers.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following
+ disclaimer in the documentation and/or other materials provided
+ with the distribution.
+
+ * Neither the name of the NumPy Developers nor the names of any
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/Kahi_scienti_sources/MANIFEST.in b/Kahi_scienti_sources/MANIFEST.in
new file mode 100644
index 0000000..15271b5
--- /dev/null
+++ b/Kahi_scienti_sources/MANIFEST.in
@@ -0,0 +1,2 @@
+recursive-include kahi_scienti_sources/ *.py
+recursive-include kahi_scienti_sources/ *.*
\ No newline at end of file
diff --git a/Kahi_scienti_sources/README.md b/Kahi_scienti_sources/README.md
new file mode 100644
index 0000000..1abe201
--- /dev/null
+++ b/Kahi_scienti_sources/README.md
@@ -0,0 +1,66 @@
+
+
+# Kahi scienti sources plugin
+Kahi will use this plugin to insert or update the journal information from scienti dump
+
+# Description
+Plugin that reads the information from a scienti dump to insert or update journals in colav's database.
+
+# Installation
+You could download the repository from github. Go into the folder where the setup.py is located and run
+```shell
+pip3 install .
+```
+From the package you can install by running
+```shell
+pip3 install kahi_scienti_sources
+```
+
+## Dependencies
+Software dependencies will automatically be installed when installing the plugin.
+The user must have at least one database obtained from minciencias and previously processed by [kayPacha](https://github.com/colav/KayPacha "KayPacha") and uploaded on a mongodb database.
+
+# Usage
+To use this plugin you must have kahi installed in your system and construct a yaml file such as
+```yaml
+config:
+ database_url: localhost:27017
+ database_name: kahi
+ log_database: kahi_log
+ log_collection: log
+workflow:
+ scienti_sources:
+ - database_url: localhost:27017
+ database_name: scienti_111
+ collection_name: products
+```
+Where file_path under scimago_sources task is the full path where the scimago csv is located.
+
+I you have several scimago files use the yaml structure as shown below
+```yaml
+config:
+ database_url: localhost:27017
+ database_name: kahi
+ log_database: kahi_log
+ log_collection: log
+workflow:
+ scienti_sources:
+ - database_url: localhost:27017
+ database_name: scienti_111
+ collection_name: products
+ - database_url: localhost:27017
+ database_name: scienti_uec_2022
+ collection_name: products
+ - database_url: localhost:27017
+ database_name: scienti_univalle_2022
+ collection_name: products
+```
+
+# License
+BSD-3-Clause License
+
+# Links
+http://colav.udea.edu.co/
+
+
+
diff --git a/Kahi_scienti_sources/kahi_scienti_sources/Kahi_scienti_sources.py b/Kahi_scienti_sources/kahi_scienti_sources/Kahi_scienti_sources.py
new file mode 100644
index 0000000..c462f7e
--- /dev/null
+++ b/Kahi_scienti_sources/kahi_scienti_sources/Kahi_scienti_sources.py
@@ -0,0 +1,240 @@
+from kahi.KahiBase import KahiBase
+from pymongo import MongoClient
+from datetime import datetime as dt
+from time import time
+from langid import classify
+
+
+class Kahi_scienti_sources(KahiBase):
+
+ config = {}
+
+ def __init__(self, config):
+ self.config = config
+
+ self.mongodb_url = config["database_url"]
+
+ self.client = MongoClient(self.mongodb_url)
+
+ self.db = self.client[config["database_name"]]
+ self.collection = self.db["sources"]
+
+ self.already_in_db = []
+
+ def update_scienti(self, reg, entry, issn):
+ updated_scienti = False
+ for upd in entry["updated"]:
+ if upd["source"] == "scienti":
+ updated_scienti = True
+ entry["updated"].remove(upd)
+ entry["updated"].append(
+ {"source": "scienti", "time": int(time())})
+ break
+ if not updated_scienti:
+ entry["updated"].append({"source": "scienti", "time": int(time())})
+ journal = None
+ for detail in reg["details"]:
+ if "article" in detail.keys():
+ paper = detail["article"][0]
+ if "journal" in paper.keys():
+ journal = paper["journal"][0]
+ break
+ if not journal:
+ return
+ if "TPO_REVISTA" in journal.keys():
+ entry["types"].append(
+ {"source": "scienti", "type": journal["TPO_REVISTA"]})
+ entry["external_ids"].append(
+ {"source": "scienti", "id": journal["COD_REVISTA"]})
+
+ rankings_list = []
+ ranks = []
+ dates = [(rank["from_date"], rank["to_date"])
+ for rank in entry["ranking"] if rank["source"] == "scienti"]
+ for reg_scienti in self.scienti_collection["products"].find({"details.article.journal.TXT_ISSN_SEP": issn}):
+ paper = None
+ journal = None
+ for detail in reg_scienti["details"]:
+ if "article" in detail.keys():
+ paper = detail["article"][0]
+ if "journal" in paper.keys():
+ journal = paper["journal"][0]
+ break
+
+ if "TPO_CLASIFICACION" not in journal.keys():
+ continue
+ if not journal["TPO_CLASIFICACION"] in ranks:
+ ranking = {
+ "from_date": int(dt.strptime(paper["DTA_CREACION"], "%a, %d %b %Y %H:%M:%S %Z").timestamp()),
+ "to_date": int(dt.strptime(paper["DTA_CREACION"], "%a, %d %b %Y %H:%M:%S %Z").timestamp()),
+ "rank": journal["TPO_CLASIFICACION"],
+ "issn": issn,
+ "order": None,
+ "source": "scienti"
+ }
+ rankings_list.append(ranking)
+ ranks.append(journal["TPO_CLASIFICACION"])
+ dates_tuple = (
+ int(dt.strptime(
+ paper["DTA_CREACION"], "%a, %d %b %Y %H:%M:%S %Z").timestamp()),
+ int(dt.strptime(
+ paper["DTA_CREACION"], "%a, %d %b %Y %H:%M:%S %Z").timestamp())
+ )
+
+ dates.append(dates_tuple)
+ else:
+ idx = ranks.index(journal["TPO_CLASIFICACION"])
+ date1, date2 = dates[idx]
+
+ if date1 > int(dt.strptime(paper["DTA_CREACION"], "%a, %d %b %Y %H:%M:%S %Z").timestamp()):
+ date1 = int(dt.strptime(
+ paper["DTA_CREACION"], "%a, %d %b %Y %H:%M:%S %Z").timestamp())
+ if date2 < int(dt.strptime(paper["DTA_CREACION"], "%a, %d %b %Y %H:%M:%S %Z").timestamp()):
+ date2 = int(dt.strptime(
+ paper["DTA_CREACION"], "%a, %d %b %Y %H:%M:%S %Z").timestamp())
+ dates[idx] = (date1, date2)
+
+ self.collection.update_one({"_id": entry["_id"]}, {"$set": {
+ "types": entry["types"],
+ "external_ids": entry["external_ids"],
+ "updated": entry["updated"],
+ "ranking": entry["ranking"] + rankings_list
+ }})
+
+ def process_scienti(self, config, verbose=0):
+ self.scienti_client = MongoClient(config["database_url"])
+
+ if config["database_name"] not in self.scienti_client.list_database_names():
+ raise Exception("Database {} not found".format(config["database_name"]))
+
+ self.scienti_db = self.scienti_client[config["database_name"]]
+
+ if config["collection_name"] not in self.scienti_db.list_collection_names():
+ raise Exception("Collection {} not found".format(config["collection_name"]))
+
+ self.scienti_collection = self.scienti_db[config["collection_name"]]
+ for issn in self.scienti_collection.distinct("details.article.journal.TXT_ISSN_SEP"):
+ print(issn)
+ reg_db = self.collection.find_one({"external_ids.id": issn})
+ if reg_db:
+ reg_scienti = self.scienti_collection.find_one(
+ {"details.article.journal.TXT_ISSN_SEP": issn})
+ if reg_scienti:
+ self.update_scienti(reg_scienti, reg_db, issn)
+ else:
+ reg_scienti = self.scienti_collection.find_one(
+ {"details.article.journal.TXT_ISSN_SEP": issn})
+ if reg_scienti:
+ journal = None
+ for detail in reg_scienti["details"]:
+ if "article" in detail.keys():
+ paper = detail["article"][0]
+ if "journal" in paper.keys():
+ journal = paper["journal"][0]
+ break
+ if not journal:
+ continue
+ entry = self.empty_source()
+ entry["updated"] = [
+ {"source": "scienti", "time": int(time())}]
+ lang = classify(journal["TXT_NME_REVISTA"])[0]
+ entry["names"] = [
+ {"lang": lang, "name": journal["TXT_NME_REVISTA"], "source": "scienti"}]
+ entry["external_ids"].append(
+ {"source": "issn", "id": journal["TXT_ISSN_SEP"]})
+ entry["external_ids"].append(
+ {"source": "scienti", "id": journal["COD_REVISTA"]})
+ if "TPO_REVISTA" in journal.keys():
+ entry["types"].append(
+ {"source": "scienti", "type": journal["TPO_REVISTA"]})
+ if "editorial" in journal.keys():
+ entry["publisher"] = {
+ "country_code": "", "name": journal["editorial"][0]["TXT_NME_EDITORIAL"]}
+ rankings_list = []
+ ranks = []
+ dates = []
+ for reg_scienti in self.scienti_collection.find({"details.article.journal.TXT_ISSN_SEP": issn}):
+ paper = None
+ journal = None
+ for detail in reg_scienti["details"]:
+ if "article" in detail.keys():
+ paper = detail["article"][0]
+ if "journal" in paper.keys():
+ journal = paper["journal"][0]
+ break
+ if "TPO_CLASIFICACION" not in journal.keys():
+ continue
+ if not journal["TPO_CLASIFICACION"] in ranks:
+ try:
+ from_date = int(dt.strptime(paper["DTA_CREACION"], "%a, %d %b %Y %H:%M:%S %Z").timestamp())
+ to_date = int(dt.strptime(paper["DTA_CREACION"], "%a, %d %b %Y %H:%M:%S %Z").timestamp())
+ except:
+ try:
+ from_date = int(dt.strptime(paper["DTA_CREACION"], "%Y-%m-%d %H:%M:%S").timestamp())
+ to_date = int(dt.strptime(paper["DTA_CREACION"], "%Y-%m-%d %H:%M:%S").timestamp())
+ except:
+ from_date = None
+ to_date = None
+ ranking = {
+ "from_date": from_date,
+ "to_date": to_date,
+ "rank": journal["TPO_CLASIFICACION"],
+ "issn": issn,
+ "order": None,
+ "source": "scienti"
+ }
+ rankings_list.append(ranking)
+ ranks.append(journal["TPO_CLASIFICACION"])
+ try:
+ dates_tuple = (
+ int(dt.strptime(
+ paper["DTA_CREACION"], "%a, %d %b %Y %H:%M:%S %Z").timestamp()),
+ int(dt.strptime(
+ paper["DTA_CREACION"], "%a, %d %b %Y %H:%M:%S %Z").timestamp())
+ )
+ except:
+ try:
+ dates_tuple = (
+ int(dt.strptime(
+ paper["DTA_CREACION"], "%Y-%m-%d %H:%M:%S").timestamp()),
+ int(dt.strptime(
+ paper["DTA_CREACION"], "%Y-%m-%d %H:%M:%S").timestamp())
+ )
+ except:
+ dates_tuple = (
+ None,
+ None
+ )
+
+
+ dates.append(dates_tuple)
+ else:
+ # if is already ranked but dates changed
+ idx = ranks.index(journal["TPO_CLASIFICACION"])
+ date1, date2 = dates[idx]
+ try:
+ if date1 > int(dt.strptime(paper["DTA_CREACION"], "%a, %d %b %Y %H:%M:%S %Z").timestamp()):
+ date1 = int(dt.strptime(
+ paper["DTA_CREACION"], "%a, %d %b %Y %H:%M:%S %Z").timestamp())
+ if date2 < int(dt.strptime(paper["DTA_CREACION"], "%a, %d %b %Y %H:%M:%S %Z").timestamp()):
+ date2 = int(dt.strptime(
+ paper["DTA_CREACION"], "%a, %d %b %Y %H:%M:%S %Z").timestamp())
+ except:
+ try:
+ if date1 > int(dt.strptime(paper["DTA_CREACION"], "%Y-%m-%d %H:%M:%S").timestamp()):
+ date1 = int(dt.strptime(
+ paper["DTA_CREACION"], "%Y-%m-%d %H:%M:%S").timestamp())
+ if date2 < int(dt.strptime(paper["DTA_CREACION"], "%Y-%m-%d %H:%M:%S").timestamp()):
+ date2 = int(dt.strptime(
+ paper["DTA_CREACION"], "%Y-%m-%d %H:%M:%S").timestamp())
+ except:
+ pass
+ dates[idx] = (date1, date2)
+ entry["ranking"] = rankings_list
+ self.collection.insert_one(entry)
+
+ def run(self):
+ for config in self.config["scienti_sources"]:
+ print("Processing {} database".format(config["database_name"]))
+ self.process_scienti(config, verbose=5)
+ return 0
diff --git a/Kahi_scienti_sources/kahi_scienti_sources/__init__.py b/Kahi_scienti_sources/kahi_scienti_sources/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/Kahi_scienti_sources/kahi_scienti_sources/_version.py b/Kahi_scienti_sources/kahi_scienti_sources/_version.py
new file mode 100644
index 0000000..7b6ecff
--- /dev/null
+++ b/Kahi_scienti_sources/kahi_scienti_sources/_version.py
@@ -0,0 +1,6 @@
+# flake8: noqa
+__version__ = '0.0.1-alpha'
+
+
+def get_version():
+ return __version__
diff --git a/Kahi_scienti_sources/setup.py b/Kahi_scienti_sources/setup.py
new file mode 100644
index 0000000..e730f9c
--- /dev/null
+++ b/Kahi_scienti_sources/setup.py
@@ -0,0 +1,91 @@
+#!/usr/bin/env python3
+# coding: utf-8
+
+# Copyright (c) Colav.
+# Distributed under the terms of the Modified BSD License.
+
+# -----------------------------------------------------------------------------
+# Minimal Python version sanity check (from IPython)
+# -----------------------------------------------------------------------------
+
+# See https://stackoverflow.com/a/26737258/2268280
+# sudo pip3 install twine
+# python3 setup.py sdist bdist_wheel
+# twine upload dist/*
+# For test purposes
+# twine upload --repository-url https://test.pypi.org/legacy/ dist/*
+
+from __future__ import print_function
+from setuptools import setup, find_packages
+
+import os
+import sys
+import codecs
+
+
+v = sys.version_info
+
+
+def read(rel_path):
+ here = os.path.abspath(os.path.dirname(__file__))
+ with codecs.open(os.path.join(here, rel_path), 'r') as fp:
+ return fp.read()
+
+
+def get_version(rel_path):
+ for line in read(rel_path).splitlines():
+ if line.startswith('__version__'):
+ delim = '"' if '"' in line else "'"
+ return line.split(delim)[1]
+ else:
+ raise RuntimeError("Unable to find version string.")
+
+
+shell = False
+if os.name in ('nt', 'dos'):
+ shell = True
+ warning = "WARNING: Windows is not officially supported"
+ print(warning, file=sys.stderr)
+
+
+def main():
+ setup(
+ # Application name:
+ name="Kahi_scienti_sources",
+
+ # Version number (initial):
+ version=get_version('kahi_scienti_sources/_version.py'),
+
+ # Application author details:
+ author="Colav",
+ author_email="colav@udea.edu.co",
+
+ # Packages
+ packages=find_packages(exclude=['tests']),
+
+ # Include additional files into the package
+ include_package_data=True,
+
+ # Details
+ url="https://github.com/colav/Kahi_plugins",
+ #
+ license="BSD",
+
+ description="Kahi plugin to insert or update sources from scienti",
+
+ long_description=open("README.md").read(),
+
+ long_description_content_type="text/markdown",
+
+ # Dependent packages (distributions)
+ # put you packages here
+ install_requires=[
+ 'kahi',
+ 'langid',
+ 'pymongo'
+ ],
+ )
+
+
+if __name__ == "__main__":
+ main()
diff --git a/Kahi_template/kahi_template/_version.py b/Kahi_template/kahi_template/_version.py
index e3f75fa..7b6ecff 100644
--- a/Kahi_template/kahi_template/_version.py
+++ b/Kahi_template/kahi_template/_version.py
@@ -1,5 +1,6 @@
# flake8: noqa
__version__ = '0.0.1-alpha'
+
def get_version():
return __version__