Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Openalex sources #8

Merged
merged 4 commits into from
Aug 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions Kahi_openalex_sources/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
Copyright (c) 2005-2020, Colav Developers.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:

* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.

* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.

* Neither the name of the NumPy Developers nor the names of any
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2 changes: 2 additions & 0 deletions Kahi_openalex_sources/MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
recursive-include kahi_openalex_sources/ *.py
recursive-include kahi_openalex_sources/ *.*
44 changes: 44 additions & 0 deletions Kahi_openalex_sources/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
<center><img src="https://raw.githubusercontent.com/colav/colav.github.io/master/img/Logo.png"/></center>

# Kahi OpenAlex sources plugin
Kahi will use this plugin to insert or update the journal information from openalex

# Description
Plugin that reads the information from a mongodb collection with openalex information to update or insert the information of the journals in CoLav's database format.

# Installation
You could download the repository from github. Go into the folder where the setup.py is located and run
```shell
pip3 install .
```
From the package you can install by running
```shell
pip3 install kahi_doaj_sources
```

## Dependencies
Software dependencies will automatically be installed when installing the plugin.
The user must have a copy of the openalex dumpwith the collection of venues which can be downloaded at [OpenAlex data dump website](https://docs.openalex.org/download-all-data/openalex-snapshot "OpenAlex data dump website") and import it on a mongodb database.

# Usage
To use this plugin you must have kahi installed in your system and construct a yaml file such as
```yaml
config:
database_url: localhost:27017
database_name: kahi
log_database: kahi_log
log_collection: log
workflow:
openalex_sources:
database_url: localhost:27017
database_name: openalex
collection_name: venues
```


# License
BSD-3-Clause License

# Links
http://colav.udea.edu.co/

121 changes: 121 additions & 0 deletions Kahi_openalex_sources/kahi_openalex_sources/Kahi_openalex_sources.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
from kahi.KahiBase import KahiBase
from pymongo import MongoClient
from datetime import datetime as dt
from time import time


class Kahi_openalex_sources(KahiBase):

config = {}

def __init__(self, config):
self.config = config

self.mongodb_url = config["database_url"]

self.client = MongoClient(self.mongodb_url)

self.db = self.client[config["database_name"]]
self.collection = self.db["sources"]

self.openalex_client = MongoClient(
config["openalex_sources"]["database_url"])
self.openalex_db = self.openalex_client[config["openalex_sources"]
["database_name"]]
self.openalex_collection = self.openalex_db[config["openalex_sources"]
["collection_name"]]

self.already_processed = []

def process_openalex(self):
with self.openalex_client.start_session() as session:
self.openalex_db = self.openalex_client[self.config["openalex_sources"]
["database_name"]]
self.openalex_collection = self.openalex_db[self.config["openalex_sources"]
["collection_name"]]
old = dt.now()
for source in self.openalex_collection.find({"id": {"$nin": self.already_processed}}):
if source["id"] in self.already_processed:
continue
source_db = None
if "issn" in source.keys():
source_db = self.collection.find_one(
{"external_ids.id": source["issn"]})
if not source_db:
if "issn_l" in source.keys():
source_db = self.collection.find_one(
{"external_ids.id": source["issn_l"]})
if source_db:
oa_found = False
for up in source_db["updated"]:
if up["source"] == "openalex":
oa_found = True
break
if oa_found:
continue

source_db["updated"].append(
{"source": "openalex", "time": int(time())})
source_db["external_ids"].append(
{"source": "openalex", "id": source["id"]})
source_db["types"].append(
{"source": "openalex", "type": source["type"]})
source_db["names"].append(
{"name": source["display_name"], "lang": "en", "source": "openalex"})

self.collection.update_one({"_id": source_db["_id"]}, {"$set": {
"updated": source_db["updated"],
"names": source_db["names"],
"external_ids": source_db["external_ids"],
"types": source_db["types"],
"subjects": source_db["subjects"]
}})
else:
entry = self.empty_source()
entry["updated"] = [
{"source": "openalex", "time": int(time())}]
entry["names"].append(
{"name": source["display_name"], "lang": "en", "source": "openalex"})
entry["external_ids"].append(
{"source": "openalex", "id": source["id"]})
if "issn" in source.keys():
entry["external_ids"].append(
{"source": "issn", "id": source["issn"]})
if "issn_l" in source.keys():
entry["external_ids"].append(
{"source": "issn_l", "id": source["issn_l"]})
entry["types"].append(
{"source": "openalex", "type": source["type"]})
if "publisher" in source.keys():
if source["publisher"]:
entry["publisher"] = {
"name": source["publisher"], "country_code": ""}
if "apc_usd" in source.keys():
if source["apc_usd"]:
entry["apc"] = {"currency": "USD",
"charges": source["apc_usd"]}
if "abbreviated_title" in source.keys():
if source["abbreviated_title"]:
entry["abbreviations"].append(
source["abbreviated_title"])
for name in source["alternate_titles"]:
entry["abbreviations"].append(name)
if source["homepage_url"]:
entry["external_urls"].append(
{"source": "site", "url": source["homepage_url"]})
if source["societies"]:
for soc in source["societies"]:
entry["external_urls"].append(
{"source": soc["organization"], "url": soc["url"]})

self.collection.insert_one(entry)
self.already_processed.append(source["id"])
delta = dt.now() - old
if delta.seconds > 240:
self.openalex_client.admin.command(
'refreshSessions', [session.session_id], session=session)
old = dt.now()

def run(self):
self.process_openalex()
return 0
Empty file.
6 changes: 6 additions & 0 deletions Kahi_openalex_sources/kahi_openalex_sources/_version.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# flake8: noqa
__version__ = '0.0.1-alpha'


def get_version():
return __version__
90 changes: 90 additions & 0 deletions Kahi_openalex_sources/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#!/usr/bin/env python3
# coding: utf-8

# Copyright (c) Colav.
# Distributed under the terms of the Modified BSD License.

# -----------------------------------------------------------------------------
# Minimal Python version sanity check (from IPython)
# -----------------------------------------------------------------------------

# See https://stackoverflow.com/a/26737258/2268280
# sudo pip3 install twine
# python3 setup.py sdist bdist_wheel
# twine upload dist/*
# For test purposes
# twine upload --repository-url https://test.pypi.org/legacy/ dist/*

from __future__ import print_function
from setuptools import setup, find_packages

import os
import sys
import codecs


v = sys.version_info


def read(rel_path):
here = os.path.abspath(os.path.dirname(__file__))
with codecs.open(os.path.join(here, rel_path), 'r') as fp:
return fp.read()


def get_version(rel_path):
for line in read(rel_path).splitlines():
if line.startswith('__version__'):
delim = '"' if '"' in line else "'"
return line.split(delim)[1]
else:
raise RuntimeError("Unable to find version string.")


shell = False
if os.name in ('nt', 'dos'):
shell = True
warning = "WARNING: Windows is not officially supported"
print(warning, file=sys.stderr)


def main():
setup(
# Application name:
name="Kahi_openalex_sources",

# Version number (initial):
version=get_version('kahi_openalex_sources/_version.py'),

# Application author details:
author="Colav",
author_email="colav@udea.edu.co",

# Packages
packages=find_packages(exclude=['tests']),

# Include additional files into the package
include_package_data=True,

# Details
url="https://github.com/colav/Kahi_plugins",
#
license="BSD",

description="Kahi plugin to insert and update the sources from openalex",

long_description=open("README.md").read(),

long_description_content_type="text/markdown",

# Dependent packages (distributions)
# put you packages here
install_requires=[
'kahi',
'pymongo'
],
)


if __name__ == "__main__":
main()
Loading