Skip to content

Commit

Permalink
Merge pull request #10 from TheScienceMuseum/develop
Browse files Browse the repository at this point in the history
v0.3.4: get_labels
  • Loading branch information
kdutia authored Oct 7, 2020
2 parents d6bdaa8 + 51456d8 commit 8ef738b
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 1 deletion.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

All notable changes documented below.

## 0.3.4

- **enhancement:** `wd_entities.get_entities` now has a `get_labels` method to get labels for a list of QIDs in a particular language using the wbgetentities API.

## 0.3.2

- **enhancement:** add `labels_aliases` field for faster text search of both labels and aliases using an [Elasticsearch match query](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-query.html)
Expand Down
19 changes: 19 additions & 0 deletions elastic_wikidata/wd_entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,25 @@ def result_generator(
response = s.get(url, headers=headers, timeout=timeout).json()
yield [v for _, v in response["entities"].items()]

def get_labels(self, qcodes, lang="en", page_limit=50, timeout: int = None) -> dict:
"""
Get labels from Wikidata qcodes. If the item associated with a qcode has no label, its value
in the dictionary is an empty string.
Returns:
dict: {qid1: label1, qid2: label2, ...}
"""

qid_label_mapping = dict()
qcodes = list(set(qcodes))

docs = self.get_all_results(qcodes, lang, page_limit, timeout)

for doc in docs:
qid_label_mapping[doc["id"]] = doc["labels"].get(lang, {}).get("value", "")

return qid_label_mapping


def simplify_wbgetentities_result(
doc: Union[dict, List[dict]], lang: str, properties: list
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="elastic-wikidata",
version="0.3.3",
version="0.3.4",
author="Science Museum Group",
description="elastic-wikidata",
long_description=long_description,
Expand Down
33 changes: 33 additions & 0 deletions tests/test_wd_entities.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from elastic_wikidata import wd_entities
import pytest


@pytest.fixture
def ge():
ge = wd_entities.get_entities()

return ge


def test_get_all_results(ge):
qids = ["Q203545", "Q706475", "Q18637243"]

res = ge.get_all_results(qids, timeout=6)

assert isinstance(res, list)
assert len(res) == len(qids)
assert [item["id"] for item in res] == qids


def test_get_labels(ge):
qids = ["Q203545", "Q706475", "Q18637243", "Q82340"]

label_dict = ge.get_labels(qids, timeout=6)

# the last QID has no english label so a blank string is returned as its value
assert label_dict == {
"Q18637243": "Michaela Coel",
"Q203545": "Michael Gambon",
"Q706475": "Steve McQueen",
"Q82340": "",
}

0 comments on commit 8ef738b

Please sign in to comment.