Skip to content

Commit

Permalink
Merge pull request #12 from TheScienceMuseum/develop
Browse files Browse the repository at this point in the history
v0.3.6
  • Loading branch information
kdutia authored Oct 9, 2020
2 parents 9dc7d88 + 685d3cb commit 2d49800
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 21 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@

All notable changes documented below.

## 0.3.6

- **fix:** handles documents which are missing any of *labels/aliases/descriptions/claims* fields.
- **enhancement:** `wd_entities.simplify_wbgetentities_result` gives the option to return the redirected QID for Wikidata pages which redirect. By default it returns the undirected QID: the same one that was passed into the function.

## 0.3.5

- **fix:** `wd_entities.simplify_wbgetentities_result` can handle type *quantity*, and returns the value of *amount*.
Expand Down
56 changes: 36 additions & 20 deletions elastic_wikidata/wd_entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,10 @@ def get_labels(self, qcodes, lang="en", page_limit=50, timeout: int = None) -> d


def simplify_wbgetentities_result(
doc: Union[dict, List[dict]], lang: str, properties: list
doc: Union[dict, List[dict]],
lang: str,
properties: list,
use_redirected_qid: bool = False,
) -> Union[dict, List[dict]]:
"""
Processes a single document or set of documents from the JSON result of wbgetentities, returning a simplified version of that document.
Expand All @@ -119,6 +122,8 @@ def simplify_wbgetentities_result(
doc (Union[dict, List[dict]]): JSON result from Wikidata wbgetentities API
lang (str): Wikimedia language code
properties (list): list of Wikidata properties
use_redirected_qid (bool, optional): whether to return the redirected QID value under the 'id' field instead of the original QID
if there is one. Defaults to False.
Returns:
Union[dict, List[dict]]: dict if single record passed in; list if multiple records
Expand All @@ -135,40 +140,51 @@ def simplify_wbgetentities_result(
"quantity": "amount",
}

newdoc = {"id": doc["id"]}
# check for redirected URL
if "redirects" in doc:
if use_redirected_qid:
newdoc = {"id": doc["redirects"]["to"]}
else:
newdoc = {"id": doc["redirects"]["from"]}

else:
newdoc = {"id": doc["id"]}

# add label(s)
if lang in doc["labels"]:
if lang in doc.get("labels", {}):
newdoc["labels"] = doc["labels"][lang]["value"]

# add descriptions(s)
if lang in doc["descriptions"]:
if lang in doc.get("descriptions", {}):
newdoc["descriptions"] = doc["descriptions"][lang]["value"]

# add aliases
if (len(doc["aliases"]) > 0) and (lang in doc["aliases"]):
if (len(doc.get("aliases", {})) > 0) and (lang in doc.get("aliases", {})):
newdoc["aliases"] = [i["value"] for i in doc["aliases"][lang]]
else:
newdoc["aliases"] = []

# add claims (property values)
newdoc["claims"] = {}

for p in properties:
if p in doc["claims"]:
claims = []
for i in doc["claims"][p]:
try:
value_type = i["mainsnak"]["datavalue"]["type"]
if value_type == "string":
claims.append(i["mainsnak"]["datavalue"]["value"])
else:
value_name = wd_type_mapping[value_type]
claims.append(i["mainsnak"]["datavalue"]["value"][value_name])
except KeyError:
pass

newdoc["claims"][p] = claims
if "claims" in doc:
for p in properties:
if p in doc["claims"]:
claims = []
for i in doc["claims"][p]:
try:
value_type = i["mainsnak"]["datavalue"]["type"]
if value_type == "string":
claims.append(i["mainsnak"]["datavalue"]["value"])
else:
value_name = wd_type_mapping[value_type]
claims.append(
i["mainsnak"]["datavalue"]["value"][value_name]
)
except KeyError:
pass

newdoc["claims"][p] = claims

return newdoc

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="elastic-wikidata",
version="0.3.5",
version="0.3.6",
author="Science Museum Group",
description="elastic-wikidata",
long_description=long_description,
Expand Down

0 comments on commit 2d49800

Please sign in to comment.