diff --git a/CHANGELOG.md b/CHANGELOG.md index 5541b62..ef9893c 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ All notable changes documented below. +## 0.3.2 + +- **enhancement:** add `labels_aliases` field for faster text search of both labels and aliases using an [Elasticsearch match query](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-query.html) + ## 0.3.1 - **fix:** property values without types are ignored diff --git a/cli.py b/cli.py index a2e30d6..8c706cd 100755 --- a/cli.py +++ b/cli.py @@ -186,7 +186,7 @@ def check_es_credentials(credentials: dict): # path="../wikidata/all_no_articles.ndjson", # properties="p31,p279", # config="./config.ini", - # index='wikidump', + # index='wikidump2', # cluster=None, # user=None, # password=None, @@ -195,5 +195,6 @@ def check_es_credentials(credentials: dict): # page_size=100, # language='en', # timeout=6, + # disable_refresh=True # ) main() diff --git a/elastic_wikidata/dump_to_es.py b/elastic_wikidata/dump_to_es.py index 3c4d33b..8f5b83c 100755 --- a/elastic_wikidata/dump_to_es.py +++ b/elastic_wikidata/dump_to_es.py @@ -93,11 +93,21 @@ def start_elasticsearch(self): print("Connecting to Elasticsearch on localhost") self.es = Elasticsearch() - self.es.indices.create(index=self.index_name, ignore=400) + mappings = { + "mappings": { + "properties": { + "labels": {"type": "text", "copy_to": "labels_aliases"}, + "aliases": {"type": "text", "copy_to": "labels_aliases"}, + "labels_aliases": {"type": "text", "store": "true"}, + } + } + } + + self.es.indices.create(index=self.index_name, ignore=400, body=mappings) if self.disable_refresh_on_index: print( - "Temporary disabling refresh for the index. Will reset refresh interval for the default (1s) after load is complete." + "Temporary disabling refresh for the index. Will reset refresh interval to the default (1s) after load is complete." ) self.es.indices.put_settings({"index": {"refresh_interval": -1}}) diff --git a/setup.py b/setup.py index d3f9ea3..1fcadd5 100755 --- a/setup.py +++ b/setup.py @@ -5,13 +5,13 @@ setuptools.setup( name="elastic-wikidata", - version="0.3.1", + version="0.3.2", author="Science Museum Group", description="elastic-wikidata", long_description=long_description, long_description_content_type="text/markdown", url="https://github.com/TheScienceMuseum/elastic-wikidata", - download_url="https://github.com/TheScienceMuseum/elastic-wikidata/archive/v0.2.0.tar.gz", + download_url="https://github.com/TheScienceMuseum/elastic-wikidata/archive/v0.3.2.tar.gz", classifiers=[ "Programming Language :: Python :: 3", "License :: OSI Approved :: MIT License",