Skip to content

Commit

Permalink
Merge pull request #442 from PyThaiNLP/fix-update-bug
Browse files Browse the repository at this point in the history
Fix update bug
  • Loading branch information
wannaphong authored Jun 27, 2020
2 parents 6c83fd5 + ab74589 commit a158dd4
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 8 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ Using PyThaiNLP:
- More tutorials at [https://www.thainlp.org/pythainlp/tutorials/](https://www.thainlp.org/pythainlp/tutorials/)
- See full documentation at [https://thainlp.org/pythainlp/docs/2.2/](https://thainlp.org/pythainlp/docs/2.2/)
- Some additional data (like word lists and language models) may get automatically download during runtime and it will be kept under the directory `~/pythainlp-data` by default. See corpus catalog at [https://github.com/PyThaiNLP/pythainlp-corpus](https://github.com/PyThaiNLP/pythainlp-corpus).
- The data location can be changed, using `PYTHAINLP_DATA_DIR` environment variable.
- The data location can be changed, using `PYTHAINLP_DATA_DIR` environment variable.
- For PyThaiNLP tokenization performance and measurement methods, see [tokenization benchmark](tokenization-benchmark.md)
- 📫 follow our [PyThaiNLP](https://www.facebook.com/pythainlp/) Facebook page

Expand Down
2 changes: 1 addition & 1 deletion pythainlp/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
__version__ = "2.2.0"
__version__ = "2.2.1"

thai_consonants = "กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ" # 44 chars

Expand Down
27 changes: 23 additions & 4 deletions pythainlp/corpus/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,20 @@ def get_corpus(filename: str) -> frozenset:
return frozenset(lines)


def _update_all():
print("Update Corpus...")
with TinyDB(corpus_db_path()) as local_db:
item_all = local_db.all()
query = Query()
for item in item_all:
name = item["name"]
if "file_name" in item.keys():
local_db.update({"filename": item["file_name"]}, query.name == name)
elif "file" in item.keys():
local_db.update({"filename": item["file"]}, query.name == name)
local_db.close()


def get_corpus_path(name: str) -> Union[str, None]:
"""
Get corpus path.
Expand Down Expand Up @@ -125,13 +139,18 @@ def get_corpus_path(name: str) -> Union[str, None]:
"""
# check if the corpus is in local catalog, download if not
corpus_db_detail = get_corpus_db_detail(name)
if not corpus_db_detail or not corpus_db_detail.get("file_name"):
if corpus_db_detail.get("file_name") is not None and corpus_db_detail.get("filename") is None:
_update_all()
elif corpus_db_detail.get("file") is not None and corpus_db_detail.get("filename") is None:
_update_all()

if not corpus_db_detail or not corpus_db_detail.get("filename"):
download(name)
corpus_db_detail = get_corpus_db_detail(name)

if corpus_db_detail and corpus_db_detail.get("file_name"):
if corpus_db_detail and corpus_db_detail.get("filename"):
# corpus is in the local catalog, get full path to the file
path = get_full_data_path(corpus_db_detail.get("file_name"))
path = get_full_data_path(corpus_db_detail.get("filename"))
# check if the corpus file actually exists, download if not
if not os.path.exists(path):
download(name)
Expand Down Expand Up @@ -263,7 +282,7 @@ def download(name: str, force: bool = False, url: str = None, version: str = Non
{
"name": name,
"version": version,
"file_name": file_name,
"filename": file_name,
}
)
else:
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 2.2.0
current_version = 2.2.1
commit = True
tag = True
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<build>\d+))?
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@

setup(
name="pythainlp",
version="2.2.0",
version="2.2.1",
description="Thai Natural Language Processing library",
long_description=readme,
long_description_content_type="text/markdown",
Expand Down

0 comments on commit a158dd4

Please sign in to comment.