From dbcef8844b3ac47bde1270e65fcbf52024036367 Mon Sep 17 00:00:00 2001 From: severinsimmler Date: Mon, 4 Jan 2021 21:27:56 +0100 Subject: [PATCH 1/6] chore: log message format --- chaine/logging.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chaine/logging.py b/chaine/logging.py index 23b1f79..7e4b983 100644 --- a/chaine/logging.py +++ b/chaine/logging.py @@ -132,7 +132,7 @@ def __init__(self): self.loss = None def __str__(self) -> str: - return f"Iteration: {self.iteration}\tLoss: {self.loss}" + return f"Iteration {self.iteration}, train loss: {self.loss}" class LogParser: From c2548063cdfa7856ddfe635939d66cce9f788f6c Mon Sep 17 00:00:00 2001 From: severinsimmler Date: Mon, 4 Jan 2021 21:28:01 +0100 Subject: [PATCH 2/6] chore: update readme --- README.md | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 3c546cb..0f32f09 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,10 @@ # Chaine -A linear-chain conditional random field implementation. +Linear-chain conditional random fields for natural language processing. -Chaine is a modern Python library without any third-party dependencies and a backend written in C implementing conditional random fields for natural language processing tasks like named entity recognition or part-of-speech tagging. +Chaine is a modern Python library without any third-party dependencies and a backend written in C. You can train conditional random fields for natural language processing tasks like [named entity recognition](https://en.wikipedia.org/wiki/Named-entity_recognition) or [part-of-speech tagging](https://en.wikipedia.org/wiki/Part-of-speech_tagging). -- **Lightweight:** explain +- **Lightweight**: Chaine explain - **Fast:** explain - **Easy to use:** explain @@ -14,25 +14,18 @@ You can install the latest stable version from [PyPI](https://pypi.org/project/c $ pip install chaine ``` -If you are interested in the theoretical concepts behind conditional random fields, refer to the introducing paper by [Lafferty et al](https://repository.upenn.edu/cgi/viewcontent.cgi?article=1162&context=cis_papers). +If you are interested in the theoretical concepts behind conditional random fields, please refer to the introducing paper by [Lafferty et al](https://repository.upenn.edu/cgi/viewcontent.cgi?article=1162&context=cis_papers). -## How it works +## Example -``` +```python >>> import chaine ->>> tokens = [["John", "Lennon", "was", "rhythm", "guitarist" "of", "The", "Beatles"]] ->>> labels = [["B-PER", "I-PER", "O", "O", "O", "O", "B-ORG", "I-ORG"]] +>>> tokens = [["John", "Lennon", "was", "born", "in" "Liverpool"]] +>>> labels = [["B-PER", "I-PER", "O", "O", "O", "B-LOC"]] >>> model = chaine.train(tokens, labels, max_iterations=5) -Loading data -Start training -Iteration 1, train loss: 14.334076 -Iteration 2, train loss: 14.334064 -Iteration 3, train loss: 14.334053 -Iteration 4, train loss: 14.334041 -Iteration 5, train loss: 14.334029 >>> model.predict(tokens) -[['B-PER', 'I-PER', 'O', 'O', 'O', 'B-ORG', 'I-ORG']] +[["B-PER", "I-PER", "O", "O", "O", "B-LOC"]] ``` Check out the introducing [Jupyter notebook](https://github.com/severinsimmler/chaine/blob/master/notebooks/tutorial.ipynb). From 7f75bc050f3039703dae90d87791b975c98c47a6 Mon Sep 17 00:00:00 2001 From: severinsimmler Date: Mon, 4 Jan 2021 21:49:41 +0100 Subject: [PATCH 3/6] chore: update readme --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 0f32f09..5d098d4 100644 --- a/README.md +++ b/README.md @@ -2,11 +2,11 @@ Linear-chain conditional random fields for natural language processing. -Chaine is a modern Python library without any third-party dependencies and a backend written in C. You can train conditional random fields for natural language processing tasks like [named entity recognition](https://en.wikipedia.org/wiki/Named-entity_recognition) or [part-of-speech tagging](https://en.wikipedia.org/wiki/Part-of-speech_tagging). +Chaine is a modern Python library without third-party dependencies and a backend written in C. You can train conditional random fields for natural language processing tasks like [named entity recognition](https://en.wikipedia.org/wiki/Named-entity_recognition) or [part-of-speech tagging](https://en.wikipedia.org/wiki/Part-of-speech_tagging). -- **Lightweight**: Chaine explain -- **Fast:** explain -- **Easy to use:** explain +- **Lightweight**: No use of bloated third-party libraries — only pure Python and C. +- **Fast**: Performance critical parts are written in C and thus [blazingly fast](http://www.chokkan.org/software/crfsuite/benchmark.html). +- **Easy to use**: Designed with special focus on usability and a beautiful high-level API. You can install the latest stable version from [PyPI](https://pypi.org/project/chaine): @@ -25,7 +25,7 @@ If you are interested in the theoretical concepts behind conditional random fiel >>> labels = [["B-PER", "I-PER", "O", "O", "O", "B-LOC"]] >>> model = chaine.train(tokens, labels, max_iterations=5) >>> model.predict(tokens) -[["B-PER", "I-PER", "O", "O", "O", "B-LOC"]] +[['B-PER', 'I-PER', 'O', 'O', 'O', 'B-LOC']] ``` Check out the introducing [Jupyter notebook](https://github.com/severinsimmler/chaine/blob/master/notebooks/tutorial.ipynb). From 85ee5fe8fa51f486667ec8458c63655ab5d271b1 Mon Sep 17 00:00:00 2001 From: severinsimmler Date: Mon, 4 Jan 2021 21:50:30 +0100 Subject: [PATCH 4/6] chore: update readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5d098d4..5b390e3 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ Linear-chain conditional random fields for natural language processing. Chaine is a modern Python library without third-party dependencies and a backend written in C. You can train conditional random fields for natural language processing tasks like [named entity recognition](https://en.wikipedia.org/wiki/Named-entity_recognition) or [part-of-speech tagging](https://en.wikipedia.org/wiki/Part-of-speech_tagging). -- **Lightweight**: No use of bloated third-party libraries — only pure Python and C. +- **Lightweight**: No use of bloated third-party libraries. - **Fast**: Performance critical parts are written in C and thus [blazingly fast](http://www.chokkan.org/software/crfsuite/benchmark.html). - **Easy to use**: Designed with special focus on usability and a beautiful high-level API. From 4bb80c236538591dc00b1ea4926cc5fe32417408 Mon Sep 17 00:00:00 2001 From: severinsimmler Date: Mon, 4 Jan 2021 22:08:55 +0100 Subject: [PATCH 5/6] chore: tune version number --- pyproject.toml | 27 --------------------------- 1 file changed, 27 deletions(-) delete mode 100644 pyproject.toml diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index cb5797f..0000000 --- a/pyproject.toml +++ /dev/null @@ -1,27 +0,0 @@ -[tool.poetry] -name = "chaine" -version = "0.2.1" -description = "A Lightweight Conditional Random Field" -authors = ["Severin Simmler "] -readme = "README.md" -build = "build.py" - -[tool.poetry.dependencies] -python = "^3.8" - -[tool.poetry.dev-dependencies] -black = "^20.8b1" -isort = "^5.6.4" -pytest = "^6.1.2" -cython = "^0.29.21" -jupyterlab = "^2.2.9" - -[tool.isort] -line_length = 88 -use_parentheses = true -include_trailing_comma = true -multi_line_output = 3 - -[build-system] -requires = ["poetry-core>=1.0.0", "setuptools", "wheel", "cython"] -build-backend = "poetry.core.masonry.api" From d923839e8ed37c29f90bcab8db3277780390f90b Mon Sep 17 00:00:00 2001 From: severinsimmler Date: Mon, 4 Jan 2021 22:09:14 +0100 Subject: [PATCH 6/6] chore: tune version number --- pyproject.toml | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 pyproject.toml diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..bce7743 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,27 @@ +[tool.poetry] +name = "chaine" +version = "0.2.2" +description = "A Lightweight Conditional Random Field" +authors = ["Severin Simmler "] +readme = "README.md" +build = "build.py" + +[tool.poetry.dependencies] +python = "^3.8" + +[tool.poetry.dev-dependencies] +black = "^20.8b1" +isort = "^5.6.4" +pytest = "^6.1.2" +cython = "^0.29.21" +jupyterlab = "^2.2.9" + +[tool.isort] +line_length = 88 +use_parentheses = true +include_trailing_comma = true +multi_line_output = 3 + +[build-system] +requires = ["poetry-core>=1.0.0", "setuptools", "wheel", "cython"] +build-backend = "poetry.core.masonry.api"