severinsimmler
diff --git a/‎README.md
Lines changed: 23 additions & 10 deletions b/‎README.md
Lines changed: 23 additions & 10 deletions
diff --git a/‎chaine/__init__.py
Lines changed: 1 addition & 1 deletion b/‎chaine/__init__.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎chaine/crfsuite/include/crfsuite.hpp
Lines changed: 5 additions & 5 deletions b/‎chaine/crfsuite/include/crfsuite.hpp
Lines changed: 5 additions & 5 deletions
diff --git a/‎chaine/data.py
Lines changed: 0 additions & 2 deletions b/‎chaine/data.py
Lines changed: 0 additions & 2 deletions
diff --git a/‎chaine/core.py renamed to ‎chaine/training.py b/‎chaine/core.py renamed to ‎chaine/training.py
diff --git a/‎notebooks/tutorial.ipynb
Lines changed: 21 additions & 0 deletions b/‎notebooks/tutorial.ipynb
Lines changed: 21 additions & 0 deletions
@@ -1,25 +1,38 @@
-# A Lightweight Conditional Random Field
+# Chaine
 
-This is a modern Python library without any third-party dependencies and a backend written in C implementing conditional random fields for natural language processing tasks like named entity recognition or part-of-speech tagging.
+A linear-chain conditional random field implementation.
+
+Chaine is a modern Python library without any third-party dependencies and a backend written in C implementing conditional random fields for natural language processing tasks like named entity recognition or part-of-speech tagging.
+
+- **Lightweight:** explain
+- **Fast:** explain
+- **Easy to use:** explain
 
 You can install the latest stable version from [PyPI](https://pypi.org/project/chaine):
 
 ```
 $ pip install chaine
 ```
 
-If you are interested in the theoretical concepts behind conditional random fields, I can recommend the introducing paper by [Lafferty et al](https://repository.upenn.edu/cgi/viewcontent.cgi?article=1162&context=cis_papers).
+If you are interested in the theoretical concepts behind conditional random fields, refer to the introducing paper by [Lafferty et al](https://repository.upenn.edu/cgi/viewcontent.cgi?article=1162&context=cis_papers).
 
 
-## Example
+## How it works
 
-```python
+```
 >>> import chaine
->>> sequences = [[["a", "a"], ["b", "b"]]]
->>> labels = [["0", "1"]]
->>> model = chaine.train(sequences, labels)
->>> model.predict(sequences)
-[['0', '1']]
+>>> tokens = [["John", "Lennon", "was", "rhythm", "guitarist" "of", "The", "Beatles"]]
+>>> labels = [["B-PER", "I-PER", "O", "O", "O", "O", "B-ORG", "I-ORG"]]
+>>> model = chaine.train(tokens, labels, max_iterations=5)
+Loading data
+Start training
+Iteration 1, train loss: 14.334076
+Iteration 2, train loss: 14.334064
+Iteration 3, train loss: 14.334053
+Iteration 4, train loss: 14.334041
+Iteration 5, train loss: 14.334029
+>>> model.predict(tokens)
+[['B-PER', 'I-PER', 'O', 'O', 'O', 'B-ORG', 'I-ORG']]
 ```
 
 Check out the introducing [Jupyter notebook](https://github.com/severinsimmler/chaine/blob/master/notebooks/tutorial.ipynb).
 
@@ -1,2 +1,2 @@
-from chaine.core import train
+from chaine.training import train
 from chaine.crf import Model, Trainer
@@ -364,13 +364,13 @@ namespace CRFSuite
 
         if (model == NULL)
         {
-            throw std::invalid_argument("The tagger is not opened");
+            throw std::invalid_argument("The tagger is not opened.");
         }
 
         // Obtain the dictionary interface representing the labels in the model.
         if ((ret = model->get_labels(model, &labels)))
         {
-            throw std::runtime_error("Failed to obtain the dictionary interface for labels");
+            throw std::runtime_error("Failed to obtain the dictionary interface for labels.");
         }
 
         // Collect all label strings to lseq.
@@ -405,13 +405,13 @@ namespace CRFSuite
 
         if (model == NULL || tagger == NULL)
         {
-            throw std::invalid_argument("The tagger is not opened");
+            throw std::invalid_argument("The tagger is not opened.");
         }
 
         // Obtain the dictionary interface representing the attributes in the model.
         if ((ret = model->get_attrs(model, &attrs)))
         {
-            throw std::runtime_error("Failed to obtain the dictionary interface for attributes");
+            throw std::runtime_error("Failed to obtain the dictionary interface for attributes.");
         }
 
         // Build an instance.
@@ -468,7 +468,7 @@ namespace CRFSuite
         // Obtain the dictionary interface representing the labels in the model.
         if ((ret = model->get_labels(model, &labels)))
         {
-            throw std::runtime_error("Failed to obtain the dictionary interface for labels");
+            throw std::runtime_error("Failed to obtain the dictionary interface for labels.");
         }
 
         // Run the Viterbi algorithm.
 
@@ -8,8 +8,6 @@
 import re
 from dataclasses import dataclass
 
-from chaine.typing import Iterable
-
 
 @dataclass
 class Token:
 
@@ -68,6 +68,27 @@
    "source": [
     "crf.predict(tokens)"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Feature extraction\n",
+    "\n",
+    "```\n",
+    "identity of wi, identity of neighboring words\n",
+    "embeddings for wi, embeddings for neighboring words\n",
+    "part of speech of wi, part of speech of neighboring words\n",
+    "base-phrase syntactic chunk label of wi and neighboring words\n",
+    "presence of wi in a gazetteer\n",
+    "wi contains a particular prefix (from all prefixes of length ≤ 4)\n",
+    "wi contains a particular suffix (from all suffixes of length ≤ 4)\n",
+    "wi is all upper case\n",
+    "word shape of wi, word shape of neighboring words\n",
+    "short word shape of wi, short word shape of neighboring words\n",
+    "presence of hyphen\n",
+    "```"
+   ]
   }
  ],
  "metadata": {
Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`		`-from chaine.core import train`
	`1`	`+from chaine.training import train`
`2`	`2`	`from chaine.crf import Model, Trainer`
Original file line number	Diff line number	Diff line change
`@@ -364,13 +364,13 @@ namespace CRFSuite`
`364`	`364`
`365`	`365`	`if (model == NULL)`
`366`	`366`	`{`
`367`		`- throw std::invalid_argument("The tagger is not opened");`
	`367`	`+ throw std::invalid_argument("The tagger is not opened.");`
`368`	`368`	`}`
`369`	`369`
`370`	`370`	`// Obtain the dictionary interface representing the labels in the model.`
`371`	`371`	`if ((ret = model->get_labels(model, &labels)))`
`372`	`372`	`{`
`373`		`- throw std::runtime_error("Failed to obtain the dictionary interface for labels");`
	`373`	`+ throw std::runtime_error("Failed to obtain the dictionary interface for labels.");`
`374`	`374`	`}`
`375`	`375`
`376`	`376`	`// Collect all label strings to lseq.`
`@@ -405,13 +405,13 @@ namespace CRFSuite`
`405`	`405`
`406`	`406`	`if (model == NULL \|\| tagger == NULL)`
`407`	`407`	`{`
`408`		`- throw std::invalid_argument("The tagger is not opened");`
	`408`	`+ throw std::invalid_argument("The tagger is not opened.");`
`409`	`409`	`}`
`410`	`410`
`411`	`411`	`// Obtain the dictionary interface representing the attributes in the model.`
`412`	`412`	`if ((ret = model->get_attrs(model, &attrs)))`
`413`	`413`	`{`
`414`		`- throw std::runtime_error("Failed to obtain the dictionary interface for attributes");`
	`414`	`+ throw std::runtime_error("Failed to obtain the dictionary interface for attributes.");`
`415`	`415`	`}`
`416`	`416`
`417`	`417`	`// Build an instance.`
`@@ -468,7 +468,7 @@ namespace CRFSuite`
`468`	`468`	`// Obtain the dictionary interface representing the labels in the model.`
`469`	`469`	`if ((ret = model->get_labels(model, &labels)))`
`470`	`470`	`{`
`471`		`- throw std::runtime_error("Failed to obtain the dictionary interface for labels");`
	`471`	`+ throw std::runtime_error("Failed to obtain the dictionary interface for labels.");`
`472`	`472`	`}`
`473`	`473`
`474`	`474`	`// Run the Viterbi algorithm.`