From 05e5be081c79590ed84b5baacec82fe4a3edd699 Mon Sep 17 00:00:00 2001 From: Steven Loria Date: Fri, 13 Sep 2013 19:53:46 -0500 Subject: [PATCH] Docs update --- HISTORY.rst | 5 ++++- docs/install.rst | 17 +++++++++++++++++ text/_perceptron.py | 5 ++++- text/taggers.py | 8 +++++++- 4 files changed, 32 insertions(+), 3 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index ebdffd35..8dd6406a 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,8 +3,11 @@ Changelog 0.6.3 (unreleased) ------------------ -- Word tokenization fix: Words that stem from a contraction will still have an apostrophe, e.g. ``"Let's" => ["Let", "'s"]``. This makes it easier to identify contractions. +.. module:: text.taggers + +- Word tokenization fix: Words that stem from a contraction will still have an apostrophe, e.g. ``"Let's" => ["Let", "'s"]``. - Fix bug with comparing blobs to strings. +- Add :class:`text.taggers.PerceptronTagger `, a fast and accurate POS tagger. Thanks `@syllog1sm `_. - Note for Python 3 users: You may need to update your corpora, since NLTK master has reorganized its corpus system. Just run ``curl https://raw.github.com/sloria/TextBlob/master/download_corpora.py | python`` again. - Add ``download_corpora_lite.py`` script for getting the minimum corpora requirements for TextBlob's basic features. diff --git a/docs/install.rst b/docs/install.rst index 35642002..7cede1b8 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -57,6 +57,23 @@ To get the latest development version of TextBlob, run $ pip install -U git+https://github.com/sloria/TextBlob.git@dev +Getting Extra Models and Data +----------------------------- + +.. module:: text.taggers + +Some features, such as the :class:`PerceptronTagger `, require data that is not available from the NLTK downloader. These data will be made available on the Github `release page`_ for TextBlob. + +To install a model or corpus: + +1. Download the file from the `release page`_. +2. Unzip/untar the downloaded file. +3. Place the uncompressed file in your TextBlob installation directory. To find out where this is, you can run :: + + $ python -c "import text; print(text.__path__[0])" + +.. _release page: https://github.com/sloria/TextBlob/releases + Python ++++++ diff --git a/text/_perceptron.py b/text/_perceptron.py index d40bc12a..7c1ec3dc 100644 --- a/text/_perceptron.py +++ b/text/_perceptron.py @@ -56,6 +56,7 @@ def upd_feat(c, f, w, v): weights = self.weights.setdefault(f, {}) upd_feat(truth, f, weights.get(truth, 0.0), 1.0) upd_feat(guess, f, weights.get(guess, 0.0), -1.0) + return None def average_weights(self): for feat, weights in self.weights.items(): @@ -68,14 +69,16 @@ def average_weights(self): if averaged: new_feat_weights[clas] = averaged self.weights[feat] = new_feat_weights + return None def save(self, path): '''Save the pickled model weights.''' - pickle.dump(dict(self.weights), open(path, 'w')) + return pickle.dump(dict(self.weights), open(path, 'w')) def load(self, path): '''Load the pickled model weights.''' self.weights = pickle.load(open(path)) + return None def train(nr_iter, examples): diff --git a/text/taggers.py b/text/taggers.py index 16d22b9e..ef98738c 100644 --- a/text/taggers.py +++ b/text/taggers.py @@ -66,8 +66,14 @@ class PerceptronTagger(BaseTagger): '''Greedy Averaged Perceptron tagger, as implemented by Matthew Honnibal. Requires that ``trontagger.pickle`` exists in the text/ package directory. + For more information on how to get and install the pickled model, see + the install guide here: - See more info at this blog post: http://honnibal.wordpress.com/2013/09/11/a-good-part-of-speechpos-tagger-in-about-200-lines-of-python/ + https:/textblob.readthedocs.org/en/latest/install.html + + See more info about the Averaged Perceptron Tagger at this blog post: + + http://honnibal.wordpress.com/2013/09/11/a-good-part-of-speechpos-tagger-in-about-200-lines-of-python/ The tagger is about 96.8%% accurate.