diff --git a/text/base.py b/text/base.py index 5900aba2..ba1c7d06 100644 --- a/text/base.py +++ b/text/base.py @@ -7,12 +7,11 @@ from __future__ import absolute_import from abc import ABCMeta, abstractmethod from text.packages import nltk -from text.compat import add_metaclass +from text.compat import with_metaclass ##### POS TAGGERS ##### -@add_metaclass(ABCMeta) -class BaseTagger(object): +class BaseTagger(with_metaclass(ABCMeta)): '''Abstract tagger class from which all taggers inherit from. All descendants must implement a @@ -27,8 +26,7 @@ def tag(self, text, tokenize=True): ##### NOUN PHRASE EXTRACTORS ##### -@add_metaclass(ABCMeta) -class BaseNPExtractor(object): +class BaseNPExtractor(with_metaclass(ABCMeta)): '''Abstract base class from which all NPExtractor classes inherit. Descendant classes must implement an ``extract(text)`` method @@ -42,8 +40,7 @@ def extract(self, text): ##### TOKENIZERS ##### -@add_metaclass(ABCMeta) -class BaseTokenizer(nltk.tokenize.api.TokenizerI): +class BaseTokenizer(with_metaclass(ABCMeta), nltk.tokenize.api.TokenizerI): '''Abstract base class from which all Tokenizer classes inherit. Descendant classes must implement a ``tokenize(text)`` method @@ -71,8 +68,8 @@ def itokenize(self, text, *args, **kwargs): DISCRETE = 'ds' CONTINUOUS = 'co' -@add_metaclass(ABCMeta) -class BaseSentimentAnalyzer(object): + +class BaseSentimentAnalyzer(with_metaclass(ABCMeta)): '''Abstract base class from which all sentiment analyzers inherit. Should implement an ``analyze(text)`` method which returns either the @@ -99,8 +96,7 @@ def analyze(self, text): ##### PARSERS ##### -@add_metaclass(ABCMeta) -class BaseParser(object): +class BaseParser(with_metaclass(ABCMeta)): '''Abstract parser class from which all parsers inherit from. All descendants must implement a `parse()` method. diff --git a/text/classifiers.py b/text/classifiers.py index 35ccd6c6..6aabd41d 100644 --- a/text/classifiers.py +++ b/text/classifiers.py @@ -34,7 +34,7 @@ from __future__ import absolute_import from text.packages import nltk from text.tokenizers import WordTokenizer -from text.compat import basestring, u +from text.compat import basestring import text.formats as formats from text.utils import lowerstrip from text.decorators import cached_property @@ -74,7 +74,7 @@ def basic_extractor(document, train_set): for w in tokenizer.itokenize(document, include_punc=False)]) else: tokens = set((lowerstrip(w, all=False) for w in document)) - features = dict([(u('contains({0})').format(word), (word in tokens)) + features = dict([(u'contains({0})'.format(word), (word in tokens)) for word in word_features]) return features @@ -89,7 +89,7 @@ def contains_extractor(document): for w in tokenizer.itokenize(document, include_punc=False)]) else: tokens = set((lowerstrip(w, all=False) for w in document)) - features = dict((u('contains({0})'.format(w)), True) for w in tokens) + features = dict((u'contains({0})'.format(w), True) for w in tokens) return features ##### CLASSIFIERS ##### diff --git a/text/compat.py b/text/compat.py index c33b4693..932efeab 100644 --- a/text/compat.py +++ b/text/compat.py @@ -4,10 +4,6 @@ PY2 = int(sys.version[0]) == 2 if PY2: - def b(s): - return s - def u(s): - return unicode(s, "unicode_escape") from itertools import imap, izip import urllib2 as request from urllib import quote as urlquote @@ -27,12 +23,7 @@ def implements_to_string(cls): cls.__unicode__ = cls.__str__ cls.__str__ = lambda x: x.__unicode__().encode('utf-8') return cls - else: # PY3 - def b(s): - return s.encode("latin-1") - def u(s): - return s from urllib import request from urllib.parse import quote as urlquote text_type = str @@ -47,15 +38,21 @@ def u(s): implements_to_string = lambda x: x -def add_metaclass(metaclass): - """Class decorator for creating a class with a metaclass. - From the six library. - """ - def wrapper(cls): - orig_vars = cls.__dict__.copy() - orig_vars.pop('__dict__', None) - orig_vars.pop('__weakref__', None) - for slots_var in orig_vars.get('__slots__', ()): - orig_vars.pop(slots_var) - return metaclass(cls.__name__, cls.__bases__, orig_vars) - return wrapper +def with_metaclass(meta, *bases): + '''Defines a metaclass. + + Creates a dummy class with a dummy metaclass. When subclassed, the dummy + metaclass is used, which has a constructor that instantiates a + new class from the original parent. This ensures that the dummy class and + dummy metaclass are not in the inheritance tree. + + Credit to Armin Ronacher. + ''' + class metaclass(meta): + __call__ = type.__call__ + __init__ = type.__init__ + def __new__(cls, name, this_bases, d): + if this_bases is None: + return type.__new__(cls, name, (), d) + return meta(name, bases, d) + return metaclass('temporary_class', None, {})