Skip to content

Commit

Permalink
Add lemmas for pronouns
Browse files Browse the repository at this point in the history
  • Loading branch information
bjascob committed Sep 30, 2022
1 parent 719686a commit 43f9358
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 1 deletion.
2 changes: 1 addition & 1 deletion lemminflect/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from .core.Inflections import Inflections
from .core.Lemmatizer import Lemmatizer

__version__ = '0.2.2'
__version__ = '0.2.3'


# Lemmatizer is a singleton so this will only instantiate and load the data
Expand Down
87 changes: 87 additions & 0 deletions lemminflect/resources/lemma_overrides.csv
Original file line number Diff line number Diff line change
@@ -1 +1,88 @@
# inflection,upos,lemma (note that upos must be capitalized)

# The following pronouns are not in the lookup
all,NOUN,all
another,NOUN,another
any,NOUN,any
anybody,NOUN,anybody
anyone,NOUN,anyone
anything,NOUN,anything
aught,NOUN,aught
both,NOUN,both
each,NOUN,each
everybody,NOUN,everybody
everyone,NOUN,everyone
everything,NOUN,everything
he,NOUN,he
her,NOUN,her
hers,NOUN,hers
herself,NOUN,herself
him,NOUN,him
himself,NOUN,himself
his,NOUN,his
idem,NOUN,idem
it,NOUN,it
its,NOUN,its
itself,NOUN,itself
many,NOUN,many
me,NOUN,me
my,NOUN,my
myself,NOUN,myself
naught,NOUN,naught
neither,NOUN,neither
none,NOUN,none
our,NOUN,our
ours,NOUN,ours
ourself,NOUN,ourself
ourselves,NOUN,ourselves
she,NOUN,she
some,NOUN,some
somebody,NOUN,somebody
someone,NOUN,someone
something,NOUN,something
such,NOUN,such
suchlike,NOUN,suchlike
that,NOUN,that
thee,NOUN,thee
theirs,NOUN,theirs
their,NOUN,their
theirself,NOUN,theirself
theirselves,NOUN,theirselves
them,NOUN,them
themself,NOUN,themself
themselves,NOUN,themselves
these,NOUN,these
they,NOUN,they
thine,NOUN,thine
this,NOUN,this
those,NOUN,those
thou,NOUN,thou
thy,NOUN,thy
thyself,NOUN,thyself
us,NOUN,us
we,NOUN,we
what,NOUN,what
whatever,NOUN,whatever
whatnot,NOUN,whatnot
whether,NOUN,whether
which,NOUN,which
whichever,NOUN,whichever
whichsoever,NOUN,whichsoever
who,NOUN,who
whoever,NOUN,whoever
whom,NOUN,whom
whomever,NOUN,whomever
whomso,NOUN,whomso
whomsoever,NOUN,whomsoever
whose,NOUN,whose
whosever,NOUN,whosever
whosesoever,NOUN,whosesoever
whoso,NOUN,whoso
whosoever,NOUN,whosoever
ye,NOUN,ye
yon,NOUN,yon
you,NOUN,you
your,NOUN,your
yours,NOUN,yours
yourself,NOUN,yourself
yourselves,NOUN,yourselves
15 changes: 15 additions & 0 deletions tests/auto/LemmatizerTests.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,21 @@ def testProperNouns(self):
token = self.nlp('The Axxlaskans went South.')[1]
self.assertEqual(token._.lemma(lemmatize_oov=True), 'Axxlaskan')

def testPronouns(self):
pronouns = ['all', 'another', 'any', 'anybody', 'anyone', 'anything', 'aught', 'both', 'each',
'everybody', 'everyone', 'everything', 'he', 'her', 'hers', 'herself', 'him',
'himself', 'his', 'idem', 'it', 'its', 'itself', 'many', 'me', 'my', 'myself',
'neither', 'none', 'our', 'ours', 'ourself', 'ourselves', 'she', 'some', 'somebody',
'someone', 'something', 'such', 'suchlike', 'that', 'thee', 'their', 'theirs',
'theirself', 'theirselves', 'them', 'themself', 'themselves', 'these', 'they',
'thine', 'this', 'those', 'thou', 'thy', 'thyself', 'us', 'we', 'what', 'whatever',
'whatnot', 'whether', 'which', 'whichever', 'whichsoever', 'who', 'whoever', 'whom',
'whomever', 'whomso', 'whomsoever', 'whose', 'whosever', 'whosesoever', 'whoso',
'whosoever', 'ye', 'yon', 'you', 'your', 'yours', 'yourself', 'yourselves']
tests = [(p, 'NOUN', p) for p in pronouns]
self.runGetAllLemmasTests(tests)
self.runGetLemmaTests(tests)

def testOverrides(self):
# run the lemmatizer once to assure the overrides is loaded (ie.. lazy loading)
lemminflect.getLemma('Alaskans', 'NOUN', lemmatize_oov=False)
Expand Down

0 comments on commit 43f9358

Please sign in to comment.