Skip to content

Commit

Permalink
add some mislabled names, closes #38
Browse files Browse the repository at this point in the history
  • Loading branch information
fgregg committed May 30, 2017
1 parent 5e690fc commit 20a5b37
Show file tree
Hide file tree
Showing 6 changed files with 32 additions and 8 deletions.
11 changes: 10 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,13 @@ install:
- make all
after_success: coveralls
sudo: false
script: nosetests --with-coverage --cover-package=probablepeople
script: nosetests --with-coverage --cover-package=probablepeople
deploy:
provider: pypi
skip_cleanup: true
user: datamade.wheelbuilder
on:
tags: true
distributions: "sdist bdist_wheel"
password:
secure: d29239p1FSbCuW9v5cnxRbKUPidAD0GosW6HtUF2LIVO0epLPCHlOpG/REpnxYv5f/Ug0rL2luzglcpCb9ZqrRZw0WhzQAX3GT4qlYQ88xiVYYRErYbMnkP0l4Suls3IWmQ13Y5xwe8rDRwyHdL8rhJZmq358YnpiI7/2fYHoEs=
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ all : probablepeople/generic_learned_settings.crfsuite \
probablepeople/person_learned_settings.crfsuite

probablepeople/generic_learned_settings.crfsuite: name_data/labeled/company_labeled.xml name_data/labeled/person_labeled.xml
parserator train $<,$(word 2,$^) probablepeople --modelfile=generic
parserator train $^ probablepeople --modelfile=generic

probablepeople/company_learned_settings.crfsuite: name_data/labeled/company_labeled.xml
parserator train $< probablepeople --modelfile=company
Expand Down
5 changes: 4 additions & 1 deletion name_data/labeled/company_labeled.xml
Original file line number Diff line number Diff line change
Expand Up @@ -684,7 +684,6 @@
<Name><CorporationName>bell</CorporationName> <CorporationName>boyd</CorporationName> <CorporationName>&amp;</CorporationName> <CorporationName>lloyd</CorporationName> <CorporationNameOrganization>co.</CorporationNameOrganization></Name>
<Name><CorporationLegalType>p.c.</CorporationLegalType> <CorporationName>shekar,</CorporationName> <CorporationName>m.d.</CorporationName></Name>
<Name><CorporationName>karl</CorporationName> <CorporationName>productions</CorporationName></Name>
<Name><GivenName>joseph</GivenName> <MiddleName>trindl</MiddleName> <SuffixGenerational>iii</SuffixGenerational></Name>
<Name><CorporationName>illinois</CorporationName> <CorporationName>veternary</CorporationName> <CorporationName>medical</CorporationName> <CorporationCommitteeType>pac</CorporationCommitteeType></Name>
<Name><CorporationName>BEST</CorporationName> <CorporationName>PRACTICE</CorporationName> <CorporationNameOrganization>HS</CorporationNameOrganization></Name>
<Name><CorporationName>Velma</CorporationName> <CorporationName>F</CorporationName> <CorporationName>Thomas</CorporationName> <CorporationNameOrganization>Early</CorporationNameOrganization> <CorporationNameOrganization>Childhood</CorporationNameOrganization> <CorporationNameOrganization>Center</CorporationNameOrganization></Name>
Expand Down Expand Up @@ -1416,4 +1415,8 @@
<Name><CorporationName>yale</CorporationName> <CorporationNameOrganization>university</CorporationNameOrganization> <CorporationNameOrganization>school</CorporationNameOrganization> <CorporationNameOrganization>of</CorporationNameOrganization> <CorporationNameOrganization>medicine</CorporationNameOrganization></Name>
<Name><CorporationName>HARVARD</CorporationName> <CorporationNameOrganization>BUSINESS</CorporationNameOrganization> <CorporationNameOrganization>SCHOOL</CorporationNameOrganization></Name>
<Name><CorporationName>st</CorporationName> <CorporationName>johns</CorporationName> <CorporationNameOrganization>church</CorporationNameOrganization></Name>
<Name><CorporationName>cook,</CorporationName> <CorporationName>ysursa,</CorporationName> <CorporationNameAndCompany>et</CorporationNameAndCompany> <CorporationNameAndCompany>al</CorporationNameAndCompany> <CorporationNameOrganization>law</CorporationNameOrganization> <CorporationNameOrganization>firm</CorporationNameOrganization></Name>
<Name><CorporationName>b</CorporationName> <CorporationName>p</CorporationName> <CorporationName>consultants</CorporationName></Name>
<Name><CorporationName>schuyler</CorporationName> <CorporationName>roche</CorporationName> <CorporationName>&amp;</CorporationName> <CorporationName>zwirner</CorporationName></Name>

</NameCollection>
15 changes: 11 additions & 4 deletions name_data/labeled/person_labeled.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3918,7 +3918,6 @@
<Name><GivenName>patrick</GivenName> <MiddleInitial>w</MiddleInitial> <Surname>mc</Surname> <Surname>arthur</Surname></Name>
<Name><GivenName>tita</GivenName> <MiddleInitial>e.</MiddleInitial> <Surname>roach</Surname></Name>
<Name><GivenName>david</GivenName> <Surname>delre</Surname> <SuffixOther>atty</SuffixOther> <SuffixOther>at</SuffixOther> <SuffixOther>law</SuffixOther></Name>
<Name><GivenName>sheila</GivenName> <MiddleInitial>m.</MiddleInitial> <Surname>mcginnis</Surname> <CorporationCommitteeType>camp.</CorporationCommitteeType> <CorporationCommitteeType>comm.</CorporationCommitteeType></Name>
<Name><GivenName>david</GivenName> <MiddleInitial>b.</MiddleInitial> <SuffixGenerational>iii</SuffixGenerational> <Surname>hill</Surname></Name>
<Name><GivenName>david</GivenName> <Surname>spiegel</Surname> <And>and</And> <GivenName>suzanne</GivenName> <Surname>courtney</Surname> <SuffixOther>ttees</SuffixOther> <SuffixOther>of</SuffixOther> <SuffixOther>spiegel-cortney</SuffixOther> <SuffixOther>fam.tr</SuffixOther></Name>
<Name><PrefixOther>mayor</PrefixOther> <GivenName>john</GivenName> <Surname>rodgers</Surname></Name>
Expand All @@ -3937,7 +3936,6 @@
<Name><GivenName>bruce</GivenName> <MiddleInitial>a</MiddleInitial> <Surname>hackel</Surname> <SuffixOther>gri</SuffixOther></Name>
<Name><GivenName>olatunij</GivenName> <MiddleName>tommy</MiddleName> <Surname>abina</Surname></Name>
<Name><GivenName>sondra</GivenName> <MiddleName>berman</MiddleName> <Surname>epstein</Surname></Name>
<Name><Surname>cook,</Surname> <CorporationName>ysursa,</CorporationName> <CorporationNameAndCompany>et</CorporationNameAndCompany> <CorporationNameAndCompany>al</CorporationNameAndCompany> <CorporationNameOrganization>law</CorporationNameOrganization> <CorporationNameOrganization>firm</CorporationNameOrganization></Name>
<Name><GivenName>anuradha</GivenName> <Surname>ghogale</Surname></Name>
<Name><GivenName>richard</GivenName> <MiddleInitial>a.</MiddleInitial> <Surname>shapiro</Surname> <SuffixOther>attorney</SuffixOther> <SuffixOther>at</SuffixOther> <SuffixOther>law</SuffixOther></Name>
<Name><GivenName>chaka</GivenName> <Surname>patterson,</Surname></Name>
Expand Down Expand Up @@ -3982,10 +3980,19 @@
<Name><PrefixOther>attorney</PrefixOther> <GivenName>paul</GivenName> <MiddleInitial>l.</MiddleInitial> <Surname>williams</Surname></Name>
<Name><GivenName>raymondd</GivenName> <Surname>prybill</Surname></Name>
<Name><GivenName>semir</GivenName> <And>and</And> <GivenName>lilya</GivenName> <Surname>sirazi</Surname></Name>
<Name><CorporationName>b</CorporationName> <CorporationName>p</CorporationName> <Surname>consultants</Surname></Name>
<Name><GivenName>young</GivenName> <MiddleName>sun</MiddleName> <And>&amp;</And> <GivenName>myung</GivenName> <MiddleName>wo</MiddleName> <Surname>yoo</Surname></Name>
<Name><CorporationName>schuyler</CorporationName> <CorporationName>roche</CorporationName> <CorporationName>&amp;</CorporationName> <Surname>zwirner</Surname></Name>
<Name><FirstInitial>w.</FirstInitial> <MiddleName>russell</MiddleName> <SuffixGenerational>(jr.)</SuffixGenerational> <Surname>withers</Surname></Name>
<Name><GivenName>james</GivenName> <MiddleInitial>w.</MiddleInitial> <Surname>paleks</Surname></Name>
<Name><GivenName>patricia</GivenName> <PrefixMarital>mrs,</PrefixMarital> <Surname>natke</Surname></Name>
<Name><GivenName>Wm.</GivenName> <MiddleName>Sam</MiddleName> <Surname>McCann<$
<Name><GivenName>Charles</GivenName> <MiddleInitial>W.</MiddleInitial> <Surnam$
<Name><GivenName>Miguel</GivenName> <Surname>del</Surname> <Surname>Valle</Sur$
<Name><GivenName>Emanuel</GivenName> <MiddleName>Chris</MiddleName> <Surname>W$
<Name><GivenName>C.D.</GivenName> <Surname>Davidsmeyer</Surname></Name>
<Name><GivenName>Michael</GivenName> <Surname>Unes</Surname></Name>
<Name><GivenName>James</GivenName> <MiddleName>Pate</MiddleName> <Surname>Phil$
<Name><GivenName>Annazette</GivenName> <Surname>Collins</Surname></Name>
<Name><GivenName>Franco</GivenName> <Surname>Coladipietro</Surname></Name>
<Name><GivenName>Carol</GivenName> <Surname>Sente</Surname></Name>
<Name><GivenName>joseph</GivenName> <MiddleName>trindl</MiddleName> <SuffixGenerational>iii</SuffixGenerational></Name>
</NameCollection>
5 changes: 4 additions & 1 deletion probablepeople/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,10 @@ def _loadTagger(model_type) :

return tagger

TAGGERS = {model_type : _loadTagger(model_type) for model_type in MODEL_FILES}
TAGGERS = {model_type : _loadTagger(model_type) for model_type in MODEL_FILES}

TAGGER = _loadTagger('generic')

def parse(raw_string, type=None):
if type is None:
type='generic'
Expand Down
2 changes: 2 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[bdist_wheel]
universal=1

0 comments on commit 20a5b37

Please sign in to comment.