Skip to content

Commit

Permalink
add subspecies patterns; related to #161
Browse files Browse the repository at this point in the history
  • Loading branch information
Jorrit Poelen committed Apr 5, 2024
1 parent d04d166 commit 498aadb
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,33 @@
public class DiscoverLifeUtil2 {

public static final List<String> RANKS = Arrays.asList("Family", "Subfamily", "Tribe", "Subtribe", "Genus", "Subgenus");
public static final String NAME_PATTERN_AUTHORSHIP_PARENTHESES = "(?<name>[A-Z][a-z]+[ ][a-z]+)[ ]+(?<authorship>[(][^,]+[,][ ][0-9]{4}[)])";
public static final String NAME_PATTERN_WITH_NOTE = "(?<name>[A-Z][a-z]+[ ][a-z]+)(?<note>[_][_a-z]+)[ ]+(?<authorship>[^,]+[,][ ][0-9]{4})";
public static final String NAME_PATTERN_WITH_PARENTHESIS = "(?<name>[A-Z][a-z]+[ ][(][A-Z][a-z]+[)][ ][a-z]+)[ ]+(?<authorship>[^,]+[,][ ][0-9]{4})";
public static final String NAME_PATTERN_AUTHORSHIP_MULTIPLE_AUTHORS = "(?<name>[A-Z][a-z]+[ ][a-z]+)[ ]+(?<authorship>([A-Z][a-z]+)([ ]and[ ])([A-Z][a-z]+)[,][ ][0-9]{4})";
public static final List<String> NAME_PATTERNS = Arrays.asList(NAME_PATTERN_AUTHORSHIP_PARENTHESES, NAME_PATTERN_WITH_NOTE, NAME_PATTERN_WITH_PARENTHESIS, NAME_PATTERN_AUTHORSHIP_MULTIPLE_AUTHORS);

public static final String NAME = "(?<name>([A-Z][a-z]+)([ ][a-z]+)([ ][a-z]+){0,1})";
public static final String NAME_PARENTHESIS = "(?<name>[A-Z][a-z]+[ ][(][A-Z][a-z]+[)][ ][a-z]+)";
public static final String AUTHORSHIP = "(?<authorship>[^,]+[,][ ][0-9]{4})";
public static final String AUTHORSHIP_PARENTHESIS = "(?<authorship>[(][^,]+[,][ ][0-9]{4}[)])";
public static final String AUTHORSHIP_AND = "(?<authorship>([A-Z][a-z]+)([ ]and[ ])([A-Z][a-z]+)[,][ ][0-9]{4})";
public static final String SPACE = "[ ]+";
public static final String NOTE = "(?<note>[_][_a-z]+)";

public static final String NAME_AUTHORSHIP_PARENTHESES
= NAME + SPACE + AUTHORSHIP_PARENTHESIS;
public static final String NAME_AUTHORSHIP
= NAME + SPACE + AUTHORSHIP;
public static final String NAME_WITH_NOTE
= NAME + NOTE + SPACE + AUTHORSHIP;
public static final String NAME_WITH_PARENTHESIS
= NAME_PARENTHESIS + SPACE + AUTHORSHIP;
public static final String NAME_AUTHORSHIP_MULTIPLE_AUTHORS
= NAME + SPACE + AUTHORSHIP_AND;


public static final List<String> NAME_PATTERNS = Arrays.asList(
NAME_AUTHORSHIP,
NAME_AUTHORSHIP_PARENTHESES,
NAME_WITH_NOTE,
NAME_WITH_PARENTHESIS,
NAME_AUTHORSHIP_MULTIPLE_AUTHORS);

public static void splitRecords(InputStream is, Consumer<String> lineConsumer) {
Scanner scanner = new Scanner(is, StandardCharsets.UTF_8.name());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,9 @@ public void parseNames() throws ParserConfigurationException, IOException {
@Override
public void foundTaxonForTerm(Long requestId, Term providedTerm, NameType nameType, Taxon resolvedTaxon) {
counter.incrementAndGet();
Taxon providedTaxon = (Taxon) providedTerm;
if (!NameType.HAS_ACCEPTED_NAME.equals(nameType)) {
System.out.println(providedTaxon.getName());
System.out.println(providedTaxon.getAuthorship());
}
}
});
assertThat(counter.get(), Is.is(51164));
assertThat(counter.get(), Is.is(31278));
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,9 @@ public void parseRelatedNames() throws ParserConfigurationException, XPathExpres

List<Taxon> taxons = DiscoverLifeUtil2.parseRelatedNames(doc);

assertThat(taxons.size(), is(13));
assertThat(taxons.size(), is(17));

Taxon lastTaxon = taxons.get(12);
Taxon lastTaxon = taxons.get(taxons.size() - 1);
assertThat(lastTaxon.getName(), is("Andrena (Holandrena) cressonii"));
assertThat(lastTaxon.getAuthorship(), is("Robertson, 1891"));
}
Expand Down Expand Up @@ -168,6 +168,14 @@ public void parseNameAlt4() {

}

@Test
public void parseNameAlt5() {
Taxon matched = DiscoverLifeUtil2.parse("Agapostemon texanus subtilior Cockerell, 1898");
assertThat(matched.getName(), is("Agapostemon texanus subtilior"));
assertThat(matched.getAuthorship(), is("Cockerell, 1898"));

}

@Test
public void patchCommonNames() {
String name = "Protandrena bachue Gonzalez and Ruz, 2007 Rhophitulus bachue (Gonzalez and Ruz, 2007)";
Expand Down

0 comments on commit 498aadb

Please sign in to comment.