Skip to content

Commit

Permalink
rename ner: fields to :ner-kw
Browse files Browse the repository at this point in the history
  • Loading branch information
BeritJanssen committed Nov 21, 2024
1 parent 92f77a5 commit 0a06b4e
Show file tree
Hide file tree
Showing 6 changed files with 15 additions and 15 deletions.
2 changes: 1 addition & 1 deletion backend/addcorpus/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ def has_named_entities(self):
try:
# we check if any fields exist for filtering named entities
ner_exists = client.search(
index=self.es_index, query={"exists": {"field": "ner:*"}}, size=0
index=self.es_index, query={"exists": {"field": "*:ner-kw"}}, size=0
)
if total_hits(ner_exists):
return True
Expand Down
4 changes: 2 additions & 2 deletions backend/addcorpus/tests/test_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ def test_validate_ner_slug():
validate_ner_slug(keyword_mapping(), "slug:ner")
validate_ner_slug(annotated_text_mapping(), "slug:ner")
with pytest.raises(ValidationError):
validate_ner_slug(date_mapping(), "ner:slug")
validate_ner_slug(keyword_mapping(), "ner:slug")
validate_ner_slug(date_mapping(), "slug:ner-kw")
validate_ner_slug(keyword_mapping(), "slug:ner-kw")


def test_validate_es_mapping():
Expand Down
6 changes: 3 additions & 3 deletions backend/addcorpus/validation/creation.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,19 +146,19 @@ def validate_field_name_permissible_characters(slug: str):
def validate_ner_slug(es_mapping: dict, name: str):
"""
Checks if colons are in field name, will raise ValidationError if the field does not meet the following requirements:
- starts with `ner:` prefix and is a keyword field
- ends with `:ner` suffix and is an annotated_text field
- ends with `:ner-kw` suffix and is a keyword field
"""
if ":" in name:
if name.endswith(":ner"):
if primary_mapping_type(es_mapping) != MappingType.ANNOTATED_TEXT.value:
raise ValidationError(
f"{name} cannot be used as a field name: the suffix `:ner` is reserved for annotated_text fields"
)
elif name.startswith("ner:"):
elif name.endswith(":ner-kw"):
if primary_mapping_type(es_mapping) != MappingType.KEYWORD.value:
raise ValidationError(
f"{name} cannot be used as a field name: the prefix `ner:` is reserved for Named Entity keyword fields"
f"{name} cannot be used as a field name: the suffix `:ner-kw` is reserved for Named Entity keyword fields"
)
else:
raise ValidationError(
Expand Down
8 changes: 4 additions & 4 deletions backend/corpora/parliament/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,10 +284,10 @@ def parliament_corpora_settings(settings):
"page": None,
"url": None,
"sequence": 1,
"ner:location": [],
"ner:miscellaneous": [],
"ner:organization": ["Economische Zaken"],
"ner:person": [],
"location:ner-kw": [],
"miscellaneous:ner-kw": [],
"organization:ner-kw": ["Economische Zaken"],
"person:ner-kw": [],
}
],
"n_documents": 2,
Expand Down
2 changes: 1 addition & 1 deletion backend/corpora/parliament/utils/parlamint.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def get_entity_list(extracted_data: tuple[str, dict], entity: str) -> list[str]:

def ner_keyword_field(entity: str):
return FieldDefinition(
name=f"ner:{entity}",
name=f"{entity}:ner-kw",
display_name=f"Named Entity: {entity.capitalize()}",
searchable=True,
es_mapping=keyword_mapping(enable_full_text_search=True),
Expand Down
8 changes: 4 additions & 4 deletions documentation/Named-entities.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ If the main content field is called `speech`, the field containing named entity
```

Moreover, an enriched corpus should contain the following keyword fields:
- `ner:person`
- `ner:location`
- `ner:organization`
- `ner:miscellaneous`
- `person:ner-kw`
- `location:ner-kw`
- `organization:ner-kw`
- `miscellaneous:ner-kw`
These can be used to search or filter (to be implemented).

## Enriching a corpus with named entities
Expand Down

0 comments on commit 0a06b4e

Please sign in to comment.