Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
0e842b4
Ajout relations
pidoux7 May 15, 2024
fd7f6f2
feat: add support for reading and dumping relations in standoff files…
pidoux7 Jul 3, 2024
452c52f
fix: Fix import statement in relations module 'eds.relations'
pidoux7 Jul 3, 2024
9ef378d
Added: relation testing for eds.relations and brat relations connector
pidoux7 Jul 3, 2024
943ecd3
Added: relations module documentation
pidoux7 Jul 3, 2024
47add63
Fix: modified gitignore in order to run relations test
pidoux7 Jul 3, 2024
0b4eda6
Modified: changelog
pidoux7 Jul 3, 2024
83ce9f9
merge: add previous commits on branch master
pidoux7 Jul 3, 2024
6d11710
Fix: test_relations path
pidoux7 Jul 3, 2024
bd75048
Updated: translation of french comments in english
pidoux7 Jul 3, 2024
16f3194
Ajout relations
pidoux7 May 15, 2024
189b5df
feat: add support for reading and dumping relations in standoff files…
pidoux7 Jul 3, 2024
ed7adf9
fix: Fix import statement in relations module 'eds.relations'
pidoux7 Jul 3, 2024
0f9586c
Added: relation testing for eds.relations and brat relations connector
pidoux7 Jul 3, 2024
367fa79
Added: relations module documentation
pidoux7 Jul 3, 2024
fc5af63
Fix: modified gitignore in order to run relations test
pidoux7 Jul 3, 2024
9d43e01
Modified: changelog
pidoux7 Jul 3, 2024
f368eca
Fix: test_relations path
pidoux7 Jul 3, 2024
2ccf29e
Updated: translation of french comments in english
pidoux7 Jul 3, 2024
02de693
Update changelog
Aremaki Oct 8, 2024
b18370c
fix error when relation is None
Aremaki Sep 22, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ _build/
*.tar.gz
*.tsv
*.ann
!text.ann

# Editors
.idea
Expand Down
8 changes: 8 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,14 @@

- `eds.tables` accepts a minimum_table_size (default 2) argument to reduce pollution
- `RuleBasedQualifier` now expose a `process` method that only returns qualified entities and token without actually tagging them, defering this task to the `__call__` method.
- Relation implementation in `doc.spans["<label>"][i]._.rel = [{'type':'rel_type', 'target': <span>},]`
- Relation connector with brat2docs and docs2brat in `edsnlp.connectors.brat` compatible with `edsnlp.data.read_*` and `edsnlp.data.write_*` (modified files : `edsnlp.data.converters`, `edsnlp.data.standoff`)
- Rule based relation model using proximity and/or sentence in `edsnlp.pipes.misc.relations` registered as `eds.relation`
- Documentation using Mkdocs for relations `docs.pipes.misc.relations.md` and `docs.pipes.misc.index.md`
- Tests for relations `tests.pipelines.misc.test_relations` and ressources `ressources.relations`
- `data.set_processing(...)` now expose an `autocast` parameter to disable or tweak the automatic casting of the tensor
during the processing. Autocasting should result in a slight speedup, but may lead to numerical instability.
- Use `torch.inference_mode` to disable view tracking and version counter bumps during inference.

### Fixed

Expand Down
1 change: 1 addition & 0 deletions docs/pipes/misc/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,6 @@ For instance, the date detection and normalisation pipeline falls in this catego
| `eds.sections` | Section detection |
| `eds.reason` | Rule-based hospitalisation reason detection |
| `eds.tables` | Tables detection |
| `eds.relations` | Relations extraction |

<!-- --8<-- [end:components] -->
8 changes: 8 additions & 0 deletions docs/pipes/misc/relations.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Relations {: #edsnlp.pipes.misc.relations.factory.create_component }

::: edsnlp.pipes.misc.relations.factory.create_component
options:
heading_level: 2
show_bases: false
show_source: false
only_class_level: true
142 changes: 132 additions & 10 deletions edsnlp/data/converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,12 +244,19 @@ def __call__(self, obj):
if not Span.has_extension(dst):
Span.set_extension(dst, default=None)

############## Modifications for relations ###############
dict_entities = {} ## dict for entity storage
for ent in obj.get("entities") or ():
begin = min(f["begin"] for f in ent["fragments"]) # start of the entity
end = max(f["end"] for f in ent["fragments"]) # end of the entity
dict_entities[ent["entity_id"]] = (
ent["label"] + ";" + str(begin) + ";" + str(end)
)
fragments = (
[
{
"begin": min(f["begin"] for f in ent["fragments"]),
"end": max(f["end"] for f in ent["fragments"]),
"begin": begin,
"end": end,
}
]
if not self.split_fragments
Expand All @@ -267,6 +274,11 @@ def __call__(self, obj):
if isinstance(ent["attributes"], list)
else ent["attributes"]
)
attributes = (
{a["label"]: a["value"] for a in ent["attributes"]}
if isinstance(ent["attributes"], list)
else ent["attributes"]
)
if self.notes_as_span_attribute and ent["notes"]:
ent["attributes"][self.notes_as_span_attribute] = "|".join(
note["value"] for note in ent["notes"]
Expand Down Expand Up @@ -302,6 +314,67 @@ def __call__(self, obj):
if span._.get(attr) is None:
span._.set(attr, value)

############## Modifications fo relations ###############
# add relations in spans
if not Span.has_extension("rel"):
Span.set_extension("rel", default=[])

for rel in obj.get("relations") or (): # iterates relations
for label in doc.spans: # iterates source labels
for i, spa in enumerate(doc.spans[label]): # iterates source spans
bo = False

# relations
if dict_entities[rel["from_entity_id"]].split(";") == [
label,
str(spa.start_char),
str(spa.end_char),
]: # sif source entity is the same as the span
for label2 in doc.spans: # iiterates target labels
for j, spa2 in enumerate(
doc.spans[label2]
): # iterates target label
if dict_entities[rel["to_entity_id"]].split(";") == [
label2,
str(spa2.start_char),
str(spa2.end_char),
]: # if target entity is the same as the span
relation = {
"type": rel["relation_label"],
"target": doc.spans[label2][j],
} # create the relation
doc.spans[label][i]._.rel.append(
relation
) # add the relation to the span
bo = True
break
if bo:
break
bo = False

# inverse relations
if dict_entities[rel["to_entity_id"]].split(";") == [
label,
str(spa.start_char),
str(spa.end_char),
]:
for label2 in doc.spans:
for j, spa2 in enumerate(doc.spans[label2]):
if dict_entities[rel["from_entity_id"]].split(";") == [
label2,
str(spa2.start_char),
str(spa2.end_char),
]:
relation = {
"type": "inv_" + rel["relation_label"],
"target": doc.spans[label2][j],
}
doc.spans[label][i]._.rel.append(relation)
bo = True
break
if bo:
break

return doc


Expand Down Expand Up @@ -346,12 +419,9 @@ def __init__(

def __call__(self, doc):
spans = get_spans(doc, self.span_getter)
obj = {
FILENAME: doc._.note_id,
"doc_id": doc._.note_id,
"text": doc.text,
"entities": [
{
entities = []
for i, ent in enumerate(sorted(dict.fromkeys(spans))):
entity = {
"entity_id": i,
"fragments": [
{
Expand All @@ -366,9 +436,61 @@ def __call__(self, doc):
},
"label": ent.label_,
}
for i, ent in enumerate(sorted(dict.fromkeys(spans)))
],
if ent._.has("note") and ent._.note is not None:
entity["note"] = ent._.note
entities.append(entity)

# mapping between entities and their `entity_id`
entity_map = {
(
ent["fragments"][0]["begin"],
ent["fragments"][0]["end"],
ent["label"],
): ent["entity_id"]
for ent in entities
}

# doesn't include 'inv_' relations
relations = []
relation_idx = 1
for span_label, span_list in doc.spans.items():
for spa in span_list:
if spa._.has("rel") and len(spa._.rel) > 0:
source_entity_id = entity_map.get(
(spa.start_char, spa.end_char, spa.label_)
)
for rel in spa._.rel:
if not rel["type"].startswith("inv_"):
target_entity_id = entity_map.get(
(
rel["target"].start_char,
rel["target"].end_char,
rel["target"].label_,
)
)
if (
source_entity_id is not None
and target_entity_id is not None
):
relations.append(
{
"rel_id": relation_idx,
"from_entity_id": source_entity_id,
"relation_type": rel["type"],
"to_entity_id": target_entity_id,
}
)
relation_idx += 1

# final object
obj = {
FILENAME: doc._.note_id,
"doc_id": doc._.note_id,
"text": doc.text,
"entities": entities,
"relations": relations,
}

return obj


Expand Down
43 changes: 27 additions & 16 deletions edsnlp/data/standoff.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,14 +220,15 @@ def dump_standoff_file(
if parent_dir and not fs.exists(parent_dir):
fs.makedirs(parent_dir, exist_ok=True)
if not fs.exists(txt_filename) or overwrite_txt:
with fs.open(txt_filename, "w") as f:
with fs.open(txt_filename, "w", encoding="utf-8") as f:
f.write(doc["text"])

ann_filename = txt_filename.replace(".txt", ".ann")
attribute_idx = 1
note_idx = 1
entities_ids = defaultdict(lambda: "T" + str(len(entities_ids) + 1))
if not fs.exists(ann_filename) or overwrite_ann:
with fs.open(ann_filename, "w") as f:
with fs.open(ann_filename, "w", encoding="utf-8") as f:
if "entities" in doc:
for entity in doc["entities"]:
spans = []
Expand Down Expand Up @@ -264,20 +265,30 @@ def dump_standoff_file(
file=f,
)
attribute_idx += 1

# fmt: off
# if "relations" in doc:
# for i, relation in enumerate(doc["relations"]):
# entity_from = entities_ids[relation["from_entity_id"]]
# entity_to = entities_ids[relation["to_entity_id"]]
# print(
# "R{}\t{} Arg1:{} Arg2:{}\t".format(
# i + 1, str(relation["label"]), entity_from,
# entity_to
# ),
# file=f,
# )
# fmt: on
if "note" in entity:
print(
"#{}\tAnnotatorNotes {}\t{}".format(
note_idx,
brat_entity_id,
(" " + str(entity["note"])),
),
file=f,
)
note_idx += 1
# Ajout du traitement des relations
relation_idx = 1
if "relations" in doc:
for relation in doc["relations"]:
print(
"R{}\t{} Arg1:{} Arg2:{}".format(
relation_idx,
relation["relation_type"],
entities_ids[relation["from_entity_id"]],
entities_ids[relation["to_entity_id"]],
),
file=f,
)
relation_idx += 1


class StandoffReader(BaseReader):
Expand Down
1 change: 1 addition & 0 deletions edsnlp/pipes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from .misc.dates.factory import create_component as dates
from .misc.quantities.factory import create_component as quantities
from .misc.reason.factory import create_component as reason
from .misc.relations.factory import create_component as relations
from .misc.sections.factory import create_component as sections
from .misc.tables.factory import create_component as tables
from .ner.adicap.factory import create_component as adicap
Expand Down
1 change: 1 addition & 0 deletions edsnlp/pipes/misc/relations/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .relations import RelationsMatcher
17 changes: 17 additions & 0 deletions edsnlp/pipes/misc/relations/factory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from edsnlp.core import registry

from .relations import RelationsMatcher

DEFAULT_CONFIG = dict(
scheme=None,
use_sentences=False,
clean_rel=False,
proximity_method="right",
max_dist=45,
)

create_component = registry.factory.register(
"eds.relations",
assigns=["doc.spans"],
deprecated=["relations"],
)(RelationsMatcher)
17 changes: 17 additions & 0 deletions edsnlp/pipes/misc/relations/patterns.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
scheme = [
{
"source": [{"label": "Chemical_and_drugs", "attr": {"Tech": [None]}}],
"target": [
{
"label": "Temporal",
"attr": {"AttTemp": [None, "Duration", "Date", "Frequency"]},
},
{
"label": "Chemical_and_drugs",
"attr": {"Tech": ["dosage", "route", "strength", "form"]},
},
],
"type": "Depend",
"inv_type": "inv_Depend",
},
]
Loading