From a9560597073684fcce788dc0d69c43e8bccef186 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kwasigroch <51204632+akwasigroch@users.noreply.github.com> Date: Fri, 21 Feb 2025 20:05:01 +0100 Subject: [PATCH] Tokenizer fix (#234) Co-authored-by: Arkadiusz Kwasigroch --- src/femr/models/tokenizer.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/femr/models/tokenizer.py b/src/femr/models/tokenizer.py index 26922dca..2bced19d 100644 --- a/src/femr/models/tokenizer.py +++ b/src/femr/models/tokenizer.py @@ -190,14 +190,14 @@ def convert_statistics_to_msgpack( if start_val == end_val: continue - entry = { - "type": "numeric", - "code_string": code, - "val_start": start_val, - "val_end": end_val, - "weight": weight * math.log(weight) + (1 - weight) * math.log(1 - weight), - } - vocab.append(entry) + entry = { + "type": "numeric", + "code_string": code, + "val_start": start_val, + "val_end": end_val, + "weight": weight * math.log(weight) + (1 - weight) * math.log(1 - weight), + } + vocab.append(entry) else: assert ontology for code, weight in statistics["code_counts"].items():