From a9560597073684fcce788dc0d69c43e8bccef186 Mon Sep 17 00:00:00 2001
From: Arkadiusz Kwasigroch <51204632+akwasigroch@users.noreply.github.com>
Date: Fri, 21 Feb 2025 20:05:01 +0100
Subject: [PATCH] Tokenizer fix (#234)

Co-authored-by: Arkadiusz Kwasigroch <arkadiusz.kwasigroch@hpi.de>
---
 src/femr/models/tokenizer.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/femr/models/tokenizer.py b/src/femr/models/tokenizer.py
index 26922dca..2bced19d 100644
--- a/src/femr/models/tokenizer.py
+++ b/src/femr/models/tokenizer.py
@@ -190,14 +190,14 @@ def convert_statistics_to_msgpack(
                 if start_val == end_val:
                     continue
 
-            entry = {
-                "type": "numeric",
-                "code_string": code,
-                "val_start": start_val,
-                "val_end": end_val,
-                "weight": weight * math.log(weight) + (1 - weight) * math.log(1 - weight),
-            }
-            vocab.append(entry)
+                entry = {
+                    "type": "numeric",
+                    "code_string": code,
+                    "val_start": start_val,
+                    "val_end": end_val,
+                    "weight": weight * math.log(weight) + (1 - weight) * math.log(1 - weight),
+                }
+                vocab.append(entry)
     else:
         assert ontology
         for code, weight in statistics["code_counts"].items():