diff --git a/resources/benchmarks/benchmarks_tl_correctness.json b/resources/benchmarks/benchmarks_tl_correctness.json
deleted file mode 100644
index f3fee769..00000000
--- a/resources/benchmarks/benchmarks_tl_correctness.json
+++ /dev/null
@@ -1,21 +0,0 @@
-{
-    "version": "0.11.2",
-    "guessing_the_correctness_of_the_text": {
-        "percentage_of_guessed_correct_tl": 0.9785407725321889,
-        "list_of_file_with_incorrect_tl": [
-            "hogans-federal-motion-for-a-preliminary-injunction_1616093696_24.pdf",
-            "afcea-spy.pdf",
-            "b96a__usmc-combat-camera-directory.pdf",
-            "access-the-vision-for-2013.pdf",
-            "demystifying-nge-rock-ridge_1643518222_537.pdf"
-        ],
-        "percentage_of_guessed_incorrect_tl": 0.7916666666666666,
-        "list_of_file_with_correct_tl": [
-            "PE20_1616439522_1.pdf",
-            "slides.pdf",
-            "PE157_1616278053_181.pdf",
-            "EXTERNAL FORMS - SUPPORTING DOCUMENTATION-ESHS9615401 2017_07_27 11_22_39_1616049888_455.pdf",
-            "╨º╨£╨£╨ñ_╨É╨▒╨░╨║╤â╨╝╨╛╨▓_╤â╤ç╨╡╨▒╨╜╨╕╨║.pdf"
-        ]
-    }
-}
\ No newline at end of file
diff --git a/scripts/benchmark_tl_correctness.py b/scripts/benchmark_tl_correctness.py
index 8379fd7b..5469f309 100644
--- a/scripts/benchmark_tl_correctness.py
+++ b/scripts/benchmark_tl_correctness.py
@@ -5,7 +5,6 @@
 import numpy as np
 import requests
 import wget
-from Cryptodome.Random.random import shuffle
 from sklearn.metrics import accuracy_score, balanced_accuracy_score, precision_recall_fscore_support
 from tqdm import tqdm
 
@@ -45,7 +44,7 @@ def download_dataset(data_dir: str) -> str:
     return benchmark_data_dir
 
 
-def get_metrics(max_eval_pdf: int = 10000, with_shuffle: bool = False) -> None:
+def get_metrics(max_eval_pdf: int = 10000) -> None:
     data_dir = os.path.join(get_config()["intermediate_data_path"], "text_layer_correctness_data")
     os.makedirs(data_dir, exist_ok=True)
 
@@ -70,8 +69,6 @@ def get_metrics(max_eval_pdf: int = 10000, with_shuffle: bool = False) -> None:
     parameters = dict(pdf_with_text_layer="auto", pages="1:1")
     times_correct, times_incorrect = [], []
 
-    if with_shuffle:
-        shuffle(files)
     count = min(max_eval_pdf, len(files))
 
     for i, file_path in enumerate(tqdm(files[:count])):
@@ -111,9 +108,9 @@ def get_metrics(max_eval_pdf: int = 10000, with_shuffle: bool = False) -> None:
     output += f"--- Class corrected --- : Precision = {avg[0][0]}, Recall={avg[1][0]}, F1={avg[2][0]}\n"
     output += f"--- Class incorrected --- : Precision = {avg[0][1]}, Recall={avg[1][1]}, F1={avg[2][1]}\n"
 
-    output += f"--- AVG Time corrected pdfs --- = {np.array(times_correct).mean()}\n"
-    output += f"--- AVG Time incorrected pdfs --- = {np.array(times_incorrect).mean()}\n"
-    output += f"--- AVG Time all pdfs --- = {np.array(times_correct + times_incorrect).mean()}\n"
+    output += f"--- AVG Time corrected pdfs --- = {np.mean(times_correct)}\n"
+    output += f"--- AVG Time incorrected pdfs --- = {np.mean(times_incorrect)}\n"
+    output += f"--- AVG Time all pdfs --- = {np.mean(times_correct + times_incorrect)}\n"
 
     output += "\n\n--- Failed corrected pdfs --- : \n" + '\n'.join(failed_corrected_pdfs)  # noqa
     output += "\n\n--- Failed incorrected pdfs --- : \n" + '\n'.join(failed_incorrected_pdfs)  # noqa