From fc222163d8eb9f33d4248869c06316cefb266a1b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Santanch=C3=A8?= <santanche@gmail.com>
Date: Sat, 16 Dec 2023 16:08:57 -0300
Subject: [PATCH] feat (annotation): new clustering metric

---
 .../public/editor/annotate/js/annotator.js    |   6 +-
 .../public/editor/annotate/js/metrics.js      |  52 ++++++
 .../public/editor/annotate/metrics/index.html |   1 +
 .../public/editor/annotate/metrics/metrics.js | 148 ++++++++++++++----
 .../cases/play/schemas/heart-robot-kolb.csv   |   1 +
 .../public/report/js/report-annotations.js    |   9 +-
 6 files changed, 186 insertions(+), 31 deletions(-)
 create mode 100644 src/adonisjs/public/report/category/cases/play/schemas/heart-robot-kolb.csv
diff --git a/src/adonisjs/public/editor/annotate/js/annotator.js b/src/adonisjs/public/editor/annotate/js/annotator.js
index dffa3361..9c121757 100644
--- a/src/adonisjs/public/editor/annotate/js/annotator.js
+++ b/src/adonisjs/public/editor/annotate/js/annotator.js
@@ -539,6 +539,9 @@ class Annotator {
       o2html += '<tr><td>' + catList[g[0]-1] + '</td><td>' + g[2] + '</td></tr>'
     }
     const ctcategories = Object.keys(catIndex).length
+
+    const clustering = Math.round(AnnotationMetrics.i._clusteringFreeRecall(catOrder)*100) / 100
+
     if (isAnnotations) {
       document.querySelector('#memory-scores').innerHTML =
         `<ul>
@@ -556,7 +559,8 @@ class Annotator {
           <li><b>accuracy score (inferred):</b> ${(ctideas == 0) ? '' : ctinfright / ctideas}</li>
           <li><b>encapsulated score:</b> ${(ctideas == 0) ? '' : (ctrightencap + ctwrongencap) / ctideas}</li>
           <li><b>self order score:</b> ${selfOrder.score}</li>
-          <li><b>normalized self order score:</b> ${(ctideas == 0) ? 0 : selfOrder.score / ctideas}</li>
+          <li><b>normalized self order score:</b> ${(ctideas == 0) ? 0 : Math.round(selfOrder.score / ctideas * 100) / 100}</li>
+          <li><b>clustering in free recall:</b> ${clustering}</li>
           <li> ${o1html} </li>
           <li> ${o2html} </li>
         </ul>`
diff --git a/src/adonisjs/public/editor/annotate/js/metrics.js b/src/adonisjs/public/editor/annotate/js/metrics.js
index fe451dd3..6c7e69bd 100644
--- a/src/adonisjs/public/editor/annotate/js/metrics.js
+++ b/src/adonisjs/public/editor/annotate/js/metrics.js
@@ -60,6 +60,58 @@ class AnnotationMetrics {
       score: subs
     }
   }
+
+  /*
+  * Category clustering calculator for free recall
+  * https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3665324/
+  */
+  _clusteringFreeRecall (categoriesOrder) {
+    const n = categoriesOrder.length  // number of recalled items
+
+    // sort by text position (second element)
+    const sortedL = categoriesOrder.sort((a, b) => a[1] - b[1])
+
+    const nc = {}  // number of recalled items in each recalled category
+    let r = 0  // number of category repetition
+    for (let i = 0; i < sortedL.length; i++) {
+      const cat = sortedL[i][0]
+      if (!nc[cat])
+        nc[cat] = 1
+      else
+        nc[cat]++
+      let nextPos = i + 1
+      while (nextPos < sortedL.length && sortedL[nextPos][1] === sortedL[i][1])
+        nextPos++
+      if (nextPos < sortedL.length) {
+        let sp = nextPos
+        while (sp < sortedL.length && sortedL[sp][1] === sortedL[nextPos][1]) {
+          if (cat == sortedL[sp][0]) {
+            r++
+            break
+          }
+          sp++
+        }
+      }
+    }
+
+    const c = Object.keys(nc).length  // number of recalled categories
+    const max = n - c  // maximum possible number of category repetitions
+
+    let er = 0  // expected number of category repetitions
+    for (const cat in nc)
+      er += nc[cat] * nc[cat]
+    er = er / n - 1
+
+    const rr = r / (n - 1)  // ratio of repetition
+
+    const mrr = r / max  // modified ratio of repetition
+
+    const ds = r - er  // deviation score
+
+    const arc = (r - er) / (max - er)  // adjusted ratio of clustering
+
+    return arc
+  }
 }
 
 (function () {
diff --git a/src/adonisjs/public/editor/annotate/metrics/index.html b/src/adonisjs/public/editor/annotate/metrics/index.html
index c33b35e5..dfc93750 100644
--- a/src/adonisjs/public/editor/annotate/metrics/index.html
+++ b/src/adonisjs/public/editor/annotate/metrics/index.html
@@ -1,6 +1,7 @@
 <!DOCTYPE html>
 <html>
   <body>
+    <textarea id="console" rows="50" style="width:100%" readonly></textarea>
     <script type="text/javascript" src="metrics.js">
     </script>
   </body>
diff --git a/src/adonisjs/public/editor/annotate/metrics/metrics.js b/src/adonisjs/public/editor/annotate/metrics/metrics.js
index ae19295e..71f1e5b7 100644
--- a/src/adonisjs/public/editor/annotate/metrics/metrics.js
+++ b/src/adonisjs/public/editor/annotate/metrics/metrics.js
@@ -1,7 +1,7 @@
 function sortSubstitutionCount(numbersList) {
   const sortedL = numbersList.slice().sort((a, b) => a - b)
-  console.log(numbersList)
-  console.log(sortedL)
+  present(numbersList)
+  present(sortedL)
   let subs = 0
   for (let i = 0; i < numbersList.length; i++) {
     if (numbersList[i] !== sortedL[i]) {
@@ -11,18 +11,18 @@ function sortSubstitutionCount(numbersList) {
   return Math.ceil(subs/2)
 }
 
-// console.log(sortSubstitutionCount([1, 1, 1, 2, 3, 3, 4]))
-// console.log(sortSubstitutionCount([1, 1, 1, 3, 2, 3, 4]))
-// console.log(sortSubstitutionCount([1, 1, 1, 3, 3, 2, 4]))
-// console.log(sortSubstitutionCount([1, 1, 1, 4, 3, 2, 3]))
-// console.log(sortSubstitutionCount([1, 4, 1, 3, 3, 2, 1]))
-// console.log(sortSubstitutionCount([4, 3, 3, 2, 1, 1, 1]))
+// present(sortSubstitutionCount([1, 1, 1, 2, 3, 3, 4]))
+// present(sortSubstitutionCount([1, 1, 1, 3, 2, 3, 4]))
+// present(sortSubstitutionCount([1, 1, 1, 3, 3, 2, 4]))
+// present(sortSubstitutionCount([1, 1, 1, 4, 3, 2, 3]))
+// present(sortSubstitutionCount([1, 4, 1, 3, 3, 2, 1]))
+// present(sortSubstitutionCount([4, 3, 3, 2, 1, 1, 1]))
 
 function selfOrderCount(categoriesOrder) {
   // sort by text position (second element)
   const sortedL = categoriesOrder.sort((a, b) => a[1] - b[1])
-  console.log('Sorted by position')
-  console.log(sortedL)
+  present('Sorted by position')
+  present(sortedL)
 
   // group by category (first element)
   // group = [category, position, count]
@@ -46,13 +46,13 @@ function selfOrderCount(categoriesOrder) {
         prev = i
     }
   }
-  console.log('Grouped by category')
-  console.log(JSON.parse(JSON.stringify(grouped)))
+  present('Grouped by category')
+  present(JSON.parse(JSON.stringify(grouped)))
 
   // sort groups by position (second element)
   const sortedG = grouped.sort((a, b) => a[1] - b[1])
-  console.log('Group sorted by position')
-  console.log(JSON.parse(JSON.stringify(sortedG)))
+  present('Group sorted by position')
+  present(JSON.parse(JSON.stringify(sortedG)))
 
   // count order change to group together categories
   let subs = 0
@@ -75,21 +75,113 @@ function selfOrderCount(categoriesOrder) {
       }
     }
   }
-  console.log('Final group after ordering')
-  console.log(sortedG)
+  present('Final group after ordering')
+  present(sortedG)
 
   return subs
 }
 
-console.log(selfOrderCount(
-  [[1, 10], [2, 20], [1, 20], [2, 30]]))
-console.log(selfOrderCount(
-  [[1, 10], [2, 20], [1, 25], [2, 30]]))
-console.log(selfOrderCount(
-  [[1, 10], [2, 20], [1, 20], [3, 20], [1, 30], [2, 30], [3, 30]]))
-console.log(selfOrderCount(
-  [[2, 71], [2, 96], [3, 98], [2, 100], [5, 120], [5, 130], [5, 135], [3, 140], [5, 180]]))
-console.log(selfOrderCount(
-  [[5, 135], [2, 100], [2, 71], [2, 96], [5, 130], [3, 98], [5, 180], [5, 120], [3, 140]]))
-console.log(selfOrderCount(
-  [[2, 71], [2, 96], [3, 98], [2, 98], [5, 98], [5, 130], [5, 135], [3, 140], [5, 180]]))
\ No newline at end of file
+// present(selfOrderCount(
+//   [[1, 10], [2, 20], [1, 20], [2, 30]]))
+// present(selfOrderCount(
+//   [[1, 10], [2, 20], [1, 25], [2, 30]]))
+// present(selfOrderCount(
+//   [[1, 10], [2, 20], [1, 20], [3, 20], [1, 30], [2, 30], [3, 30]]))
+// present(selfOrderCount(
+//   [[2, 71], [2, 96], [3, 98], [2, 100], [5, 120], [5, 130], [5, 135], [3, 140], [5, 180]]))
+// present(selfOrderCount(
+//   [[5, 135], [2, 100], [2, 71], [2, 96], [5, 130], [3, 98], [5, 180], [5, 120], [3, 140]]))
+// present(selfOrderCount(
+//   [[2, 71], [2, 96], [3, 98], [2, 98], [5, 98], [5, 130], [5, 135], [3, 140], [5, 180]]))
+
+/*
+ * Category clustering calculator for free recall
+ * https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3665324/
+ * tested here and traferred to /editor/annotate/js/metrics.js
+*/
+function clusteringFreeRecall(categoriesOrder, console) {
+
+  const n = categoriesOrder.length  // number of recalled items
+
+  // sort by text position (second element)
+  const sortedL = categoriesOrder.sort((a, b) => a[1] - b[1])
+
+  const nc = {}  // number of recalled items in each recalled category
+  let r = 0  // number of category repetition
+  for (let i = 0; i < sortedL.length; i++) {
+    const cat = sortedL[i][0]
+    if (!nc[cat])
+      nc[cat] = 1
+    else
+      nc[cat]++
+    let nextPos = i + 1
+    while (nextPos < sortedL.length && sortedL[nextPos][1] === sortedL[i][1])
+      nextPos++
+    if (nextPos < sortedL.length) {
+      let sp = nextPos
+      while (sp < sortedL.length && sortedL[sp][1] === sortedL[nextPos][1]) {
+        if (cat == sortedL[sp][0]) {
+          r++
+          break
+        }
+        sp++
+      }
+    }
+  }
+
+  const c = Object.keys(nc).length  // number of recalled categories
+  const max = n - c  // maximum possible number of category repetitions
+
+  let er = 0  // expected number of category repetitions
+  for (const cat in nc)
+    er += nc[cat] * nc[cat]
+  er = er / n - 1
+
+  const rr = r / (n - 1)  // ratio of repetition
+
+  const mrr = r / max  // modified ratio of repetition
+
+  const ds = r - er  // deviation score
+
+  const arc = (r - er) / (max - er)  // adjusted ratio of clustering
+
+  if (console) {
+    present('\n\n=== Clustering Free Recall ===')
+    present(JSON.stringify(categoriesOrder))
+    present('--- n = ' + n)
+    present('--- sorted by position')
+    present(JSON.stringify(sortedL))
+    present('--- c = ' + c)
+    present('--- ni')
+    present(nc)
+    present('--- r = ' + r)
+    present('--- max = ' + max)
+    present('--- E(r) = ' + Math.round(er * 100) / 100)
+    present('--- RR = ' + Math.round(rr * 100) / 100)
+    present('--- MRR = ' + Math.round(mrr * 100) / 100)
+    present('--- DS = ' + Math.round(ds * 100) / 100)
+    present('--- ARC = ' + Math.round(arc * 100) / 100)
+  }
+
+  return arc
+}
+
+function present (output) {
+  document.querySelector('#console').value += output + '\n'
+}
+
+present(clusteringFreeRecall(
+  [[2, 1], [4, 2], [4, 3], [3, 4], [2, 5], [3, 6], [1, 7], [4, 8], [4, 9]], true
+))
+present(clusteringFreeRecall(
+  [[3, 1], [4, 2], [4, 3], [3, 4], [1, 5], [1, 6], [3, 7], [1, 8], [1, 9], [2, 10], [2, 11], [2, 12], [4, 13], [4, 14], [3, 15]], true
+))
+present(clusteringFreeRecall(
+  [[2, 1], [2, 2], [3, 3], [1, 4], [1, 5], [1, 6], [1, 7], [2, 8], [3, 9], [3, 10], [2, 11], [1, 12], [4, 13], [4, 14], [4, 15], [4, 16], [2, 17], [2, 18], [3, 19], [1, 20]], true
+))
+present(clusteringFreeRecall(
+  [[5, 135], [2, 100], [2, 71], [2, 96], [5, 130], [3, 98], [5, 180], [5, 120], [3, 140]], true))
+present(clusteringFreeRecall(
+  [[2, 71], [2, 96], [3, 98], [2, 98], [5, 98], [5, 130], [5, 135], [3, 140], [5, 180]], true))
+present(clusteringFreeRecall(
+  [[2, 71], [2, 96], [3, 98], [2, 98], [5, 98], [7,98], [5, 130], [5, 135], [3, 140], [5, 180]], true))
\ No newline at end of file
diff --git a/src/adonisjs/public/report/category/cases/play/schemas/heart-robot-kolb.csv b/src/adonisjs/public/report/category/cases/play/schemas/heart-robot-kolb.csv
new file mode 100644
index 00000000..da9f4889
--- /dev/null
+++ b/src/adonisjs/public/report/category/cases/play/schemas/heart-robot-kolb.csv
@@ -0,0 +1 @@
+Kolb1.kb1,Kolb2.kb2,Kolb3.kb3,Kolb4.kb4,Kolb5.kb5,Kolb6.kb6,Kolb7.kb7,Kolb8.kb8,Kolb9.kb9,Kolb10.kb10,Kolb11.kb11,Kolb12.kb12
\ No newline at end of file
diff --git a/src/adonisjs/public/report/js/report-annotations.js b/src/adonisjs/public/report/js/report-annotations.js
index e0716aff..f798548d 100644
--- a/src/adonisjs/public/report/js/report-annotations.js
+++ b/src/adonisjs/public/report/js/report-annotations.js
@@ -111,6 +111,8 @@ class ReportManager {
     }
     const selfOrder = AnnotationMetrics.i._selfOrderCount(catOrder)
 
+    const clustering = AnnotationMetrics.i._clusteringFreeRecall(catOrder)
+
     let o1csv = ''
     let sep = ''
     for (const g of selfOrder.groups) {
@@ -133,7 +135,10 @@ class ReportManager {
     
     const ctcategories = Object.keys(catIndex).length
 
-    return `${ctcategories},${ctright},${ctinfright},${ctideas},${ctrightencap},${ctinfrightencap},${ctwrong},${ctwrongencap},${ctcategories * ctideas},${(ctideas == 0) ? 0 : ctright / ctideas},${(ctideas == 0) ? 0 : ctinfright / ctideas},${(ctideas == 0) ? 0 : (ctrightencap + ctwrongencap) / ctideas},${selfOrder.score},${(ctideas == 0) ? 0 : selfOrder.score / ctideas}${countCat},"${o1csv}","${o2csv}"`
+    return `${ctcategories},${ctright},${ctinfright},${ctideas},${ctrightencap},${ctinfrightencap},${ctwrong},${ctwrongencap},` +
+           `${ctcategories * ctideas},${(ctideas == 0) ? 0 : ctright / ctideas},${(ctideas == 0) ? 0 : ctinfright / ctideas},` +
+           `${(ctideas == 0) ? 0 : (ctrightencap + ctwrongencap) / ctideas},${selfOrder.score},` +
+           `${(ctideas == 0) ? 0 : selfOrder.score / ctideas},${clustering}${countCat},"${o1csv}","${o2csv}"`
   }
 
   async _download () {
@@ -146,7 +151,7 @@ class ReportManager {
         '"used categories","right","right (inferred)","total ideas","right encapsulated",' +
         '"right encapsulated (inferred)","wrong","wrong encapsulated","coverage score",' +
         '"accuracy score","accuracy score (inferred)","encapsulated score","self order score",' +
-        '"normalized self order score"'
+        '"normalized self order score","clustering in free recall"'
 
       for (const m in ReportManager.catList)
         table += ',"' + ReportManager.catList[m] + '"'