From fc222163d8eb9f33d4248869c06316cefb266a1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Santanch=C3=A8?= Date: Sat, 16 Dec 2023 16:08:57 -0300 Subject: [PATCH] feat (annotation): new clustering metric --- .../public/editor/annotate/js/annotator.js | 6 +- .../public/editor/annotate/js/metrics.js | 52 ++++++ .../public/editor/annotate/metrics/index.html | 1 + .../public/editor/annotate/metrics/metrics.js | 148 ++++++++++++++---- .../cases/play/schemas/heart-robot-kolb.csv | 1 + .../public/report/js/report-annotations.js | 9 +- 6 files changed, 186 insertions(+), 31 deletions(-) create mode 100644 src/adonisjs/public/report/category/cases/play/schemas/heart-robot-kolb.csv diff --git a/src/adonisjs/public/editor/annotate/js/annotator.js b/src/adonisjs/public/editor/annotate/js/annotator.js index dffa3361..9c121757 100644 --- a/src/adonisjs/public/editor/annotate/js/annotator.js +++ b/src/adonisjs/public/editor/annotate/js/annotator.js @@ -539,6 +539,9 @@ class Annotator { o2html += '' + catList[g[0]-1] + '' + g[2] + '' } const ctcategories = Object.keys(catIndex).length + + const clustering = Math.round(AnnotationMetrics.i._clusteringFreeRecall(catOrder)*100) / 100 + if (isAnnotations) { document.querySelector('#memory-scores').innerHTML = `` diff --git a/src/adonisjs/public/editor/annotate/js/metrics.js b/src/adonisjs/public/editor/annotate/js/metrics.js index fe451dd3..6c7e69bd 100644 --- a/src/adonisjs/public/editor/annotate/js/metrics.js +++ b/src/adonisjs/public/editor/annotate/js/metrics.js @@ -60,6 +60,58 @@ class AnnotationMetrics { score: subs } } + + /* + * Category clustering calculator for free recall + * https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3665324/ + */ + _clusteringFreeRecall (categoriesOrder) { + const n = categoriesOrder.length // number of recalled items + + // sort by text position (second element) + const sortedL = categoriesOrder.sort((a, b) => a[1] - b[1]) + + const nc = {} // number of recalled items in each recalled category + let r = 0 // number of category repetition + for (let i = 0; i < sortedL.length; i++) { + const cat = sortedL[i][0] + if (!nc[cat]) + nc[cat] = 1 + else + nc[cat]++ + let nextPos = i + 1 + while (nextPos < sortedL.length && sortedL[nextPos][1] === sortedL[i][1]) + nextPos++ + if (nextPos < sortedL.length) { + let sp = nextPos + while (sp < sortedL.length && sortedL[sp][1] === sortedL[nextPos][1]) { + if (cat == sortedL[sp][0]) { + r++ + break + } + sp++ + } + } + } + + const c = Object.keys(nc).length // number of recalled categories + const max = n - c // maximum possible number of category repetitions + + let er = 0 // expected number of category repetitions + for (const cat in nc) + er += nc[cat] * nc[cat] + er = er / n - 1 + + const rr = r / (n - 1) // ratio of repetition + + const mrr = r / max // modified ratio of repetition + + const ds = r - er // deviation score + + const arc = (r - er) / (max - er) // adjusted ratio of clustering + + return arc + } } (function () { diff --git a/src/adonisjs/public/editor/annotate/metrics/index.html b/src/adonisjs/public/editor/annotate/metrics/index.html index c33b35e5..dfc93750 100644 --- a/src/adonisjs/public/editor/annotate/metrics/index.html +++ b/src/adonisjs/public/editor/annotate/metrics/index.html @@ -1,6 +1,7 @@ + diff --git a/src/adonisjs/public/editor/annotate/metrics/metrics.js b/src/adonisjs/public/editor/annotate/metrics/metrics.js index ae19295e..71f1e5b7 100644 --- a/src/adonisjs/public/editor/annotate/metrics/metrics.js +++ b/src/adonisjs/public/editor/annotate/metrics/metrics.js @@ -1,7 +1,7 @@ function sortSubstitutionCount(numbersList) { const sortedL = numbersList.slice().sort((a, b) => a - b) - console.log(numbersList) - console.log(sortedL) + present(numbersList) + present(sortedL) let subs = 0 for (let i = 0; i < numbersList.length; i++) { if (numbersList[i] !== sortedL[i]) { @@ -11,18 +11,18 @@ function sortSubstitutionCount(numbersList) { return Math.ceil(subs/2) } -// console.log(sortSubstitutionCount([1, 1, 1, 2, 3, 3, 4])) -// console.log(sortSubstitutionCount([1, 1, 1, 3, 2, 3, 4])) -// console.log(sortSubstitutionCount([1, 1, 1, 3, 3, 2, 4])) -// console.log(sortSubstitutionCount([1, 1, 1, 4, 3, 2, 3])) -// console.log(sortSubstitutionCount([1, 4, 1, 3, 3, 2, 1])) -// console.log(sortSubstitutionCount([4, 3, 3, 2, 1, 1, 1])) +// present(sortSubstitutionCount([1, 1, 1, 2, 3, 3, 4])) +// present(sortSubstitutionCount([1, 1, 1, 3, 2, 3, 4])) +// present(sortSubstitutionCount([1, 1, 1, 3, 3, 2, 4])) +// present(sortSubstitutionCount([1, 1, 1, 4, 3, 2, 3])) +// present(sortSubstitutionCount([1, 4, 1, 3, 3, 2, 1])) +// present(sortSubstitutionCount([4, 3, 3, 2, 1, 1, 1])) function selfOrderCount(categoriesOrder) { // sort by text position (second element) const sortedL = categoriesOrder.sort((a, b) => a[1] - b[1]) - console.log('Sorted by position') - console.log(sortedL) + present('Sorted by position') + present(sortedL) // group by category (first element) // group = [category, position, count] @@ -46,13 +46,13 @@ function selfOrderCount(categoriesOrder) { prev = i } } - console.log('Grouped by category') - console.log(JSON.parse(JSON.stringify(grouped))) + present('Grouped by category') + present(JSON.parse(JSON.stringify(grouped))) // sort groups by position (second element) const sortedG = grouped.sort((a, b) => a[1] - b[1]) - console.log('Group sorted by position') - console.log(JSON.parse(JSON.stringify(sortedG))) + present('Group sorted by position') + present(JSON.parse(JSON.stringify(sortedG))) // count order change to group together categories let subs = 0 @@ -75,21 +75,113 @@ function selfOrderCount(categoriesOrder) { } } } - console.log('Final group after ordering') - console.log(sortedG) + present('Final group after ordering') + present(sortedG) return subs } -console.log(selfOrderCount( - [[1, 10], [2, 20], [1, 20], [2, 30]])) -console.log(selfOrderCount( - [[1, 10], [2, 20], [1, 25], [2, 30]])) -console.log(selfOrderCount( - [[1, 10], [2, 20], [1, 20], [3, 20], [1, 30], [2, 30], [3, 30]])) -console.log(selfOrderCount( - [[2, 71], [2, 96], [3, 98], [2, 100], [5, 120], [5, 130], [5, 135], [3, 140], [5, 180]])) -console.log(selfOrderCount( - [[5, 135], [2, 100], [2, 71], [2, 96], [5, 130], [3, 98], [5, 180], [5, 120], [3, 140]])) -console.log(selfOrderCount( - [[2, 71], [2, 96], [3, 98], [2, 98], [5, 98], [5, 130], [5, 135], [3, 140], [5, 180]])) \ No newline at end of file +// present(selfOrderCount( +// [[1, 10], [2, 20], [1, 20], [2, 30]])) +// present(selfOrderCount( +// [[1, 10], [2, 20], [1, 25], [2, 30]])) +// present(selfOrderCount( +// [[1, 10], [2, 20], [1, 20], [3, 20], [1, 30], [2, 30], [3, 30]])) +// present(selfOrderCount( +// [[2, 71], [2, 96], [3, 98], [2, 100], [5, 120], [5, 130], [5, 135], [3, 140], [5, 180]])) +// present(selfOrderCount( +// [[5, 135], [2, 100], [2, 71], [2, 96], [5, 130], [3, 98], [5, 180], [5, 120], [3, 140]])) +// present(selfOrderCount( +// [[2, 71], [2, 96], [3, 98], [2, 98], [5, 98], [5, 130], [5, 135], [3, 140], [5, 180]])) + +/* + * Category clustering calculator for free recall + * https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3665324/ + * tested here and traferred to /editor/annotate/js/metrics.js +*/ +function clusteringFreeRecall(categoriesOrder, console) { + + const n = categoriesOrder.length // number of recalled items + + // sort by text position (second element) + const sortedL = categoriesOrder.sort((a, b) => a[1] - b[1]) + + const nc = {} // number of recalled items in each recalled category + let r = 0 // number of category repetition + for (let i = 0; i < sortedL.length; i++) { + const cat = sortedL[i][0] + if (!nc[cat]) + nc[cat] = 1 + else + nc[cat]++ + let nextPos = i + 1 + while (nextPos < sortedL.length && sortedL[nextPos][1] === sortedL[i][1]) + nextPos++ + if (nextPos < sortedL.length) { + let sp = nextPos + while (sp < sortedL.length && sortedL[sp][1] === sortedL[nextPos][1]) { + if (cat == sortedL[sp][0]) { + r++ + break + } + sp++ + } + } + } + + const c = Object.keys(nc).length // number of recalled categories + const max = n - c // maximum possible number of category repetitions + + let er = 0 // expected number of category repetitions + for (const cat in nc) + er += nc[cat] * nc[cat] + er = er / n - 1 + + const rr = r / (n - 1) // ratio of repetition + + const mrr = r / max // modified ratio of repetition + + const ds = r - er // deviation score + + const arc = (r - er) / (max - er) // adjusted ratio of clustering + + if (console) { + present('\n\n=== Clustering Free Recall ===') + present(JSON.stringify(categoriesOrder)) + present('--- n = ' + n) + present('--- sorted by position') + present(JSON.stringify(sortedL)) + present('--- c = ' + c) + present('--- ni') + present(nc) + present('--- r = ' + r) + present('--- max = ' + max) + present('--- E(r) = ' + Math.round(er * 100) / 100) + present('--- RR = ' + Math.round(rr * 100) / 100) + present('--- MRR = ' + Math.round(mrr * 100) / 100) + present('--- DS = ' + Math.round(ds * 100) / 100) + present('--- ARC = ' + Math.round(arc * 100) / 100) + } + + return arc +} + +function present (output) { + document.querySelector('#console').value += output + '\n' +} + +present(clusteringFreeRecall( + [[2, 1], [4, 2], [4, 3], [3, 4], [2, 5], [3, 6], [1, 7], [4, 8], [4, 9]], true +)) +present(clusteringFreeRecall( + [[3, 1], [4, 2], [4, 3], [3, 4], [1, 5], [1, 6], [3, 7], [1, 8], [1, 9], [2, 10], [2, 11], [2, 12], [4, 13], [4, 14], [3, 15]], true +)) +present(clusteringFreeRecall( + [[2, 1], [2, 2], [3, 3], [1, 4], [1, 5], [1, 6], [1, 7], [2, 8], [3, 9], [3, 10], [2, 11], [1, 12], [4, 13], [4, 14], [4, 15], [4, 16], [2, 17], [2, 18], [3, 19], [1, 20]], true +)) +present(clusteringFreeRecall( + [[5, 135], [2, 100], [2, 71], [2, 96], [5, 130], [3, 98], [5, 180], [5, 120], [3, 140]], true)) +present(clusteringFreeRecall( + [[2, 71], [2, 96], [3, 98], [2, 98], [5, 98], [5, 130], [5, 135], [3, 140], [5, 180]], true)) +present(clusteringFreeRecall( + [[2, 71], [2, 96], [3, 98], [2, 98], [5, 98], [7,98], [5, 130], [5, 135], [3, 140], [5, 180]], true)) \ No newline at end of file diff --git a/src/adonisjs/public/report/category/cases/play/schemas/heart-robot-kolb.csv b/src/adonisjs/public/report/category/cases/play/schemas/heart-robot-kolb.csv new file mode 100644 index 00000000..da9f4889 --- /dev/null +++ b/src/adonisjs/public/report/category/cases/play/schemas/heart-robot-kolb.csv @@ -0,0 +1 @@ +Kolb1.kb1,Kolb2.kb2,Kolb3.kb3,Kolb4.kb4,Kolb5.kb5,Kolb6.kb6,Kolb7.kb7,Kolb8.kb8,Kolb9.kb9,Kolb10.kb10,Kolb11.kb11,Kolb12.kb12 \ No newline at end of file diff --git a/src/adonisjs/public/report/js/report-annotations.js b/src/adonisjs/public/report/js/report-annotations.js index e0716aff..f798548d 100644 --- a/src/adonisjs/public/report/js/report-annotations.js +++ b/src/adonisjs/public/report/js/report-annotations.js @@ -111,6 +111,8 @@ class ReportManager { } const selfOrder = AnnotationMetrics.i._selfOrderCount(catOrder) + const clustering = AnnotationMetrics.i._clusteringFreeRecall(catOrder) + let o1csv = '' let sep = '' for (const g of selfOrder.groups) { @@ -133,7 +135,10 @@ class ReportManager { const ctcategories = Object.keys(catIndex).length - return `${ctcategories},${ctright},${ctinfright},${ctideas},${ctrightencap},${ctinfrightencap},${ctwrong},${ctwrongencap},${ctcategories * ctideas},${(ctideas == 0) ? 0 : ctright / ctideas},${(ctideas == 0) ? 0 : ctinfright / ctideas},${(ctideas == 0) ? 0 : (ctrightencap + ctwrongencap) / ctideas},${selfOrder.score},${(ctideas == 0) ? 0 : selfOrder.score / ctideas}${countCat},"${o1csv}","${o2csv}"` + return `${ctcategories},${ctright},${ctinfright},${ctideas},${ctrightencap},${ctinfrightencap},${ctwrong},${ctwrongencap},` + + `${ctcategories * ctideas},${(ctideas == 0) ? 0 : ctright / ctideas},${(ctideas == 0) ? 0 : ctinfright / ctideas},` + + `${(ctideas == 0) ? 0 : (ctrightencap + ctwrongencap) / ctideas},${selfOrder.score},` + + `${(ctideas == 0) ? 0 : selfOrder.score / ctideas},${clustering}${countCat},"${o1csv}","${o2csv}"` } async _download () { @@ -146,7 +151,7 @@ class ReportManager { '"used categories","right","right (inferred)","total ideas","right encapsulated",' + '"right encapsulated (inferred)","wrong","wrong encapsulated","coverage score",' + '"accuracy score","accuracy score (inferred)","encapsulated score","self order score",' + - '"normalized self order score"' + '"normalized self order score","clustering in free recall"' for (const m in ReportManager.catList) table += ',"' + ReportManager.catList[m] + '"'