Skip to content

Commit

Permalink
feat (annotation): new clustering metric
Browse files Browse the repository at this point in the history
  • Loading branch information
santanche committed Dec 16, 2023
1 parent 4162517 commit fc22216
Show file tree
Hide file tree
Showing 6 changed files with 186 additions and 31 deletions.
6 changes: 5 additions & 1 deletion src/adonisjs/public/editor/annotate/js/annotator.js
Original file line number Diff line number Diff line change
Expand Up @@ -539,6 +539,9 @@ class Annotator {
o2html += '<tr><td>' + catList[g[0]-1] + '</td><td>' + g[2] + '</td></tr>'
}
const ctcategories = Object.keys(catIndex).length

const clustering = Math.round(AnnotationMetrics.i._clusteringFreeRecall(catOrder)*100) / 100

if (isAnnotations) {
document.querySelector('#memory-scores').innerHTML =
`<ul>
Expand All @@ -556,7 +559,8 @@ class Annotator {
<li><b>accuracy score (inferred):</b> ${(ctideas == 0) ? '' : ctinfright / ctideas}</li>
<li><b>encapsulated score:</b> ${(ctideas == 0) ? '' : (ctrightencap + ctwrongencap) / ctideas}</li>
<li><b>self order score:</b> ${selfOrder.score}</li>
<li><b>normalized self order score:</b> ${(ctideas == 0) ? 0 : selfOrder.score / ctideas}</li>
<li><b>normalized self order score:</b> ${(ctideas == 0) ? 0 : Math.round(selfOrder.score / ctideas * 100) / 100}</li>
<li><b>clustering in free recall:</b> ${clustering}</li>
<li> ${o1html} </li>
<li> ${o2html} </li>
</ul>`
Expand Down
52 changes: 52 additions & 0 deletions src/adonisjs/public/editor/annotate/js/metrics.js
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,58 @@ class AnnotationMetrics {
score: subs
}
}

/*
* Category clustering calculator for free recall
* https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3665324/
*/
_clusteringFreeRecall (categoriesOrder) {
const n = categoriesOrder.length // number of recalled items

// sort by text position (second element)
const sortedL = categoriesOrder.sort((a, b) => a[1] - b[1])

const nc = {} // number of recalled items in each recalled category
let r = 0 // number of category repetition
for (let i = 0; i < sortedL.length; i++) {
const cat = sortedL[i][0]
if (!nc[cat])
nc[cat] = 1
else
nc[cat]++
let nextPos = i + 1
while (nextPos < sortedL.length && sortedL[nextPos][1] === sortedL[i][1])
nextPos++
if (nextPos < sortedL.length) {
let sp = nextPos
while (sp < sortedL.length && sortedL[sp][1] === sortedL[nextPos][1]) {
if (cat == sortedL[sp][0]) {
r++
break
}
sp++
}
}
}

const c = Object.keys(nc).length // number of recalled categories
const max = n - c // maximum possible number of category repetitions

let er = 0 // expected number of category repetitions
for (const cat in nc)
er += nc[cat] * nc[cat]
er = er / n - 1

const rr = r / (n - 1) // ratio of repetition

const mrr = r / max // modified ratio of repetition

const ds = r - er // deviation score

const arc = (r - er) / (max - er) // adjusted ratio of clustering

return arc
}
}

(function () {
Expand Down
1 change: 1 addition & 0 deletions src/adonisjs/public/editor/annotate/metrics/index.html
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
<!DOCTYPE html>
<html>
<body>
<textarea id="console" rows="50" style="width:100%" readonly></textarea>
<script type="text/javascript" src="metrics.js">
</script>
</body>
Expand Down
148 changes: 120 additions & 28 deletions src/adonisjs/public/editor/annotate/metrics/metrics.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
function sortSubstitutionCount(numbersList) {
const sortedL = numbersList.slice().sort((a, b) => a - b)
console.log(numbersList)
console.log(sortedL)
present(numbersList)
present(sortedL)
let subs = 0
for (let i = 0; i < numbersList.length; i++) {
if (numbersList[i] !== sortedL[i]) {
Expand All @@ -11,18 +11,18 @@ function sortSubstitutionCount(numbersList) {
return Math.ceil(subs/2)
}

// console.log(sortSubstitutionCount([1, 1, 1, 2, 3, 3, 4]))
// console.log(sortSubstitutionCount([1, 1, 1, 3, 2, 3, 4]))
// console.log(sortSubstitutionCount([1, 1, 1, 3, 3, 2, 4]))
// console.log(sortSubstitutionCount([1, 1, 1, 4, 3, 2, 3]))
// console.log(sortSubstitutionCount([1, 4, 1, 3, 3, 2, 1]))
// console.log(sortSubstitutionCount([4, 3, 3, 2, 1, 1, 1]))
// present(sortSubstitutionCount([1, 1, 1, 2, 3, 3, 4]))
// present(sortSubstitutionCount([1, 1, 1, 3, 2, 3, 4]))
// present(sortSubstitutionCount([1, 1, 1, 3, 3, 2, 4]))
// present(sortSubstitutionCount([1, 1, 1, 4, 3, 2, 3]))
// present(sortSubstitutionCount([1, 4, 1, 3, 3, 2, 1]))
// present(sortSubstitutionCount([4, 3, 3, 2, 1, 1, 1]))

function selfOrderCount(categoriesOrder) {
// sort by text position (second element)
const sortedL = categoriesOrder.sort((a, b) => a[1] - b[1])
console.log('Sorted by position')
console.log(sortedL)
present('Sorted by position')
present(sortedL)

// group by category (first element)
// group = [category, position, count]
Expand All @@ -46,13 +46,13 @@ function selfOrderCount(categoriesOrder) {
prev = i
}
}
console.log('Grouped by category')
console.log(JSON.parse(JSON.stringify(grouped)))
present('Grouped by category')
present(JSON.parse(JSON.stringify(grouped)))

// sort groups by position (second element)
const sortedG = grouped.sort((a, b) => a[1] - b[1])
console.log('Group sorted by position')
console.log(JSON.parse(JSON.stringify(sortedG)))
present('Group sorted by position')
present(JSON.parse(JSON.stringify(sortedG)))

// count order change to group together categories
let subs = 0
Expand All @@ -75,21 +75,113 @@ function selfOrderCount(categoriesOrder) {
}
}
}
console.log('Final group after ordering')
console.log(sortedG)
present('Final group after ordering')
present(sortedG)

return subs
}

console.log(selfOrderCount(
[[1, 10], [2, 20], [1, 20], [2, 30]]))
console.log(selfOrderCount(
[[1, 10], [2, 20], [1, 25], [2, 30]]))
console.log(selfOrderCount(
[[1, 10], [2, 20], [1, 20], [3, 20], [1, 30], [2, 30], [3, 30]]))
console.log(selfOrderCount(
[[2, 71], [2, 96], [3, 98], [2, 100], [5, 120], [5, 130], [5, 135], [3, 140], [5, 180]]))
console.log(selfOrderCount(
[[5, 135], [2, 100], [2, 71], [2, 96], [5, 130], [3, 98], [5, 180], [5, 120], [3, 140]]))
console.log(selfOrderCount(
[[2, 71], [2, 96], [3, 98], [2, 98], [5, 98], [5, 130], [5, 135], [3, 140], [5, 180]]))
// present(selfOrderCount(
// [[1, 10], [2, 20], [1, 20], [2, 30]]))
// present(selfOrderCount(
// [[1, 10], [2, 20], [1, 25], [2, 30]]))
// present(selfOrderCount(
// [[1, 10], [2, 20], [1, 20], [3, 20], [1, 30], [2, 30], [3, 30]]))
// present(selfOrderCount(
// [[2, 71], [2, 96], [3, 98], [2, 100], [5, 120], [5, 130], [5, 135], [3, 140], [5, 180]]))
// present(selfOrderCount(
// [[5, 135], [2, 100], [2, 71], [2, 96], [5, 130], [3, 98], [5, 180], [5, 120], [3, 140]]))
// present(selfOrderCount(
// [[2, 71], [2, 96], [3, 98], [2, 98], [5, 98], [5, 130], [5, 135], [3, 140], [5, 180]]))

/*
* Category clustering calculator for free recall
* https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3665324/
* tested here and traferred to /editor/annotate/js/metrics.js
*/
function clusteringFreeRecall(categoriesOrder, console) {

const n = categoriesOrder.length // number of recalled items

// sort by text position (second element)
const sortedL = categoriesOrder.sort((a, b) => a[1] - b[1])

const nc = {} // number of recalled items in each recalled category
let r = 0 // number of category repetition
for (let i = 0; i < sortedL.length; i++) {
const cat = sortedL[i][0]
if (!nc[cat])
nc[cat] = 1
else
nc[cat]++
let nextPos = i + 1
while (nextPos < sortedL.length && sortedL[nextPos][1] === sortedL[i][1])
nextPos++
if (nextPos < sortedL.length) {
let sp = nextPos
while (sp < sortedL.length && sortedL[sp][1] === sortedL[nextPos][1]) {
if (cat == sortedL[sp][0]) {
r++
break
}
sp++
}
}
}

const c = Object.keys(nc).length // number of recalled categories
const max = n - c // maximum possible number of category repetitions

let er = 0 // expected number of category repetitions
for (const cat in nc)
er += nc[cat] * nc[cat]
er = er / n - 1

const rr = r / (n - 1) // ratio of repetition

const mrr = r / max // modified ratio of repetition

const ds = r - er // deviation score

const arc = (r - er) / (max - er) // adjusted ratio of clustering

if (console) {
present('\n\n=== Clustering Free Recall ===')
present(JSON.stringify(categoriesOrder))
present('--- n = ' + n)
present('--- sorted by position')
present(JSON.stringify(sortedL))
present('--- c = ' + c)
present('--- ni')
present(nc)
present('--- r = ' + r)
present('--- max = ' + max)
present('--- E(r) = ' + Math.round(er * 100) / 100)
present('--- RR = ' + Math.round(rr * 100) / 100)
present('--- MRR = ' + Math.round(mrr * 100) / 100)
present('--- DS = ' + Math.round(ds * 100) / 100)
present('--- ARC = ' + Math.round(arc * 100) / 100)
}

return arc
}

function present (output) {
document.querySelector('#console').value += output + '\n'
}

present(clusteringFreeRecall(
[[2, 1], [4, 2], [4, 3], [3, 4], [2, 5], [3, 6], [1, 7], [4, 8], [4, 9]], true
))
present(clusteringFreeRecall(
[[3, 1], [4, 2], [4, 3], [3, 4], [1, 5], [1, 6], [3, 7], [1, 8], [1, 9], [2, 10], [2, 11], [2, 12], [4, 13], [4, 14], [3, 15]], true
))
present(clusteringFreeRecall(
[[2, 1], [2, 2], [3, 3], [1, 4], [1, 5], [1, 6], [1, 7], [2, 8], [3, 9], [3, 10], [2, 11], [1, 12], [4, 13], [4, 14], [4, 15], [4, 16], [2, 17], [2, 18], [3, 19], [1, 20]], true
))
present(clusteringFreeRecall(
[[5, 135], [2, 100], [2, 71], [2, 96], [5, 130], [3, 98], [5, 180], [5, 120], [3, 140]], true))
present(clusteringFreeRecall(
[[2, 71], [2, 96], [3, 98], [2, 98], [5, 98], [5, 130], [5, 135], [3, 140], [5, 180]], true))
present(clusteringFreeRecall(
[[2, 71], [2, 96], [3, 98], [2, 98], [5, 98], [7,98], [5, 130], [5, 135], [3, 140], [5, 180]], true))
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Kolb1.kb1,Kolb2.kb2,Kolb3.kb3,Kolb4.kb4,Kolb5.kb5,Kolb6.kb6,Kolb7.kb7,Kolb8.kb8,Kolb9.kb9,Kolb10.kb10,Kolb11.kb11,Kolb12.kb12
9 changes: 7 additions & 2 deletions src/adonisjs/public/report/js/report-annotations.js
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ class ReportManager {
}
const selfOrder = AnnotationMetrics.i._selfOrderCount(catOrder)

const clustering = AnnotationMetrics.i._clusteringFreeRecall(catOrder)

let o1csv = ''
let sep = ''
for (const g of selfOrder.groups) {
Expand All @@ -133,7 +135,10 @@ class ReportManager {

const ctcategories = Object.keys(catIndex).length

return `${ctcategories},${ctright},${ctinfright},${ctideas},${ctrightencap},${ctinfrightencap},${ctwrong},${ctwrongencap},${ctcategories * ctideas},${(ctideas == 0) ? 0 : ctright / ctideas},${(ctideas == 0) ? 0 : ctinfright / ctideas},${(ctideas == 0) ? 0 : (ctrightencap + ctwrongencap) / ctideas},${selfOrder.score},${(ctideas == 0) ? 0 : selfOrder.score / ctideas}${countCat},"${o1csv}","${o2csv}"`
return `${ctcategories},${ctright},${ctinfright},${ctideas},${ctrightencap},${ctinfrightencap},${ctwrong},${ctwrongencap},` +
`${ctcategories * ctideas},${(ctideas == 0) ? 0 : ctright / ctideas},${(ctideas == 0) ? 0 : ctinfright / ctideas},` +
`${(ctideas == 0) ? 0 : (ctrightencap + ctwrongencap) / ctideas},${selfOrder.score},` +
`${(ctideas == 0) ? 0 : selfOrder.score / ctideas},${clustering}${countCat},"${o1csv}","${o2csv}"`
}

async _download () {
Expand All @@ -146,7 +151,7 @@ class ReportManager {
'"used categories","right","right (inferred)","total ideas","right encapsulated",' +
'"right encapsulated (inferred)","wrong","wrong encapsulated","coverage score",' +
'"accuracy score","accuracy score (inferred)","encapsulated score","self order score",' +
'"normalized self order score"'
'"normalized self order score","clustering in free recall"'

for (const m in ReportManager.catList)
table += ',"' + ReportManager.catList[m] + '"'
Expand Down

0 comments on commit fc22216

Please sign in to comment.