Skip to content

Commit

Permalink
Add removeDocument to TFIDF
Browse files Browse the repository at this point in the history
  • Loading branch information
Hugo-ter-Doest committed Jul 6, 2024
1 parent a37074e commit ab81f74
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 0 deletions.
19 changes: 19 additions & 0 deletions lib/natural/tfidf/tfidf.js
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,25 @@ class TfIdf {
}
}

// Remove a document from the corpus
// Returns true if the document was found
// Returns false if the document was not found
removeDocument(key) {
// Find the document
const index = this.documents.findIndex(function (document) {
return document.__key === key
})
// If found, remove it
if (index > -1) {
this.documents.splice(index, 1)
// Invalidate the cache
this._idfCache = Object.create(null)
return true
}

return false
}

// If restoreCache is set to true, all terms idf scores currently cached will be recomputed.
// Otherwise, the cache will just be wiped clean
addFileSync (path, encoding, key, restoreCache) {
Expand Down
14 changes: 14 additions & 0 deletions spec/tfidf_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -260,4 +260,18 @@ describe('tfidf', function () {
expect(tfidf.setStopwords(stopwords)).toEqual(false)
})
})

describe('Remove documents', function () {
it('should remove a document', function () {
tfidf = new TfIdf()

tfidf.addDocument('this document is about node.', 0)
tfidf.addDocument('this document isn\'t about node.', 1)

const result1 = tfidf.removeDocument(0)
expect(result1).toEqual(true)
const result2 = tfidf.removeDocument(0)
expect(result2).toEqual(true)
})
})
})

0 comments on commit ab81f74

Please sign in to comment.