From 80458e24bd94357ccc6178ac4ce0ecde8c6d47c1 Mon Sep 17 00:00:00 2001 From: mikiher Date: Thu, 30 Nov 2023 21:15:25 +0200 Subject: [PATCH 1/5] "[un]abridged" in title candidate generation --- server/finders/BookFinder.js | 1 + test/server/finders/BookFinder.test.js | 2 ++ 2 files changed, 3 insertions(+) diff --git a/server/finders/BookFinder.js b/server/finders/BookFinder.js index 7d26b6bfcf..0c5f32d263 100644 --- a/server/finders/BookFinder.js +++ b/server/finders/BookFinder.js @@ -167,6 +167,7 @@ class BookFinder { [/ (2nd|3rd|\d+th)\s+ed(\.|ition)?/g, ''], // Remove edition [/(^| |\.)(m4b|m4a|mp3)( |$)/g, ''], // Remove file-type [/ a novel.*$/g, ''], // Remove "a novel" + [/(^| )(un)?abridged( |$)/g, ' '], // Remove "unabridged/abridged" [/^\d+ | \d+$/g, ''], // Remove preceding/trailing numbers ] diff --git a/test/server/finders/BookFinder.test.js b/test/server/finders/BookFinder.test.js index 2728f174a6..ed2442c64f 100644 --- a/test/server/finders/BookFinder.test.js +++ b/test/server/finders/BookFinder.test.js @@ -35,6 +35,8 @@ describe('TitleCandidates', () => { ['adds candidate + variant, removing edition 2', 'anna karenina 4th ed.', ['anna karenina', 'anna karenina 4th ed.']], ['adds candidate + variant, removing fie type', 'anna karenina.mp3', ['anna karenina', 'anna karenina.mp3']], ['adds candidate + variant, removing "a novel"', 'anna karenina a novel', ['anna karenina', 'anna karenina a novel']], + ['adds candidate + variant, removing "abridged"', 'abridged anna karenina', ['anna karenina', 'abridged anna karenina']], + ['adds candidate + variant, removing "unabridged"', 'anna karenina unabridged', ['anna karenina', 'anna karenina unabridged']], ['adds candidate + variant, removing preceding/trailing numbers', '1 anna karenina 2', ['anna karenina', '1 anna karenina 2']], ['does not add empty candidate', '', []], ['does not add spaces-only candidate', ' ', []], From 8ac0ce399f8dbe5082fb933c2510423b1251ab8a Mon Sep 17 00:00:00 2001 From: mikiher Date: Thu, 30 Nov 2023 21:17:13 +0200 Subject: [PATCH 2/5] Remove "et al[.]" in author cleanup --- server/finders/BookFinder.js | 2 ++ test/server/finders/BookFinder.test.js | 1 + 2 files changed, 3 insertions(+) diff --git a/server/finders/BookFinder.js b/server/finders/BookFinder.js index 0c5f32d263..4422fa9864 100644 --- a/server/finders/BookFinder.js +++ b/server/finders/BookFinder.js @@ -462,6 +462,8 @@ function cleanAuthorForCompares(author) { cleanAuthor = cleanAuthor.replace(/([a-z])\.([a-z])/g, '$1. $2') // remove middle initials cleanAuthor = cleanAuthor.replace(/(?<=\w\w)(\s+[a-z]\.?)+(?=\s+\w\w)/g, '') + // remove et al. + cleanAuthor = cleanAuthor.replace(/et al\.?/g, '') return cleanAuthor } diff --git a/test/server/finders/BookFinder.test.js b/test/server/finders/BookFinder.test.js index ed2442c64f..5d28bbea25 100644 --- a/test/server/finders/BookFinder.test.js +++ b/test/server/finders/BookFinder.test.js @@ -111,6 +111,7 @@ describe('AuthorCandidates', () => { ['adds recognized author if edit distance from candidate is small', 'nicolai gogol', ['nikolai gogol']], ['does not add candidate if edit distance from any recognized author is large', 'nikolai google', []], ['adds normalized recognized candidate (contains redundant spaces)', 'nikolai gogol', ['nikolai gogol']], + ['adds normalized recognized candidate (et al removed)', 'nikolai gogol et al.', ['nikolai gogol']], ['adds normalized recognized candidate (normalized initials)', 'j.k. rowling', ['j. k. rowling']], ].forEach(([name, author, expected]) => it(name, async () => { authorCandidates.add(author) From 281de48ed4b0bc67d48e7a8ec1f85e4dbdeaa247 Mon Sep 17 00:00:00 2001 From: mikiher Date: Thu, 30 Nov 2023 21:49:24 +0200 Subject: [PATCH 3/5] Fix "et al" cleanup --- server/finders/BookFinder.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/finders/BookFinder.js b/server/finders/BookFinder.js index 4422fa9864..b76b8b1d51 100644 --- a/server/finders/BookFinder.js +++ b/server/finders/BookFinder.js @@ -463,7 +463,7 @@ function cleanAuthorForCompares(author) { // remove middle initials cleanAuthor = cleanAuthor.replace(/(?<=\w\w)(\s+[a-z]\.?)+(?=\s+\w\w)/g, '') // remove et al. - cleanAuthor = cleanAuthor.replace(/et al\.?/g, '') + cleanAuthor = cleanAuthor.replace(/ et al\.?(?= |$)/g, '') return cleanAuthor } From 0282a0521b8466c0af521f017c5a16dd8fcdfa8a Mon Sep 17 00:00:00 2001 From: mikiher Date: Sat, 9 Dec 2023 00:33:06 +0200 Subject: [PATCH 4/5] Sort audible match results by duration difference --- client/components/modals/item/tabs/Match.vue | 1 + server/controllers/SearchController.js | 5 +- server/finders/BookFinder.js | 26 ++++++++-- server/scanner/Scanner.js | 2 +- test/server/finders/BookFinder.test.js | 54 ++++++++++++++++---- 5 files changed, 70 insertions(+), 18 deletions(-) diff --git a/client/components/modals/item/tabs/Match.vue b/client/components/modals/item/tabs/Match.vue index 1c682919fa..b57e96122c 100644 --- a/client/components/modals/item/tabs/Match.vue +++ b/client/components/modals/item/tabs/Match.vue @@ -332,6 +332,7 @@ export default { if (this.isPodcast) return `term=${encodeURIComponent(this.searchTitle)}` var searchQuery = `provider=${this.provider}&fallbackTitleOnly=1&title=${encodeURIComponent(this.searchTitle)}` if (this.searchAuthor) searchQuery += `&author=${encodeURIComponent(this.searchAuthor)}` + if (this.libraryItemId) searchQuery += `&id=${this.libraryItemId}` return searchQuery }, submitSearch() { diff --git a/server/controllers/SearchController.js b/server/controllers/SearchController.js index 93587bc4fe..e52e697348 100644 --- a/server/controllers/SearchController.js +++ b/server/controllers/SearchController.js @@ -3,15 +3,18 @@ const BookFinder = require('../finders/BookFinder') const PodcastFinder = require('../finders/PodcastFinder') const AuthorFinder = require('../finders/AuthorFinder') const MusicFinder = require('../finders/MusicFinder') +const Database = require("../Database") class SearchController { constructor() { } async findBooks(req, res) { + const id = req.query.id + const libraryItem = await Database.libraryItemModel.getOldById(id) const provider = req.query.provider || 'google' const title = req.query.title || '' const author = req.query.author || '' - const results = await BookFinder.search(provider, title, author) + const results = await BookFinder.search(libraryItem, provider, title, author) res.json(results) } diff --git a/server/finders/BookFinder.js b/server/finders/BookFinder.js index b76b8b1d51..8704a96479 100644 --- a/server/finders/BookFinder.js +++ b/server/finders/BookFinder.js @@ -299,6 +299,7 @@ class BookFinder { /** * Search for books including fuzzy searches * + * @param {Object} libraryItem * @param {string} provider * @param {string} title * @param {string} author @@ -307,7 +308,7 @@ class BookFinder { * @param {{titleDistance:number, authorDistance:number, maxFuzzySearches:number}} options * @returns {Promise} */ - async search(provider, title, author, isbn, asin, options = {}) { + async search(libraryItem, provider, title, author, isbn, asin, options = {}) { let books = [] const maxTitleDistance = !isNaN(options.titleDistance) ? Number(options.titleDistance) : 4 const maxAuthorDistance = !isNaN(options.authorDistance) ? Number(options.authorDistance) : 4 @@ -336,6 +337,7 @@ class BookFinder { for (const titlePart of titleParts) authorCandidates.add(titlePart) authorCandidates = await authorCandidates.getCandidates() + loop_author: for (const authorCandidate of authorCandidates) { let titleCandidates = new BookFinder.TitleCandidates(authorCandidate) for (const titlePart of titleParts) @@ -343,13 +345,27 @@ class BookFinder { titleCandidates = titleCandidates.getCandidates() for (const titleCandidate of titleCandidates) { if (titleCandidate == title && authorCandidate == author) continue // We already tried this - if (++numFuzzySearches > maxFuzzySearches) return books + if (++numFuzzySearches > maxFuzzySearches) break loop_author books = await this.runSearch(titleCandidate, authorCandidate, provider, asin, maxTitleDistance, maxAuthorDistance) - if (books.length) return books + if (books.length) break loop_author } } } + if (books.length) { + const resultsHaveDuration = provider.startsWith('audible') + if (resultsHaveDuration && libraryItem && libraryItem.media?.duration) { + const libraryItemDurationMinutes = libraryItem.media.duration/60 + // If provider results have duration, sort by ascendinge duration difference from libraryItem + books.sort((a, b) => { + const aDuration = a.duration || Number.POSITIVE_INFINITY + const bDuration = b.duration || Number.POSITIVE_INFINITY + const aDurationDiff = Math.abs(aDuration - libraryItemDurationMinutes) + const bDurationDiff = Math.abs(bDuration - libraryItemDurationMinutes) + return aDurationDiff - bDurationDiff + }) + } + } return books } @@ -393,12 +409,12 @@ class BookFinder { if (provider === 'all') { for (const providerString of this.providers) { - const providerResults = await this.search(providerString, title, author, options) + const providerResults = await this.search(null, providerString, title, author, options) Logger.debug(`[BookFinder] Found ${providerResults.length} covers from ${providerString}`) searchResults.push(...providerResults) } } else { - searchResults = await this.search(provider, title, author, options) + searchResults = await this.search(null, provider, title, author, options) } Logger.debug(`[BookFinder] FindCovers search results: ${searchResults.length}`) diff --git a/server/scanner/Scanner.js b/server/scanner/Scanner.js index 616baf2953..040053e418 100644 --- a/server/scanner/Scanner.js +++ b/server/scanner/Scanner.js @@ -37,7 +37,7 @@ class Scanner { var searchISBN = options.isbn || libraryItem.media.metadata.isbn var searchASIN = options.asin || libraryItem.media.metadata.asin - var results = await BookFinder.search(provider, searchTitle, searchAuthor, searchISBN, searchASIN, { maxFuzzySearches: 2 }) + var results = await BookFinder.search(libraryItem, provider, searchTitle, searchAuthor, searchISBN, searchASIN, { maxFuzzySearches: 2 }) if (!results.length) { return { warning: `No ${provider} match found` diff --git a/test/server/finders/BookFinder.test.js b/test/server/finders/BookFinder.test.js index 5d28bbea25..03f81f124c 100644 --- a/test/server/finders/BookFinder.test.js +++ b/test/server/finders/BookFinder.test.js @@ -225,14 +225,14 @@ describe('search', () => { describe('search title is empty', () => { it('returns empty result', async () => { - expect(await bookFinder.search('', '', a)).to.deep.equal([]) + expect(await bookFinder.search(null, '', '', a)).to.deep.equal([]) sinon.assert.callCount(bookFinder.runSearch, 0) }) }) describe('search title is a recognized title and search author is a recognized author', () => { it('returns non-empty result (no fuzzy searches)', async () => { - expect(await bookFinder.search('', t, a)).to.deep.equal(r) + expect(await bookFinder.search(null, '', t, a)).to.deep.equal(r) sinon.assert.callCount(bookFinder.runSearch, 1) }) }) @@ -254,7 +254,7 @@ describe('search', () => { [`2022_${t}_HQ`], ].forEach(([searchTitle]) => { it(`search('${searchTitle}', '${a}') returns non-empty result (with 1 fuzzy search)`, async () => { - expect(await bookFinder.search('', searchTitle, a)).to.deep.equal(r) + expect(await bookFinder.search(null, '', searchTitle, a)).to.deep.equal(r) sinon.assert.callCount(bookFinder.runSearch, 2) }) }); @@ -264,7 +264,7 @@ describe('search', () => { [`${a} - series 01 - ${t}`], ].forEach(([searchTitle]) => { it(`search('${searchTitle}', '${a}') returns non-empty result (with 2 fuzzy searches)`, async () => { - expect(await bookFinder.search('', searchTitle, a)).to.deep.equal(r) + expect(await bookFinder.search(null, '', searchTitle, a)).to.deep.equal(r) sinon.assert.callCount(bookFinder.runSearch, 3) }) }); @@ -274,7 +274,7 @@ describe('search', () => { [`${t} junk`], ].forEach(([searchTitle]) => { it(`search('${searchTitle}', '${a}') returns an empty result`, async () => { - expect(await bookFinder.search('', searchTitle, a)).to.deep.equal([]) + expect(await bookFinder.search(null, '', searchTitle, a)).to.deep.equal([]) }) }) @@ -283,7 +283,7 @@ describe('search', () => { [`${t} - ${a}`], ].forEach(([searchTitle]) => { it(`search('${searchTitle}', '${a}') returns an empty result (with no fuzzy searches)`, async () => { - expect(await bookFinder.search('', searchTitle, a, null, null, { maxFuzzySearches: 0 })).to.deep.equal([]) + expect(await bookFinder.search(null, '', searchTitle, a, null, null, { maxFuzzySearches: 0 })).to.deep.equal([]) sinon.assert.callCount(bookFinder.runSearch, 1) }) }) @@ -295,7 +295,7 @@ describe('search', () => { [`${a} - series 01 - ${t}`], ].forEach(([searchTitle]) => { it(`search('${searchTitle}', '${a}') returns an empty result (1 fuzzy search)`, async () => { - expect(await bookFinder.search('', searchTitle, a, null, null, { maxFuzzySearches: 1 })).to.deep.equal([]) + expect(await bookFinder.search(null, '', searchTitle, a, null, null, { maxFuzzySearches: 1 })).to.deep.equal([]) sinon.assert.callCount(bookFinder.runSearch, 2) }) }) @@ -308,7 +308,7 @@ describe('search', () => { [`${a} - ${t}`], ].forEach(([searchTitle]) => { it(`search('${searchTitle}', '') returns a non-empty result (1 fuzzy search)`, async () => { - expect(await bookFinder.search('', searchTitle, '')).to.deep.equal(r) + expect(await bookFinder.search(null, '', searchTitle, '')).to.deep.equal(r) sinon.assert.callCount(bookFinder.runSearch, 2) }) }); @@ -319,7 +319,7 @@ describe('search', () => { [`${u} - ${t}`] ].forEach(([searchTitle]) => { it(`search('${searchTitle}', '') returns an empty result`, async () => { - expect(await bookFinder.search('', searchTitle, '')).to.deep.equal([]) + expect(await bookFinder.search(null, '', searchTitle, '')).to.deep.equal([]) }) }) }) @@ -330,7 +330,7 @@ describe('search', () => { [`${u} - ${t}`] ].forEach(([searchTitle]) => { it(`search('${searchTitle}', '${u}') returns a non-empty result (1 fuzzy search)`, async () => { - expect(await bookFinder.search('', searchTitle, u)).to.deep.equal(r) + expect(await bookFinder.search(null, '', searchTitle, u)).to.deep.equal(r) sinon.assert.callCount(bookFinder.runSearch, 2) }) }); @@ -339,9 +339,41 @@ describe('search', () => { [`${t}`] ].forEach(([searchTitle]) => { it(`search('${searchTitle}', '${u}') returns a non-empty result (no fuzzy search)`, async () => { - expect(await bookFinder.search('', searchTitle, u)).to.deep.equal(r) + expect(await bookFinder.search(null, '', searchTitle, u)).to.deep.equal(r) sinon.assert.callCount(bookFinder.runSearch, 1) }) }) }) + + describe('search provider results have duration', () => { + const libraryItem = { media: { duration: 60 * 1000 } } + const provider = 'audible' + const unsorted = [{ duration: 3000 }, { duration: 2000 }, { duration: 1000 }, { duration: 500 }] + const sorted = [{ duration: 1000 }, { duration: 500 }, { duration: 2000 }, { duration: 3000 }] + runSearchStub.withArgs(t, a, provider).resolves(unsorted) + + it('returns results sorted by library item duration diff', async () => { + expect(await bookFinder.search(libraryItem, provider, t, a)).to.deep.equal(sorted) + }) + + it('returns unsorted results if library item is null', async () => { + expect(await bookFinder.search(null, provider, t, a)).to.deep.equal(unsorted) + }) + + it('returns unsorted results if library item duration is undefined', async () => { + expect(await bookFinder.search({ media: {} }, provider, t, a)).to.deep.equal(unsorted) + }) + + it('returns unsorted results if library item media is undefined', async () => { + expect(await bookFinder.search({ }, provider, t, a)).to.deep.equal(unsorted) + }) + + it ('should return a result last if it has no duration', async () => { + const unsorted = [{}, { duration: 3000 }, { duration: 2000 }, { duration: 1000 }, { duration: 500 }] + const sorted = [{ duration: 1000 }, { duration: 500 }, { duration: 2000 }, { duration: 3000 }, {}] + runSearchStub.withArgs(t, a, provider).resolves(unsorted) + + expect(await bookFinder.search(libraryItem, provider, t, a)).to.deep.equal(sorted) + }) + }) }) From b580a23e7e05818a3d7ba38abf40f3450852eece Mon Sep 17 00:00:00 2001 From: advplyr Date: Sun, 10 Dec 2023 10:35:21 -0600 Subject: [PATCH 5/5] BookFinder formatting update --- server/finders/BookFinder.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/server/finders/BookFinder.js b/server/finders/BookFinder.js index 8704a96479..466c8701f7 100644 --- a/server/finders/BookFinder.js +++ b/server/finders/BookFinder.js @@ -354,8 +354,8 @@ class BookFinder { if (books.length) { const resultsHaveDuration = provider.startsWith('audible') - if (resultsHaveDuration && libraryItem && libraryItem.media?.duration) { - const libraryItemDurationMinutes = libraryItem.media.duration/60 + if (resultsHaveDuration && libraryItem?.media?.duration) { + const libraryItemDurationMinutes = libraryItem.media.duration / 60 // If provider results have duration, sort by ascendinge duration difference from libraryItem books.sort((a, b) => { const aDuration = a.duration || Number.POSITIVE_INFINITY @@ -472,7 +472,7 @@ function cleanTitleForCompares(title) { function cleanAuthorForCompares(author) { if (!author) return '' author = stripRedundantSpaces(author) - + let cleanAuthor = replaceAccentedChars(author).toLowerCase() // separate initials cleanAuthor = cleanAuthor.replace(/([a-z])\.([a-z])/g, '$1. $2')