-
-
Notifications
You must be signed in to change notification settings - Fork 269
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #3325 from Minnozz/author-search-vector
Rework author search
- Loading branch information
Showing
7 changed files
with
304 additions
and
42 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
# Generated by Django 3.2.25 on 2024-03-20 15:15 | ||
|
||
import django.contrib.postgres.indexes | ||
from django.db import migrations | ||
import pgtrigger.compiler | ||
import pgtrigger.migrations | ||
|
||
|
||
class Migration(migrations.Migration): | ||
|
||
dependencies = [ | ||
("bookwyrm", "0196_merge_pr3134_into_main"), | ||
] | ||
|
||
operations = [ | ||
migrations.AddIndex( | ||
model_name="author", | ||
index=django.contrib.postgres.indexes.GinIndex( | ||
fields=["search_vector"], name="bookwyrm_au_search__b050a8_gin" | ||
), | ||
), | ||
pgtrigger.migrations.AddTrigger( | ||
model_name="author", | ||
trigger=pgtrigger.compiler.Trigger( | ||
name="update_search_vector_on_author_edit", | ||
sql=pgtrigger.compiler.UpsertTriggerSql( | ||
func="new.search_vector := setweight(to_tsvector('simple', new.name), 'A') || setweight(to_tsvector('simple', coalesce(array_to_string(new.aliases, ' '), '')), 'B');RETURN NEW;", | ||
hash="b97919016236d74d0ade51a0769a173ea269da64", | ||
operation='INSERT OR UPDATE OF "name", "aliases", "search_vector"', | ||
pgid="pgtrigger_update_search_vector_on_author_edit_c61cb", | ||
table="bookwyrm_author", | ||
when="BEFORE", | ||
), | ||
), | ||
), | ||
migrations.RunSQL( | ||
# Calculate search vector for all Authors. | ||
sql="UPDATE bookwyrm_author SET search_vector = NULL;", | ||
reverse_sql="UPDATE bookwyrm_author SET search_vector = NULL;", | ||
), | ||
] |
57 changes: 57 additions & 0 deletions
57
bookwyrm/migrations/0198_book_search_vector_author_aliases.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
# Generated by Django 3.2.25 on 2024-03-20 15:52 | ||
|
||
from django.db import migrations | ||
import pgtrigger.compiler | ||
import pgtrigger.migrations | ||
|
||
|
||
class Migration(migrations.Migration): | ||
|
||
dependencies = [ | ||
("bookwyrm", "0197_author_search_vector"), | ||
] | ||
|
||
operations = [ | ||
pgtrigger.migrations.RemoveTrigger( | ||
model_name="author", | ||
name="reset_search_vector_on_author_edit", | ||
), | ||
pgtrigger.migrations.RemoveTrigger( | ||
model_name="book", | ||
name="update_search_vector_on_book_edit", | ||
), | ||
pgtrigger.migrations.AddTrigger( | ||
model_name="author", | ||
trigger=pgtrigger.compiler.Trigger( | ||
name="reset_book_search_vector_on_author_edit", | ||
sql=pgtrigger.compiler.UpsertTriggerSql( | ||
func="WITH updated_books AS (SELECT book_id FROM bookwyrm_book_authors WHERE author_id = new.id ) UPDATE bookwyrm_book SET search_vector = '' FROM updated_books WHERE id = updated_books.book_id;RETURN NEW;", | ||
hash="68422c0f29879c5802b82159dde45297eff53e73", | ||
operation='UPDATE OF "name", "aliases"', | ||
pgid="pgtrigger_reset_book_search_vector_on_author_edit_a50c7", | ||
table="bookwyrm_author", | ||
when="AFTER", | ||
), | ||
), | ||
), | ||
pgtrigger.migrations.AddTrigger( | ||
model_name="book", | ||
trigger=pgtrigger.compiler.Trigger( | ||
name="update_search_vector_on_book_edit", | ||
sql=pgtrigger.compiler.UpsertTriggerSql( | ||
func="WITH author_names AS (SELECT array_to_string(bookwyrm_author.name || bookwyrm_author.aliases, ' ') AS name_and_aliases FROM bookwyrm_author LEFT JOIN bookwyrm_book_authors ON bookwyrm_author.id = bookwyrm_book_authors.author_id WHERE bookwyrm_book_authors.book_id = new.id ) SELECT setweight(coalesce(nullif(to_tsvector('english', new.title), ''), to_tsvector('simple', new.title)), 'A') || setweight(to_tsvector('english', coalesce(new.subtitle, '')), 'B') || (SELECT setweight(to_tsvector('simple', coalesce(array_to_string(array_agg(name_and_aliases), ' '), '')), 'C') FROM author_names) || setweight(to_tsvector('english', coalesce(new.series, '')), 'D') INTO new.search_vector;RETURN NEW;", | ||
hash="9324f5ca76a6f5e63931881d62d11da11f595b2c", | ||
operation='INSERT OR UPDATE OF "title", "subtitle", "series", "search_vector"', | ||
pgid="pgtrigger_update_search_vector_on_book_edit_bec58", | ||
table="bookwyrm_book", | ||
when="BEFORE", | ||
), | ||
), | ||
), | ||
migrations.RunSQL( | ||
# Recalculate search vector for all Books because it now includes | ||
# Author aliases. | ||
sql="UPDATE bookwyrm_book SET search_vector = NULL;", | ||
reverse_sql="UPDATE bookwyrm_book SET search_vector = NULL;", | ||
), | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
""" test searching for authors """ | ||
from django.test import TestCase | ||
|
||
from django.contrib.postgres.search import SearchRank, SearchQuery | ||
from django.db.models import F | ||
|
||
from bookwyrm import models | ||
|
||
|
||
class AuthorSearch(TestCase): | ||
"""look for some authors""" | ||
|
||
@classmethod | ||
def setUpTestData(cls): | ||
"""we need basic test data and mocks""" | ||
cls.bob = models.Author.objects.create( | ||
name="Bob", aliases=["Robertus", "Alice"] | ||
) | ||
cls.alice = models.Author.objects.create(name="Alice") | ||
|
||
def test_search(self): | ||
"""search for an author in the db""" | ||
results = self._search("Bob") | ||
self.assertEqual(len(results), 1) | ||
self.assertEqual(results[0], self.bob) | ||
|
||
def test_alias_priority(self): | ||
"""aliases should be included, with lower priority than name""" | ||
results = self._search("Alice") | ||
self.assertEqual(len(results), 2) | ||
self.assertEqual(results[0], self.alice) | ||
|
||
def _search_first(self, query): | ||
"""wrapper around search_title_author""" | ||
return self._search(query, return_first=True) | ||
|
||
@staticmethod | ||
def _search(query, *, return_first=False): | ||
"""author search""" | ||
search_query = SearchQuery(query, config="simple") | ||
min_confidence = 0 | ||
|
||
results = ( | ||
models.Author.objects.filter(search_vector=search_query) | ||
.annotate(rank=SearchRank(F("search_vector"), search_query)) | ||
.filter(rank__gt=min_confidence) | ||
.order_by("-rank") | ||
) | ||
if return_first: | ||
return results.first() | ||
return results | ||
|
||
|
||
class SearchVectorTest(TestCase): | ||
"""check search_vector is computed correctly""" | ||
|
||
def test_search_vector_simple(self): | ||
"""simplest search vector""" | ||
author = self._create_author("Mary") | ||
self.assertEqual(author.search_vector, "'mary':1A") | ||
|
||
def test_search_vector_aliases(self): | ||
"""author aliases should be included with lower priority""" | ||
author = self._create_author("Mary", aliases=["Maria", "Example"]) | ||
self.assertEqual(author.search_vector, "'example':3B 'maria':2B 'mary':1A") | ||
|
||
def test_search_vector_parse_author(self): | ||
"""author name and alias is not stem'd or affected by stop words""" | ||
author = self._create_author("Writes", aliases=["Reads"]) | ||
self.assertEqual(author.search_vector, "'reads':2B 'writes':1A") | ||
|
||
def test_search_vector_on_update(self): | ||
"""make sure that search_vector is being set correctly on edit""" | ||
author = self._create_author("Mary") | ||
self.assertEqual(author.search_vector, "'mary':1A") | ||
|
||
author.name = "Example" | ||
author.save(broadcast=False) | ||
author.refresh_from_db() | ||
self.assertEqual(author.search_vector, "'example':1A") | ||
|
||
@staticmethod | ||
def _create_author(name, /, *, aliases=None): | ||
"""quickly create an author""" | ||
author = models.Author.objects.create(name=name, aliases=aliases or []) | ||
author.refresh_from_db() | ||
return author |
Oops, something went wrong.