Skip to content

Commit

Permalink
Improved search results by taking into account indexed languages
Browse files Browse the repository at this point in the history
  • Loading branch information
svera authored Feb 18, 2024
1 parent 052fb4a commit 60e37be
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 32 deletions.
2 changes: 1 addition & 1 deletion internal/index/bleve.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import (

// Version identifies the mapping used for indexing. Any changes in the mapping requires an increase
// of version, to signal that a new index needs to be created.
const Version = "v1"
const Version = "v2"

var noStopWordsFilters = map[string][]string{
es.AnalyzerName: {es.NormalizeName, lowercase.Name, es.LightStemmerName},
Expand Down
89 changes: 60 additions & 29 deletions internal/index/bleve_read.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,33 +43,42 @@ func (b *BleveIndexer) Search(keywords string, page, resultsPerPage int) (result
return b.runPaginatedQuery(qb, page, resultsPerPage)
}

compound := composeQuery(keywords)
analyzers, err := b.analyzers()
if err != nil {
return result.Paginated[[]Document]{}, err
}
compound := composeQuery(keywords, analyzers)
return b.runPaginatedQuery(compound, page, resultsPerPage)
}

func composeQuery(keywords string) *query.DisjunctionQuery {
func composeQuery(keywords string, analyzers []string) *query.DisjunctionQuery {
langCompoundQuery := bleve.NewDisjunctionQuery()

for lang := range noStopWordsFilters {
for _, analyzer := range analyzers {
noStopWordsAnalyzer := analyzer
if analyzer != defaultAnalyzer {
noStopWordsAnalyzer = analyzer + "_no_stop_words"
}

qt := bleve.NewMatchPhraseQuery(keywords)
qt.Analyzer = lang + "_no_stop_words"
qt.Analyzer = noStopWordsAnalyzer
qt.SetField("Title")
langCompoundQuery.AddQuery(qt)

qs := bleve.NewMatchQuery(keywords)
qs.Analyzer = lang + "_no_stop_words"
qs.Analyzer = noStopWordsAnalyzer
qs.SetField("Series")
qs.Operator = query.MatchQueryOperatorAnd
langCompoundQuery.AddQuery(qs)

qu := bleve.NewMatchQuery(keywords)
qu.Analyzer = lang
qu.Analyzer = analyzer
qu.SetField("Subjects")
qu.Operator = query.MatchQueryOperatorAnd
langCompoundQuery.AddQuery(qu)

qd := bleve.NewMatchQuery(keywords)
qd.Analyzer = lang
qd.Analyzer = analyzer
qd.SetField("Description")
qd.Operator = query.MatchQueryOperatorAnd
langCompoundQuery.AddQuery(qd)
Expand Down Expand Up @@ -211,28 +220,35 @@ func (b *BleveIndexer) Documents(IDs []string) (map[string]Document, error) {

// SameSubjects returns an array of metadata of documents by other authors, different between each other,
// which have similar subjects as the passed one and does not belong to the same collection
func (b *BleveIndexer) SameSubjects(slug string, quantity int) ([]Document, error) {
doc, err := b.Document(slug)
func (b *BleveIndexer) SameSubjects(slugID string, quantity int) ([]Document, error) {
doc, err := b.Document(slugID)
if err != nil {
return []Document{}, err
}

bq := bleve.NewBooleanQuery()
subjectsCompoundQuery := bleve.NewDisjunctionQuery()

for _, subject := range doc.Subjects {
qu := bleve.NewMatchPhraseQuery(subject)
qu.SetField("Subjects")
subject = strings.ReplaceAll(slug.Make(subject), "-", "")
qu := bleve.NewTermQuery(subject)
qu.SetField("SubjectsEq")
subjectsCompoundQuery.AddQuery(qu)
}
bq := bleve.NewBooleanQuery()

series := strings.ReplaceAll(slug.Make(doc.Series), "-", "")
sq := bleve.NewTermQuery(series)
sq.SetField("SeriesEq")
bq.AddMustNot(sq)

bq.AddMust(subjectsCompoundQuery)
bq.AddMustNot(bleve.NewDocIDQuery([]string{doc.ID}))
sq := bleve.NewMatchPhraseQuery(doc.Series)
sq.SetField("Series")
bq.AddMustNot(sq)

authorsCompoundQuery := bleve.NewDisjunctionQuery()
for _, author := range doc.Authors {
qa := bleve.NewMatchPhraseQuery(author)
qa.SetField("Authors")
author = strings.ReplaceAll(slug.Make(author), "-", "")
qa := bleve.NewTermQuery(author)
qa.SetField("AuthorsEq")
authorsCompoundQuery.AddQuery(qa)
}
bq.AddMustNot(authorsCompoundQuery)
Expand All @@ -248,8 +264,9 @@ func (b *BleveIndexer) SameSubjects(slug string, quantity int) ([]Document, erro
}
res = append(res, doc[0])
for _, author := range doc[0].Authors {
qa := bleve.NewMatchPhraseQuery(author)
qa.SetField("Authors")
author = strings.ReplaceAll(slug.Make(author), "-", "")
qa := bleve.NewTermQuery(author)
qa.SetField("AuthorsEq")
authorsCompoundQuery.AddQuery(qa)
}
bq.AddMustNot(authorsCompoundQuery)
Expand All @@ -260,44 +277,58 @@ func (b *BleveIndexer) SameSubjects(slug string, quantity int) ([]Document, erro

// SameAuthors returns an array of metadata of documents by the same authors which
// does not belong to the same collection
func (b *BleveIndexer) SameAuthors(slug string, quantity int) ([]Document, error) {
doc, err := b.Document(slug)
func (b *BleveIndexer) SameAuthors(slugID string, quantity int) ([]Document, error) {
doc, err := b.Document(slugID)
if err != nil {
return []Document{}, err
}

authorsCompoundQuery := bleve.NewDisjunctionQuery()
for _, author := range doc.Authors {
qu := bleve.NewMatchPhraseQuery(author)
qu.SetField("Authors")
author = strings.ReplaceAll(slug.Make(author), "-", "")
qu := bleve.NewTermQuery(author)
qu.SetField("AuthorsEq")
authorsCompoundQuery.AddQuery(qu)
}
bq := bleve.NewBooleanQuery()
bq.AddMust(authorsCompoundQuery)
bq.AddMustNot(bleve.NewDocIDQuery([]string{doc.ID}))
sq := bleve.NewMatchPhraseQuery(doc.Series)
sq.SetField("Series")

series := strings.ReplaceAll(slug.Make(doc.Series), "-", "")
sq := bleve.NewTermQuery(series)
sq.SetField("SeriesEq")

bq.AddMustNot(sq)

return b.runQuery(bq, quantity)
}

// SameSeries returns an array of metadata of documents in the same series
func (b *BleveIndexer) SameSeries(slug string, quantity int) ([]Document, error) {
doc, err := b.Document(slug)
func (b *BleveIndexer) SameSeries(slugID string, quantity int) ([]Document, error) {
doc, err := b.Document(slugID)
if err != nil {
return []Document{}, err
}

bq := bleve.NewBooleanQuery()
bq.AddMustNot(bleve.NewDocIDQuery([]string{doc.ID}))
sq := bleve.NewMatchPhraseQuery(doc.Series)
sq.SetField("Series")
series := strings.ReplaceAll(slug.Make(doc.Series), "-", "")

sq := bleve.NewMatchPhraseQuery(series)
sq.SetField("SeriesEq")
bq.AddMust(sq)

return b.runQuery(bq, quantity)
}

func (b *BleveIndexer) analyzers() ([]string, error) {
languages, err := b.idx.GetInternal([]byte("languages"))
if err != nil {
return []string{}, err
}
return strings.Split(string(languages), ","), nil
}

func slicer(val interface{}) []string {
var (
terms []interface{}
Expand Down
24 changes: 24 additions & 0 deletions internal/index/bleve_write.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"log"
"os"
"path/filepath"
"slices"
"strings"

"github.com/gosimple/slug"
Expand Down Expand Up @@ -46,6 +47,7 @@ func (b *BleveIndexer) RemoveFile(file string) error {
func (b *BleveIndexer) AddLibrary(fs afero.Fs, batchSize int) error {
batch := b.idx.NewBatch()
batchSlugs := make(map[string]struct{}, batchSize)
languages := []string{}
e := afero.Walk(fs, b.libraryPath, func(fullPath string, f os.FileInfo, err error) error {
ext := strings.ToLower(filepath.Ext(fullPath))
if _, ok := b.reader[ext]; !ok {
Expand All @@ -59,6 +61,7 @@ func (b *BleveIndexer) AddLibrary(fs afero.Fs, batchSize int) error {

document := b.createDocument(meta, fullPath, batchSlugs)
batchSlugs[document.Slug] = struct{}{}
languages = addLanguage(meta.Language, languages)

err = batch.Index(document.ID, document)
if err != nil {
Expand All @@ -74,10 +77,31 @@ func (b *BleveIndexer) AddLibrary(fs afero.Fs, batchSize int) error {
return nil
})

b.idx.SetInternal([]byte("languages"), []byte(strings.Join(languages, ",")))
b.idx.Batch(batch)
return e
}

func addLanguage(lang string, languages []string) []string {
if !slices.Contains(languages, defaultAnalyzer) && lang == "" {
return append(languages, defaultAnalyzer)
}

if _, ok := noStopWordsFilters[lang]; ok {
found := false
for i := range languages {
if languages[i] == lang {
found = true
break
}
}
if !found {
languages = append(languages, lang)
}
}
return languages
}

func (b *BleveIndexer) createDocument(meta metadata.Metadata, fullPath string, batchSlugs map[string]struct{}) DocumentWrite {
document := DocumentWrite{
Document: Document{
Expand Down
3 changes: 1 addition & 2 deletions internal/webserver/controller/document/detail.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@ func (d *Controller) Detail(c *fiber.Ctx) error {

document, err := d.idx.Document(c.Params("slug"))
if err != nil {
fmt.Println(err)
return fiber.ErrBadRequest
return fiber.ErrNotFound
}

if _, err := os.Stat(filepath.Join(d.config.LibraryPath, document.ID)); err != nil {
Expand Down

0 comments on commit 60e37be

Please sign in to comment.