Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use a weighted search #122

Open
wants to merge 27 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
be3ee4f
#37 Adds a test that searches Solr using the eDisMax query parser (WIP)
extracts Nov 16, 2023
214c501
#37 Fix "call to undefined method" error
extracts Nov 16, 2023
0f387b9
#37 Keep Solr-specific terminology & syntax out of Opus\Search\Query
extracts Nov 17, 2023
105ca56
#37 Completes search test which verifies that a weighted Solr search …
extracts Nov 17, 2023
b0b22a4
#37 Default values for getters that return a boolean value must be tr…
extracts Nov 22, 2023
b290e62
#37 The weighted search test now shows that searching with boosted fi…
extracts Nov 22, 2023
abe0045
#37 Adopt tests to Opus\Search\Query->getUnion() returning either tru…
extracts Nov 22, 2023
527e42c
#37 The weighted search test now also verifies that swapping field we…
extracts Nov 23, 2023
541bc43
#37 Default to the "search.weightedSearch" & "search.simple" configur…
extracts Nov 23, 2023
c78afc7
#37 Now checks the sort order of weighted search results; moves testi…
extracts Nov 23, 2023
66c06a1
#37 Reuse test documents between weighted search tests
extracts Nov 23, 2023
c622821
#37 Fixes a namespace conflict
extracts Nov 23, 2023
42b9eb0
#37 Verify the sort order of weighted search results via the document…
extracts Nov 24, 2023
f6d7469
#37 Implements explicit getters getWeightedSearch() & getWeightedFiel…
extracts Nov 24, 2023
ad067cb
#37 Removes the weightedsearch key from the initial data array so tha…
extracts Nov 29, 2023
e0593c0
#37 Fix missing return statement in setWeightedSearch() which uses a …
extracts Nov 29, 2023
357b053
#37 Adds a weight multiplier to generate a value for the Solr "pf" re…
extracts Dec 1, 2023
54d1760
#37 Adds a test that compares the search behaviour of the standard & …
extracts Dec 1, 2023
a47e997
#37 Fix coding style
extracts Dec 1, 2023
efbba3b
#37 Replaces redundant boiler plate code with separate helper methods
extracts Dec 1, 2023
e635d6f
#37 More (and more granular) tests that test weighted search behavior
extracts Dec 1, 2023
0d51ed8
#37 Removes the catchall fields "text" & "simple" from the Solr schem…
extracts Dec 3, 2023
4577fcf
#37 When searching Solr, matches with a score of 0 are now ignored by…
extracts Dec 4, 2023
9e165b1
#37 Adopts a test that searches the author field so that it uses a we…
extracts Dec 4, 2023
8a08dff
Merge branch '4.8.1' into weightedSearch37
j3nsch May 17, 2024
6066e6a
Merge pull request #130 from OPUS4/weightedSearch37tmp
j3nsch May 17, 2024
cf8cb36
#131 Added test for advanced search
j3nsch May 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 70 additions & 10 deletions src/Query.php
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
namespace Opus\Search;

use InvalidArgumentException;
use Opus\Common\Config;
use Opus\Search\Config as SearchConfig;
use Opus\Search\Facet\Set;
use Opus\Search\Filter\AbstractFilterBase;
use RuntimeException;
Expand All @@ -42,6 +44,7 @@
use function array_merge;
use function array_shift;
use function array_unique;
use function boolval;
use function count;
use function ctype_digit;
use function intval;
Expand Down Expand Up @@ -74,7 +77,7 @@
* @method int getRows( int $default = null )
* @method string[] getFields( array $default = null )
* @method array getSort( array $default = null )
* @method bool getUnion( bool $default = null )
* @method bool getUnion( bool $default = false )
* @method AbstractFilterBase getFilter(AbstractFilterBase $default = null ) retrieves condition to be met by resulting documents
* @method Set getFacet( Set $default = null )
* @method $this setStart( int $offset )
Expand All @@ -86,6 +89,8 @@
* @method $this setFacet( Set $facet )
* @method $this addFields( string $fields )
* @method $this addSort( $sorting )
* @method $this setWeightedSearch( bool $isWeightedSearch )
* @method $this setWeightedFields( int[] $weightedFields ) assigns boost factors to fields (e.g. [ 'title' => 10, 'abstract' => 0.5 ])
*/
class Query
{
Expand All @@ -95,14 +100,16 @@ class Query
public function reset()
{
$this->data = [
'start' => null,
'rows' => null,
'fields' => null,
'sort' => null,
'union' => null,
'filter' => null,
'facet' => null,
'subfilters' => null,
'start' => null,
'rows' => null,
'fields' => null,
'sort' => null,
'union' => false,
'filter' => null,
'facet' => null,
'subfilters' => null,
'weightedsearch' => null, // first getWeightedSearch() call will set this to true or false
extracts marked this conversation as resolved.
Show resolved Hide resolved
'weightedfields' => null,
];
}

Expand Down Expand Up @@ -184,6 +191,46 @@ protected function normalizeDirection($ascending)
return $ascending;
}

/**
* Returns true if a weighted search shall be used, otherwise returns false.
*
* @return bool
*/
public function getWeightedSearch()
{
if ($this->data['weightedsearch'] === null) {
$config = Config::get();

if (isset($config->search->weightedSearch)) {
$this->data['weightedsearch'] = boolval($config->search->weightedSearch);
} else {
$this->data['weightedsearch'] = false;
}
}

return $this->data['weightedsearch'];
}

/**
* Returns boost factors keyed by field (e.g. [ 'title' => 10, 'abstract' => 0.5 ]).
*
* @return int[]
*/
public function getWeightedFields()
{
if ($this->data['weightedfields'] === null) {
$config = Config::get();

if (isset($config->search->simple)) {
$this->data['weightedfields'] = $config->search->simple->toArray();
} else {
$this->data['weightedfields'] = [];
}
}

return $this->data['weightedfields'];
}

/**
* Retrieves value of selected query parameter.
*
Expand Down Expand Up @@ -267,6 +314,7 @@ public function set($name, $value, $adding = false)
break;

case 'union':
case 'weightedsearch':
if ($adding) {
throw new InvalidArgumentException('invalid parameter access on ' . $name);
}
Expand Down Expand Up @@ -300,6 +348,18 @@ public function set($name, $value, $adding = false)

case 'subfilters':
throw new RuntimeException('invalid access on sub filters');

case 'weightedfields':
if ($adding) {
throw new InvalidArgumentException('invalid parameter access on ' . $name);
}

if (! is_array($value)) {
throw new InvalidArgumentException('invalid query fields option');
}

$this->data[$name] = $value;
break;
}

return $this;
Expand Down Expand Up @@ -469,7 +529,7 @@ public function getSubFilters()
*/
public static function getParameterDefault($name, $fallbackIfMissing, $oldName = null)
{
$config = Config::getDomainConfiguration();
$config = SearchConfig::getDomainConfiguration();
$defaults = $config->parameterDefaults;

if ($defaults instanceof Zend_Config) {
Expand Down
31 changes: 31 additions & 0 deletions src/Solr/Solarium/Adapter.php
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
use function file_exists;
use function filesize;
use function filter_var;
use function implode;
use function in_array;
use function intval;
use function is_array;
Expand Down Expand Up @@ -615,6 +616,19 @@ protected function applyParametersOnQuery(
$query->setSorts($sortings);
}

$isWeightedSearch = $parameters->getWeightedSearch();
if ($isWeightedSearch === true) {
// get the edismax component
$edismax = $query->getEDisMax();

// NOTE: query is now an edismax query
$weightedFields = $parameters->getWeightedFields();
if (! empty($weightedFields)) {
$queryFields = $this->getQueryFieldsString($weightedFields);
$edismax->setQueryFields($queryFields);
extracts marked this conversation as resolved.
Show resolved Hide resolved
}
}

$facet = $parameters->getFacet();
if ($facet !== null) {
$facetSet = $query->getFacetSet();
Expand Down Expand Up @@ -880,4 +894,21 @@ public function setTimeout($timeout)
$this->client->setOptions($options, true);
}
}

/**
* Converts an array containing boost factors keyed by field into a query fields string that can be used
* as input for the Solr `qf` request parameter.
*
* @param int[] $weightedFields assigns boost factors to fields, e.g.: [ 'title' => 10, 'abstract' => 0.5 ]
* @return string query fields string, e.g.: "title^10 abstract^0.5"
*/
protected function getQueryFieldsString($weightedFields)
{
$queryFields = [];
foreach ($weightedFields as $field => $boostFactor) {
$queryFields[] = "$field^$boostFactor";
}

return implode(' ', $queryFields);
}
}
8 changes: 4 additions & 4 deletions test/QueryTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ public function testInitiallyEmpty()
$this->assertFalse(isset($query->rows));
$this->assertFalse(isset($query->fields));
$this->assertFalse(isset($query->sort));
$this->assertFalse(isset($query->union));
$this->assertFalse($query->union);
}

public function testSupportingExplicitGetter()
Expand All @@ -60,7 +60,7 @@ public function testSupportingExplicitGetter()
$this->assertNull($query->get('rows'));
$this->assertNull($query->get('fields'));
$this->assertNull($query->get('sort'));
$this->assertNull($query->get('union'));
$this->assertFalse($query->get('union'));
}

public function testSupportingImplicitGetter()
Expand All @@ -71,7 +71,7 @@ public function testSupportingImplicitGetter()
$this->assertNull($query->rows);
$this->assertNull($query->fields);
$this->assertNull($query->sort);
$this->assertNull($query->union);
$this->assertFalse($query->union);
}

public function testSupportingGetterMethods()
Expand All @@ -82,7 +82,7 @@ public function testSupportingGetterMethods()
$this->assertNull($query->getRows());
$this->assertNull($query->getFields());
$this->assertNull($query->getSort());
$this->assertNull($query->getUnion());
$this->assertFalse($query->getUnion());
}

/**
Expand Down
113 changes: 113 additions & 0 deletions test/Solr/Solarium/AdapterSearchingTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,35 @@
use Opus\Search\Util\Searcher;
use OpusTest\Search\TestAsset\DocumentBasedTestCase;

use function abs;
use function count;

class AdapterSearchingTest extends DocumentBasedTestCase
{
/** @var array[] */
protected static $additionalDocumentPropertySets = [
'weightedTestDocA' => [
'TitleMain' => [
'Value' => 'Some Document',
'Language' => 'eng',
],
'TitleAbstract' => [
'Value' => 'Abstract of test document A.\nSome more text.',
'Language' => 'eng',
],
],
'weightedTestDocB' => [
'TitleMain' => [
'Value' => 'Another Test Document',
'Language' => 'eng',
],
'TitleAbstract' => [
'Value' => 'Abstract of document B.\nSome blah blah text.',
'Language' => 'eng',
],
],
];

public function testService()
{
$search = Service::selectSearchingService(null, 'solr');
Expand Down Expand Up @@ -248,4 +273,92 @@ public function testMapYearFacetIndexFieldsToYearAsset()

$this->assertEquals(1, $result->getAllMatchesCount());
}

public function testWeightedSearch()
{
$docA = $this->createDocument('weightedTestDocA');
$docB = $this->createDocument('weightedTestDocB');

$index = Service::selectIndexingService(null, 'solr');
$index->addDocumentsToIndex([$docA, $docB]);

$search = Service::selectSearchingService(null, 'solr');

$query = new Query();
$query->addSorting('score', false);

$filter = $search->createFilter();
$filter->createSimpleEqualityFilter('*')->addValue('test document');
$query->setFilter($filter);

// 1. with different boost factors assigned to fields, expect clearly different scores & appropriate sort order
$this->adjustConfiguration([
'search' => [
'weightedSearch' => true, // use the Solr eDisMax query parser
'simple' => [
'abstract' => 0.5,
'title' => 10,
],
],
]);

$result = $search->customSearch($query);
$matches = $result->getReturnedMatches();

$this->assertEquals(2, count($matches));

$this->assertTrue(abs($matches[0]->getScore() - $matches[1]->getScore()) > 1.0);

$this->assertEquals($docB->getId(), $matches[0]->getDocument()->getId());

// 2. with swapped boost factors, expect a swapped sort order
$query->setWeightedFields(['abstract' => 10.0, 'title' => 0.5]);

$result = $search->customSearch($query);
$matches = $result->getReturnedMatches();

$this->assertEquals(2, count($matches));

$this->assertTrue(abs($matches[0]->getScore() - $matches[1]->getScore()) > 1.0);

$this->assertEquals($docA->getId(), $matches[0]->getDocument()->getId());
}

public function testWeightedSearchWithEqualWeights()
{
$docA = $this->createDocument('weightedTestDocA');
$docB = $this->createDocument('weightedTestDocB');

$index = Service::selectIndexingService(null, 'solr');
$index->addDocumentsToIndex([$docA, $docB]);

$search = Service::selectSearchingService(null, 'solr');

$query = new Query();
$query->setWeightedSearch(true);

$filter = $search->createFilter();
$filter->createSimpleEqualityFilter('*')->addValue('test document');
$query->setFilter($filter);

// 1. without any boost factors assigned to fields, expect roughly equal scores
$query->setWeightedFields([]);

$result = $search->customSearch($query);
$matches = $result->getReturnedMatches();

$this->assertEquals(2, count($matches));

$this->assertTrue(abs($matches[0]->getScore() - $matches[1]->getScore()) < 1.0);

// 2. with equal boost factors, expect roughly equal scores
$query->setWeightedFields(['abstract' => 1.0, 'title' => 1.0]);

$result = $search->customSearch($query);
$matches = $result->getReturnedMatches();

$this->assertEquals(2, count($matches));

$this->assertTrue(abs($matches[0]->getScore() - $matches[1]->getScore()) < 1.0);
}
}
Loading