Skip to content

Commit

Permalink
Comment search process
Browse files Browse the repository at this point in the history
  • Loading branch information
Hlavtox committed Sep 10, 2024
1 parent 2937cae commit ad29ac3
Showing 1 changed file with 96 additions and 18 deletions.
114 changes: 96 additions & 18 deletions classes/Search.php
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,21 @@ public static function sanitize($string, $id_lang, $indexation = false, $iso_cod
return $string;
}

/**
* The holy method to search for products.
*
* @param int $id_lang Language identifier
* @param string $expr Search expression
* @param int $page_number Start from page
* @param int $page_size Number of products to return
* @param $order_by
* @param $order_way
* @param bool $ajax Specifies the return structure of data
* @param bool $use_cookie unused
* @param Context $context Context to use when searching data. Current context will be used if missing.
*
* @return array|bool search results returned in certain structure, depending on $ajax parameter
*/
public static function find(
$id_lang,
$expr,
Expand All @@ -268,38 +283,62 @@ public static function find(
$context = Context::getContext();
}

// Get database instance to use
$db = Db::getInstance(_PS_USE_SQL_SLAVE_);

// TODO : smart page management
if ($page_number < 1) {
// Initialize pagination if nonsense was passed
if (empty($page_number)) {
$page_number = 1;
}
if ($page_size < 1) {
if (empty($page_size)) {
$page_size = 1;
}

// Initialize and validate sorting
if (!Validate::isOrderBy($order_by) || !Validate::isOrderWay($order_way)) {
return false;
}

$scoreArray = [];
/*
* Variables related to fuzzy search.
*
* $psFuzzySearch to see if fuzzy search is enabled.
* $fuzzyMaxLoop configuration to limit how many times we try to fuzzy search for each word.
* $fuzzyLoop to track how many times we tried to fuzzy search, so we can break the loop.
*/
$fuzzyLoop = 0;
$wordCnt = 0;
$eligibleProducts2Full = [];
$expressions = explode(';', $expr);
$fuzzyMaxLoop = (int) Configuration::get('PS_SEARCH_FUZZY_MAX_LOOP');
$psFuzzySearch = (int) Configuration::get('PS_SEARCH_FUZZY');

// Score array to keep track of words we will get weights for (for relevance)
$scoreArray = [];

// Word count to track how many words we got for given expression
$wordCnt = 0;

// Final resulting array with product IDs found
$foundProductIds = [];

// Expressions to search for. If user passes search expressions separated with semicolon, they will be treated separately
$expressions = explode(';', $expr);

// Minimal word length configuration, so we don't search for extremely short words
$psSearchMinWordLength = (int) Configuration::get('PS_SEARCH_MINWORDLEN');

// Ok, now let's go through each expression. It's usually only one.
foreach ($expressions as $expression) {
$eligibleProducts2 = null;
$productIdsFoundForCurrentExpression = null;

// Get all words from current expression
$words = Search::extractKeyWords($expression, $id_lang, false, $context->language->iso_code);
foreach ($words as $key => $word) {
// Skip all empty words or shorter than our limit
if (empty($word) || strlen($word) < $psSearchMinWordLength) {
unset($words[$key]);
continue;
}

$sql_param_search = self::getSearchParamFromWord($word);
// We prepare a basic part of SQL query that we will be searching
$sql = 'SELECT DISTINCT si.id_product ' .
'FROM ' . _DB_PREFIX_ . 'search_word sw ' .
'LEFT JOIN ' . _DB_PREFIX_ . 'search_index si ON sw.id_word = si.id_word ' .
Expand All @@ -311,6 +350,17 @@ public static function find(
'AND product_shop.indexed = 1 ' .
'AND sw.word LIKE ';

/*
* Now, find all products from the index, that have this keyword.
* We start with the word itself wrapped in %%, coming from getSearchParamFromWord.
*
* If we don't find anything, we will leverage levenshtein algorithm to find a closest keyword
* via findClosestWeightestWord method.
*
* We will keep searching with different expressions, until we find something
* or we exceed our fuzzy search limit.
*/
$sql_param_search = self::getSearchParamFromWord($word);
while (!($result = $db->executeS($sql . "'" . $sql_param_search . "';", true, false))) {
if (!$psFuzzySearch
|| $fuzzyLoop++ > $fuzzyMaxLoop
Expand All @@ -320,32 +370,52 @@ public static function find(
}
}

// If nothing was found after X retries, skip this keyword
if (!$result) {
unset($words[$key]);
continue;
}

$productIds = array_column($result, 'id_product');
if ($eligibleProducts2 === null) {
$eligibleProducts2 = $productIds;
/*
* Extremely important step that someone broke in the past.
* Now if we found something, we need to intersect it with the the previously found products.
* If we search for "Red car", we want to get products that contain "red" AND contain "car".
* Somebody broke it before and it found all things "car" and all things "red".
*/
$productIdsFoundForCurrentWord = array_column($result, 'id_product');
if ($productIdsFoundForCurrentExpression === null) {
$productIdsFoundForCurrentExpression = $productIdsFoundForCurrentWord;
} else {
$eligibleProducts2 = array_intersect($eligibleProducts2, $productIds);
$productIdsFoundForCurrentExpression = array_intersect($productIdsFoundForCurrentExpression, $productIdsFoundForCurrentWord);
}

// Add the expresion to our score array, so we can later calculate the relevance
$scoreArray[] = 'sw.word LIKE \'' . $sql_param_search . '\'';
}
$wordCnt += count($words);
if ($eligibleProducts2) {
$eligibleProducts2Full = array_merge($eligibleProducts2Full, $eligibleProducts2);
if ($productIdsFoundForCurrentExpression) {
$foundProductIds = array_merge($foundProductIds, $productIdsFoundForCurrentExpression);
}
}

$eligibleProducts2Full = array_unique($eligibleProducts2Full);
// Remove all duplicates from product IDs
$foundProductIds = array_unique($foundProductIds);

if (!$wordCnt || !count($eligibleProducts2Full)) {
// If we didn't end up anything now, we can immediately return empty response.
// No sense in calculating weights of nothing.
if (!$wordCnt || !count($foundProductIds)) {
return $ajax ? [] : ['total' => 0, 'result' => []];
}

/*
* Now, we have a list of randomly ordered product IDs for our search,
* but we don't know if they are active, should be displayed, nothing.
*/

/*
* This is a subquery that selects weight for each keyword.
* This is used as "relevance" sort order.
*/
$sqlScore = '';
if (!empty($scoreArray) && is_array($scoreArray)) {
$sqlScore = ',( ' .
Expand All @@ -365,6 +435,7 @@ public static function find(
$sqlGroups = 'AND cg.`id_group` ' . (count($groups) ? 'IN (' . implode(',', $groups) . ')' : '=' . (int) Group::getCurrent()->id);
}

// Select products from the list of IDs that should be displayed and can be returned.
$results = $db->executeS(
'SELECT DISTINCT cp.`id_product` ' .
'FROM `' . _DB_PREFIX_ . 'category_product` cp ' .
Expand All @@ -376,20 +447,27 @@ public static function find(
'AND product_shop.`active` = 1 ' .
'AND product_shop.`visibility` IN ("both", "search") ' .
'AND product_shop.indexed = 1 ' .
'AND cp.id_product IN (' . implode(',', $eligibleProducts2Full) . ')' . $sqlGroups,
'AND cp.id_product IN (' . implode(',', $foundProductIds) . ')' . $sqlGroups,
true,
false
);

// And again, extract their IDs
$eligibleProducts = [];
foreach ($results as $row) {
$eligibleProducts[] = $row['id_product'];
}

// If we didn't end up anything now, we can immediately return empty response.
// No sense in getting more data for nothing.
if (!count($eligibleProducts)) {
return $ajax ? [] : ['total' => 0, 'result' => []];
}

/*
* Now, we have a list of (also) randomly ordered product IDs for our search,
* but we know that they are real, active products that should be returned.
*/
$product_pool = ' IN (' . implode(',', $eligibleProducts) . ') ';

if ($ajax) {
Expand Down

0 comments on commit ad29ac3

Please sign in to comment.