diff --git a/classes/Search.php b/classes/Search.php index 5c4970ea72921..aa2c1cbe168ab 100644 --- a/classes/Search.php +++ b/classes/Search.php @@ -253,6 +253,21 @@ public static function sanitize($string, $id_lang, $indexation = false, $iso_cod return $string; } + /** + * The holy method to search for products. + * + * @param int $id_lang Language identifier + * @param string $expr Search expression + * @param int $page_number Start from page + * @param int $page_size Number of products to return + * @param $order_by + * @param $order_way + * @param bool $ajax Specifies the return structure of data + * @param bool $use_cookie unused + * @param Context $context Context to use when searching data. Current context will be used if missing. + * + * @return array|bool search results returned in certain structure, depending on $ajax parameter + */ public static function find( $id_lang, $expr, @@ -268,38 +283,62 @@ public static function find( $context = Context::getContext(); } + // Get database instance to use $db = Db::getInstance(_PS_USE_SQL_SLAVE_); - // TODO : smart page management - if ($page_number < 1) { + // Initialize pagination if nonsense was passed + if (empty($page_number)) { $page_number = 1; } - if ($page_size < 1) { + if (empty($page_size)) { $page_size = 1; } + // Initialize and validate sorting if (!Validate::isOrderBy($order_by) || !Validate::isOrderWay($order_way)) { return false; } - $scoreArray = []; + /* + * Variables related to fuzzy search. + * + * $psFuzzySearch to see if fuzzy search is enabled. + * $fuzzyMaxLoop configuration to limit how many times we try to fuzzy search for each word. + * $fuzzyLoop to track how many times we tried to fuzzy search, so we can break the loop. + */ $fuzzyLoop = 0; - $wordCnt = 0; - $eligibleProducts2Full = []; - $expressions = explode(';', $expr); $fuzzyMaxLoop = (int) Configuration::get('PS_SEARCH_FUZZY_MAX_LOOP'); $psFuzzySearch = (int) Configuration::get('PS_SEARCH_FUZZY'); + + // Score array to keep track of words we will get weights for (for relevance) + $scoreArray = []; + + // Word count to track how many words we got for given expression + $wordCnt = 0; + + // Final resulting array with product IDs found + $foundProductIds = []; + + // Expressions to search for. If user passes search expressions separated with semicolon, they will be treated separately + $expressions = explode(';', $expr); + + // Minimal word length configuration, so we don't search for extremely short words $psSearchMinWordLength = (int) Configuration::get('PS_SEARCH_MINWORDLEN'); + + // Ok, now let's go through each expression. It's usually only one. foreach ($expressions as $expression) { - $eligibleProducts2 = null; + $productIdsFoundForCurrentExpression = null; + + // Get all words from current expression $words = Search::extractKeyWords($expression, $id_lang, false, $context->language->iso_code); foreach ($words as $key => $word) { + // Skip all empty words or shorter than our limit if (empty($word) || strlen($word) < $psSearchMinWordLength) { unset($words[$key]); continue; } - $sql_param_search = self::getSearchParamFromWord($word); + // We prepare a basic part of SQL query that we will be searching $sql = 'SELECT DISTINCT si.id_product ' . 'FROM ' . _DB_PREFIX_ . 'search_word sw ' . 'LEFT JOIN ' . _DB_PREFIX_ . 'search_index si ON sw.id_word = si.id_word ' . @@ -311,6 +350,17 @@ public static function find( 'AND product_shop.indexed = 1 ' . 'AND sw.word LIKE '; + /* + * Now, find all products from the index, that have this keyword. + * We start with the word itself wrapped in %%, coming from getSearchParamFromWord. + * + * If we don't find anything, we will leverage levenshtein algorithm to find a closest keyword + * via findClosestWeightestWord method. + * + * We will keep searching with different expressions, until we find something + * or we exceed our fuzzy search limit. + */ + $sql_param_search = self::getSearchParamFromWord($word); while (!($result = $db->executeS($sql . "'" . $sql_param_search . "';", true, false))) { if (!$psFuzzySearch || $fuzzyLoop++ > $fuzzyMaxLoop @@ -320,32 +370,52 @@ public static function find( } } + // If nothing was found after X retries, skip this keyword if (!$result) { unset($words[$key]); continue; } - $productIds = array_column($result, 'id_product'); - if ($eligibleProducts2 === null) { - $eligibleProducts2 = $productIds; + /* + * Extremely important step that someone broke in the past. + * Now if we found something, we need to intersect it with the the previously found products. + * If we search for "Red car", we want to get products that contain "red" AND contain "car". + * Somebody broke it before and it found all things "car" and all things "red". + */ + $productIdsFoundForCurrentWord = array_column($result, 'id_product'); + if ($productIdsFoundForCurrentExpression === null) { + $productIdsFoundForCurrentExpression = $productIdsFoundForCurrentWord; } else { - $eligibleProducts2 = array_intersect($eligibleProducts2, $productIds); + $productIdsFoundForCurrentExpression = array_intersect($productIdsFoundForCurrentExpression, $productIdsFoundForCurrentWord); } + // Add the expresion to our score array, so we can later calculate the relevance $scoreArray[] = 'sw.word LIKE \'' . $sql_param_search . '\''; } $wordCnt += count($words); - if ($eligibleProducts2) { - $eligibleProducts2Full = array_merge($eligibleProducts2Full, $eligibleProducts2); + if ($productIdsFoundForCurrentExpression) { + $foundProductIds = array_merge($foundProductIds, $productIdsFoundForCurrentExpression); } } - $eligibleProducts2Full = array_unique($eligibleProducts2Full); + // Remove all duplicates from product IDs + $foundProductIds = array_unique($foundProductIds); - if (!$wordCnt || !count($eligibleProducts2Full)) { + // If we didn't end up anything now, we can immediately return empty response. + // No sense in calculating weights of nothing. + if (!$wordCnt || !count($foundProductIds)) { return $ajax ? [] : ['total' => 0, 'result' => []]; } + /* + * Now, we have a list of randomly ordered product IDs for our search, + * but we don't know if they are active, should be displayed, nothing. + */ + + /* + * This is a subquery that selects weight for each keyword. + * This is used as "relevance" sort order. + */ $sqlScore = ''; if (!empty($scoreArray) && is_array($scoreArray)) { $sqlScore = ',( ' . @@ -365,6 +435,7 @@ public static function find( $sqlGroups = 'AND cg.`id_group` ' . (count($groups) ? 'IN (' . implode(',', $groups) . ')' : '=' . (int) Group::getCurrent()->id); } + // Select products from the list of IDs that should be displayed and can be returned. $results = $db->executeS( 'SELECT DISTINCT cp.`id_product` ' . 'FROM `' . _DB_PREFIX_ . 'category_product` cp ' . @@ -376,20 +447,27 @@ public static function find( 'AND product_shop.`active` = 1 ' . 'AND product_shop.`visibility` IN ("both", "search") ' . 'AND product_shop.indexed = 1 ' . - 'AND cp.id_product IN (' . implode(',', $eligibleProducts2Full) . ')' . $sqlGroups, + 'AND cp.id_product IN (' . implode(',', $foundProductIds) . ')' . $sqlGroups, true, false ); + // And again, extract their IDs $eligibleProducts = []; foreach ($results as $row) { $eligibleProducts[] = $row['id_product']; } + // If we didn't end up anything now, we can immediately return empty response. + // No sense in getting more data for nothing. if (!count($eligibleProducts)) { return $ajax ? [] : ['total' => 0, 'result' => []]; } + /* + * Now, we have a list of (also) randomly ordered product IDs for our search, + * but we know that they are real, active products that should be returned. + */ $product_pool = ' IN (' . implode(',', $eligibleProducts) . ') '; if ($ajax) {