Skip to content
This repository has been archived by the owner on May 3, 2024. It is now read-only.

Commit

Permalink
Phrase fix and other improvements (#584)
Browse files Browse the repository at this point in the history
* Fix phrase levenshtein part

* Fix std::swap UB and remove bad hash

* Make filter/prepared smaller, important for removes
  • Loading branch information
MBkkt authored Jan 25, 2024
1 parent 1e8a650 commit df481fa
Show file tree
Hide file tree
Showing 61 changed files with 432 additions and 667 deletions.
5 changes: 1 addition & 4 deletions core/analysis/analyzers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,17 +65,14 @@ struct value {

} // namespace

namespace std {

template<>
struct hash<::key> {
struct std::hash<::key> {
size_t operator()(const ::key& value) const noexcept {
return irs::hash_combine(
std::hash<irs::type_info::type_id>()(value.args_format.id()), value.type);
}
};

} // namespace std
namespace irs::analysis {
namespace {

Expand Down
19 changes: 6 additions & 13 deletions core/analysis/pipeline_token_stream.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,22 +70,15 @@ class pipeline_token_stream final : public TypedAnalyzer<pipeline_token_stream>,
template<typename Visitor>
bool visit_members(Visitor&& visitor) const {
for (const auto& sub : pipeline_) {
if (sub.get_stream().type() ==
type()) { // pipe inside pipe - forward visiting
#if IRESEARCH_DEBUG
const auto& sub_pipe =
dynamic_cast<const pipeline_token_stream&>(sub.get_stream());
#else
const auto& sub_pipe =
static_cast<const pipeline_token_stream&>(sub.get_stream());
#endif
const auto& stream = sub.get_stream();
if (stream.type() == type()) {
// pipe inside pipe - forward visiting
const auto& sub_pipe = DownCast<pipeline_token_stream>(stream);
if (!sub_pipe.visit_members(visitor)) {
return false;
}
} else {
if (!visitor(sub.get_stream())) {
return false;
}
} else if (!visitor(sub.get_stream())) {
return false;
}
}
return true;
Expand Down
2 changes: 1 addition & 1 deletion core/search/all_docs_provider.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

namespace irs {

filter::ptr AllDocsProvider::Default(score_t boost) {
AllDocsProvider::Ptr AllDocsProvider::Default(score_t boost) {
auto filter = std::make_unique<all>();
filter->boost(boost);
return filter;
Expand Down
9 changes: 4 additions & 5 deletions core/search/all_docs_provider.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,12 @@ namespace irs {

class AllDocsProvider {
public:
using ProviderFunc = std::function<filter::ptr(score_t)>;
using Ptr = std::unique_ptr<FilterWithBoost>;
using ProviderFunc = std::function<Ptr(score_t)>;

static filter::ptr Default(score_t boost);
static Ptr Default(score_t boost);

filter::ptr MakeAllDocsFilter(score_t boost) const {
return all_docs_(boost);
}
Ptr MakeAllDocsFilter(score_t boost) const { return all_docs_(boost); }

void SetProvider(ProviderFunc&& provider) {
all_docs_ = provider ? std::move(provider) : ProviderFunc{&Default};
Expand Down
7 changes: 5 additions & 2 deletions core/search/all_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,21 +30,24 @@ namespace irs {
class all_query : public filter::prepared {
public:
explicit all_query(bstring&& stats, score_t boost)
: filter::prepared(boost), stats_(std::move(stats)) {}
: stats_{std::move(stats)}, boost_{boost} {}

doc_iterator::ptr execute(const ExecutionContext& ctx) const final {
auto& rdr = ctx.segment;

return memory::make_managed<AllIterator>(rdr, stats_.c_str(), ctx.scorers,
rdr.docs_count(), boost());
rdr.docs_count(), boost_);
}

void visit(const SubReader&, PreparedStateVisitor&, score_t) const final {
// No terms to visit
}

score_t boost() const noexcept final { return boost_; }

private:
bstring stats_;
score_t boost_;
};

filter::prepared::ptr all::prepare(const PrepareContext& ctx) const {
Expand Down
4 changes: 2 additions & 2 deletions core/search/all_filter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@
namespace irs {

// Filter returning all documents
class all : public filter {
class all : public FilterWithBoost {
public:
filter::prepared::ptr prepare(const PrepareContext& ctx) const final;
prepared::ptr prepare(const PrepareContext& ctx) const final;

irs::type_info::type_id type() const noexcept final {
return irs::type<all>::id();
Expand Down
78 changes: 25 additions & 53 deletions core/search/boolean_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,55 +22,36 @@

#include "boolean_filter.hpp"

#include <boost/container_hash/hash.hpp>

#include "conjunction.hpp"
#include "disjunction.hpp"
#include "exclusion.hpp"
#include "min_match_disjunction.hpp"
#include "prepared_state_visitor.hpp"
#include "search/boolean_query.hpp"

namespace irs {
namespace {

// first - pointer to the innermost not "not" node
// second - collapsed negation mark
std::pair<const irs::filter*, bool> optimize_not(const irs::Not& node) {
std::pair<const filter*, bool> optimize_not(const Not& node) {
bool neg = true;
const irs::filter* inner = node.filter();
while (inner && inner->type() == irs::type<irs::Not>::id()) {
const auto* inner = node.filter();
while (inner != nullptr && inner->type() == type<Not>::id()) {
neg = !neg;
inner = static_cast<const irs::Not*>(inner)->filter();
inner = DownCast<Not>(inner)->filter();
}

return std::make_pair(inner, neg);
return std::pair{inner, neg};
}

} // namespace

namespace irs {

size_t boolean_filter::hash() const noexcept {
size_t seed = 0;

::boost::hash_combine(seed, filter::hash());
std::for_each(
filters_.begin(), filters_.end(),
[&seed](const filter::ptr& f) { ::boost::hash_combine(seed, *f); });

return seed;
}

bool boolean_filter::equals(const filter& rhs) const noexcept {
if (!filter::equals(rhs)) {
return false;
}
const auto& typed_rhs = DownCast<boolean_filter>(rhs);
return filters_.size() == typed_rhs.size() &&
std::equal(begin(), end(), typed_rhs.begin(),
[](const filter::ptr& lhs, const filter::ptr& rhs) {
return *lhs == *rhs;
});
return std::equal(
begin(), end(), typed_rhs.begin(), typed_rhs.end(),
[](const auto& lhs, const auto& rhs) { return *lhs == *rhs; });
}

filter::prepared::ptr boolean_filter::prepare(const PrepareContext& ctx) const {
Expand All @@ -94,8 +75,8 @@ filter::prepared::ptr boolean_filter::prepare(const PrepareContext& ctx) const {
std::vector<const filter*> incl;
std::vector<const filter*> excl;

irs::filter::ptr all_docs_zero_boost;
irs::filter::ptr all_docs_no_boost;
AllDocsProvider::Ptr all_docs_zero_boost;
AllDocsProvider::Ptr all_docs_no_boost;

group_filters(all_docs_zero_boost, incl, excl);

Expand All @@ -108,29 +89,29 @@ filter::prepared::ptr boolean_filter::prepare(const PrepareContext& ctx) const {
return PrepareBoolean(incl, excl, ctx);
}

void boolean_filter::group_filters(filter::ptr& all_docs_zero_boost,
void boolean_filter::group_filters(AllDocsProvider::Ptr& all_docs_zero_boost,
std::vector<const filter*>& incl,
std::vector<const filter*>& excl) const {
incl.reserve(size() / 2);
excl.reserve(incl.capacity());

const irs::filter* empty_filter{nullptr};
const filter* empty_filter = nullptr;
const auto is_or = type() == irs::type<Or>::id();
for (auto begin = this->begin(), end = this->end(); begin != end; ++begin) {
if (irs::type<irs::empty>::id() == (*begin)->type()) {
empty_filter = begin->get();
for (const auto& filter : *this) {
if (irs::type<Empty>::id() == filter->type()) {
empty_filter = filter.get();
continue;
}
if (irs::type<Not>::id() == (*begin)->type()) {
const auto res = optimize_not(DownCast<Not>(**begin));
if (irs::type<Not>::id() == filter->type()) {
const auto res = optimize_not(DownCast<Not>(*filter));

if (!res.first) {
continue;
}

if (res.second) {
if (!all_docs_zero_boost) {
all_docs_zero_boost = MakeAllDocsFilter(0.f);
all_docs_zero_boost = MakeAllDocsFilter(0.F);
}

if (*all_docs_zero_boost == *res.first) {
Expand All @@ -148,7 +129,7 @@ void boolean_filter::group_filters(filter::ptr& all_docs_zero_boost,
incl.push_back(res.first);
}
} else {
incl.push_back(begin->get());
incl.push_back(filter.get());
}
}
if (empty_filter != nullptr) {
Expand All @@ -162,7 +143,7 @@ filter::prepared::ptr And::PrepareBoolean(std::vector<const filter*>& incl,
// optimization step
// if include group empty itself or has 'empty' -> this whole conjunction is
// empty
if (incl.empty() || incl.back()->type() == irs::type<irs::empty>::id()) {
if (incl.empty() || incl.back()->type() == irs::type<Empty>::id()) {
return prepared::empty();
}

Expand All @@ -180,7 +161,7 @@ filter::prepared::ptr And::PrepareBoolean(std::vector<const filter*>& incl,
for (auto filter : incl) {
if (*filter == *cumulative_all) {
all_count++;
all_boost += filter->boost();
all_boost += DownCast<FilterWithBoost>(*filter).boost();
}
}
if (all_count != 0) {
Expand All @@ -204,7 +185,7 @@ filter::prepared::ptr And::PrepareBoolean(std::vector<const filter*>& incl,
// resulting boost will be: new_boost * OR_BOOST * LEFT_BOOST. If we
// substitute new_boost back we will get ( boost * OR_BOOST * ALL_BOOST +
// boost * OR_BOOST * LEFT_BOOST) - original non-optimized boost value
auto left_boost = (*incl.begin())->boost();
auto left_boost = (*incl.begin())->BoostImpl();
if (boost() != 0 && left_boost != 0 && !sub_ctx.scorers.empty()) {
sub_ctx.boost = (sub_ctx.boost * boost() * all_boost +
sub_ctx.boost * boost() * left_boost) /
Expand Down Expand Up @@ -247,7 +228,7 @@ filter::prepared::ptr Or::PrepareBoolean(std::vector<const filter*>& incl,
return MakeAllDocsFilter(kNoBoost)->prepare(sub_ctx);
}

if (!incl.empty() && incl.back()->type() == irs::type<irs::empty>::id()) {
if (!incl.empty() && incl.back()->type() == irs::type<Empty>::id()) {
incl.pop_back();
}

Expand All @@ -270,7 +251,7 @@ filter::prepared::ptr Or::PrepareBoolean(std::vector<const filter*>& incl,
for (auto filter : incl) {
if (*filter == *cumulative_all) {
all_count++;
all_boost += filter->boost();
all_boost += DownCast<FilterWithBoost>(*filter).boost();
incl_all = filter;
}
}
Expand Down Expand Up @@ -353,15 +334,6 @@ filter::prepared::ptr Not::prepare(const PrepareContext& ctx) const {
return res.first->prepare(sub_ctx);
}

size_t Not::hash() const noexcept {
size_t seed = 0;
::boost::hash_combine(seed, filter::hash());
if (filter_) {
::boost::hash_combine<const irs::filter&>(seed, *filter_);
}
return seed;
}

bool Not::equals(const irs::filter& rhs) const noexcept {
if (!filter::equals(rhs)) {
return false;
Expand Down
Loading

0 comments on commit df481fa

Please sign in to comment.