Skip to content
This repository has been archived by the owner on May 3, 2024. It is now read-only.

Commit

Permalink
Merge branch 'master' into chore/enable-tests
Browse files Browse the repository at this point in the history
  • Loading branch information
MBkkt authored Mar 20, 2024
2 parents f7f6c26 + 932b08b commit fdb9ca7
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 72 deletions.
62 changes: 29 additions & 33 deletions core/analysis/multi_delimited_token_stream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ class MultiDelimitedTokenStreamSingleCharsBase
public:
auto FindNextDelim() {
auto where = static_cast<Derived*>(this)->FindNextDelim();
return std::make_pair(where, 1);
return std::make_pair(where, size_t{1});
}
};

Expand Down Expand Up @@ -409,7 +409,7 @@ irs::analysis::analyzer::ptr Make(MultiDelimitedAnalyser::Options&& opts) {
return std::make_unique<MultiDelimitedTokenStreamGeneric>(std::move(opts));
}

constexpr std::string_view kDelimiterParamName{"delimiter"};
constexpr std::string_view kDelimiterParamName{"delimiters"};

bool ParseVpackOptions(VPackSlice slice,
MultiDelimitedAnalyser::Options& options) {
Expand All @@ -418,46 +418,42 @@ bool ParseVpackOptions(VPackSlice slice,
"Slice for multi_delimited_token_stream is not an object or string");
return false;
}
auto delim_array_slice = slice.get(kDelimiterParamName);
if (!delim_array_slice.isArray()) {
IRS_LOG_WARN(
absl::StrCat("Invalid type or missing '", kDelimiterParamName,
"' (array expected) for multi_delimited_token_stream from "
"VPack arguments"));
return false;
}

if (auto delim_array_slice = slice.get(kDelimiterParamName);
!delim_array_slice.isNone()) {
if (!delim_array_slice.isArray()) {
IRS_LOG_WARN(
absl::StrCat("Invalid type '", kDelimiterParamName,
"' (array expected) for multi_delimited_token_stream from "
"VPack arguments"));
for (auto delim : VPackArrayIterator(delim_array_slice)) {
if (!delim.isString()) {
IRS_LOG_WARN(absl::StrCat(
"Invalid type in '", kDelimiterParamName,
"' (string expected) for multi_delimited_token_stream from "
"VPack arguments"));
return false;
}
auto view = ViewCast<byte_type>(delim.stringView());

for (auto delim : VPackArrayIterator(delim_array_slice)) {
if (!delim.isString()) {
IRS_LOG_WARN(absl::StrCat(
"Invalid type in '", kDelimiterParamName,
"' (string expected) for multi_delimited_token_stream from "
"VPack arguments"));
return false;
}
auto view = ViewCast<byte_type>(delim.stringView());
if (view.empty()) {
IRS_LOG_ERROR("Delimiter list contains an empty string.");
return false;
}

if (view.empty()) {
IRS_LOG_ERROR("Delimiter list contains an empty string.");
for (const auto& known : options.delimiters) {
if (view.starts_with(known) || known.starts_with(view)) {
IRS_LOG_ERROR(
absl::StrCat("Some delimiters are a prefix of others. See `",
ViewCast<char>(bytes_view{known}), "` and `",
delim.stringView(), "`"));
return false;
}

for (const auto& known : options.delimiters) {
if (view.starts_with(known) || known.starts_with(view)) {
IRS_LOG_ERROR(
absl::StrCat("Some delimiters are a prefix of others. See `",
ViewCast<char>(bytes_view{known}), "` and `",
delim.stringView(), "`"));
return false;
}
}

options.delimiters.emplace_back(view);
}
}

options.delimiters.emplace_back(view);
}
return true;
}

Expand Down
8 changes: 5 additions & 3 deletions core/formats/columnstore2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -393,9 +393,11 @@ class column_base : public column_reader, private util::noncopyable {
if (irs::IsNull(column_name)) {
column_name = "<anonymous>";
}
IRS_LOG_WARN(
absl::StrCat("Failed to allocate memory for buffered column id ",
header().id, " name: ", column_name, " of size ", size));
IRS_LOG_INFO(absl::StrCat(
"Failed to allocate memory for buffered column id ", header().id,
" name: ", column_name, " of size ", (size + mappings),
". This can happen if no columns cache was configured or the "
"column data size exceeds the columns cache size."));
return false;
}

Expand Down
121 changes: 85 additions & 36 deletions tests/analysis/multi_delimited_token_stream_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@
#include "analysis/multi_delimited_token_stream.hpp"
#include "gtest/gtest.h"
#include "tests_config.hpp"
#include "velocypack/Parser.h"

using namespace arangodb::velocypack;
using namespace irs::analysis;

namespace {

Expand All @@ -30,6 +34,9 @@ irs::bstring operator""_b(const char* ptr, std::size_t size) {
}

class multi_delimited_token_stream_tests : public ::testing::Test {
public:
static void SetUpTestCase() { MultiDelimitedAnalyser::init(); }

virtual void SetUp() {
// Code here will be called immediately after the constructor (right before
// each test).
Expand All @@ -48,15 +55,12 @@ class multi_delimited_token_stream_tests : public ::testing::Test {
// -----------------------------------------------------------------------------

TEST_F(multi_delimited_token_stream_tests, consts) {
static_assert("multi_delimiter" ==
irs::type<irs::analysis::MultiDelimitedAnalyser>::name());
static_assert("multi_delimiter" == irs::type<MultiDelimitedAnalyser>::name());
}

TEST_F(multi_delimited_token_stream_tests, test_delimiter) {
auto stream =
irs::analysis::MultiDelimitedAnalyser::Make({.delimiters = {"a"_b}});
ASSERT_EQ(irs::type<irs::analysis::MultiDelimitedAnalyser>::id(),
stream->type());
auto stream = MultiDelimitedAnalyser::Make({.delimiters = {"a"_b}});
ASSERT_EQ(irs::type<MultiDelimitedAnalyser>::id(), stream->type());

ASSERT_TRUE(stream->reset("baccaad"));

Expand All @@ -83,10 +87,8 @@ TEST_F(multi_delimited_token_stream_tests, test_delimiter) {
}

TEST_F(multi_delimited_token_stream_tests, test_delimiter_empty_match) {
auto stream =
irs::analysis::MultiDelimitedAnalyser::Make({.delimiters = {"."_b}});
ASSERT_EQ(irs::type<irs::analysis::MultiDelimitedAnalyser>::id(),
stream->type());
auto stream = MultiDelimitedAnalyser::Make({.delimiters = {"."_b}});
ASSERT_EQ(irs::type<MultiDelimitedAnalyser>::id(), stream->type());

ASSERT_TRUE(stream->reset(".."));

Expand All @@ -97,10 +99,9 @@ TEST_F(multi_delimited_token_stream_tests, test_delimiter_empty_match) {
}

TEST_F(multi_delimited_token_stream_tests, test_delimiter_3) {
auto stream = irs::analysis::MultiDelimitedAnalyser::Make(
{.delimiters = {";"_b, ","_b, "|"_b}});
ASSERT_EQ(irs::type<irs::analysis::MultiDelimitedAnalyser>::id(),
stream->type());
auto stream =
MultiDelimitedAnalyser::Make({.delimiters = {";"_b, ","_b, "|"_b}});
ASSERT_EQ(irs::type<MultiDelimitedAnalyser>::id(), stream->type());

ASSERT_TRUE(stream->reset("a;b||c|d,ff"));

Expand Down Expand Up @@ -133,10 +134,9 @@ TEST_F(multi_delimited_token_stream_tests, test_delimiter_3) {
}

TEST_F(multi_delimited_token_stream_tests, test_delimiter_5) {
auto stream = irs::analysis::MultiDelimitedAnalyser::Make(
auto stream = MultiDelimitedAnalyser::Make(
{.delimiters = {";"_b, ","_b, "|"_b, "."_b, ":"_b}});
ASSERT_EQ(irs::type<irs::analysis::MultiDelimitedAnalyser>::id(),
stream->type());
ASSERT_EQ(irs::type<MultiDelimitedAnalyser>::id(), stream->type());

ASSERT_TRUE(stream->reset("a:b||c.d,ff."));

Expand Down Expand Up @@ -169,10 +169,8 @@ TEST_F(multi_delimited_token_stream_tests, test_delimiter_5) {
}

TEST_F(multi_delimited_token_stream_tests, test_delimiter_single_long) {
auto stream =
irs::analysis::MultiDelimitedAnalyser::Make({.delimiters = {"foo"_b}});
ASSERT_EQ(irs::type<irs::analysis::MultiDelimitedAnalyser>::id(),
stream->type());
auto stream = MultiDelimitedAnalyser::Make({.delimiters = {"foo"_b}});
ASSERT_EQ(irs::type<MultiDelimitedAnalyser>::id(), stream->type());

ASSERT_TRUE(stream->reset("foobarfoobazbarfoobar"));

Expand All @@ -197,9 +195,8 @@ TEST_F(multi_delimited_token_stream_tests, test_delimiter_single_long) {
}

TEST_F(multi_delimited_token_stream_tests, no_delimiter) {
auto stream = irs::analysis::MultiDelimitedAnalyser::Make({.delimiters = {}});
ASSERT_EQ(irs::type<irs::analysis::MultiDelimitedAnalyser>::id(),
stream->type());
auto stream = MultiDelimitedAnalyser::Make({.delimiters = {}});
ASSERT_EQ(irs::type<MultiDelimitedAnalyser>::id(), stream->type());

ASSERT_TRUE(stream->reset("foobar"));

Expand All @@ -216,10 +213,9 @@ TEST_F(multi_delimited_token_stream_tests, no_delimiter) {
}

TEST_F(multi_delimited_token_stream_tests, multi_words) {
auto stream = irs::analysis::MultiDelimitedAnalyser::Make(
{.delimiters = {"foo"_b, "bar"_b, "baz"_b}});
ASSERT_EQ(irs::type<irs::analysis::MultiDelimitedAnalyser>::id(),
stream->type());
auto stream =
MultiDelimitedAnalyser::Make({.delimiters = {"foo"_b, "bar"_b, "baz"_b}});
ASSERT_EQ(irs::type<MultiDelimitedAnalyser>::id(), stream->type());

ASSERT_TRUE(stream->reset("fooxyzbarbazz"));

Expand All @@ -240,10 +236,9 @@ TEST_F(multi_delimited_token_stream_tests, multi_words) {
}

TEST_F(multi_delimited_token_stream_tests, multi_words_2) {
auto stream = irs::analysis::MultiDelimitedAnalyser::Make(
{.delimiters = {"foo"_b, "bar"_b, "baz"_b}});
ASSERT_EQ(irs::type<irs::analysis::MultiDelimitedAnalyser>::id(),
stream->type());
auto stream =
MultiDelimitedAnalyser::Make({.delimiters = {"foo"_b, "bar"_b, "baz"_b}});
ASSERT_EQ(irs::type<MultiDelimitedAnalyser>::id(), stream->type());

ASSERT_TRUE(stream->reset("foobarbaz"));

Expand All @@ -254,10 +249,9 @@ TEST_F(multi_delimited_token_stream_tests, multi_words_2) {
}

TEST_F(multi_delimited_token_stream_tests, trick_matching_1) {
auto stream = irs::analysis::MultiDelimitedAnalyser::Make(
{.delimiters = {"foo"_b, "ffa"_b}});
ASSERT_EQ(irs::type<irs::analysis::MultiDelimitedAnalyser>::id(),
stream->type());
auto stream =
MultiDelimitedAnalyser::Make({.delimiters = {"foo"_b, "ffa"_b}});
ASSERT_EQ(irs::type<MultiDelimitedAnalyser>::id(), stream->type());

ASSERT_TRUE(stream->reset("abcffoobar"));

Expand All @@ -276,3 +270,58 @@ TEST_F(multi_delimited_token_stream_tests, trick_matching_1) {
ASSERT_EQ(offset->end, 10);
ASSERT_FALSE(stream->next());
}

TEST_F(multi_delimited_token_stream_tests, construct) {
// wrong name
{
auto builder = Parser::fromJson(R"({"delimiter":["a", "b"]})");
std::string in_str;
in_str.assign(builder->slice().startAs<char>(),
builder->slice().byteSize());
auto stream = analyzers::get(
"multi_delimiter", irs::type<irs::text_format::vpack>::get(), in_str);
ASSERT_EQ(nullptr, stream);
}

// wrong type
{
auto builder = Parser::fromJson(R"({"delimiters":1})");
std::string in_str;
in_str.assign(builder->slice().startAs<char>(),
builder->slice().byteSize());
auto stream = analyzers::get(
"multi_delimiter", irs::type<irs::text_format::vpack>::get(), in_str);
ASSERT_EQ(nullptr, stream);
}

{
auto builder = Parser::fromJson(R"({"delimiters":["a", "b"]})");
std::string in_str;
in_str.assign(builder->slice().startAs<char>(),
builder->slice().byteSize());
auto stream = analyzers::get(
"multi_delimiter", irs::type<irs::text_format::vpack>::get(), in_str);
ASSERT_NE(nullptr, stream);
ASSERT_TRUE(stream->reset("aib"));
ASSERT_TRUE(stream->next());
auto* term = irs::get<irs::term_attribute>(*stream);
ASSERT_EQ("i", irs::ViewCast<char>(term->value));
ASSERT_FALSE(stream->next());
}
{
auto builder = Parser::fromJson(R"({"delimiters":["a", "b", "c", "d"]})");
std::string in_str;
in_str.assign(builder->slice().startAs<char>(),
builder->slice().byteSize());
std::string actual;
auto stream =
analyzers::normalize(actual, "multi_delimiter",
irs::type<irs::text_format::vpack>::get(), in_str);

auto slice = Slice(reinterpret_cast<uint8_t*>(actual.data()));
ASSERT_TRUE(slice.isObject());
auto delimiters = slice.get("delimiters");
ASSERT_TRUE(delimiters.isArray());
ASSERT_EQ(4, delimiters.length());
}
}

0 comments on commit fdb9ca7

Please sign in to comment.