Skip to content
This repository has been archived by the owner on May 3, 2024. It is now read-only.

Wildcard analyzer helpers #578

Merged
merged 2 commits into from
Nov 30, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 85 additions & 6 deletions core/analysis/analyzers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,18 @@

#include "analysis/analyzers.hpp"

#include <velocypack/Builder.h>
#include <velocypack/Parser.h>

#include "analysis/token_streams.hpp"
#include "utils/hash_utils.hpp"
#include "utils/register.hpp"

namespace {
#include "utils/vpack_utils.hpp"

using namespace irs;
using namespace arangodb;

namespace {

struct key {
key(std::string_view type, const irs::type_info& args_format)
Expand Down Expand Up @@ -70,10 +76,10 @@ struct hash<::key> {
};

} // namespace std

namespace irs::analysis {
namespace {

constexpr std::string_view kFileNamePrefix{"libanalyzer-"};
constexpr std::string_view kFileNamePrefix = "libanalyzer-";

class analyzer_register final
: public irs::tagged_generic_register<::key, ::value, std::string_view,
Expand All @@ -93,9 +99,27 @@ class analyzer_register final
}
};

} // namespace
constexpr std::string_view kTypeParam = "type";
constexpr std::string_view kPropertiesParam = "properties";
constexpr std::string_view kAnalyzerParam = "analyzer";

namespace irs::analysis {
std::string_view GetType(velocypack::Slice& input) {
IRS_ASSERT(input.isObject());
input = input.get(kAnalyzerParam);
if (input.isNone() || input.isNull() || input.isEmptyObject()) {
return irs::string_token_stream::type_name();
}
if (!input.isObject()) {
return {};
}
auto type = input.get(kTypeParam);
if (!type.isString()) {
return {};
}
return type.stringView();
}

} // namespace

analyzer_registrar::analyzer_registrar(
const type_info& type, const type_info& args_format,
Expand Down Expand Up @@ -195,5 +219,60 @@ bool visit(
return analyzer_register::instance().visit(wrapper);
}

bool MakeAnalyzer(velocypack::Slice input, analyzer::ptr& output) {
auto type = GetType(input);
if (type.empty()) {
return false;
}
if (type == irs::string_token_stream::type_name()) {
output = {};
return true;
}
input = input.get(kPropertiesParam);
if (input.isNone()) {
input = velocypack::Slice::emptyObjectSlice();
}
output = get(type, irs::type<irs::text_format::vpack>::get(),
{input.startAs<char>(), input.byteSize()});
if (!output) {
// fallback to json format if vpack isn't available
output = get(type, irs::type<irs::text_format::json>::get(),
irs::slice_to_string(input));
}
return output != nullptr;
}

bool NormalizeAnalyzer(velocypack::Slice input, velocypack::Builder& output) {
auto type = GetType(input);
if (type.empty()) {
return false;
}
velocypack::ObjectBuilder scope{&output, kAnalyzerParam};
if (type == irs::string_token_stream::type_name()) {
return true;
}
output.add(kTypeParam, velocypack::Value{type});
input = input.get(kPropertiesParam);
if (input.isNone()) {
input = velocypack::Slice::emptyObjectSlice();
}
std::string normalized;
if (normalize(normalized, type, irs::type<text_format::vpack>::get(),
{input.startAs<char>(), input.byteSize()})) {
output.add(
kPropertiesParam,
velocypack::Slice{reinterpret_cast<const uint8_t*>(normalized.data())});
return true;
}
// fallback to json format if vpack isn't available
if (normalize(normalized, type, irs::type<text_format::json>::get(),
slice_to_string(input))) {
auto vpack = velocypack::Parser::fromJson(normalized);
output.add(kPropertiesParam, vpack->slice());
return true;
}
return false;
}

} // namespace analyzers
} // namespace irs::analysis
9 changes: 9 additions & 0 deletions core/analysis/analyzers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,17 @@

#pragma once

#include <velocypack/Slice.h>

#include <functional>

#include "analyzer.hpp"
#include "shared.hpp"
#include "utils/text_format.hpp"

namespace arangodb::velocypack {
class Builder;
} // namespace arangodb::velocypack
namespace irs::analysis {

using factory_f = analysis::analyzer::ptr (*)(std::string_view args);
Expand Down Expand Up @@ -71,6 +76,10 @@ void load_all(std::string_view path);
bool visit(
const std::function<bool(std::string_view, const type_info&)>& visitor);

bool MakeAnalyzer(arangodb::velocypack::Slice input, analyzer::ptr& output);
bool NormalizeAnalyzer(arangodb::velocypack::Slice input,
arangodb::velocypack::Builder& output);

} // namespace analyzers
} // namespace irs::analysis

Expand Down
Loading