diff --git a/CMakeLists.txt b/CMakeLists.txt index 99953828..5dd22b94 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -79,13 +79,13 @@ sourcemeta_target_shellcheck(SOURCES test/*.sh docker/*.sh) set(SOURCEMETA_SCHEMAS "${PROJECT_SOURCE_DIR}/collections/sourcemeta/one/schemas") add_custom_target(jsonschema_fmt_test - COMMAND "$" fmt --check --verbose ${SOURCEMETA_SCHEMAS}) + COMMAND "$" fmt --check ${SOURCEMETA_SCHEMAS}) add_custom_target(jsonschema_fmt - COMMAND "$" fmt --verbose ${SOURCEMETA_SCHEMAS}) + COMMAND "$" fmt ${SOURCEMETA_SCHEMAS}) add_custom_target(jsonschema_metaschema - COMMAND "$" metaschema --verbose ${SOURCEMETA_SCHEMAS}) + COMMAND "$" metaschema ${SOURCEMETA_SCHEMAS}) add_custom_target(jsonschema_lint - COMMAND "$" lint --verbose + COMMAND "$" lint --exclude simple_properties_identifiers ${SOURCEMETA_SCHEMAS}) if(ONE_TESTS) diff --git a/DEPENDENCIES b/DEPENDENCIES index 3224a36e..38445da0 100644 --- a/DEPENDENCIES +++ b/DEPENDENCIES @@ -1,10 +1,10 @@ vendorpull https://github.com/sourcemeta/vendorpull 1dcbac42809cf87cb5b045106b863e17ad84ba02 uwebsockets https://github.com/uNetworking/uWebSockets v20.74.0 -core https://github.com/sourcemeta/core d43595afeb1849fb649f7257048b488fcd6ac3e8 -blaze https://github.com/sourcemeta/blaze 9c95799ec015db97a7bfaf44d390ae8d67932a60 -jsonbinpack https://github.com/sourcemeta/jsonbinpack 2f865f340f58dc0e00b4892eb7334739c5d6ec67 +core https://github.com/sourcemeta/core 550fcd3596d401148148196f2b8ffa5232e93bd7 +blaze https://github.com/sourcemeta/blaze cd5ebdb49d7d5fac151f7ed89810908d22198e42 +jsonbinpack https://github.com/sourcemeta/jsonbinpack 8fae212dc7ec02af4bb0cd4e7fccd42a2471f1c1 hydra https://github.com/sourcemeta/hydra af9f2c54709d620872ead0c3f8f683c15a0fa702 -jsonschema https://github.com/sourcemeta/jsonschema v13.2.0 +jsonschema https://github.com/sourcemeta/jsonschema v13.5.0 bootstrap https://github.com/twbs/bootstrap v5.3.3 bootstrap-icons https://github.com/twbs/icons v1.11.3 collections/sourcemeta/std/v0 https://github.com/sourcemeta/std 3f8746fb22ac70494ea3e3075cc19ee5d770ced7 diff --git a/src/index/generators.h b/src/index/generators.h index efa72e00..20d432fb 100644 --- a/src/index/generators.h +++ b/src/index/generators.h @@ -220,8 +220,7 @@ struct GENERATE_HEALTH { const auto contents{sourcemeta::one::read_json(dependencies.front())}; sourcemeta::core::SchemaTransformer bundle; - sourcemeta::core::add(bundle, - sourcemeta::core::AlterSchemaMode::Readability); + sourcemeta::core::add(bundle, sourcemeta::core::AlterSchemaMode::Linter); bundle.add( sourcemeta::blaze::default_schema_compiler); bundle.add( diff --git a/test/e2e/populated/api/schemas-health.hurl b/test/e2e/populated/api/schemas-health.hurl index d056330f..2f0f56a7 100644 --- a/test/e2e/populated/api/schemas-health.hurl +++ b/test/e2e/populated/api/schemas-health.hurl @@ -41,16 +41,16 @@ jsonpath "$.errors[1].pointers[0]" == "" jsonpath "$.errors[1].name" == "top_level_examples" jsonpath "$.errors[1].message" == "Set a non-empty examples array at the top level of the schema to illustrate the expected data" jsonpath "$.errors[1].description" == null -jsonpath "$.errors[2].pointers" count == 1 +jsonpath "$.errors[2].pointers" count == 2 jsonpath "$.errors[2].pointers[0]" == "/properties/type/enum" -jsonpath "$.errors[2].name" == "enum_to_const" -jsonpath "$.errors[2].message" == "An `enum` of a single value can be expressed as `const`" +jsonpath "$.errors[2].pointers[1]" == "/properties/type/type" +jsonpath "$.errors[2].name" == "enum_with_type" +jsonpath "$.errors[2].message" == "Setting `type` alongside `enum` is considered an anti-pattern, as the enumeration choices already imply their respective types" jsonpath "$.errors[2].description" == null -jsonpath "$.errors[3].pointers" count == 2 +jsonpath "$.errors[3].pointers" count == 1 jsonpath "$.errors[3].pointers[0]" == "/properties/type/enum" -jsonpath "$.errors[3].pointers[1]" == "/properties/type/type" -jsonpath "$.errors[3].name" == "enum_with_type" -jsonpath "$.errors[3].message" == "Setting `type` alongside `enum` is considered an anti-pattern, as the enumeration choices already imply their respective types" +jsonpath "$.errors[3].name" == "enum_to_const" +jsonpath "$.errors[3].message" == "An `enum` of a single value can be expressed as `const`" jsonpath "$.errors[3].description" == null POST {{base}}/self/api/schemas/evaluate/sourcemeta/one/api/schemas/health/response diff --git a/vendor/blaze/src/compiler/compile.cc b/vendor/blaze/src/compiler/compile.cc index d750c099..1ef46676 100644 --- a/vendor/blaze/src/compiler/compile.cc +++ b/vendor/blaze/src/compiler/compile.cc @@ -27,8 +27,14 @@ auto compile_subschema(const sourcemeta::blaze::Context &context, if (schema_context.schema.to_boolean()) { return {}; } else { - return {make(sourcemeta::blaze::InstructionIndex::AssertionFail, context, - schema_context, dynamic_context, ValueNone{})}; + return {make( + sourcemeta::blaze::InstructionIndex::AssertionFail, context, + schema_context, + {.keyword = "", + .base_schema_location = dynamic_context.base_schema_location, + .base_instance_location = dynamic_context.base_instance_location, + .property_as_target = dynamic_context.property_as_target}, + ValueNone{})}; } } diff --git a/vendor/blaze/src/evaluator/evaluator_describe.cc b/vendor/blaze/src/evaluator/evaluator_describe.cc index 96355fd2..06c354d7 100644 --- a/vendor/blaze/src/evaluator/evaluator_describe.cc +++ b/vendor/blaze/src/evaluator/evaluator_describe.cc @@ -191,9 +191,12 @@ auto describe(const bool valid, const Instruction &step, const sourcemeta::core::WeakPointer &instance_location, const sourcemeta::core::JSON &instance, const sourcemeta::core::JSON &annotation) -> std::string { - assert(evaluate_path.empty() || evaluate_path.back().is_property()); - const std::string keyword{ - evaluate_path.empty() ? "" : evaluate_path.back().to_property()}; + const std::string keyword{evaluate_path.empty() || + // The last token can be an index for + // boolean schemas inside array applicators + !evaluate_path.back().is_property() + ? "" + : evaluate_path.back().to_property()}; const sourcemeta::core::JSON &target{get(instance, instance_location)}; if (step.type == sourcemeta::blaze::InstructionIndex::AssertionFail) { diff --git a/vendor/core/config.cmake.in b/vendor/core/config.cmake.in index 2db4b0aa..2a663c02 100644 --- a/vendor/core/config.cmake.in +++ b/vendor/core/config.cmake.in @@ -92,6 +92,8 @@ foreach(component ${SOURCEMETA_CORE_COMPONENTS}) find_dependency(mpdecimal CONFIG) include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_numeric.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_io.cmake") + find_dependency(PCRE2 CONFIG) + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_regex.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_json.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_jsonpointer.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_jsonschema.cmake") diff --git a/vendor/core/src/core/jsonpointer/CMakeLists.txt b/vendor/core/src/core/jsonpointer/CMakeLists.txt index 20d4291a..2cb95a38 100644 --- a/vendor/core/src/core/jsonpointer/CMakeLists.txt +++ b/vendor/core/src/core/jsonpointer/CMakeLists.txt @@ -1,7 +1,7 @@ sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME jsonpointer PRIVATE_HEADERS pointer.h position.h error.h token.h walker.h template.h - SOURCES jsonpointer.cc stringify.h parser.h grammar.h position.cc) + SOURCES jsonpointer.cc stringify.h parser.h grammar.h position.cc mangle.cc) if(SOURCEMETA_CORE_INSTALL) sourcemeta_library_install(NAMESPACE sourcemeta PROJECT core NAME jsonpointer) diff --git a/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer.h b/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer.h index 4fff956a..7354ca70 100644 --- a/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer.h +++ b/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer.h @@ -21,6 +21,7 @@ #include // std::allocator #include // std::basic_ostream #include // std::basic_string +#include // std::string_view #include // std::is_same_v /// @defgroup jsonpointer JSON Pointer @@ -524,6 +525,63 @@ auto to_string(const WeakPointer &pointer) -> std::basic_string>; +/// @ingroup jsonpointer +/// +/// Stringify the input JSON Pointer template into a C++ standard string. For +/// example: +/// +/// ```cpp +/// #include +/// #include +/// #include +/// +/// sourcemeta::core::PointerTemplate pointer; +/// pointer.emplace_back(sourcemeta::core::Pointer::Token{"foo"}); +/// pointer.emplace_back(sourcemeta::core::PointerTemplate::Wildcard::Property); +/// const std::string result{sourcemeta::core::to_string(pointer)}; +/// std::cout << result << '\n'; +/// ``` +SOURCEMETA_CORE_JSONPOINTER_EXPORT +auto to_string(const PointerTemplate &pointer) + -> std::basic_string>; + +/// @ingroup jsonpointer +/// +/// Mangle a JSON Pointer template and prefix into a collision-free identifier. +/// +/// The encoding rules for ASCII characters (0x00-0x7F) are: +/// +/// - Lowercase at segment start (except x, u, z): capitalize (no marker) +/// - Lowercase x, u, z at segment start: hex escape (reserved characters) +/// - Uppercase at segment start (except X, U, Z): U + letter +/// - Uppercase X, U, Z at segment start: hex escape (reserved characters) +/// - Non-segment-start lowercase: as-is +/// - Non-segment-start uppercase (except X, U): as-is +/// - Non-segment-start X: X58, Non-segment-start U: X55 +/// - ASCII digits (0-9): as-is +/// - Other ASCII (space, punctuation, control): hex escape, starts new segment +/// - Z/z reserved for special token prefixes +/// +/// For non-ASCII bytes (0x80-0xFF, e.g. UTF-8 sequences): +/// +/// - Always hex escaped +/// - Do NOT start a new segment (preserves UTF-8 multi-byte sequences) +/// +/// For example: +/// +/// ```cpp +/// #include +/// #include +/// +/// const sourcemeta::core::PointerTemplate pointer{"foo", "bar"}; +/// const auto result{sourcemeta::core::mangle(pointer, "schema")}; +/// assert(result == "Schema_Foo_Bar"); +/// ``` +SOURCEMETA_CORE_JSONPOINTER_EXPORT +auto mangle(const PointerTemplate &pointer, std::string_view prefix) + -> std::string; + /// @ingroup jsonpointer /// /// Stringify the input JSON Pointer into a properly escaped URI fragment. For @@ -659,6 +717,41 @@ struct hash +struct hash> { + auto operator()(const sourcemeta::core::GenericPointerTemplate + &pointer) const noexcept -> std::size_t { + const auto size{pointer.size()}; + if (size == 0) { + return size; + } + + auto hash_element = + [](const typename sourcemeta::core::GenericPointerTemplate< + PointerT>::value_type &element) -> std::size_t { + using Template = sourcemeta::core::GenericPointerTemplate; + const auto *token{std::get_if(&element)}; + if (token) { + return token->is_property() + ? static_cast(token->property_hash().a) + : token->to_index(); + } else { + return element.index(); + } + }; + + const auto &first{*pointer.cbegin()}; + const auto &middle{ + *(pointer.cbegin() + + static_cast::difference_type>(size / 2))}; + const auto &last{*(pointer.cend() - 1)}; + + return size + hash_element(first) + hash_element(middle) + + hash_element(last); + } +}; } // namespace std #endif diff --git a/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_template.h b/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_template.h index b0d70fa6..0fdfcc7f 100644 --- a/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_template.h +++ b/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_template.h @@ -9,6 +9,8 @@ #include // std::initializer_list #include // std::back_inserter #include // std::optional, std::nullopt +#include // std::is_convertible_v, std::is_null_pointer_v +#include // std::forward #include // std::variant, std::holds_alternative, std::get #include // std::vector @@ -56,6 +58,27 @@ template class GenericPointerTemplate { std::initializer_list tokens) : data{std::move(tokens)} {} + /// This constructor creates a JSON Pointer template from properties or + /// indexes. For example: + /// + /// ```cpp + /// #include + /// #include + /// + /// const sourcemeta::core::PointerTemplate pointer_1{"foo", "bar", "baz"}; + /// assert(pointer_1.size() == 3); + /// const sourcemeta::core::PointerTemplate pointer_2{"foo", 1, "bar"}; + /// assert(pointer_2.size() == 3); + /// ``` + template + requires(sizeof...(Args) > 0 && + ((!std::is_null_pointer_v> && + (std::is_convertible_v || + std::is_integral_v>)) && + ...)) + GenericPointerTemplate(Args &&...args) + : data{Token{std::forward(args)}...} {} + /// This constructor creates a JSON Pointer template from an existing JSON /// Pointer. For example: /// @@ -207,6 +230,20 @@ template class GenericPointerTemplate { return this->data.empty(); } + /// Get the size of the JSON Pointer template. For example: + /// + /// ```cpp + /// #include + /// #include + /// + /// const sourcemeta::core::Pointer base{"foo", "bar"}; + /// const sourcemeta::core::PointerTemplate pointer{base}; + /// assert(pointer.size() == 2); + /// ``` + [[nodiscard]] auto size() const noexcept -> size_type { + return this->data.size(); + } + /// Check if a JSON Pointer template only consists in normal non-templated /// tokens. For example: /// diff --git a/vendor/core/src/core/jsonpointer/jsonpointer.cc b/vendor/core/src/core/jsonpointer/jsonpointer.cc index d480c35e..330afee4 100644 --- a/vendor/core/src/core/jsonpointer/jsonpointer.cc +++ b/vendor/core/src/core/jsonpointer/jsonpointer.cc @@ -373,6 +373,16 @@ auto to_string(const WeakPointer &pointer) return result.str(); } +auto to_string(const PointerTemplate &pointer) + -> std::basic_string> { + std::basic_ostringstream> + result; + stringify(pointer, result); + return result.str(); +} + auto to_uri(const Pointer &pointer) -> URI { std::basic_ostringstream> diff --git a/vendor/core/src/core/jsonpointer/mangle.cc b/vendor/core/src/core/jsonpointer/mangle.cc new file mode 100644 index 00000000..eaab7be9 --- /dev/null +++ b/vendor/core/src/core/jsonpointer/mangle.cc @@ -0,0 +1,222 @@ +#include + +#include // assert +#include // std::setfill, std::setw +#include // std::ostringstream +#include // std::string_view +#include // std::visit + +namespace { + +// Special characters +constexpr auto ESCAPE_PREFIX = 'X'; +constexpr auto UPPERCASE_PREFIX = 'U'; +constexpr auto SEPARATOR = '_'; +constexpr auto HYPHEN = '-'; + +// Reserved characters that need escaping +constexpr auto RESERVED_X_UPPER = 'X'; +constexpr auto RESERVED_X_LOWER = 'x'; +constexpr auto RESERVED_U_UPPER = 'U'; +constexpr auto RESERVED_U_LOWER = 'u'; +constexpr auto RESERVED_Z_UPPER = 'Z'; +constexpr auto RESERVED_Z_LOWER = 'z'; + +// Special token markers +constexpr std::string_view TOKEN_EMPTY = "ZEmpty"; +constexpr std::string_view TOKEN_WILDCARD_PROPERTY = "ZAnyProperty"; +constexpr std::string_view TOKEN_WILDCARD_ITEM = "ZAnyItem"; +constexpr std::string_view TOKEN_WILDCARD_KEY = "ZAnyKey"; +constexpr std::string_view TOKEN_CONDITION = "ZMaybe"; +constexpr std::string_view TOKEN_NEGATION = "ZNot"; +constexpr std::string_view TOKEN_REGEX = "ZRegex"; + +constexpr auto ASCII_MAX = static_cast(0x80); + +// Locale-independent ASCII character classification +inline auto is_ascii_alpha(unsigned char character) noexcept -> bool { + return (character >= 'A' && character <= 'Z') || + (character >= 'a' && character <= 'z'); +} + +inline auto is_ascii_digit(unsigned char character) noexcept -> bool { + return character >= '0' && character <= '9'; +} + +inline auto is_ascii_lower(unsigned char character) noexcept -> bool { + return character >= 'a' && character <= 'z'; +} + +inline auto to_ascii_upper(unsigned char character) noexcept -> char { + if (character >= 'a' && character <= 'z') { + return static_cast(character - 'a' + 'A'); + } + return static_cast(character); +} + +inline auto hex_escape(std::ostringstream &output, char character) noexcept + -> void { + output << ESCAPE_PREFIX << std::uppercase << std::hex << std::setfill('0') + << std::setw(2) + << static_cast(static_cast(character)); +} + +inline auto is_reserved_at_start(char character) noexcept -> bool { + switch (character) { + case RESERVED_X_UPPER: + case RESERVED_X_LOWER: + case RESERVED_U_UPPER: + case RESERVED_U_LOWER: + case RESERVED_Z_UPPER: + case RESERVED_Z_LOWER: + return true; + default: + return false; + } +} + +inline auto encode_prefix(std::ostringstream &output, + std::string_view input) noexcept -> void { + bool capitalize_next{true}; + bool first{true}; + + for (const auto character : input) { + const auto unsigned_character{static_cast(character)}; + + if (is_ascii_alpha(unsigned_character)) { + if (capitalize_next && is_ascii_lower(unsigned_character)) { + output << to_ascii_upper(unsigned_character); + } else { + output << character; + } + capitalize_next = false; + } else if (is_ascii_digit(unsigned_character)) { + if (first) { + output << SEPARATOR; + } + output << character; + capitalize_next = false; + } else if (character == SEPARATOR || character == HYPHEN) { + capitalize_next = true; + } else { + hex_escape(output, character); + capitalize_next = true; + } + + first = false; + } +} + +inline auto encode_string(std::ostringstream &output, + const std::string &input) noexcept -> void { + bool segment_start{true}; + + for (const auto character : input) { + const auto unsigned_character{static_cast(character)}; + + if (is_ascii_alpha(unsigned_character)) { + const bool is_lower{is_ascii_lower(unsigned_character)}; + if (segment_start) { + if (is_reserved_at_start(character)) { + hex_escape(output, character); + } else if (is_lower) { + output << to_ascii_upper(unsigned_character); + } else { + output << UPPERCASE_PREFIX << character; + } + } else if (character == RESERVED_X_UPPER || + character == RESERVED_U_UPPER) { + hex_escape(output, character); + } else { + output << character; + } + segment_start = false; + } else if (is_ascii_digit(unsigned_character)) { + output << character; + segment_start = false; + } else { + hex_escape(output, character); + // Only ASCII non-alphanumeric starts a new segment + // Non-ASCII bytes (>= 0x80) do not start new segments (UTF-8 handling) + segment_start = (unsigned_character < ASCII_MAX); + } + } +} + +inline auto encode_string_or_empty(std::ostringstream &output, + const std::string &input) noexcept -> void { + if (input.empty()) { + output << TOKEN_EMPTY; + } else { + encode_string(output, input); + } +} + +class TokenVisitor { +public: + explicit TokenVisitor(std::ostringstream &output) noexcept + : output_{output} {} + + auto operator()(const sourcemeta::core::Pointer::Token &token) const noexcept + -> void { + this->output_ << SEPARATOR; + encode_string_or_empty(this->output_, token.to_property()); + } + + auto operator()(const sourcemeta::core::PointerTemplate::Wildcard &wildcard) + const noexcept -> void { + this->output_ << SEPARATOR; + switch (wildcard) { + case sourcemeta::core::PointerTemplate::Wildcard::Property: + this->output_ << TOKEN_WILDCARD_PROPERTY; + break; + case sourcemeta::core::PointerTemplate::Wildcard::Item: + this->output_ << TOKEN_WILDCARD_ITEM; + break; + case sourcemeta::core::PointerTemplate::Wildcard::Key: + this->output_ << TOKEN_WILDCARD_KEY; + break; + } + } + + auto operator()(const sourcemeta::core::PointerTemplate::Condition &condition) + const noexcept -> void { + this->output_ << SEPARATOR << TOKEN_CONDITION; + if (condition.suffix.has_value()) { + encode_string_or_empty(this->output_, condition.suffix.value()); + } + } + + auto + operator()(const sourcemeta::core::PointerTemplate::Negation &) const noexcept + -> void { + this->output_ << SEPARATOR << TOKEN_NEGATION; + } + + auto operator()(const sourcemeta::core::PointerTemplate::Regex ®ex) + const noexcept -> void { + this->output_ << SEPARATOR << TOKEN_REGEX; + encode_string_or_empty(this->output_, regex); + } + +private: + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + std::ostringstream &output_; +}; + +} // namespace + +namespace sourcemeta::core { + +auto mangle(const PointerTemplate &pointer, const std::string_view prefix) + -> std::string { + assert(!prefix.empty()); + std::ostringstream output; + encode_prefix(output, prefix); + for (const auto &token : pointer) { + std::visit(TokenVisitor{output}, token); + } + return output.str(); +} + +} // namespace sourcemeta::core diff --git a/vendor/core/src/core/jsonschema/frame.cc b/vendor/core/src/core/jsonschema/frame.cc index 180aaf01..9f9bf66c 100644 --- a/vendor/core/src/core/jsonschema/frame.cc +++ b/vendor/core/src/core/jsonschema/frame.cc @@ -5,9 +5,9 @@ #include // std::less #include // std::map #include // std::optional -#include // std::set #include // std::ostringstream #include // std::unordered_map +#include // std::unordered_set #include // std::pair, std::move #include // std::vector @@ -268,7 +268,9 @@ auto traverse_origin_instance_locations( const sourcemeta::core::SchemaFrame::Instances &instances, const sourcemeta::core::Pointer ¤t, const std::optional &accumulator, - sourcemeta::core::SchemaFrame::Instances::mapped_type &destination) + sourcemeta::core::SchemaFrame::Instances::mapped_type &destination, + std::unordered_set< + const sourcemeta::core::SchemaFrame::References::value_type *> &visited) -> void { if (accumulator.has_value() && std::ranges::find(destination, accumulator.value()) == @@ -277,18 +279,25 @@ auto traverse_origin_instance_locations( } for (const auto &reference : frame.references_to(current)) { - const auto subschema_pointer{reference.get().first.second.initial()}; - // Avoid recursing to itself, in the case of circular subschemas - if (subschema_pointer == current) { + if (visited.contains(&reference.get())) { continue; } + visited.insert(&reference.get()); + + const auto subschema_pointer{reference.get().first.second.initial()}; const auto match{instances.find(subschema_pointer)}; if (match != instances.cend()) { for (const auto &instance_location : match->second) { traverse_origin_instance_locations(frame, instances, subschema_pointer, - instance_location, destination); + instance_location, destination, + visited); } + } else { + // Even if the parent doesn't have instance locations yet, + // recurse to find the origin of the reference chain + traverse_origin_instance_locations(frame, instances, subschema_pointer, + std::nullopt, destination, visited); } } } @@ -302,16 +311,32 @@ struct CacheSubschema { std::optional parent{}; }; +auto is_definition_entry(const sourcemeta::core::Pointer &pointer) -> bool { + if (pointer.size() < 2) { + return false; + } + + const auto &container{pointer.at(pointer.size() - 2)}; + return container.is_property() && (container.to_property() == "$defs" || + container.to_property() == "definitions"); +} + auto repopulate_instance_locations( const sourcemeta::core::SchemaFrame &frame, const sourcemeta::core::SchemaFrame::Instances &instances, const std::unordered_map &cache, - const sourcemeta::core::Pointer &, const CacheSubschema &cache_entry, + const sourcemeta::core::Pointer &pointer, const CacheSubschema &cache_entry, sourcemeta::core::SchemaFrame::Instances::mapped_type &destination, const std::optional &accumulator) -> void { - // Check parent first as even orphan schemas can inherit instance locations - // from their parents if the parent is in the evaluation flow + // Definition entries should not inherit instance locations from their parent + // container. They only get instance locations if something references them. + // However, children of definitions should still inherit from their definition + // parent + if (cache_entry.orphan && is_definition_entry(pointer)) { + return; + } + if (cache_entry.parent.has_value() && // Don't consider bases from the root subschema, as if that // subschema has any instance location other than "", then it @@ -343,9 +368,6 @@ auto repopulate_instance_locations( frame, instances, cache, cache_entry.parent.value(), cache.at(cache_entry.parent.value()), destination, new_accumulator); } - } else if (cache_entry.orphan && cache_entry.instance_location.empty()) { - // Only return early for orphan schemas if they don't have a parent - return; } } @@ -1035,18 +1057,21 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, } if (this->mode_ == sourcemeta::core::SchemaFrame::Mode::Instances) { - // Calculate alternative unresolved instance locations + // First pass: trace through references to find instance locations. + // This handles definitions that are referenced for (auto &entry : this->locations_) { if (entry.second.type == SchemaFrame::LocationType::Pointer) { continue; } + std::unordered_set visited; traverse_origin_instance_locations( *this, this->instances_, entry.second.pointer, std::nullopt, - this->instances_[entry.second.pointer]); + this->instances_[entry.second.pointer], visited); } - // This is guaranteed to be top-down + // Second pass: inherit instance locations from parents (top-down). + // This handles applicator children inheriting from their parent schema for (auto &entry : this->locations_) { if (entry.second.type == SchemaFrame::LocationType::Pointer) { continue; @@ -1058,6 +1083,20 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, this->instances_[entry.second.pointer], std::nullopt); } + + // Third pass: trace references again. Now that inheritance has run, + // schemas from definitions can trace to applicator children that now have + // instance locations from inheritance + for (auto &entry : this->locations_) { + if (entry.second.type == SchemaFrame::LocationType::Pointer) { + continue; + } + + std::unordered_set visited; + traverse_origin_instance_locations( + *this, this->instances_, entry.second.pointer, std::nullopt, + this->instances_[entry.second.pointer], visited); + } } } diff --git a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema.h b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema.h index eb1d3a89..4bd7284b 100644 --- a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema.h +++ b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema.h @@ -435,27 +435,23 @@ auto wrap(const JSON &schema, const Pointer &pointer, /// @ingroup jsonschema /// -/// Parse a JSON Schema `type` string into one or more native JSON type -/// definition. For example: +/// Parse the value of a JSON Schema `type` keyword (which can be a string or +/// an array of strings) into a set of native JSON types. For example: /// /// ```cpp /// #include /// #include /// #include -/// #include /// -/// std::set types; -/// sourcemeta::core::parse_schema_type("number", -/// [&types](const auto type) { types.emplace(type); }); -/// -/// assert(types.size() == 2); -/// assert(types.contains(sourcemeta::core::JSON::Type::Integer)); -/// assert(types.contains(sourcemeta::core::JSON::Type::Real)); +/// const auto type{sourcemeta::core::parse_json(R"JSON([ "string", "null" +/// ])JSON")}; const auto types{sourcemeta::core::parse_schema_type(type)}; +/// assert(types.test( +/// static_cast(sourcemeta::core::JSON::Type::String))); +/// assert(types.test( +/// static_cast(sourcemeta::core::JSON::Type::Null))); /// ``` SOURCEMETA_CORE_JSONSCHEMA_EXPORT -auto parse_schema_type(const JSON::String &type, - const std::function &callback) - -> void; +auto parse_schema_type(const JSON &type) -> JSON::TypeSet; } // namespace sourcemeta::core diff --git a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_transform.h b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_transform.h index 2ce3eac8..eb609061 100644 --- a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_transform.h +++ b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_transform.h @@ -13,7 +13,6 @@ #include // std::uint8_t #include // std::function #include // std::make_move_iterator, std::begin, std::end -#include // std::map #include // std::make_unique, std::unique_ptr #include // std::optional, std::nullopt #include // std::set @@ -230,13 +229,11 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaTransformer { auto operator=(SchemaTransformer &&) -> SchemaTransformer & = default; #endif - /// Add a rule to the bundle + /// Add a rule to the bundle. Rules are evaluated in the order they are added. + /// It is the caller's responsibility to not add duplicate rules. template T, typename... Args> auto add(Args &&...args) -> void { - auto rule{std::make_unique(std::forward(args)...)}; - // Rules must only be defined once - assert(!this->rules.contains(rule->name())); - this->rules.emplace(rule->name(), std::move(rule)); + this->rules.push_back(std::make_unique(std::forward(args)...)); } /// Remove a rule from the bundle @@ -254,10 +251,11 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaTransformer { const SchemaTransformRule::Result &)>; /// Apply the bundle of rules to a schema - auto apply(JSON &schema, const SchemaWalker &walker, - const SchemaResolver &resolver, const Callback &callback, - const std::optional &default_dialect = std::nullopt, - const std::optional &default_id = std::nullopt) const + [[nodiscard]] auto + apply(JSON &schema, const SchemaWalker &walker, + const SchemaResolver &resolver, const Callback &callback, + const std::optional &default_dialect = std::nullopt, + const std::optional &default_id = std::nullopt) const -> std::pair; /// Report back the rules from the bundle that need to be applied to a schema @@ -278,7 +276,7 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaTransformer { #if defined(_MSC_VER) #pragma warning(disable : 4251) #endif - std::map> rules; + std::vector> rules; #if defined(_MSC_VER) #pragma warning(default : 4251) #endif diff --git a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_types.h b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_types.h index b0c269f3..8f2845bc 100644 --- a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_types.h +++ b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_types.h @@ -161,6 +161,9 @@ struct SchemaWalkerResult { /// The keywords a given keyword depends on (if any) during the evaluation /// process std::unordered_set dependencies; + /// The keywords a given keyword depends on for evaluation ordering purposes + /// only (not semantic dependencies) + std::unordered_set order_dependencies; /// The JSON instance types that this keyword applies to (empty means all) JSON::TypeSet instances; @@ -175,9 +178,12 @@ struct SchemaWalkerResult { SchemaWalkerResult(SchemaKeywordType type_, std::optional vocabulary_, std::unordered_set dependencies_, + std::unordered_set order_dependencies_, JSON::TypeSet instances_) : type{type_}, vocabulary{std::move(vocabulary_)}, - dependencies{std::move(dependencies_)}, instances{instances_} {} + dependencies{std::move(dependencies_)}, + order_dependencies{std::move(order_dependencies_)}, + instances{instances_} {} }; /// @ingroup jsonschema diff --git a/vendor/core/src/core/jsonschema/jsonschema.cc b/vendor/core/src/core/jsonschema/jsonschema.cc index 6f2de2d7..b7be25a5 100644 --- a/vendor/core/src/core/jsonschema/jsonschema.cc +++ b/vendor/core/src/core/jsonschema/jsonschema.cc @@ -474,14 +474,23 @@ auto sourcemeta::core::schema_keyword_priority( const sourcemeta::core::Vocabularies &vocabularies, const sourcemeta::core::SchemaWalker &walker) -> std::uint64_t { const auto &result{walker(keyword, vocabularies)}; - return std::accumulate( + const auto priority_from_dependencies{std::accumulate( result.dependencies.cbegin(), result.dependencies.cend(), static_cast(0), [&vocabularies, &walker](const auto accumulator, const auto &dependency) { return std::max( accumulator, schema_keyword_priority(dependency, vocabularies, walker) + 1); - }); + })}; + const auto priority_from_order_dependencies{std::accumulate( + result.order_dependencies.cbegin(), result.order_dependencies.cend(), + static_cast(0), + [&vocabularies, &walker](const auto accumulator, const auto &dependency) { + return std::max( + accumulator, + schema_keyword_priority(dependency, vocabularies, walker) + 1); + })}; + return std::max(priority_from_dependencies, priority_from_order_dependencies); } auto sourcemeta::core::wrap(const sourcemeta::core::JSON::String &identifier) @@ -559,24 +568,39 @@ auto sourcemeta::core::wrap(const sourcemeta::core::JSON &schema, return result; } -auto sourcemeta::core::parse_schema_type( - const sourcemeta::core::JSON::String &type, - const std::function &callback) +static auto parse_schema_type_string(const sourcemeta::core::JSON::String &type, + sourcemeta::core::JSON::TypeSet &result) -> void { if (type == "null") { - callback(sourcemeta::core::JSON::Type::Null); + result.set(static_cast(sourcemeta::core::JSON::Type::Null)); } else if (type == "boolean") { - callback(sourcemeta::core::JSON::Type::Boolean); + result.set(static_cast(sourcemeta::core::JSON::Type::Boolean)); } else if (type == "object") { - callback(sourcemeta::core::JSON::Type::Object); + result.set(static_cast(sourcemeta::core::JSON::Type::Object)); } else if (type == "array") { - callback(sourcemeta::core::JSON::Type::Array); + result.set(static_cast(sourcemeta::core::JSON::Type::Array)); } else if (type == "number") { - callback(sourcemeta::core::JSON::Type::Integer); - callback(sourcemeta::core::JSON::Type::Real); + result.set(static_cast(sourcemeta::core::JSON::Type::Integer)); + result.set(static_cast(sourcemeta::core::JSON::Type::Real)); } else if (type == "integer") { - callback(sourcemeta::core::JSON::Type::Integer); + result.set(static_cast(sourcemeta::core::JSON::Type::Integer)); } else if (type == "string") { - callback(sourcemeta::core::JSON::Type::String); + result.set(static_cast(sourcemeta::core::JSON::Type::String)); } } + +auto sourcemeta::core::parse_schema_type(const sourcemeta::core::JSON &type) + -> sourcemeta::core::JSON::TypeSet { + sourcemeta::core::JSON::TypeSet result; + if (type.is_string()) { + parse_schema_type_string(type.to_string(), result); + } else if (type.is_array()) { + for (const auto &item : type.as_array()) { + if (item.is_string()) { + parse_schema_type_string(item.to_string(), result); + } + } + } + + return result; +} diff --git a/vendor/core/src/core/jsonschema/known_walker.cc b/vendor/core/src/core/jsonschema/known_walker.cc index 56e199f6..88e19770 100644 --- a/vendor/core/src/core/jsonschema/known_walker.cc +++ b/vendor/core/src/core/jsonschema/known_walker.cc @@ -11,10 +11,10 @@ using KeywordHandler = const SchemaWalkerResult &(*)(const Vocabularies &vocabularies); static const SchemaWalkerResult UNKNOWN_RESULT{ - SchemaKeywordType::Unknown, std::nullopt, {}, {}}; + SchemaKeywordType::Unknown, std::nullopt, {}, {}, {}}; static const SchemaWalkerResult UNKNOWN_WITH_REF_RESULT{ - SchemaKeywordType::Unknown, std::nullopt, {"$ref"}, {}}; + SchemaKeywordType::Unknown, std::nullopt, {"$ref"}, {}, {}}; auto has_draft3_to_7(const Vocabularies &vocabularies) -> bool { return vocabularies.contains(Known::JSON_Schema_Draft_7) || @@ -30,14 +30,21 @@ auto has_draft3_to_7(const Vocabularies &vocabularies) -> bool { #define RETURN_WITH_DEPENDENCIES(_vocabulary, _types, _strategy, ...) \ { \ static const SchemaWalkerResult result{ \ - SchemaKeywordType::_strategy, _vocabulary, {__VA_ARGS__}, _types}; \ + SchemaKeywordType::_strategy, _vocabulary, {__VA_ARGS__}, {}, _types}; \ + return result; \ + } + +#define RETURN_WITH_ORDER_DEPENDENCIES(_vocabulary, _types, _strategy, ...) \ + { \ + static const SchemaWalkerResult result{ \ + SchemaKeywordType::_strategy, _vocabulary, {}, {__VA_ARGS__}, _types}; \ return result; \ } #define RETURN(_vocabulary, _types, _strategy) \ { \ static const SchemaWalkerResult result{ \ - SchemaKeywordType::_strategy, _vocabulary, {}, _types}; \ + SchemaKeywordType::_strategy, _vocabulary, {}, {}, _types}; \ return result; \ } @@ -47,6 +54,13 @@ auto has_draft3_to_7(const Vocabularies &vocabularies) -> bool { RETURN_WITH_DEPENDENCIES(_vocabulary, _types, _strategy, __VA_ARGS__) \ } +#define CHECK_VOCABULARY_WITH_ORDER_DEPENDENCIES(_vocabulary, _types, \ + _strategy, ...) \ + if (vocabularies.contains(_vocabulary)) { \ + RETURN_WITH_ORDER_DEPENDENCIES(_vocabulary, _types, _strategy, \ + __VA_ARGS__) \ + } + #define CHECK_VOCABULARY(_vocabulary, _types, _strategy) \ if (vocabularies.contains(_vocabulary)) { \ RETURN(_vocabulary, _types, _strategy) \ @@ -340,7 +354,7 @@ auto handle_properties(const Vocabularies &vocabularies) -> const SchemaWalkerResult & { if (vocabularies.contains(Known::JSON_Schema_2020_12_Applicator)) { if (vocabularies.contains(Known::JSON_Schema_2020_12_Validation)) { - RETURN_WITH_DEPENDENCIES( + RETURN_WITH_ORDER_DEPENDENCIES( Known::JSON_Schema_2020_12_Applicator, make_set({JSON::Type::Object}), ApplicatorMembersTraversePropertyStatic, "required") } @@ -350,7 +364,7 @@ auto handle_properties(const Vocabularies &vocabularies) } if (vocabularies.contains(Known::JSON_Schema_2019_09_Applicator)) { if (vocabularies.contains(Known::JSON_Schema_2019_09_Validation)) { - RETURN_WITH_DEPENDENCIES( + RETURN_WITH_ORDER_DEPENDENCIES( Known::JSON_Schema_2019_09_Applicator, make_set({JSON::Type::Object}), ApplicatorMembersTraversePropertyStatic, "required") } @@ -358,24 +372,60 @@ auto handle_properties(const Vocabularies &vocabularies) make_set({JSON::Type::Object}), ApplicatorMembersTraversePropertyStatic) } - CHECK_VOCABULARY_WITH_DEPENDENCIES( - Known::JSON_Schema_Draft_7, make_set({JSON::Type::Object}), - ApplicatorMembersTraversePropertyStatic, "$ref", "required") - CHECK_VOCABULARY_WITH_DEPENDENCIES( - Known::JSON_Schema_Draft_7_Hyper, make_set({JSON::Type::Object}), - ApplicatorMembersTraversePropertyStatic, "$ref", "required") - CHECK_VOCABULARY_WITH_DEPENDENCIES( - Known::JSON_Schema_Draft_6, make_set({JSON::Type::Object}), - ApplicatorMembersTraversePropertyStatic, "$ref", "required") - CHECK_VOCABULARY_WITH_DEPENDENCIES( - Known::JSON_Schema_Draft_6_Hyper, make_set({JSON::Type::Object}), - ApplicatorMembersTraversePropertyStatic, "$ref", "required") - CHECK_VOCABULARY_WITH_DEPENDENCIES( - Known::JSON_Schema_Draft_4, make_set({JSON::Type::Object}), - ApplicatorMembersTraversePropertyStatic, "$ref", "required") - CHECK_VOCABULARY_WITH_DEPENDENCIES( - Known::JSON_Schema_Draft_4_Hyper, make_set({JSON::Type::Object}), - ApplicatorMembersTraversePropertyStatic, "$ref", "required") + if (vocabularies.contains(Known::JSON_Schema_Draft_7)) { + static const SchemaWalkerResult result{ + SchemaKeywordType::ApplicatorMembersTraversePropertyStatic, + Known::JSON_Schema_Draft_7, + {"$ref"}, + {"required"}, + make_set({JSON::Type::Object})}; + return result; + } + if (vocabularies.contains(Known::JSON_Schema_Draft_7_Hyper)) { + static const SchemaWalkerResult result{ + SchemaKeywordType::ApplicatorMembersTraversePropertyStatic, + Known::JSON_Schema_Draft_7_Hyper, + {"$ref"}, + {"required"}, + make_set({JSON::Type::Object})}; + return result; + } + if (vocabularies.contains(Known::JSON_Schema_Draft_6)) { + static const SchemaWalkerResult result{ + SchemaKeywordType::ApplicatorMembersTraversePropertyStatic, + Known::JSON_Schema_Draft_6, + {"$ref"}, + {"required"}, + make_set({JSON::Type::Object})}; + return result; + } + if (vocabularies.contains(Known::JSON_Schema_Draft_6_Hyper)) { + static const SchemaWalkerResult result{ + SchemaKeywordType::ApplicatorMembersTraversePropertyStatic, + Known::JSON_Schema_Draft_6_Hyper, + {"$ref"}, + {"required"}, + make_set({JSON::Type::Object})}; + return result; + } + if (vocabularies.contains(Known::JSON_Schema_Draft_4)) { + static const SchemaWalkerResult result{ + SchemaKeywordType::ApplicatorMembersTraversePropertyStatic, + Known::JSON_Schema_Draft_4, + {"$ref"}, + {"required"}, + make_set({JSON::Type::Object})}; + return result; + } + if (vocabularies.contains(Known::JSON_Schema_Draft_4_Hyper)) { + static const SchemaWalkerResult result{ + SchemaKeywordType::ApplicatorMembersTraversePropertyStatic, + Known::JSON_Schema_Draft_4_Hyper, + {"$ref"}, + {"required"}, + make_set({JSON::Type::Object})}; + return result; + } CHECK_VOCABULARY_WITH_DEPENDENCIES( Known::JSON_Schema_Draft_3, make_set({JSON::Type::Object}), ApplicatorMembersTraversePropertyStatic, "$ref") @@ -718,30 +768,30 @@ auto handle_type(const Vocabularies &vocabularies) -> const SchemaWalkerResult & { if (vocabularies.contains(Known::JSON_Schema_2020_12_Validation)) { if (vocabularies.contains(Known::JSON_Schema_2020_12_Applicator)) { - RETURN_WITH_DEPENDENCIES(Known::JSON_Schema_2020_12_Validation, {}, - Assertion, "properties") + RETURN_WITH_ORDER_DEPENDENCIES(Known::JSON_Schema_2020_12_Validation, {}, + Assertion, "properties") } RETURN(Known::JSON_Schema_2020_12_Validation, {}, Assertion) } if (vocabularies.contains(Known::JSON_Schema_2019_09_Validation)) { if (vocabularies.contains(Known::JSON_Schema_2019_09_Applicator)) { - RETURN_WITH_DEPENDENCIES(Known::JSON_Schema_2019_09_Validation, {}, - Assertion, "properties") + RETURN_WITH_ORDER_DEPENDENCIES(Known::JSON_Schema_2019_09_Validation, {}, + Assertion, "properties") } RETURN(Known::JSON_Schema_2019_09_Validation, {}, Assertion) } - CHECK_VOCABULARY_WITH_DEPENDENCIES(Known::JSON_Schema_Draft_7, {}, Assertion, - ("properties")) - CHECK_VOCABULARY_WITH_DEPENDENCIES(Known::JSON_Schema_Draft_7_Hyper, {}, - Assertion, "properties") - CHECK_VOCABULARY_WITH_DEPENDENCIES(Known::JSON_Schema_Draft_6, {}, Assertion, - ("properties")) - CHECK_VOCABULARY_WITH_DEPENDENCIES(Known::JSON_Schema_Draft_6_Hyper, {}, - Assertion, "properties") - CHECK_VOCABULARY_WITH_DEPENDENCIES(Known::JSON_Schema_Draft_4, {}, Assertion, - ("properties")) - CHECK_VOCABULARY_WITH_DEPENDENCIES(Known::JSON_Schema_Draft_4_Hyper, {}, - Assertion, "properties") + CHECK_VOCABULARY_WITH_ORDER_DEPENDENCIES(Known::JSON_Schema_Draft_7, {}, + Assertion, "properties") + CHECK_VOCABULARY_WITH_ORDER_DEPENDENCIES(Known::JSON_Schema_Draft_7_Hyper, {}, + Assertion, "properties") + CHECK_VOCABULARY_WITH_ORDER_DEPENDENCIES(Known::JSON_Schema_Draft_6, {}, + Assertion, "properties") + CHECK_VOCABULARY_WITH_ORDER_DEPENDENCIES(Known::JSON_Schema_Draft_6_Hyper, {}, + Assertion, "properties") + CHECK_VOCABULARY_WITH_ORDER_DEPENDENCIES(Known::JSON_Schema_Draft_4, {}, + Assertion, "properties") + CHECK_VOCABULARY_WITH_ORDER_DEPENDENCIES(Known::JSON_Schema_Draft_4_Hyper, {}, + Assertion, "properties") CHECK_VOCABULARY_WITH_DEPENDENCIES(Known::JSON_Schema_Draft_3, {}, ApplicatorElementsInPlaceSome, "$ref") CHECK_VOCABULARY_WITH_DEPENDENCIES(Known::JSON_Schema_Draft_3_Hyper, {}, @@ -834,10 +884,10 @@ auto handle_multipleOf(const Vocabularies &vocabularies) auto handle_maximum(const Vocabularies &vocabularies) -> const SchemaWalkerResult & { - CHECK_VOCABULARY_WITH_DEPENDENCIES( + CHECK_VOCABULARY_WITH_ORDER_DEPENDENCIES( Known::JSON_Schema_2020_12_Validation, make_set({JSON::Type::Integer, JSON::Type::Real}), Assertion, "type") - CHECK_VOCABULARY_WITH_DEPENDENCIES( + CHECK_VOCABULARY_WITH_ORDER_DEPENDENCIES( Known::JSON_Schema_2019_09_Validation, make_set({JSON::Type::Integer, JSON::Type::Real}), Assertion, "type") CHECK_VOCABULARY_WITH_DEPENDENCIES( @@ -881,10 +931,10 @@ auto handle_maximum(const Vocabularies &vocabularies) auto handle_minimum(const Vocabularies &vocabularies) -> const SchemaWalkerResult & { - CHECK_VOCABULARY_WITH_DEPENDENCIES( + CHECK_VOCABULARY_WITH_ORDER_DEPENDENCIES( Known::JSON_Schema_2020_12_Validation, make_set({JSON::Type::Integer, JSON::Type::Real}), Assertion, "type") - CHECK_VOCABULARY_WITH_DEPENDENCIES( + CHECK_VOCABULARY_WITH_ORDER_DEPENDENCIES( Known::JSON_Schema_2019_09_Validation, make_set({JSON::Type::Integer, JSON::Type::Real}), Assertion, "type") CHECK_VOCABULARY_WITH_DEPENDENCIES( diff --git a/vendor/core/src/core/jsonschema/transformer.cc b/vendor/core/src/core/jsonschema/transformer.cc index fc0ab1e6..32287b52 100644 --- a/vendor/core/src/core/jsonschema/transformer.cc +++ b/vendor/core/src/core/jsonschema/transformer.cc @@ -1,9 +1,11 @@ #include #include +#include // std::erase_if #include // assert #include // std::set #include // std::ostringstream +#include // std::tuple #include // std::unordered_set #include // std::move, std::pair @@ -105,7 +107,7 @@ auto SchemaTransformer::check( const std::optional &default_dialect, const std::optional &default_id) const -> std::pair { - SchemaFrame frame{SchemaFrame::Mode::Locations}; + SchemaFrame frame{SchemaFrame::Mode::Instances}; // If we use the default id when there is already one, framing will duplicate // the locations leading to duplicate check reports @@ -137,12 +139,12 @@ auto SchemaTransformer::check( const auto ¤t{get(schema, entry.second.pointer)}; const auto current_vocabularies{frame.vocabularies(entry.second, resolver)}; bool subresult{true}; - for (const auto &[name, rule] : this->rules) { + for (const auto &rule : this->rules) { const auto outcome{rule->check(current, schema, current_vocabularies, walker, resolver, frame, entry.second)}; if (outcome.applies) { subresult = false; - callback(entry.second.pointer, name, rule->message(), outcome); + callback(entry.second.pointer, rule->name(), rule->message(), outcome); } } @@ -164,13 +166,14 @@ auto SchemaTransformer::apply( -> std::pair { // There is no point in applying an empty bundle assert(!this->rules.empty()); - std::set> processed_rules; + std::set> + processed_rules; bool result{true}; std::size_t subschema_count{0}; std::size_t subschema_failures{0}; while (true) { - SchemaFrame frame{SchemaFrame::Mode::References}; + SchemaFrame frame{SchemaFrame::Mode::Instances}; frame.analyse(schema, walker, resolver, default_dialect, default_id); std::unordered_set visited; @@ -196,7 +199,7 @@ auto SchemaTransformer::apply( frame.vocabularies(entry.second, resolver)}; bool subschema_failed{false}; - for (const auto &[name, rule] : this->rules) { + for (const auto &rule : this->rules) { const auto subresult{rule->apply(current, schema, current_vocabularies, walker, resolver, frame, entry.second)}; @@ -206,7 +209,7 @@ auto SchemaTransformer::apply( } else { result = false; subschema_failed = true; - callback(entry.second.pointer, name, rule->message(), + callback(entry.second.pointer, rule->name(), rule->message(), subresult.second); } @@ -214,9 +217,14 @@ auto SchemaTransformer::apply( continue; } - std::pair mark{¤t, &name}; + std::tuple mark{ + ¤t, &rule->name(), + // Allow applying the same rule to the same location if the schema + // has changed, which means we are still "making progress". The + // hashing is not perfect, but its enough + current.fast_hash()}; if (processed_rules.contains(mark)) { - throw SchemaTransformRuleProcessedTwiceError(name, + throw SchemaTransformRuleProcessedTwiceError(rule->name(), entry.second.pointer); } @@ -238,6 +246,11 @@ auto SchemaTransformer::apply( continue; } + // If the source no longer exists, we don't need to fix the reference + if (!try_get(schema, reference.first.second.initial())) { + continue; + } + const auto new_fragment{rule->rereference( reference.second.destination, reference.first.second, target.relative_pointer, entry.second.relative_pointer)}; @@ -270,7 +283,9 @@ auto SchemaTransformer::apply( } auto SchemaTransformer::remove(const std::string &name) -> bool { - return this->rules.erase(name) > 0; + return std::erase_if(this->rules, [&name](const auto &rule) { + return rule->name() == name; + }) > 0; } } // namespace sourcemeta::core diff --git a/vendor/core/src/core/yaml/yaml.cc b/vendor/core/src/core/yaml/yaml.cc index b5b35f58..d8a1fcce 100644 --- a/vendor/core/src/core/yaml/yaml.cc +++ b/vendor/core/src/core/yaml/yaml.cc @@ -515,9 +515,18 @@ auto read_yaml(const std::filesystem::path &path, auto read_yaml_or_json(const std::filesystem::path &path, const JSON::ParseCallback &callback) -> JSON { - return path.extension() == ".yaml" || path.extension() == ".yml" - ? read_yaml(path, callback) - : read_json(path, callback); + const auto extension{path.extension()}; + if (extension == ".yaml" || extension == ".yml") { + return read_yaml(path, callback); + } else if (extension == ".json") { + return read_json(path, callback); + } + + try { + return read_json(path, callback); + } catch (const JSONParseError &) { + return read_yaml(path, callback); + } } } // namespace sourcemeta::core diff --git a/vendor/core/src/extension/alterschema/CMakeLists.txt b/vendor/core/src/extension/alterschema/CMakeLists.txt index b93ab209..11ca6a60 100644 --- a/vendor/core/src/extension/alterschema/CMakeLists.txt +++ b/vendor/core/src/extension/alterschema/CMakeLists.txt @@ -1,6 +1,10 @@ sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME alterschema SOURCES alterschema.cc # Canonicalizer + canonicalizer/boolean_true.h + canonicalizer/const_as_enum.h + canonicalizer/exclusive_maximum_integer_to_maximum.h + canonicalizer/exclusive_minimum_integer_to_minimum.h canonicalizer/max_contains_covered_by_max_items.h canonicalizer/min_items_given_min_contains.h canonicalizer/min_items_implicit.h @@ -8,79 +12,84 @@ sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME alterschema canonicalizer/min_properties_covered_by_required.h canonicalizer/min_properties_implicit.h canonicalizer/multiple_of_implicit.h + canonicalizer/no_metadata.h canonicalizer/properties_implicit.h - canonicalizer/type_union_implicit.h - canonicalizer/boolean_true.h - canonicalizer/const_as_enum.h - canonicalizer/exclusive_maximum_integer_to_maximum.h - canonicalizer/exclusive_minimum_integer_to_minimum.h - canonicalizer/type_array_to_any_of_2020_12.h + canonicalizer/type_array_to_any_of.h canonicalizer/type_boolean_as_enum.h canonicalizer/type_null_as_enum.h + canonicalizer/type_union_implicit.h + + # Common + common/const_with_type.h + common/orphan_definitions.h + common/content_media_type_without_encoding.h + common/content_schema_without_media_type.h + common/dependencies_property_tautology.h + common/dependent_required_tautology.h + common/draft_official_dialect_without_empty_fragment.h + common/draft_ref_siblings.h + common/drop_allof_empty_schemas.h + common/duplicate_allof_branches.h + common/duplicate_anyof_branches.h + common/duplicate_enum_values.h + common/duplicate_required_values.h + common/else_empty.h + common/else_without_if.h + common/enum_with_type.h + common/equal_numeric_bounds_to_enum.h + common/exclusive_maximum_number_and_maximum.h + common/exclusive_minimum_number_and_minimum.h + common/if_without_then_else.h + common/ignored_metaschema.h + common/max_contains_without_contains.h + common/maximum_real_for_integer.h + common/min_contains_without_contains.h + common/minimum_real_for_integer.h + common/modern_official_dialect_with_empty_fragment.h + common/non_applicable_additional_items.h + common/non_applicable_enum_validation_keywords.h + common/non_applicable_type_specific_keywords.h + common/not_false.h + common/required_properties_in_properties.h + common/single_type_array.h + common/then_empty.h + common/then_without_if.h + common/unknown_keywords_prefix.h + common/unknown_local_ref.h + common/unnecessary_allof_ref_wrapper_draft.h + common/unnecessary_allof_ref_wrapper_modern.h + common/unnecessary_allof_wrapper.h # Linter - linter/const_with_type.h - linter/duplicate_enum_values.h - linter/duplicate_required_values.h - linter/exclusive_maximum_number_and_maximum.h - linter/exclusive_minimum_number_and_minimum.h - linter/enum_with_type.h - linter/dependencies_property_tautology.h - linter/dependent_required_tautology.h - linter/equal_numeric_bounds_to_enum.h - linter/maximum_real_for_integer.h - linter/draft_official_dialect_without_empty_fragment.h - linter/minimum_real_for_integer.h - linter/required_properties_in_properties.h - linter/single_type_array.h - linter/enum_to_const.h linter/additional_properties_default.h + linter/comment_trim.h linter/content_schema_default.h + linter/definitions_to_defs.h linter/dependencies_default.h linter/dependent_required_default.h + linter/description_trailing_period.h + linter/description_trim.h + linter/duplicate_examples.h + linter/enum_to_const.h + linter/equal_numeric_bounds_to_const.h linter/items_array_default.h linter/items_schema_default.h linter/multiple_of_default.h - linter/non_applicable_additional_items.h linter/pattern_properties_default.h linter/properties_default.h - linter/unevaluated_items_default.h - linter/unevaluated_properties_default.h - linter/unsatisfiable_max_contains.h - linter/unsatisfiable_min_properties.h - linter/content_media_type_without_encoding.h - linter/content_schema_without_media_type.h - linter/non_applicable_type_specific_keywords.h - linter/unnecessary_allof_ref_wrapper_modern.h - linter/unnecessary_allof_ref_wrapper_draft.h - linter/duplicate_allof_branches.h - linter/duplicate_anyof_branches.h - linter/else_without_if.h - linter/if_without_then_else.h - linter/ignored_metaschema.h - linter/max_contains_without_contains.h - linter/min_contains_without_contains.h - linter/modern_official_dialect_with_empty_fragment.h - linter/then_empty.h - linter/else_empty.h - linter/then_without_if.h - linter/top_level_title.h - linter/top_level_description.h + linter/property_names_default.h + linter/property_names_type_default.h + linter/simple_properties_identifiers.h linter/title_description_equal.h linter/title_trailing_period.h - linter/description_trailing_period.h linter/title_trim.h - linter/description_trim.h - linter/comment_trim.h + linter/top_level_description.h linter/top_level_examples.h - linter/duplicate_examples.h - linter/property_names_type_default.h - linter/property_names_default.h - linter/draft_ref_siblings.h - linter/definitions_to_defs.h - linter/unknown_keywords_prefix.h - linter/unknown_local_ref.h - linter/non_applicable_enum_validation_keywords.h) + linter/top_level_title.h + linter/unevaluated_items_default.h + linter/unevaluated_properties_default.h + linter/unsatisfiable_max_contains.h + linter/unsatisfiable_min_properties.h) if(SOURCEMETA_CORE_INSTALL) sourcemeta_library_install(NAMESPACE sourcemeta PROJECT core NAME alterschema) @@ -88,3 +97,5 @@ endif() target_link_libraries(sourcemeta_core_alterschema PUBLIC sourcemeta::core::jsonschema) +target_link_libraries(sourcemeta_core_alterschema PRIVATE + sourcemeta::core::regex) diff --git a/vendor/core/src/extension/alterschema/alterschema.cc b/vendor/core/src/extension/alterschema/alterschema.cc index ba7b71c9..4e87f439 100644 --- a/vendor/core/src/extension/alterschema/alterschema.cc +++ b/vendor/core/src/extension/alterschema/alterschema.cc @@ -1,9 +1,11 @@ #include +#include // For built-in rules #include // std::sort, std::unique #include // std::floor #include // std::back_inserter +#include // std::unordered_map #include // std::unordered_set #include // std::move namespace sourcemeta::core { @@ -38,63 +40,74 @@ inline auto APPLIES_TO_POINTERS(std::vector &&keywords) #include "canonicalizer/min_properties_covered_by_required.h" #include "canonicalizer/min_properties_implicit.h" #include "canonicalizer/multiple_of_implicit.h" +#include "canonicalizer/no_metadata.h" #include "canonicalizer/properties_implicit.h" -#include "canonicalizer/type_array_to_any_of_2020_12.h" +#include "canonicalizer/type_array_to_any_of.h" #include "canonicalizer/type_boolean_as_enum.h" #include "canonicalizer/type_null_as_enum.h" #include "canonicalizer/type_union_implicit.h" +// Common +#include "common/const_with_type.h" +#include "common/content_media_type_without_encoding.h" +#include "common/content_schema_without_media_type.h" +#include "common/dependencies_property_tautology.h" +#include "common/dependent_required_tautology.h" +#include "common/draft_official_dialect_without_empty_fragment.h" +#include "common/draft_ref_siblings.h" +#include "common/drop_allof_empty_schemas.h" +#include "common/duplicate_allof_branches.h" +#include "common/duplicate_anyof_branches.h" +#include "common/duplicate_enum_values.h" +#include "common/duplicate_required_values.h" +#include "common/else_empty.h" +#include "common/else_without_if.h" +#include "common/enum_with_type.h" +#include "common/equal_numeric_bounds_to_enum.h" +#include "common/exclusive_maximum_number_and_maximum.h" +#include "common/exclusive_minimum_number_and_minimum.h" +#include "common/if_without_then_else.h" +#include "common/ignored_metaschema.h" +#include "common/max_contains_without_contains.h" +#include "common/maximum_real_for_integer.h" +#include "common/min_contains_without_contains.h" +#include "common/minimum_real_for_integer.h" +#include "common/modern_official_dialect_with_empty_fragment.h" +#include "common/non_applicable_additional_items.h" +#include "common/non_applicable_enum_validation_keywords.h" +#include "common/non_applicable_type_specific_keywords.h" +#include "common/not_false.h" +#include "common/orphan_definitions.h" +#include "common/required_properties_in_properties.h" +#include "common/single_type_array.h" +#include "common/then_empty.h" +#include "common/then_without_if.h" +#include "common/unknown_keywords_prefix.h" +#include "common/unknown_local_ref.h" +#include "common/unnecessary_allof_ref_wrapper_draft.h" +#include "common/unnecessary_allof_ref_wrapper_modern.h" +#include "common/unnecessary_allof_wrapper.h" + // Linter #include "linter/additional_properties_default.h" #include "linter/comment_trim.h" -#include "linter/const_with_type.h" -#include "linter/content_media_type_without_encoding.h" #include "linter/content_schema_default.h" -#include "linter/content_schema_without_media_type.h" #include "linter/definitions_to_defs.h" #include "linter/dependencies_default.h" -#include "linter/dependencies_property_tautology.h" #include "linter/dependent_required_default.h" -#include "linter/dependent_required_tautology.h" #include "linter/description_trailing_period.h" #include "linter/description_trim.h" -#include "linter/draft_official_dialect_without_empty_fragment.h" -#include "linter/draft_ref_siblings.h" -#include "linter/duplicate_allof_branches.h" -#include "linter/duplicate_anyof_branches.h" -#include "linter/duplicate_enum_values.h" #include "linter/duplicate_examples.h" -#include "linter/duplicate_required_values.h" -#include "linter/else_empty.h" -#include "linter/else_without_if.h" #include "linter/enum_to_const.h" -#include "linter/enum_with_type.h" #include "linter/equal_numeric_bounds_to_const.h" -#include "linter/equal_numeric_bounds_to_enum.h" -#include "linter/exclusive_maximum_number_and_maximum.h" -#include "linter/exclusive_minimum_number_and_minimum.h" -#include "linter/if_without_then_else.h" -#include "linter/ignored_metaschema.h" #include "linter/items_array_default.h" #include "linter/items_schema_default.h" -#include "linter/max_contains_without_contains.h" -#include "linter/maximum_real_for_integer.h" -#include "linter/min_contains_without_contains.h" -#include "linter/minimum_real_for_integer.h" -#include "linter/modern_official_dialect_with_empty_fragment.h" #include "linter/multiple_of_default.h" -#include "linter/non_applicable_additional_items.h" -#include "linter/non_applicable_enum_validation_keywords.h" -#include "linter/non_applicable_type_specific_keywords.h" -#include "linter/not_false.h" #include "linter/pattern_properties_default.h" #include "linter/properties_default.h" #include "linter/property_names_default.h" #include "linter/property_names_type_default.h" -#include "linter/required_properties_in_properties.h" -#include "linter/single_type_array.h" -#include "linter/then_empty.h" -#include "linter/then_without_if.h" +#include "linter/simple_properties_identifiers.h" #include "linter/title_description_equal.h" #include "linter/title_trailing_period.h" #include "linter/title_trim.h" @@ -103,10 +116,6 @@ inline auto APPLIES_TO_POINTERS(std::vector &&keywords) #include "linter/top_level_title.h" #include "linter/unevaluated_items_default.h" #include "linter/unevaluated_properties_default.h" -#include "linter/unknown_keywords_prefix.h" -#include "linter/unknown_local_ref.h" -#include "linter/unnecessary_allof_ref_wrapper_draft.h" -#include "linter/unnecessary_allof_ref_wrapper_modern.h" #include "linter/unsatisfiable_max_contains.h" #include "linter/unsatisfiable_min_properties.h" @@ -116,13 +125,19 @@ inline auto APPLIES_TO_POINTERS(std::vector &&keywords) namespace sourcemeta::core { auto add(SchemaTransformer &bundle, const AlterSchemaMode mode) -> void { - // Common rules that apply to all modes + if (mode == AlterSchemaMode::Canonicalizer) { + bundle.add(); + bundle.add(); + } + + if (mode == AlterSchemaMode::Linter) { + bundle.add(); + } + bundle.add(); bundle.add(); bundle.add(); bundle.add(); - bundle.add(); - bundle.add(); bundle.add(); bundle.add(); bundle.add(); @@ -143,7 +158,6 @@ auto add(SchemaTransformer &bundle, const AlterSchemaMode mode) -> void { bundle.add(); bundle.add(); bundle.add(); - bundle.add(); bundle.add(); bundle.add(); bundle.add(); @@ -154,38 +168,31 @@ auto add(SchemaTransformer &bundle, const AlterSchemaMode mode) -> void { bundle.add(); bundle.add(); bundle.add(); - bundle.add(); - bundle.add(); - bundle.add(); - bundle.add(); - bundle.add(); - bundle.add(); - - if (mode == AlterSchemaMode::StaticAnalysis) { + bundle.add(); + + if (mode == AlterSchemaMode::Canonicalizer) { bundle.add(); bundle.add(); bundle.add(); bundle.add(); bundle.add(); - bundle.add(); bundle.add(); bundle.add(); bundle.add(); bundle.add(); + bundle.add(); + bundle.add(); bundle.add(); bundle.add(); - bundle.add(); bundle.add(); bundle.add(); bundle.add(); - bundle.add(); } - if (mode == AlterSchemaMode::Readability) { + if (mode == AlterSchemaMode::Linter) { bundle.add(); bundle.add(); bundle.add(); - bundle.add(); bundle.add(); bundle.add(); bundle.add(); @@ -203,7 +210,20 @@ auto add(SchemaTransformer &bundle, const AlterSchemaMode mode) -> void { bundle.add(); bundle.add(); bundle.add(); + bundle.add(); + bundle.add(); + bundle.add(); + bundle.add(); + bundle.add(); + bundle.add(); + bundle.add(); + bundle.add(); } + + bundle.add(); + bundle.add(); + bundle.add(); + bundle.add(); } } // namespace sourcemeta::core diff --git a/vendor/core/src/extension/alterschema/canonicalizer/multiple_of_implicit.h b/vendor/core/src/extension/alterschema/canonicalizer/multiple_of_implicit.h index d466165f..32a2c864 100644 --- a/vendor/core/src/extension/alterschema/canonicalizer/multiple_of_implicit.h +++ b/vendor/core/src/extension/alterschema/canonicalizer/multiple_of_implicit.h @@ -21,8 +21,8 @@ class MultipleOfImplicit final : public SchemaTransformRule { Vocabularies::Known::JSON_Schema_Draft_4}) && schema.is_object() && schema.defines("type") && schema.at("type").is_string() && - (schema.at("type").to_string() == "integer" || - schema.at("type").to_string() == "number") && + // Applying this to numbers would be a semantic problem + schema.at("type").to_string() == "integer" && !schema.defines("multipleOf")); return true; } diff --git a/vendor/core/src/extension/alterschema/canonicalizer/no_metadata.h b/vendor/core/src/extension/alterschema/canonicalizer/no_metadata.h new file mode 100644 index 00000000..80b53c25 --- /dev/null +++ b/vendor/core/src/extension/alterschema/canonicalizer/no_metadata.h @@ -0,0 +1,34 @@ +class NoMetadata final : public SchemaTransformRule { +public: + NoMetadata() + : SchemaTransformRule{"no_metadata", + "Annotations, comments, and unknown keywords have " + "no effect on validation"} {}; + + [[nodiscard]] auto + condition(const JSON &schema, const JSON &, const Vocabularies &vocabularies, + const SchemaFrame &, const SchemaFrame::Location &, + const SchemaWalker &walker, const SchemaResolver &) const + -> SchemaTransformRule::Result override { + ONLY_CONTINUE_IF(schema.is_object() && !schema.empty()); + + std::vector locations; + for (const auto &entry : schema.as_object()) { + const auto &metadata{walker(entry.first, vocabularies)}; + if (metadata.type == SchemaKeywordType::Annotation || + metadata.type == SchemaKeywordType::Comment || + metadata.type == SchemaKeywordType::Unknown) { + locations.push_back(Pointer{entry.first}); + } + } + + ONLY_CONTINUE_IF(!locations.empty()); + return APPLIES_TO_POINTERS(std::move(locations)); + } + + auto transform(JSON &schema, const Result &result) const -> void override { + for (const auto &location : result.locations) { + schema.erase(location.at(0).to_property()); + } + } +}; diff --git a/vendor/core/src/extension/alterschema/canonicalizer/type_array_to_any_of.h b/vendor/core/src/extension/alterschema/canonicalizer/type_array_to_any_of.h new file mode 100644 index 00000000..7c55c172 --- /dev/null +++ b/vendor/core/src/extension/alterschema/canonicalizer/type_array_to_any_of.h @@ -0,0 +1,92 @@ +class TypeArrayToAnyOf final : public SchemaTransformRule { +public: + TypeArrayToAnyOf() + : SchemaTransformRule{ + "type_array_to_any_of", + "Setting `type` to more than one choice is syntax sugar to " + "`anyOf` over the corresponding types"} {}; + + [[nodiscard]] auto + condition(const sourcemeta::core::JSON &schema, + const sourcemeta::core::JSON &, + const sourcemeta::core::Vocabularies &vocabularies, + const sourcemeta::core::SchemaFrame &, + const sourcemeta::core::SchemaFrame::Location &, + const sourcemeta::core::SchemaWalker &walker, + const sourcemeta::core::SchemaResolver &) const + -> sourcemeta::core::SchemaTransformRule::Result override { + + ONLY_CONTINUE_IF(vocabularies.contains_any( + {Vocabularies::Known::JSON_Schema_2020_12_Validation, + Vocabularies::Known::JSON_Schema_2020_12_Applicator, + Vocabularies::Known::JSON_Schema_2019_09_Validation, + Vocabularies::Known::JSON_Schema_2019_09_Applicator, + Vocabularies::Known::JSON_Schema_Draft_7, + Vocabularies::Known::JSON_Schema_Draft_6, + Vocabularies::Known::JSON_Schema_Draft_4}) && + schema.is_object() && schema.defines("type") && + schema.at("type").is_array()); + + this->keyword_instances_.clear(); + + for (const auto &entry : schema.as_object()) { + if (entry.first == "type") { + continue; + } + + const auto &metadata{walker(entry.first, vocabularies)}; + if (metadata.instances.any()) { + this->keyword_instances_[entry.first] = metadata.instances; + } + } + + return APPLIES_TO_KEYWORDS("type"); + } + + auto transform(JSON &schema, const Result &) const -> void override { + auto disjunctors{sourcemeta::core::JSON::make_array()}; + for (const auto &type : schema.at("type").as_array()) { + auto branch{sourcemeta::core::JSON::make_object()}; + branch.assign("type", type); + const auto current_type_set{parse_schema_type(type)}; + for (const auto &[keyword, instances] : this->keyword_instances_) { + if ((instances & current_type_set).any()) { + branch.assign(keyword, schema.at(keyword)); + } + } + + disjunctors.push_back(std::move(branch)); + } + + for (const auto &[keyword, instances] : this->keyword_instances_) { + schema.erase(keyword); + } + + if (schema.defines("anyOf")) { + auto first_branch{sourcemeta::core::JSON::make_object()}; + first_branch.assign("anyOf", schema.at("anyOf")); + auto second_branch{sourcemeta::core::JSON::make_object()}; + second_branch.assign("anyOf", std::move(disjunctors)); + schema.erase("anyOf"); + + if (schema.defines("allOf")) { + schema.at("allOf").push_back(std::move(first_branch)); + schema.at("allOf").push_back(std::move(second_branch)); + schema.erase("type"); + } else { + auto allof_wrapper{sourcemeta::core::JSON::make_array()}; + allof_wrapper.push_back(std::move(first_branch)); + allof_wrapper.push_back(std::move(second_branch)); + schema.at("type").into(std::move(allof_wrapper)); + schema.rename("type", "allOf"); + } + } else { + schema.at("type").into(std::move(disjunctors)); + schema.rename("type", "anyOf"); + } + } + +private: + mutable std::unordered_map + keyword_instances_; +}; diff --git a/vendor/core/src/extension/alterschema/canonicalizer/type_array_to_any_of_2020_12.h b/vendor/core/src/extension/alterschema/canonicalizer/type_array_to_any_of_2020_12.h deleted file mode 100644 index f449286e..00000000 --- a/vendor/core/src/extension/alterschema/canonicalizer/type_array_to_any_of_2020_12.h +++ /dev/null @@ -1,68 +0,0 @@ -class TypeArrayToAnyOf_2020_12 final : public SchemaTransformRule { -public: - TypeArrayToAnyOf_2020_12() - : SchemaTransformRule{ - "type_array_to_any_of_2020_12", - "Setting `type` to more than one choice is syntax sugar to " - "`anyOf` over the corresponding types"} {}; - - [[nodiscard]] auto - condition(const sourcemeta::core::JSON &schema, - const sourcemeta::core::JSON &, - const sourcemeta::core::Vocabularies &vocabularies, - - const sourcemeta::core::SchemaFrame &frame, - const sourcemeta::core::SchemaFrame::Location &, - const sourcemeta::core::SchemaWalker &, - const sourcemeta::core::SchemaResolver &) const - -> sourcemeta::core::SchemaTransformRule::Result override { - - // Note that a big limitation of this rule is that it cannot apply to - // schemas that have identifiers. For example, consider a schema that has - // a type union declaration alongside of an `anyOf` where one branch defines - // `$id` or `$anchor`. We will end up duplicating identifiers (leading to - // invalid schemas) and there is no silver bullet to avoid these cases. - const auto has_identifiers{ - std::ranges::any_of(frame.locations(), [](const auto &entry) { - return entry.second.type == - sourcemeta::core::SchemaFrame::LocationType::Resource || - entry.second.type == - sourcemeta::core::SchemaFrame::LocationType::Anchor; - })}; - - ONLY_CONTINUE_IF( - vocabularies.contains_any( - {Vocabularies::Known::JSON_Schema_2020_12_Validation, - Vocabularies::Known::JSON_Schema_2020_12_Applicator}) && - !has_identifiers && schema.is_object() && schema.defines("type") && - schema.at("type").is_array() && - // Non type-specific applicators can leads to invalid schemas - !schema.defines("$defs") && !schema.defines("$ref") && - !schema.defines("if") && !schema.defines("then") && - !schema.defines("else") && !schema.defines("allOf") && - !schema.defines("oneOf") && !schema.defines("anyOf")); - return APPLIES_TO_KEYWORDS("type"); - } - - auto transform(JSON &schema, const Result &) const -> void override { - const std::set keep{"$schema", "$id", "$anchor", - "$dynamicAnchor", "$vocabulary"}; - auto disjunctors{sourcemeta::core::JSON::make_array()}; - for (const auto &type : schema.at("type").as_array()) { - auto copy = schema; - copy.erase_keys(keep.cbegin(), keep.cend()); - copy.assign("type", type); - disjunctors.push_back(std::move(copy)); - } - - auto result{sourcemeta::core::JSON::make_object()}; - for (const auto &keyword : keep) { - if (schema.defines(keyword)) { - result.assign(keyword, schema.at(keyword)); - } - } - - result.assign("anyOf", std::move(disjunctors)); - schema.into(std::move(result)); - } -}; diff --git a/vendor/core/src/extension/alterschema/canonicalizer/type_union_implicit.h b/vendor/core/src/extension/alterschema/canonicalizer/type_union_implicit.h index 8cc046f4..11fb779d 100644 --- a/vendor/core/src/extension/alterschema/canonicalizer/type_union_implicit.h +++ b/vendor/core/src/extension/alterschema/canonicalizer/type_union_implicit.h @@ -11,9 +11,10 @@ class TypeUnionImplicit final : public SchemaTransformRule { const sourcemeta::core::Vocabularies &vocabularies, const sourcemeta::core::SchemaFrame &, const sourcemeta::core::SchemaFrame::Location &, - const sourcemeta::core::SchemaWalker &, + const sourcemeta::core::SchemaWalker &walker, const sourcemeta::core::SchemaResolver &) const -> sourcemeta::core::SchemaTransformRule::Result override { + using namespace sourcemeta::core; ONLY_CONTINUE_IF(schema.is_object()); ONLY_CONTINUE_IF(vocabularies.contains_any( {Vocabularies::Known::JSON_Schema_2020_12_Validation, @@ -26,66 +27,49 @@ class TypeUnionImplicit final : public SchemaTransformRule { Vocabularies::Known::JSON_Schema_Draft_1, Vocabularies::Known::JSON_Schema_Draft_0})); ONLY_CONTINUE_IF(!schema.defines("type")); - ONLY_CONTINUE_IF( - !vocabularies.contains(Vocabularies::Known::JSON_Schema_2020_12_Core) || - !schema.defines_any({"$ref", "$dynamicRef"})); - ONLY_CONTINUE_IF(!vocabularies.contains( - Vocabularies::Known::JSON_Schema_2020_12_Applicator) || - !schema.defines_any({"anyOf", "oneOf", "allOf", "if", - "then", "else", "not"})); - ONLY_CONTINUE_IF(!vocabularies.contains( - Vocabularies::Known::JSON_Schema_2020_12_Validation) || - !schema.defines_any({"enum", "const"})); - ONLY_CONTINUE_IF( - !vocabularies.contains(Vocabularies::Known::JSON_Schema_2019_09_Core) || - !schema.defines_any({"$ref", "$recursiveRef"})); - ONLY_CONTINUE_IF(!vocabularies.contains( - Vocabularies::Known::JSON_Schema_2019_09_Applicator) || - !schema.defines_any({"anyOf", "oneOf", "allOf", "if", - "then", "else", "not"})); - ONLY_CONTINUE_IF(!vocabularies.contains( - Vocabularies::Known::JSON_Schema_2019_09_Validation) || - !schema.defines_any({"enum", "const"})); - ONLY_CONTINUE_IF( - !vocabularies.contains(Vocabularies::Known::JSON_Schema_Draft_7) || - !schema.defines_any({"$ref", "enum", "const", "anyOf", "oneOf", "allOf", - "if", "then", "else", "not"})); - ONLY_CONTINUE_IF( - !vocabularies.contains(Vocabularies::Known::JSON_Schema_Draft_6) || - !schema.defines_any( - {"$ref", "enum", "const", "anyOf", "oneOf", "allOf", "not"})); - ONLY_CONTINUE_IF( - !vocabularies.contains(Vocabularies::Known::JSON_Schema_Draft_4) || - !schema.defines_any( - {"$ref", "enum", "anyOf", "oneOf", "allOf", "not"})); - ONLY_CONTINUE_IF( - !vocabularies.contains(Vocabularies::Known::JSON_Schema_Draft_3) || - !schema.defines_any({"$ref", "enum", "disallow", "extends"})) - ONLY_CONTINUE_IF( - !vocabularies.contains(Vocabularies::Known::JSON_Schema_Draft_2) || - !schema.defines_any({"enum", "disallow", "extends"})); - ONLY_CONTINUE_IF( - !vocabularies.contains(Vocabularies::Known::JSON_Schema_Draft_1) || - !schema.defines_any({"enum", "disallow", "extends"})); - ONLY_CONTINUE_IF(!vocabularies.contains( - Vocabularies::Known::JSON_Schema_Draft_0_Hyper) || - !schema.defines_any({"enum", "disallow", "extends"})); + ONLY_CONTINUE_IF(!schema.defines("enum")); + ONLY_CONTINUE_IF(!vocabularies.contains_any( + {Vocabularies::Known::JSON_Schema_2020_12_Validation, + Vocabularies::Known::JSON_Schema_2019_09_Validation, + Vocabularies::Known::JSON_Schema_Draft_7, + Vocabularies::Known::JSON_Schema_Draft_6}) || + !schema.defines("const")); + + for (const auto &entry : schema.as_object()) { + const auto &keyword_type{walker(entry.first, vocabularies).type}; + + // References point to other schemas that may have type constraints + ONLY_CONTINUE_IF(keyword_type != SchemaKeywordType::Reference); + + // Logical in-place applicators apply without affecting the instance + // location, meaning they impose constraints on the same instance. Adding + // an implicit type union alongside these would create redundant branches + // that need complex simplification + ONLY_CONTINUE_IF( + keyword_type != SchemaKeywordType::ApplicatorValueOrElementsInPlace && + keyword_type != SchemaKeywordType::ApplicatorMembersInPlaceSome && + keyword_type != SchemaKeywordType::ApplicatorElementsInPlace && + keyword_type != SchemaKeywordType::ApplicatorElementsInPlaceSome && + keyword_type != + SchemaKeywordType::ApplicatorElementsInPlaceSomeNegate && + keyword_type != SchemaKeywordType::ApplicatorValueInPlaceMaybe && + keyword_type != SchemaKeywordType::ApplicatorValueInPlaceNegate); + } + return true; } auto transform(JSON &schema, const Result &) const -> void override { auto types{sourcemeta::core::JSON::make_array()}; - // All possible JSON Schema types - // See - // https://json-schema.org/draft/2020-12/json-schema-validation.html#rfc.section.6.1.1 types.push_back(sourcemeta::core::JSON{"null"}); types.push_back(sourcemeta::core::JSON{"boolean"}); types.push_back(sourcemeta::core::JSON{"object"}); types.push_back(sourcemeta::core::JSON{"array"}); types.push_back(sourcemeta::core::JSON{"string"}); + + // Note we don't add `integer`, as its covered by `number` types.push_back(sourcemeta::core::JSON{"number"}); - types.push_back(sourcemeta::core::JSON{"integer"}); schema.assign("type", std::move(types)); } diff --git a/vendor/core/src/extension/alterschema/linter/const_with_type.h b/vendor/core/src/extension/alterschema/common/const_with_type.h similarity index 67% rename from vendor/core/src/extension/alterschema/linter/const_with_type.h rename to vendor/core/src/extension/alterschema/common/const_with_type.h index 9a51405f..261f528f 100644 --- a/vendor/core/src/extension/alterschema/linter/const_with_type.h +++ b/vendor/core/src/extension/alterschema/common/const_with_type.h @@ -23,23 +23,9 @@ class ConstWithType final : public SchemaTransformRule { schema.is_object() && schema.defines("type") && schema.defines("const")); - std::set current_types; - if (schema.at("type").is_string()) { - parse_schema_type( - schema.at("type").to_string(), - [¤t_types](const auto type) { current_types.emplace(type); }); - } else if (schema.at("type").is_array()) { - for (const auto &entry : schema.at("type").as_array()) { - if (entry.is_string()) { - parse_schema_type(entry.to_string(), - [¤t_types](const auto type) { - current_types.emplace(type); - }); - } - } - } - - ONLY_CONTINUE_IF(current_types.contains(schema.at("const").type())); + const auto current_types{parse_schema_type(schema.at("type"))}; + ONLY_CONTINUE_IF(current_types.test( + static_cast(schema.at("const").type()))); return APPLIES_TO_KEYWORDS("const", "type"); } diff --git a/vendor/core/src/extension/alterschema/linter/content_media_type_without_encoding.h b/vendor/core/src/extension/alterschema/common/content_media_type_without_encoding.h similarity index 100% rename from vendor/core/src/extension/alterschema/linter/content_media_type_without_encoding.h rename to vendor/core/src/extension/alterschema/common/content_media_type_without_encoding.h diff --git a/vendor/core/src/extension/alterschema/linter/content_schema_without_media_type.h b/vendor/core/src/extension/alterschema/common/content_schema_without_media_type.h similarity index 100% rename from vendor/core/src/extension/alterschema/linter/content_schema_without_media_type.h rename to vendor/core/src/extension/alterschema/common/content_schema_without_media_type.h diff --git a/vendor/core/src/extension/alterschema/linter/dependencies_property_tautology.h b/vendor/core/src/extension/alterschema/common/dependencies_property_tautology.h similarity index 100% rename from vendor/core/src/extension/alterschema/linter/dependencies_property_tautology.h rename to vendor/core/src/extension/alterschema/common/dependencies_property_tautology.h diff --git a/vendor/core/src/extension/alterschema/linter/dependent_required_tautology.h b/vendor/core/src/extension/alterschema/common/dependent_required_tautology.h similarity index 100% rename from vendor/core/src/extension/alterschema/linter/dependent_required_tautology.h rename to vendor/core/src/extension/alterschema/common/dependent_required_tautology.h diff --git a/vendor/core/src/extension/alterschema/linter/draft_official_dialect_without_empty_fragment.h b/vendor/core/src/extension/alterschema/common/draft_official_dialect_without_empty_fragment.h similarity index 100% rename from vendor/core/src/extension/alterschema/linter/draft_official_dialect_without_empty_fragment.h rename to vendor/core/src/extension/alterschema/common/draft_official_dialect_without_empty_fragment.h diff --git a/vendor/core/src/extension/alterschema/linter/draft_ref_siblings.h b/vendor/core/src/extension/alterschema/common/draft_ref_siblings.h similarity index 100% rename from vendor/core/src/extension/alterschema/linter/draft_ref_siblings.h rename to vendor/core/src/extension/alterschema/common/draft_ref_siblings.h diff --git a/vendor/core/src/extension/alterschema/common/drop_allof_empty_schemas.h b/vendor/core/src/extension/alterschema/common/drop_allof_empty_schemas.h new file mode 100644 index 00000000..31abc14e --- /dev/null +++ b/vendor/core/src/extension/alterschema/common/drop_allof_empty_schemas.h @@ -0,0 +1,43 @@ +class DropAllOfEmptySchemas final : public SchemaTransformRule { +public: + DropAllOfEmptySchemas() + : SchemaTransformRule{"drop_allof_empty_schemas", + "Empty schemas in `allOf` are redundant and can be " + "removed"} {}; + + [[nodiscard]] auto + condition(const JSON &schema, const JSON &, const Vocabularies &vocabularies, + const SchemaFrame &, const SchemaFrame::Location &, + const SchemaWalker &, const SchemaResolver &) const + -> SchemaTransformRule::Result override { + ONLY_CONTINUE_IF(vocabularies.contains_any( + {Vocabularies::Known::JSON_Schema_2020_12_Applicator, + Vocabularies::Known::JSON_Schema_2019_09_Applicator, + Vocabularies::Known::JSON_Schema_Draft_7, + Vocabularies::Known::JSON_Schema_Draft_6, + Vocabularies::Known::JSON_Schema_Draft_4})); + ONLY_CONTINUE_IF(schema.is_object() && schema.defines("allOf") && + schema.at("allOf").is_array() && + !schema.at("allOf").empty()); + ONLY_CONTINUE_IF( + std::ranges::any_of(schema.at("allOf").as_array(), is_empty_schema)); + return APPLIES_TO_KEYWORDS("allOf"); + } + + auto transform(JSON &schema, const Result &) const -> void override { + auto new_allof{JSON::make_array()}; + for (const auto &entry : schema.at("allOf").as_array()) { + if (!is_empty_schema(entry)) { + new_allof.push_back(entry); + } + } + + if (new_allof.empty()) { + schema.erase("allOf"); + } else { + // Re-assign instead of the deleting in place to invalid memory addresses + // and avoid confusing the transformer + schema.assign("allOf", std::move(new_allof)); + } + } +}; diff --git a/vendor/core/src/extension/alterschema/linter/duplicate_allof_branches.h b/vendor/core/src/extension/alterschema/common/duplicate_allof_branches.h similarity index 100% rename from vendor/core/src/extension/alterschema/linter/duplicate_allof_branches.h rename to vendor/core/src/extension/alterschema/common/duplicate_allof_branches.h diff --git a/vendor/core/src/extension/alterschema/linter/duplicate_anyof_branches.h b/vendor/core/src/extension/alterschema/common/duplicate_anyof_branches.h similarity index 100% rename from vendor/core/src/extension/alterschema/linter/duplicate_anyof_branches.h rename to vendor/core/src/extension/alterschema/common/duplicate_anyof_branches.h diff --git a/vendor/core/src/extension/alterschema/linter/duplicate_enum_values.h b/vendor/core/src/extension/alterschema/common/duplicate_enum_values.h similarity index 100% rename from vendor/core/src/extension/alterschema/linter/duplicate_enum_values.h rename to vendor/core/src/extension/alterschema/common/duplicate_enum_values.h diff --git a/vendor/core/src/extension/alterschema/linter/duplicate_required_values.h b/vendor/core/src/extension/alterschema/common/duplicate_required_values.h similarity index 100% rename from vendor/core/src/extension/alterschema/linter/duplicate_required_values.h rename to vendor/core/src/extension/alterschema/common/duplicate_required_values.h diff --git a/vendor/core/src/extension/alterschema/linter/else_empty.h b/vendor/core/src/extension/alterschema/common/else_empty.h similarity index 100% rename from vendor/core/src/extension/alterschema/linter/else_empty.h rename to vendor/core/src/extension/alterschema/common/else_empty.h diff --git a/vendor/core/src/extension/alterschema/linter/else_without_if.h b/vendor/core/src/extension/alterschema/common/else_without_if.h similarity index 100% rename from vendor/core/src/extension/alterschema/linter/else_without_if.h rename to vendor/core/src/extension/alterschema/common/else_without_if.h diff --git a/vendor/core/src/extension/alterschema/linter/enum_with_type.h b/vendor/core/src/extension/alterschema/common/enum_with_type.h similarity index 72% rename from vendor/core/src/extension/alterschema/linter/enum_with_type.h rename to vendor/core/src/extension/alterschema/common/enum_with_type.h index 81d25595..350f73d8 100644 --- a/vendor/core/src/extension/alterschema/linter/enum_with_type.h +++ b/vendor/core/src/extension/alterschema/common/enum_with_type.h @@ -27,25 +27,10 @@ class EnumWithType final : public SchemaTransformRule { ONLY_CONTINUE_IF(schema.is_object() && schema.defines("type") && schema.defines("enum") && schema.at("enum").is_array()); - std::set current_types; - if (schema.at("type").is_string()) { - parse_schema_type( - schema.at("type").to_string(), - [¤t_types](const auto type) { current_types.emplace(type); }); - } else if (schema.at("type").is_array()) { - for (const auto &entry : schema.at("type").as_array()) { - if (entry.is_string()) { - parse_schema_type(entry.to_string(), - [¤t_types](const auto type) { - current_types.emplace(type); - }); - } - } - } - + const auto current_types{parse_schema_type(schema.at("type"))}; ONLY_CONTINUE_IF(std::ranges::all_of( schema.at("enum").as_array(), [¤t_types](const auto &item) { - return current_types.contains(item.type()); + return current_types.test(static_cast(item.type())); })); return APPLIES_TO_KEYWORDS("enum", "type"); diff --git a/vendor/core/src/extension/alterschema/linter/equal_numeric_bounds_to_enum.h b/vendor/core/src/extension/alterschema/common/equal_numeric_bounds_to_enum.h similarity index 100% rename from vendor/core/src/extension/alterschema/linter/equal_numeric_bounds_to_enum.h rename to vendor/core/src/extension/alterschema/common/equal_numeric_bounds_to_enum.h diff --git a/vendor/core/src/extension/alterschema/linter/exclusive_maximum_number_and_maximum.h b/vendor/core/src/extension/alterschema/common/exclusive_maximum_number_and_maximum.h similarity index 100% rename from vendor/core/src/extension/alterschema/linter/exclusive_maximum_number_and_maximum.h rename to vendor/core/src/extension/alterschema/common/exclusive_maximum_number_and_maximum.h diff --git a/vendor/core/src/extension/alterschema/linter/exclusive_minimum_number_and_minimum.h b/vendor/core/src/extension/alterschema/common/exclusive_minimum_number_and_minimum.h similarity index 100% rename from vendor/core/src/extension/alterschema/linter/exclusive_minimum_number_and_minimum.h rename to vendor/core/src/extension/alterschema/common/exclusive_minimum_number_and_minimum.h diff --git a/vendor/core/src/extension/alterschema/linter/if_without_then_else.h b/vendor/core/src/extension/alterschema/common/if_without_then_else.h similarity index 100% rename from vendor/core/src/extension/alterschema/linter/if_without_then_else.h rename to vendor/core/src/extension/alterschema/common/if_without_then_else.h diff --git a/vendor/core/src/extension/alterschema/linter/ignored_metaschema.h b/vendor/core/src/extension/alterschema/common/ignored_metaschema.h similarity index 100% rename from vendor/core/src/extension/alterschema/linter/ignored_metaschema.h rename to vendor/core/src/extension/alterschema/common/ignored_metaschema.h diff --git a/vendor/core/src/extension/alterschema/linter/max_contains_without_contains.h b/vendor/core/src/extension/alterschema/common/max_contains_without_contains.h similarity index 100% rename from vendor/core/src/extension/alterschema/linter/max_contains_without_contains.h rename to vendor/core/src/extension/alterschema/common/max_contains_without_contains.h diff --git a/vendor/core/src/extension/alterschema/linter/maximum_real_for_integer.h b/vendor/core/src/extension/alterschema/common/maximum_real_for_integer.h similarity index 100% rename from vendor/core/src/extension/alterschema/linter/maximum_real_for_integer.h rename to vendor/core/src/extension/alterschema/common/maximum_real_for_integer.h diff --git a/vendor/core/src/extension/alterschema/linter/min_contains_without_contains.h b/vendor/core/src/extension/alterschema/common/min_contains_without_contains.h similarity index 100% rename from vendor/core/src/extension/alterschema/linter/min_contains_without_contains.h rename to vendor/core/src/extension/alterschema/common/min_contains_without_contains.h diff --git a/vendor/core/src/extension/alterschema/linter/minimum_real_for_integer.h b/vendor/core/src/extension/alterschema/common/minimum_real_for_integer.h similarity index 100% rename from vendor/core/src/extension/alterschema/linter/minimum_real_for_integer.h rename to vendor/core/src/extension/alterschema/common/minimum_real_for_integer.h diff --git a/vendor/core/src/extension/alterschema/linter/modern_official_dialect_with_empty_fragment.h b/vendor/core/src/extension/alterschema/common/modern_official_dialect_with_empty_fragment.h similarity index 100% rename from vendor/core/src/extension/alterschema/linter/modern_official_dialect_with_empty_fragment.h rename to vendor/core/src/extension/alterschema/common/modern_official_dialect_with_empty_fragment.h diff --git a/vendor/core/src/extension/alterschema/linter/non_applicable_additional_items.h b/vendor/core/src/extension/alterschema/common/non_applicable_additional_items.h similarity index 100% rename from vendor/core/src/extension/alterschema/linter/non_applicable_additional_items.h rename to vendor/core/src/extension/alterschema/common/non_applicable_additional_items.h diff --git a/vendor/core/src/extension/alterschema/linter/non_applicable_enum_validation_keywords.h b/vendor/core/src/extension/alterschema/common/non_applicable_enum_validation_keywords.h similarity index 97% rename from vendor/core/src/extension/alterschema/linter/non_applicable_enum_validation_keywords.h rename to vendor/core/src/extension/alterschema/common/non_applicable_enum_validation_keywords.h index 918ebf7e..6de098fe 100644 --- a/vendor/core/src/extension/alterschema/linter/non_applicable_enum_validation_keywords.h +++ b/vendor/core/src/extension/alterschema/common/non_applicable_enum_validation_keywords.h @@ -27,7 +27,7 @@ class NonApplicableEnumValidationKeywords final : public SchemaTransformRule { Vocabularies::Known::JSON_Schema_Draft_1, Vocabularies::Known::JSON_Schema_Draft_1_Hyper}) && schema.is_object() && schema.defines("enum") && - schema.at("enum").is_array()); + schema.at("enum").is_array() && !schema.defines("type")); sourcemeta::core::JSON::TypeSet enum_types; for (const auto &value : schema.at("enum").as_array()) { diff --git a/vendor/core/src/extension/alterschema/linter/non_applicable_type_specific_keywords.h b/vendor/core/src/extension/alterschema/common/non_applicable_type_specific_keywords.h similarity index 66% rename from vendor/core/src/extension/alterschema/linter/non_applicable_type_specific_keywords.h rename to vendor/core/src/extension/alterschema/common/non_applicable_type_specific_keywords.h index 465c6a5b..4d1445ac 100644 --- a/vendor/core/src/extension/alterschema/linter/non_applicable_type_specific_keywords.h +++ b/vendor/core/src/extension/alterschema/common/non_applicable_type_specific_keywords.h @@ -16,37 +16,22 @@ class NonApplicableTypeSpecificKeywords final : public SchemaTransformRule { -> sourcemeta::core::SchemaTransformRule::Result override { ONLY_CONTINUE_IF(schema.is_object()); - sourcemeta::core::JSON::TypeSet current_types; - if (vocabularies.contains_any( - {Vocabularies::Known::JSON_Schema_2020_12_Validation, - Vocabularies::Known::JSON_Schema_2019_09_Validation, - Vocabularies::Known::JSON_Schema_Draft_7, - Vocabularies::Known::JSON_Schema_Draft_6, - Vocabularies::Known::JSON_Schema_Draft_4, - Vocabularies::Known::JSON_Schema_Draft_3, - Vocabularies::Known::JSON_Schema_Draft_2, - Vocabularies::Known::JSON_Schema_Draft_2_Hyper, - Vocabularies::Known::JSON_Schema_Draft_1, - Vocabularies::Known::JSON_Schema_Draft_1_Hyper, - Vocabularies::Known::JSON_Schema_Draft_0, - Vocabularies::Known::JSON_Schema_Draft_0_Hyper}) && - schema.defines("type")) { - if (schema.at("type").is_string()) { - parse_schema_type(schema.at("type").to_string(), - [¤t_types](const auto type) { - current_types.set(static_cast(type)); - }); - } else if (schema.at("type").is_array()) { - for (const auto &entry : schema.at("type").as_array()) { - if (entry.is_string()) { - parse_schema_type( - entry.to_string(), [¤t_types](const auto type) { - current_types.set(static_cast(type)); - }); - } - } - } - } + auto current_types{vocabularies.contains_any( + {Vocabularies::Known::JSON_Schema_2020_12_Validation, + Vocabularies::Known::JSON_Schema_2019_09_Validation, + Vocabularies::Known::JSON_Schema_Draft_7, + Vocabularies::Known::JSON_Schema_Draft_6, + Vocabularies::Known::JSON_Schema_Draft_4, + Vocabularies::Known::JSON_Schema_Draft_3, + Vocabularies::Known::JSON_Schema_Draft_2, + Vocabularies::Known::JSON_Schema_Draft_2_Hyper, + Vocabularies::Known::JSON_Schema_Draft_1, + Vocabularies::Known::JSON_Schema_Draft_1_Hyper, + Vocabularies::Known::JSON_Schema_Draft_0, + Vocabularies::Known::JSON_Schema_Draft_0_Hyper}) && + schema.defines("type") + ? parse_schema_type(schema.at("type")) + : sourcemeta::core::JSON::TypeSet{}}; if (vocabularies.contains_any( {Vocabularies::Known::JSON_Schema_2020_12_Validation, diff --git a/vendor/core/src/extension/alterschema/linter/not_false.h b/vendor/core/src/extension/alterschema/common/not_false.h similarity index 100% rename from vendor/core/src/extension/alterschema/linter/not_false.h rename to vendor/core/src/extension/alterschema/common/not_false.h diff --git a/vendor/core/src/extension/alterschema/common/orphan_definitions.h b/vendor/core/src/extension/alterschema/common/orphan_definitions.h new file mode 100644 index 00000000..812a7dc9 --- /dev/null +++ b/vendor/core/src/extension/alterschema/common/orphan_definitions.h @@ -0,0 +1,81 @@ +class OrphanDefinitions final : public SchemaTransformRule { +public: + OrphanDefinitions() + : SchemaTransformRule{ + "orphan_definitions", + "Schema definitions in `$defs` or `definitions` that " + "are never internally referenced can be removed"} {}; + + [[nodiscard]] auto + condition(const sourcemeta::core::JSON &schema, + const sourcemeta::core::JSON &, + const sourcemeta::core::Vocabularies &vocabularies, + const sourcemeta::core::SchemaFrame &frame, + const sourcemeta::core::SchemaFrame::Location &location, + const sourcemeta::core::SchemaWalker &, + const sourcemeta::core::SchemaResolver &) const + -> sourcemeta::core::SchemaTransformRule::Result override { + const bool has_modern_core{ + vocabularies.contains(Vocabularies::Known::JSON_Schema_2020_12_Core) || + vocabularies.contains(Vocabularies::Known::JSON_Schema_2019_09_Core)}; + const bool has_draft_definitions{ + vocabularies.contains(Vocabularies::Known::JSON_Schema_Draft_7) || + vocabularies.contains(Vocabularies::Known::JSON_Schema_Draft_6) || + vocabularies.contains(Vocabularies::Known::JSON_Schema_Draft_4)}; + + ONLY_CONTINUE_IF(has_modern_core || has_draft_definitions); + ONLY_CONTINUE_IF(schema.is_object()); + + std::vector orphans; + + if (has_modern_core) { + collect_orphans(frame, location, schema, "$defs", orphans); + } + + if (has_modern_core || has_draft_definitions) { + collect_orphans(frame, location, schema, "definitions", orphans); + } + + ONLY_CONTINUE_IF(!orphans.empty()); + return APPLIES_TO_POINTERS(std::move(orphans)); + } + + auto transform(JSON &schema, const Result &result) const -> void override { + for (const auto &pointer : result.locations) { + assert(pointer.size() == 2); + assert(pointer.at(0).is_property()); + assert(pointer.at(1).is_property()); + const auto &container{pointer.at(0).to_property()}; + schema.at(container).erase(pointer.at(1).to_property()); + } + + remove_empty_container(schema, "$defs"); + remove_empty_container(schema, "definitions"); + } + +private: + static auto + collect_orphans(const sourcemeta::core::SchemaFrame &frame, + const sourcemeta::core::SchemaFrame::Location &root, + const JSON &schema, const JSON::String &container, + std::vector &orphans) -> void { + if (!schema.defines(container) || !schema.at(container).is_object()) { + return; + } + + for (const auto &entry : schema.at(container).as_object()) { + auto entry_pointer{Pointer{container, entry.first}}; + const auto &entry_location{frame.traverse(root, entry_pointer)}; + if (frame.instance_locations(entry_location).empty()) { + orphans.push_back(std::move(entry_pointer)); + } + } + } + + static auto remove_empty_container(JSON &schema, const JSON::String &name) + -> void { + if (schema.defines(name) && schema.at(name).empty()) { + schema.erase(name); + } + } +}; diff --git a/vendor/core/src/extension/alterschema/linter/required_properties_in_properties.h b/vendor/core/src/extension/alterschema/common/required_properties_in_properties.h similarity index 100% rename from vendor/core/src/extension/alterschema/linter/required_properties_in_properties.h rename to vendor/core/src/extension/alterschema/common/required_properties_in_properties.h diff --git a/vendor/core/src/extension/alterschema/linter/single_type_array.h b/vendor/core/src/extension/alterschema/common/single_type_array.h similarity index 100% rename from vendor/core/src/extension/alterschema/linter/single_type_array.h rename to vendor/core/src/extension/alterschema/common/single_type_array.h diff --git a/vendor/core/src/extension/alterschema/linter/then_empty.h b/vendor/core/src/extension/alterschema/common/then_empty.h similarity index 100% rename from vendor/core/src/extension/alterschema/linter/then_empty.h rename to vendor/core/src/extension/alterschema/common/then_empty.h diff --git a/vendor/core/src/extension/alterschema/linter/then_without_if.h b/vendor/core/src/extension/alterschema/common/then_without_if.h similarity index 100% rename from vendor/core/src/extension/alterschema/linter/then_without_if.h rename to vendor/core/src/extension/alterschema/common/then_without_if.h diff --git a/vendor/core/src/extension/alterschema/linter/unknown_keywords_prefix.h b/vendor/core/src/extension/alterschema/common/unknown_keywords_prefix.h similarity index 100% rename from vendor/core/src/extension/alterschema/linter/unknown_keywords_prefix.h rename to vendor/core/src/extension/alterschema/common/unknown_keywords_prefix.h diff --git a/vendor/core/src/extension/alterschema/linter/unknown_local_ref.h b/vendor/core/src/extension/alterschema/common/unknown_local_ref.h similarity index 100% rename from vendor/core/src/extension/alterschema/linter/unknown_local_ref.h rename to vendor/core/src/extension/alterschema/common/unknown_local_ref.h diff --git a/vendor/core/src/extension/alterschema/linter/unnecessary_allof_ref_wrapper_draft.h b/vendor/core/src/extension/alterschema/common/unnecessary_allof_ref_wrapper_draft.h similarity index 100% rename from vendor/core/src/extension/alterschema/linter/unnecessary_allof_ref_wrapper_draft.h rename to vendor/core/src/extension/alterschema/common/unnecessary_allof_ref_wrapper_draft.h diff --git a/vendor/core/src/extension/alterschema/linter/unnecessary_allof_ref_wrapper_modern.h b/vendor/core/src/extension/alterschema/common/unnecessary_allof_ref_wrapper_modern.h similarity index 100% rename from vendor/core/src/extension/alterschema/linter/unnecessary_allof_ref_wrapper_modern.h rename to vendor/core/src/extension/alterschema/common/unnecessary_allof_ref_wrapper_modern.h diff --git a/vendor/core/src/extension/alterschema/common/unnecessary_allof_wrapper.h b/vendor/core/src/extension/alterschema/common/unnecessary_allof_wrapper.h new file mode 100644 index 00000000..6f9b668e --- /dev/null +++ b/vendor/core/src/extension/alterschema/common/unnecessary_allof_wrapper.h @@ -0,0 +1,154 @@ +class UnnecessaryAllOfWrapper final : public SchemaTransformRule { +public: + UnnecessaryAllOfWrapper() + : SchemaTransformRule{"unnecessary_allof_wrapper", + "Keywords inside `allOf` that do not conflict with " + "the parent schema can be elevated"} {}; + + [[nodiscard]] auto + condition(const JSON &schema, const JSON &, const Vocabularies &vocabularies, + const SchemaFrame &, const SchemaFrame::Location &, + const SchemaWalker &walker, const SchemaResolver &) const + -> SchemaTransformRule::Result override { + ONLY_CONTINUE_IF(vocabularies.contains_any( + {Vocabularies::Known::JSON_Schema_2020_12_Applicator, + Vocabularies::Known::JSON_Schema_2019_09_Applicator, + Vocabularies::Known::JSON_Schema_Draft_7, + Vocabularies::Known::JSON_Schema_Draft_6, + Vocabularies::Known::JSON_Schema_Draft_4})); + ONLY_CONTINUE_IF(schema.is_object() && schema.defines("allOf") && + schema.at("allOf").is_array() && + !schema.at("allOf").empty()); + + std::unordered_set dependency_blocked; + for (const auto &entry : schema.as_object()) { + for (const auto &dependency : + walker(entry.first, vocabularies).dependencies) { + dependency_blocked.emplace(dependency); + } + } + + const JSON::TypeSet parent_types{ + schema.defines("type") && + vocabularies.contains_any( + {Vocabularies::Known::JSON_Schema_2020_12_Validation, + Vocabularies::Known::JSON_Schema_2019_09_Validation, + Vocabularies::Known::JSON_Schema_Draft_7, + Vocabularies::Known::JSON_Schema_Draft_6, + Vocabularies::Known::JSON_Schema_Draft_4}) + ? parse_schema_type(schema.at("type")) + : JSON::TypeSet{}}; + + const auto &all_of{schema.at("allOf")}; + std::vector locations; + std::unordered_set elevated; + + for (auto index = all_of.size(); index > 0; index--) { + const auto &entry{all_of.at(index - 1)}; + if (!entry.is_object() || entry.empty() || + // We separately handle this case, as it has many other subtleties + entry.defines("$ref")) { + continue; + } + + // Skip entries that define their own identity, as elevating keywords + // from them could break references that target those anchors + if (!this->is_anonymous(entry, vocabularies)) { + continue; + } + + for (const auto &keyword_entry : entry.as_object()) { + const auto &keyword{keyword_entry.first}; + const auto &metadata{walker(keyword, vocabularies)}; + + if (elevated.contains(keyword) || + (schema.defines(keyword) && + schema.at(keyword) != keyword_entry.second)) { + continue; + } + + if (dependency_blocked.contains(keyword)) { + continue; + } + + if (metadata.instances.any() && parent_types.any() && + (metadata.instances & parent_types).none()) { + continue; + } + + if (std::ranges::any_of( + metadata.dependencies, [&](const auto &dependency) { + return !entry.defines(std::string{dependency}) && + (schema.defines(std::string{dependency}) || + elevated.contains(dependency)); + })) { + continue; + } + + locations.push_back(Pointer{"allOf", index - 1, keyword}); + elevated.emplace(keyword); + + for (const auto &dependency : metadata.dependencies) { + if (!entry.defines(std::string{dependency})) { + dependency_blocked.emplace(dependency); + } + } + } + } + + ONLY_CONTINUE_IF(!locations.empty()); + return APPLIES_TO_POINTERS(std::move(locations)); + } + + auto transform(JSON &schema, const Result &result) const -> void override { + for (const auto &location : result.locations) { + assert(location.size() == 3); + const auto allof_index{location.at(1).to_index()}; + const auto &keyword{location.at(2).to_property()}; + schema.try_assign_before( + keyword, schema.at("allOf").at(allof_index).at(keyword), "allOf"); + schema.at("allOf").at(allof_index).erase(keyword); + } + } + +private: + // TODO: Ideally we this information from the frame out of the box + [[nodiscard]] auto is_anonymous(const JSON &entry, + const Vocabularies &vocabularies) const + -> bool { + if (vocabularies.contains_any( + {Vocabularies::Known::JSON_Schema_2020_12_Core, + Vocabularies::Known::JSON_Schema_2019_09_Core})) { + if (entry.defines("$id") || entry.defines("$anchor")) { + return false; + } + + if (vocabularies.contains( + Vocabularies::Known::JSON_Schema_2020_12_Core) && + entry.defines("$dynamicAnchor")) { + return false; + } + + if (vocabularies.contains( + Vocabularies::Known::JSON_Schema_2019_09_Core) && + entry.defines("$recursiveAnchor") && + entry.at("$recursiveAnchor").is_boolean() && + entry.at("$recursiveAnchor").to_boolean()) { + return false; + } + + return true; + } + + if (vocabularies.contains_any({Vocabularies::Known::JSON_Schema_Draft_7, + Vocabularies::Known::JSON_Schema_Draft_6})) { + return !entry.defines("$id"); + } + + if (vocabularies.contains(Vocabularies::Known::JSON_Schema_Draft_4)) { + return !entry.defines("id"); + } + + return false; + } +}; diff --git a/vendor/core/src/extension/alterschema/include/sourcemeta/core/alterschema.h b/vendor/core/src/extension/alterschema/include/sourcemeta/core/alterschema.h index 1a659104..3a157884 100644 --- a/vendor/core/src/extension/alterschema/include/sourcemeta/core/alterschema.h +++ b/vendor/core/src/extension/alterschema/include/sourcemeta/core/alterschema.h @@ -25,12 +25,12 @@ namespace sourcemeta::core { enum class AlterSchemaMode : std::uint8_t { /// Rules that simplify the given schema for both human readability and /// performance - Readability, + Linter, /// Rules that surface implicit constraints and simplifies keywords that /// are syntax sugar to other keywords, potentially decreasing human /// readability in favor of explicitness - StaticAnalysis, + Canonicalizer, }; /// @ingroup alterschema @@ -44,7 +44,7 @@ enum class AlterSchemaMode : std::uint8_t { /// sourcemeta::core::SchemaTransformer bundle; /// /// sourcemeta::core::add(bundle, -/// sourcemeta::core::AlterSchemaMode::Readability); +/// sourcemeta::core::AlterSchemaMode::Linter); /// /// auto schema = sourcemeta::core::parse_json(R"JSON({ /// "$schema": "https://json-schema.org/draft/2020-12/schema", diff --git a/vendor/core/src/extension/alterschema/linter/simple_properties_identifiers.h b/vendor/core/src/extension/alterschema/linter/simple_properties_identifiers.h new file mode 100644 index 00000000..e4e36519 --- /dev/null +++ b/vendor/core/src/extension/alterschema/linter/simple_properties_identifiers.h @@ -0,0 +1,65 @@ +class SimplePropertiesIdentifiers final : public SchemaTransformRule { +public: + SimplePropertiesIdentifiers() + // Inspired by + // https://json-structure.github.io/core/draft-vasters-json-structure-core.html#section-3.6 + : SchemaTransformRule{ + "simple_properties_identifiers", + "Set `properties` to identifier names that can be easily mapped to " + "programming languages (matching [A-Za-z_][A-Za-z0-9_]*)"} {}; + + [[nodiscard]] auto + condition(const sourcemeta::core::JSON &schema, + const sourcemeta::core::JSON &root, + const sourcemeta::core::Vocabularies &vocabularies, + const sourcemeta::core::SchemaFrame &frame, + const sourcemeta::core::SchemaFrame::Location &location, + const sourcemeta::core::SchemaWalker &, + const sourcemeta::core::SchemaResolver &) const + -> sourcemeta::core::SchemaTransformRule::Result override { + ONLY_CONTINUE_IF(vocabularies.contains_any( + {Vocabularies::Known::JSON_Schema_2020_12_Applicator, + Vocabularies::Known::JSON_Schema_2019_09_Applicator, + Vocabularies::Known::JSON_Schema_Draft_7, + Vocabularies::Known::JSON_Schema_Draft_6, + Vocabularies::Known::JSON_Schema_Draft_4, + Vocabularies::Known::JSON_Schema_Draft_3, + Vocabularies::Known::JSON_Schema_Draft_2, + Vocabularies::Known::JSON_Schema_Draft_2_Hyper, + Vocabularies::Known::JSON_Schema_Draft_1, + Vocabularies::Known::JSON_Schema_Draft_1_Hyper})); + ONLY_CONTINUE_IF(schema.is_object() && schema.defines("properties") && + schema.at("properties").is_object() && + !schema.at("properties").empty()); + + if (vocabularies.contains_any( + {Vocabularies::Known::JSON_Schema_2020_12_Core, + Vocabularies::Known::JSON_Schema_2019_09_Core})) { + // Skip meta-schemas with `$vocabulary` (2019-09+) + // We check the current schema resource (not root) to handle bundled + // schemas + const auto base_location{frame.traverse(location.base)}; + if (base_location.has_value()) { + const auto &resource{get(root, base_location->get().pointer)}; + ONLY_CONTINUE_IF(!resource.is_object() || + !resource.defines("$vocabulary")); + } + } else { + // Skip pre-vocabulary meta-schemas + ONLY_CONTINUE_IF(location.base != location.dialect && + (location.base + "#") != location.dialect); + } + + std::vector offenders; + for (const auto &entry : schema.at("properties").as_object()) { + static const Regex IDENTIFIER_PATTERN{ + to_regex("^[A-Za-z_][A-Za-z0-9_]*$").value()}; + if (!matches(IDENTIFIER_PATTERN, entry.first)) { + offenders.push_back(Pointer{"properties", entry.first}); + } + } + + ONLY_CONTINUE_IF(!offenders.empty()); + return APPLIES_TO_POINTERS(std::move(offenders)); + } +}; diff --git a/vendor/jsonbinpack/src/compiler/compiler.cc b/vendor/jsonbinpack/src/compiler/compiler.cc index fbc2bb4e..e8e60ca9 100644 --- a/vendor/jsonbinpack/src/compiler/compiler.cc +++ b/vendor/jsonbinpack/src/compiler/compiler.cc @@ -6,6 +6,8 @@ #include "encoding.h" +#include // assert + static auto transformer_callback_noop(const sourcemeta::core::Pointer &, const std::string_view, const std::string_view, @@ -24,9 +26,11 @@ auto canonicalize(sourcemeta::core::JSON &schema, const std::optional &default_dialect) -> void { sourcemeta::core::SchemaTransformer canonicalizer; sourcemeta::core::add(canonicalizer, - sourcemeta::core::AlterSchemaMode::StaticAnalysis); - canonicalizer.apply(schema, walker, make_resolver(resolver), - transformer_callback_noop, default_dialect); + sourcemeta::core::AlterSchemaMode::Canonicalizer); + [[maybe_unused]] const auto result = + canonicalizer.apply(schema, walker, make_resolver(resolver), + transformer_callback_noop, default_dialect); + assert(result.first); } auto make_encoding(sourcemeta::core::JSON &document, @@ -83,8 +87,10 @@ auto compile(sourcemeta::core::JSON &schema, // Numbers mapper.add(); - mapper.apply(schema, walker, make_resolver(resolver), - transformer_callback_noop, default_dialect); + [[maybe_unused]] const auto mapper_result = + mapper.apply(schema, walker, make_resolver(resolver), + transformer_callback_noop, default_dialect); + assert(mapper_result.first); // The "any" encoding is always the last resort const auto dialect{sourcemeta::core::dialect(schema)}; diff --git a/vendor/jsonschema/VERSION b/vendor/jsonschema/VERSION index 67aee239..d224e690 100644 --- a/vendor/jsonschema/VERSION +++ b/vendor/jsonschema/VERSION @@ -1 +1 @@ -13.2.0 +13.5.0 diff --git a/vendor/jsonschema/completion/jsonschema.bash b/vendor/jsonschema/completion/jsonschema.bash index 5eac91c4..0def2642 100644 --- a/vendor/jsonschema/completion/jsonschema.bash +++ b/vendor/jsonschema/completion/jsonschema.bash @@ -11,7 +11,7 @@ _jsonschema() { previous="" fi - commands="validate metaschema compile test fmt lint bundle inspect encode decode version help" + commands="validate metaschema compile test fmt lint bundle inspect canonicalize encode decode version help" global_options="--verbose -v --resolve -r --default-dialect -d --json -j --http -h" @@ -59,7 +59,7 @@ _jsonschema() { case "${command}" in validate) - local options="--http -h --benchmark -b --loop -l --extension -e --ignore -i --trace -t --fast -f --template -m" + local options="--benchmark -b --loop -l --extension -e --ignore -i --trace -t --fast -f --template -m" if [[ ${current} == -* ]] then COMPREPLY=( $(compgen -W "${options} ${global_options}" -- "${current}") ) @@ -68,7 +68,7 @@ _jsonschema() { fi ;; metaschema) - local options="--http -h --extension -e --ignore -i --trace -t" + local options="--extension -e --ignore -i --trace -t" if [[ ${current} == -* ]] then COMPREPLY=( $(compgen -W "${options} ${global_options}" -- "${current}") ) @@ -77,7 +77,7 @@ _jsonschema() { fi ;; compile) - local options="--http -h --extension -e --ignore -i --fast -f --minify -m" + local options="--extension -e --ignore -i --fast -f --minify -m" if [[ ${current} == -* ]] then COMPREPLY=( $(compgen -W "${options} ${global_options}" -- "${current}") ) @@ -86,7 +86,7 @@ _jsonschema() { fi ;; test) - local options="--http -h --extension -e --ignore -i" + local options="--extension -e --ignore -i" if [[ ${current} == -* ]] then COMPREPLY=( $(compgen -W "${options} ${global_options}" -- "${current}") ) @@ -113,7 +113,7 @@ _jsonschema() { fi ;; bundle) - local options="--http -h --extension -e --ignore -i --without-id -w" + local options="--extension -e --ignore -i --without-id -w" if [[ ${current} == -* ]] then COMPREPLY=( $(compgen -W "${options} ${global_options}" -- "${current}") ) @@ -129,6 +129,14 @@ _jsonschema() { COMPREPLY=( $(compgen -f -X '!*.json' -X '!*.yaml' -X '!*.yml' -- "${current}") ) fi ;; + canonicalize) + if [[ ${current} == -* ]] + then + COMPREPLY=( $(compgen -W "${global_options}" -- "${current}") ) + else + COMPREPLY=( $(compgen -f -X '!*.json' -X '!*.yaml' -X '!*.yml' -- "${current}") ) + fi + ;; encode) if [[ ${current} == -* ]] then diff --git a/vendor/jsonschema/completion/jsonschema.zsh b/vendor/jsonschema/completion/jsonschema.zsh index e93005cd..d74f75ed 100644 --- a/vendor/jsonschema/completion/jsonschema.zsh +++ b/vendor/jsonschema/completion/jsonschema.zsh @@ -15,6 +15,7 @@ _jsonschema() { 'lint:Lint schemas and optionally fix issues' 'bundle:Inline remote references in a schema' 'inspect:Display schema locations and references' + 'canonicalize:Transform a schema into a canonical form' 'encode:Encode JSON using JSON BinPack' 'decode:Decode JSON using JSON BinPack' 'version:Print version information' @@ -114,6 +115,11 @@ _jsonschema() { ${global_options[@]} \ '1:schema file:_files -g "*.json *.yaml *.yml"' ;; + canonicalize) + _arguments \ + ${global_options[@]} \ + '1:schema file:_files -g "*.json *.yaml *.yml"' + ;; encode) _arguments \ ${global_options[@]} \ diff --git a/vendor/jsonschema/src/CMakeLists.txt b/vendor/jsonschema/src/CMakeLists.txt index c320d020..25000026 100644 --- a/vendor/jsonschema/src/CMakeLists.txt +++ b/vendor/jsonschema/src/CMakeLists.txt @@ -10,7 +10,8 @@ add_executable(jsonschema_cli command_validate.cc command_encode.cc command_decode.cc - command_compile.cc) + command_compile.cc + command_canonicalize.cc) sourcemeta_add_default_options(PRIVATE jsonschema_cli) set_target_properties(jsonschema_cli PROPERTIES OUTPUT_NAME jsonschema) diff --git a/vendor/jsonschema/src/command.h b/vendor/jsonschema/src/command.h index 94883ac8..212d2d4b 100644 --- a/vendor/jsonschema/src/command.h +++ b/vendor/jsonschema/src/command.h @@ -12,6 +12,7 @@ auto lint(const sourcemeta::core::Options &options) -> void; auto validate(const sourcemeta::core::Options &options) -> void; auto metaschema(const sourcemeta::core::Options &options) -> void; auto compile(const sourcemeta::core::Options &options) -> void; +auto canonicalize(const sourcemeta::core::Options &options) -> void; auto encode(const sourcemeta::core::Options &options) -> void; auto decode(const sourcemeta::core::Options &options) -> void; } // namespace sourcemeta::jsonschema diff --git a/vendor/jsonschema/src/command_canonicalize.cc b/vendor/jsonschema/src/command_canonicalize.cc new file mode 100644 index 00000000..a2ed1d74 --- /dev/null +++ b/vendor/jsonschema/src/command_canonicalize.cc @@ -0,0 +1,74 @@ +#include +#include +#include + +#include // std::cout + +#include "command.h" +#include "configuration.h" +#include "error.h" +#include "resolver.h" +#include "utils.h" + +namespace { +auto transformer_callback_noop( + const sourcemeta::core::Pointer &, const std::string_view, + const std::string_view, + const sourcemeta::core::SchemaTransformRule::Result &) -> void {} +} // namespace + +auto sourcemeta::jsonschema::canonicalize( + const sourcemeta::core::Options &options) -> void { + + if (options.positional().size() < 1) { + throw PositionalArgumentError{ + "This command expects a path to a schema", + "jsonschema canonicalize path/to/schema.json"}; + } + + const std::filesystem::path schema_path{options.positional().front()}; + const auto configuration_path{find_configuration(schema_path)}; + const auto &configuration{ + read_configuration(options, configuration_path, schema_path)}; + const auto dialect{default_dialect(options, configuration)}; + auto schema{sourcemeta::core::read_yaml_or_json(schema_path)}; + + if (!sourcemeta::core::is_schema(schema)) { + throw NotSchemaError{schema_path}; + } + + try { + const auto &custom_resolver{ + resolver(options, options.contains("http"), dialect, configuration)}; + + sourcemeta::core::SchemaTransformer canonicalizer; + sourcemeta::core::add(canonicalizer, + sourcemeta::core::AlterSchemaMode::Canonicalizer); + [[maybe_unused]] const auto result = canonicalizer.apply( + schema, sourcemeta::core::schema_walker, custom_resolver, + transformer_callback_noop, dialect); + assert(result.first); + + sourcemeta::core::format(schema, sourcemeta::core::schema_walker, + custom_resolver, dialect); + } catch (const sourcemeta::core::SchemaReferenceError &error) { + throw FileError( + schema_path, std::string{error.identifier()}, error.location(), + error.what()); + } catch ( + const sourcemeta::core::SchemaRelativeMetaschemaResolutionError &error) { + throw FileError( + schema_path, error); + } catch (const sourcemeta::core::SchemaResolutionError &error) { + throw FileError(schema_path, + error); + } catch (const sourcemeta::core::SchemaUnknownBaseDialectError &) { + throw FileError( + schema_path); + } catch (const sourcemeta::core::SchemaError &error) { + throw FileError(schema_path, error.what()); + } + + sourcemeta::core::prettify(schema, std::cout); + std::cout << "\n"; +} diff --git a/vendor/jsonschema/src/command_fmt.cc b/vendor/jsonschema/src/command_fmt.cc index 68e1c6c5..4a819ba8 100644 --- a/vendor/jsonschema/src/command_fmt.cc +++ b/vendor/jsonschema/src/command_fmt.cc @@ -21,8 +21,7 @@ auto sourcemeta::jsonschema::fmt(const sourcemeta::core::Options &options) std::vector failed_files; const auto indentation{parse_indentation(options)}; for (const auto &entry : for_each_json(options)) { - if (entry.first.extension() == ".yaml" || - entry.first.extension() == ".yml") { + if (entry.yaml) { throw YAMLInputError{"This command does not support YAML input files yet", entry.first}; } diff --git a/vendor/jsonschema/src/command_lint.cc b/vendor/jsonschema/src/command_lint.cc index 731c8d76..0fbb4bfa 100644 --- a/vendor/jsonschema/src/command_lint.cc +++ b/vendor/jsonschema/src/command_lint.cc @@ -120,7 +120,7 @@ auto sourcemeta::jsonschema::lint(const sourcemeta::core::Options &options) const bool output_json = options.contains("json"); sourcemeta::core::SchemaTransformer bundle; - sourcemeta::core::add(bundle, sourcemeta::core::AlterSchemaMode::Readability); + sourcemeta::core::add(bundle, sourcemeta::core::AlterSchemaMode::Linter); bundle.add( sourcemeta::blaze::default_schema_compiler); @@ -135,7 +135,7 @@ auto sourcemeta::jsonschema::lint(const sourcemeta::core::Options &options) std::unordered_set blacklist; for (const auto &entry : bundle) { - blacklist.emplace(entry.first); + blacklist.emplace(entry->name()); } for (const auto &only : options.at("only")) { @@ -159,7 +159,7 @@ auto sourcemeta::jsonschema::lint(const sourcemeta::core::Options &options) std::reference_wrapper>> rules; for (const auto &entry : bundle) { - rules.emplace_back(entry.first, entry.second->message()); + rules.emplace_back(entry->name(), entry->message()); } std::sort(rules.begin(), rules.end(), @@ -195,8 +195,7 @@ auto sourcemeta::jsonschema::lint(const sourcemeta::core::Options &options) const auto &custom_resolver{ resolver(options, options.contains("http"), dialect, configuration)}; LOG_VERBOSE(options) << "Linting: " << entry.first.string() << "\n"; - if (entry.first.extension() == ".yaml" || - entry.first.extension() == ".yml") { + if (entry.yaml) { throw YAMLInputError{ "The --fix option is not supported for YAML input files", entry.first}; diff --git a/vendor/jsonschema/src/input.h b/vendor/jsonschema/src/input.h index c929e7bf..4f65d68e 100644 --- a/vendor/jsonschema/src/input.h +++ b/vendor/jsonschema/src/input.h @@ -14,6 +14,7 @@ #include // std::any_of, std::none_of, std::sort #include // std::size_t +#include // std::uintptr_t #include // std::filesystem #include // std::ref #include // std::set @@ -30,6 +31,7 @@ struct InputJSON { sourcemeta::core::PointerPositionTracker positions; std::size_t index{0}; bool multidocument{false}; + bool yaml{false}; auto operator<(const InputJSON &other) const noexcept -> bool { return this->first < other.first; } @@ -38,12 +40,26 @@ struct InputJSON { inline auto parse_extensions( const sourcemeta::core::Options &options, const std::optional &configuration) - -> std::set { + -> const std::set & { + using CacheKey = + std::pair>; + static std::map> cache; + + CacheKey cache_key{reinterpret_cast(&options), + configuration.has_value() + ? std::optional{configuration.value().absolute_path} + : std::nullopt}; + + const auto iterator{cache.find(cache_key)}; + if (iterator != cache.end()) { + return iterator->second; + } + std::set result; if (options.contains("extension")) { for (const auto &extension : options.at("extension")) { - if (extension.starts_with('.')) { + if (extension.empty() || extension.starts_with('.')) { result.emplace(extension); } else { std::ostringstream normalised_extension; @@ -55,7 +71,7 @@ inline auto parse_extensions( if (configuration.has_value()) { for (const auto &extension : configuration.value().extension) { - if (extension.starts_with('.')) { + if (extension.empty() || extension.starts_with('.')) { result.emplace(extension); } else { std::ostringstream normalised_extension; @@ -66,7 +82,11 @@ inline auto parse_extensions( } for (const auto &extension : result) { - LOG_VERBOSE(options) << "Using extension: " << extension << "\n"; + if (extension.empty()) { + LOG_WARNING() << "Matching files with no extension\n"; + } else { + LOG_VERBOSE(options) << "Using extension: " << extension << "\n"; + } } if (result.empty()) { @@ -75,7 +95,7 @@ inline auto parse_extensions( result.insert({".yml"}); } - return result; + return cache.emplace(std::move(cache_key), std::move(result)).first->second; } inline auto parse_ignore(const sourcemeta::core::Options &options) @@ -95,6 +115,34 @@ inline auto parse_ignore(const sourcemeta::core::Options &options) namespace { +struct ParsedJSON { + sourcemeta::core::JSON document; + sourcemeta::core::PointerPositionTracker positions; + bool yaml{false}; +}; + +inline auto read_file(const std::filesystem::path &path) -> ParsedJSON { + const auto extension{path.extension()}; + sourcemeta::core::PointerPositionTracker positions; + + if (extension == ".yaml" || extension == ".yml") { + return {sourcemeta::core::read_yaml(path, std::ref(positions)), + std::move(positions), true}; + } else if (extension == ".json") { + return {sourcemeta::core::read_json(path, std::ref(positions)), + std::move(positions), false}; + } + + try { + return {sourcemeta::core::read_json(path, std::ref(positions)), + std::move(positions), false}; + } catch (const sourcemeta::core::JSONParseError &) { + sourcemeta::core::PointerPositionTracker yaml_positions; + return {sourcemeta::core::read_yaml(path, std::ref(yaml_positions)), + std::move(yaml_positions), true}; + } +} + inline auto handle_json_entry(const std::filesystem::path &entry_path, const std::set &blacklist, @@ -108,7 +156,9 @@ handle_json_entry(const std::filesystem::path &entry_path, if (!std::filesystem::is_directory(entry) && std::any_of(extensions.cbegin(), extensions.cend(), [&canonical](const auto &extension) { - return canonical.string().ends_with(extension); + return extension.empty() + ? !canonical.has_extension() + : canonical.string().ends_with(extension); }) && std::none_of(blacklist.cbegin(), blacklist.cend(), [&canonical](const auto &prefix) { @@ -119,12 +169,10 @@ handle_json_entry(const std::filesystem::path &entry_path, continue; } - sourcemeta::core::PointerPositionTracker positions; // TODO: Print a verbose message for what is getting parsed - auto contents{sourcemeta::core::read_yaml_or_json(canonical, - std::ref(positions))}; - result.push_back( - {std::move(canonical), std::move(contents), std::move(positions)}); + auto parsed{read_file(canonical)}; + result.push_back({std::move(canonical), std::move(parsed.document), + std::move(parsed.positions), 0, false, parsed.yaml}); } } } else { @@ -190,24 +238,22 @@ handle_json_entry(const std::filesystem::path &entry_path, std::size_t index{0}; for (auto &entry : documents) { result.push_back({canonical, std::move(entry.first), - std::move(entry.second), index, true}); + std::move(entry.second), index, true, true}); index += 1; } } else if (documents.size() == 1) { - result.push_back({std::move(canonical), - std::move(documents.front().first), - std::move(documents.front().second)}); + result.push_back( + {std::move(canonical), std::move(documents.front().first), + std::move(documents.front().second), 0, false, true}); } } else { if (std::filesystem::is_empty(canonical)) { return; } - sourcemeta::core::PointerPositionTracker positions; // TODO: Print a verbose message for what is getting parsed - auto contents{ - sourcemeta::core::read_json(canonical, std::ref(positions))}; - result.push_back( - {std::move(canonical), std::move(contents), std::move(positions)}); + auto parsed{read_file(canonical)}; + result.push_back({std::move(canonical), std::move(parsed.document), + std::move(parsed.positions), 0, false, parsed.yaml}); } } } diff --git a/vendor/jsonschema/src/main.cc b/vendor/jsonschema/src/main.cc index 04f8a190..b4b6197f 100644 --- a/vendor/jsonschema/src/main.cc +++ b/vendor/jsonschema/src/main.cc @@ -21,6 +21,7 @@ Global Options: --default-dialect, -d Specify the URI for the default dialect to be used if the `$schema` keyword is not set --json, -j Prefer JSON output if supported + --http, -h Allow network access to resolve remote schemas Commands: @@ -32,9 +33,8 @@ Global Options: Print this command reference help. - validate [--http/-h] - [--benchmark/-b] [--loop ] - [--extension/-e ] + validate + [--benchmark/-b] [--loop ] [--extension/-e ] [--ignore/-i ] [--trace/-t] [--fast/-f] [--template/-m ] @@ -51,19 +51,18 @@ Global Options: for error reporting purposes. Make sure they match or you will get non-sense results. - metaschema [schemas-or-directories...] [--http/-h] - [--extension/-e ] + metaschema [schemas-or-directories...] [--extension/-e ] [--ignore/-i ] [--trace/-t] Validate that a schema or a set of schemas are valid with respect to their metaschemas. - compile [--http/-h] [--extension/-e ] + compile [--extension/-e ] [--ignore/-i ] [--fast/-f] [--minify/-m] Compile the given schema into an internal optimised representation. - test [schemas-or-directories...] [--http/-h] [--extension/-e ] + test [schemas-or-directories...] [--extension/-e ] [--ignore/-i ] Run a set of unit tests against a schema. @@ -84,7 +83,7 @@ Global Options: Use --list/-l to print a summary of all enabled rules. Use --indentation/-n to keep indentation when auto-fixing - bundle [--http/-h] [--extension/-e ] + bundle [--extension/-e ] [--ignore/-i ] [--without-id/-w] Perform JSON Schema Bundling on a schema to inline remote references, @@ -95,6 +94,13 @@ Global Options: Statically inspect a schema to display schema locations and references in a human-readable manner. + canonicalize + + Transform a JSON Schema into a canonical normalized form. + Canonicalization is a process that simplifies a schema into a + more verbose but semantically equivalent representation, making + it easier for static analysis. + encode Encode a JSON document or JSONL dataset using JSON BinPack. @@ -164,6 +170,10 @@ auto jsonschema_main(const std::string &program, const std::string &command, app.parse(argc, argv, {.skip = 1}); sourcemeta::jsonschema::compile(app); return EXIT_SUCCESS; + } else if (command == "canonicalize") { + app.parse(argc, argv, {.skip = 1}); + sourcemeta::jsonschema::canonicalize(app); + return EXIT_SUCCESS; } else if (command == "test") { app.option("extension", {"e"}); app.option("ignore", {"i"});