Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dom api #25

Merged
merged 9 commits into from
Aug 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ resolver = "2"
members = ["simdjson-sys"]

[workspace.package]
version = "0.3.0-alpha.1"
version = "0.3.0-alpha.2"

[workspace.dependencies]
simdjson-sys = { path = "simdjson-sys", version = "0.1.0-alpha.1" }
simdjson-sys = { path = "simdjson-sys", version = "0.1.0-alpha.2" }


[dependencies]
Expand Down
62 changes: 55 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,22 +20,70 @@ Add this to your `Cargo.toml`

```toml
# In the `[dependencies]` section
simdjson-rust = {git = "https://github.com/SunDoge/simdjson-rust"}
simdjson-rust = "0.3.0"
```

Then, get started.

```rust
use simdjson_rust::{ondemand::Parser, prelude::*};
use simdjson_rust::prelude::*;
use simdjson_rust::{dom, ondemand};

fn main() -> simdjson_rust::Result<()> {
let mut parser = Parser::default();
let ps = make_padded_string("[0,1,2,3]");
let mut doc = parser.iterate(&ps)?;
let mut array = doc.get_array()?;
for (index, value) in array.iter()?.enumerate() {
assert_eq!(index as u64, value?.get_uint64()?);

// ondemand api.
{
let mut parser = ondemand::Parser::default();
let mut doc = parser.iterate(&ps)?;
let mut array = doc.get_array()?;
for (index, value) in array.iter()?.enumerate() {
assert_eq!(index as u64, value?.get_uint64()?);
}
}

// dom api.
{
let mut parser = dom::Parser::default();
let elem = parser.parse(&ps)?;
let arr = elem.get_array()?;
for (index, value) in arr.iter().enumerate() {
assert_eq!(index as u64, value.get_uint64()?);
}
}

Ok(())
}
```

### `dom` and `ondemand`

`simdjson` now offer two kinds of API, `dom` and `ondemand`.
`dom` will parsed the whole string while `ondemand` only parse what you request.
Due to `ffi`, the overhead of `ondemand` API is relatively high. I have tested `lto` but it only improves a little :(

Thus it is suggestted that

- use `ondemand` if you only want to access a specific part of a large json,
- use `dom` if you want to parse the whole json.


### `padded_string`

`simdjson` requires the input string to be padded. We must provide a string with `capacity = len + SIMDJSON_PADDING`.
We provide utils to do so.

```rust
use simdjson_rust::prelude::*;

fn main() -> simdjson_rust::Result<()> {
let ps = make_padded_string("[0,1,2,3]");
let ps = "[0,1,2,3]".to_padded_string();
// or reuse a buffer.
let unpadded = String::from("[1,2,3,4]");
let ps = unpadded.into_padded_string();
// or load from file.
let ps = load_padded_string("test.json")?;
Ok(())
}
```
27 changes: 21 additions & 6 deletions examples/simple.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,27 @@
use simdjson_rust::prelude::*;
use simdjson_rust::{dom, ondemand, prelude::*};

fn main() -> simdjson_rust::Result<()> {
let mut parser = ondemand::Parser::default();
let ps = make_padded_string("[0,1,2,3]");
let mut doc = parser.iterate(&ps)?;
let mut array = doc.get_array()?;
for (index, value) in array.iter()?.enumerate() {
assert_eq!(index as u64, value?.get_uint64()?);

// ondemand api.
{
let mut parser = ondemand::Parser::default();
let mut doc = parser.iterate(&ps)?;
let mut array = doc.get_array()?;
for (index, value) in array.iter()?.enumerate() {
assert_eq!(index as u64, value?.get_uint64()?);
}
}

// dom api.
{
let mut parser = dom::Parser::default();
let elem = parser.parse(&ps)?;
let arr = elem.get_array()?;
for (index, value) in arr.iter().enumerate() {
assert_eq!(index as u64, value.get_uint64()?);
}
}

Ok(())
}
2 changes: 1 addition & 1 deletion simdjson-sys/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "simdjson-sys"
version = "0.1.0-alpha.1"
version = "0.1.0-alpha.2"
edition = "2021"
authors = ["SunDoge <384813529@qq.com>"]
license = "Apache-2.0"
Expand Down
3 changes: 2 additions & 1 deletion simdjson-sys/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ fn main() {
cc::Build::new()
.cpp(true)
.flag_if_supported("-std=c++17")
.flag_if_supported("/std:c++17")
.flag_if_supported("/std:c++20") // error C7555: use of designated initializers requires at least '/std:c++20'
.flag_if_supported("-pthread")
.flag_if_supported("-O3")
.flag("-DNDEBUG")
.include("simdjson/singleheader")
.file("src/simdjson_c_api.cpp")
.file("simdjson/singleheader/simdjson.cpp")
Expand Down
1 change: 1 addition & 0 deletions simdjson-sys/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ include!(concat!(env!("OUT_DIR"), "/bindings.rs"));

pub const SIMDJSON_PADDING: usize = 64;
pub const SIMDJSON_MAXSIZE_BYTES: usize = 0xFFFFFFFF;
pub const DEFAULT_BATCH_SIZE: usize = 1000000;
226 changes: 225 additions & 1 deletion simdjson-sys/src/simdjson_c_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,14 @@ template <typename U, typename T> inline U object_to_pointer(T &&t) {
return reinterpret_cast<U>(new T(std::move(t)));
}

// template <typename U, typename T>
// auto simdjson_result_to_struct(simdjson_result<T> &&sr) {
// T value;
// const error_code error = std::move(sr).get(value);
// return {static_cast<int>(error),
// reinterpret_cast<U>(new T(std::move(value)))};
// }

// template <typename T>
// inline int enum_result_to_number_result(simdjson_result<T>&& enum_result) {
// T inner;
Expand Down Expand Up @@ -287,4 +295,220 @@ IMPL_GET_PRIMITIVE(SJ_OD_number, ondemand::number, double, get_double)
int SJ_OD_number_get_number_type(SJ_OD_number *self) {
return static_cast<int>(
reinterpret_cast<ondemand::number *>(self)->get_number_type());
}
}

// New macros for dom
#define IMPL_HANDLE(name, type) \
void name##_free(name *r) { delete reinterpret_cast<type *>(r); } \
inline type *cast_to_type(name *r) { return reinterpret_cast<type *>(r); } \
inline name *move_to_handle(type &&r) { \
return object_to_pointer<name *>(std::move(r)); \
}

IMPL_HANDLE(SJ_DOM_parser, dom::parser)
IMPL_HANDLE(SJ_DOM_array, dom::array)
IMPL_HANDLE(SJ_DOM_element, dom::element)
IMPL_HANDLE(SJ_DOM_object, dom::object)
IMPL_HANDLE(SJ_DOM_array_iterator, dom::array::iterator)
IMPL_HANDLE(SJ_DOM_object_iterator, dom::object::iterator)
IMPL_HANDLE(SJ_DOM_document, dom::document)
IMPL_HANDLE(SJ_DOM_document_stream, dom::document_stream)
IMPL_HANDLE(SJ_DOM_document_stream_iterator, dom::document_stream::iterator)

// dom::parser
SJ_DOM_parser *SJ_DOM_parser_new(size_t max_capacity) {
return object_to_pointer<SJ_DOM_parser *>(dom::parser(max_capacity));
}

SJ_DOM_element_result SJ_DOM_parser_parse(SJ_DOM_parser *parser,
const char *json, size_t len) {
dom::element value;
const auto error = reinterpret_cast<dom::parser *>(parser)
->parse(json, len, false)
.get(value); // The string is padded, so false.
return {static_cast<int>(error), move_to_handle(std::move(value))};
}
SJ_DOM_element_result SJ_DOM_parser_parse_into_document(SJ_DOM_parser *parser,
SJ_DOM_document *doc,
const char *json,
size_t len) {
dom::element value;
const auto error = cast_to_type(parser)
->parse_into_document(
*reinterpret_cast<dom::document *>(doc), json, len)
.get(value);
return {static_cast<int>(error), move_to_handle(std::move(value))};
}
SJ_DOM_document_stream_result SJ_DOM_parser_parse_many(SJ_DOM_parser *parser,
const char *json,
size_t len,
size_t batch_size) {
dom::document_stream value;
const auto error =
cast_to_type(parser)->parse_many(json, len, batch_size).get(value);
return {static_cast<int>(error), move_to_handle(std::move(value))};
}

// dom::element
int SJ_DOM_element_type(SJ_DOM_element *self) {
return static_cast<int>(reinterpret_cast<dom::element *>(self)->type());
}

SJ_DOM_array_result SJ_DOM_element_get_array(SJ_DOM_element *self) {
dom::array res;
const error_code error = cast_to_type(self)->get_array().get(res);
return {static_cast<int>(error), move_to_handle(std::move(res))};
}
SJ_DOM_object_result SJ_DOM_element_get_object(SJ_DOM_element *self) {
dom::object res;
const error_code error = cast_to_type(self)->get_object().get(res);
return {static_cast<int>(error), move_to_handle(std::move(res))};
}

SJ_string_view_result SJ_DOM_element_get_string(SJ_DOM_element *self) {
std::string_view res;
const error_code error = cast_to_type(self)->get_string().get(res);
return {static_cast<int>(error), {.data = res.data(), .len = res.size()}};
}

SJ_uint64_t_result SJ_DOM_element_get_uint64(SJ_DOM_element *self) {
uint64_t res = 0;
const error_code error = cast_to_type(self)->get_uint64().get(res);
return {static_cast<int>(error), res};
}
SJ_int64_t_result SJ_DOM_element_get_int64(SJ_DOM_element *self) {
int64_t res = 0;
const error_code error = cast_to_type(self)->get_int64().get(res);
return {static_cast<int>(error), res};
}
SJ_double_result SJ_DOM_element_get_double(SJ_DOM_element *self) {
double res = 0.0;
const error_code error = cast_to_type(self)->get_double().get(res);
return {static_cast<int>(error), res};
}
SJ_DOM_element_result SJ_DOM_element_at_pointer(SJ_DOM_element *self,
const char *json, size_t len) {
dom::element res;
const error_code error =
cast_to_type(self)->at_pointer(std::string_view(json, len)).get(res);
return {static_cast<int>(error), move_to_handle(std::move(res))};
}

// dom::array
SJ_DOM_array_iterator *SJ_DOM_array_begin(SJ_DOM_array *self) {
return move_to_handle(cast_to_type(self)->begin());
}
SJ_DOM_array_iterator *SJ_DOM_array_end(SJ_DOM_array *self) {
return move_to_handle(cast_to_type(self)->end());
}
size_t SJ_DOM_array_size(SJ_DOM_array *self) {
return cast_to_type(self)->size();
}
size_t SJ_DOM_array_number_of_slots(SJ_DOM_array *self) {
return cast_to_type(self)->number_of_slots();
}
SJ_DOM_element_result SJ_DOM_array_at(SJ_DOM_array *self, size_t index) {
dom::element res;
const error_code error = cast_to_type(self)->at(index).get(res);
return {static_cast<int>(error), move_to_handle(std::move(res))};
}
SJ_DOM_element_result SJ_DOM_array_at_pointer(SJ_DOM_array *self,
const char *json, size_t len) {
dom::element res;
const error_code error =
cast_to_type(self)->at_pointer(std::string_view(json, len)).get(res);
return {static_cast<int>(error), move_to_handle(std::move(res))};
}

// dom::array::iterator
SJ_DOM_element *SJ_DOM_array_iterator_get(SJ_DOM_array_iterator *self) {
return move_to_handle(**cast_to_type(self));
}
bool SJ_DOM_array_iterator_not_equal(SJ_DOM_array_iterator *lhs,
SJ_DOM_array_iterator *rhs) {
return *cast_to_type(lhs) != *cast_to_type(rhs);
}
void SJ_DOM_array_iterator_step(SJ_DOM_array_iterator *self) {
++(*cast_to_type(self));
}

// dom::object
SJ_DOM_object_iterator *SJ_DOM_object_begin(SJ_DOM_object *self) {
return move_to_handle(cast_to_type(self)->begin());
}
SJ_DOM_object_iterator *SJ_DOM_object_end(SJ_DOM_object *self) {
return move_to_handle(cast_to_type(self)->end());
}
size_t SJ_DOM_object_size(SJ_DOM_object *self) {
return cast_to_type(self)->size();
}
SJ_DOM_element_result SJ_DOM_object_at_pointer(SJ_DOM_object *self,
const char *json, size_t len) {
dom::element res;
const error_code error =
cast_to_type(self)->at_pointer(std::string_view(json, len)).get(res);
return {static_cast<int>(error), move_to_handle(std::move(res))};
}
SJ_DOM_element_result SJ_DOM_object_at_key(SJ_DOM_object *self,
const char *json, size_t len) {
dom::element res;
const error_code error =
cast_to_type(self)->at_key(std::string_view(json, len)).get(res);
return {static_cast<int>(error), move_to_handle(std::move(res))};
}
SJ_DOM_element_result SJ_DOM_object_at_key_case_insensitive(SJ_DOM_object *self,
const char *json,
size_t len) {
dom::element res;
const error_code error =
cast_to_type(self)
->at_key_case_insensitive(std::string_view(json, len))
.get(res);
return {static_cast<int>(error), move_to_handle(std::move(res))};
}

// dom::object::iterator
SJ_DOM_key_value_pair SJ_DOM_object_iterator_get(SJ_DOM_object_iterator *self) {
dom::key_value_pair pair = **cast_to_type(self);
return {.key = {.data = pair.key.data(), .len = pair.key.size()},
.value = move_to_handle(std::move(pair.value))};
}
bool SJ_DOM_object_iterator_not_equal(SJ_DOM_object_iterator *lhs,
SJ_DOM_object_iterator *rhs) {
return *cast_to_type(lhs) != *cast_to_type(rhs);
}
void SJ_DOM_object_iterator_step(SJ_DOM_object_iterator *self) {
++(*cast_to_type(self));
}

// dom::document
SJ_DOM_document *SJ_DOM_document_new() {
return object_to_pointer<SJ_DOM_document *>(dom::document());
}

SJ_DOM_element *SJ_DOM_document_root(SJ_DOM_document *self) {
return move_to_handle(cast_to_type(self)->root());
}
SJ_DOM_document_stream_iterator *
SJ_DOM_document_stream_begin(SJ_DOM_document_stream *self) {
return move_to_handle(cast_to_type(self)->begin());
}
SJ_DOM_document_stream_iterator *
SJ_DOM_document_stream_end(SJ_DOM_document_stream *self) {
return move_to_handle(cast_to_type(self)->end());
}
SJ_DOM_element_result
SJ_DOM_document_stream_iterator_get(SJ_DOM_document_stream_iterator *self) {
dom::element res;
const error_code error = cast_to_type(self)->operator*().get(res);
return {static_cast<int>(error), move_to_handle(std::move(res))};
}
void SJ_DOM_document_stream_iterator_step(
SJ_DOM_document_stream_iterator *self) {
++(*cast_to_type(self));
}
bool SJ_DOM_document_stream_iterator_not_equal(
SJ_DOM_document_stream_iterator *lhs,
SJ_DOM_document_stream_iterator *rhs) {
return *cast_to_type(lhs) != *cast_to_type(rhs);
}
Loading
Loading