From e1dd1342463806a78904c845cfe76090fc6dc762 Mon Sep 17 00:00:00 2001 From: b41sh Date: Tue, 23 Apr 2024 19:28:08 +0800 Subject: [PATCH] feat: support convert jsonb value to serde_json value --- Cargo.toml | 14 +++-- src/error.rs | 1 + src/functions.rs | 127 +++++++++++++++++++++++++++++++++++++++++- src/number.rs | 18 +++++- tests/it/functions.rs | 39 ++++++++++++- 5 files changed, 186 insertions(+), 13 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 7e6ea25..fcda0b3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,23 +23,25 @@ license = "Apache-2.0" name = "jsonb" repository = "https://github.com/datafuselabs/jsonb" version = "0.3.0" -rust-version = "1.68" +rust-version = "1.77" [dependencies] byteorder = "1.5.0" fast-float = "0.2.0" +itoa = "1.0" nom = "7.1.3" -ordered-float = { version = "4.1.1", default-features = false } +ordered-float = { version = "4.2", default-features = false } rand = { version = "0.8.5", features = ["small_rng"] } -serde_json = { version = "1.0.107", default-features = false, features = [ +ryu = "1.0" +serde_json = { version = "1.0", default-features = false, features = [ "preserve_order", ] } [dev-dependencies] -goldenfile = "1.5.2" -serde_json = "1.0.107" +goldenfile = "1.7" +serde_json = "1.0" json-deserializer = "0.4.4" -simd-json = {version = "0.11.1", features = ["allow-non-simd"]} +simd-json = "0.13.10" mockalloc = "0.1.2" criterion = "0.5.1" diff --git a/src/error.rs b/src/error.rs index 0cb6c7c..db8a490 100644 --- a/src/error.rs +++ b/src/error.rs @@ -75,6 +75,7 @@ pub enum Error { InvalidToken, InvalidCast, + InvalidJson, InvalidJsonb, InvalidJsonbHeader, InvalidJsonbJEntry, diff --git a/src/functions.rs b/src/functions.rs index 25f3bd4..986f0cb 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -19,6 +19,7 @@ use std::collections::BTreeMap; use std::collections::VecDeque; use std::str::from_utf8; use std::str::from_utf8_unchecked; +use std::str::FromStr; use crate::builder::ArrayBuilder; use crate::builder::ObjectBuilder; @@ -1420,6 +1421,130 @@ pub fn is_object(value: &[u8]) -> bool { matches!(header & CONTAINER_HEADER_TYPE_MASK, OBJECT_CONTAINER_TAG) } +/// Convert `JSONB` value to `serde_json` Value +pub fn to_serde_json(value: &[u8]) -> Result { + if !is_jsonb(value) { + let json_str = std::str::from_utf8(value)?; + return match serde_json::Value::from_str(json_str) { + Ok(v) => Ok(v), + Err(_) => Err(Error::InvalidJson), + }; + } + + containter_to_serde_json(value) +} + +/// Convert `JSONB` value to `serde_json` Object Value +pub fn to_serde_json_object( + value: &[u8], +) -> Result>, Error> { + if !is_jsonb(value) { + let json_str = std::str::from_utf8(value)?; + return match serde_json::Value::from_str(json_str) { + Ok(v) => match v { + serde_json::Value::Object(obj) => Ok(Some(obj.clone())), + _ => Ok(None), + }, + Err(_) => Err(Error::InvalidJson), + }; + } + + containter_to_serde_json_object(value) +} + +fn containter_to_serde_json_object( + value: &[u8], +) -> Result>, Error> { + let header = read_u32(value, 0).unwrap_or_default(); + let length = (header & CONTAINER_HEADER_LEN_MASK) as usize; + + let obj_value = match header & CONTAINER_HEADER_TYPE_MASK { + OBJECT_CONTAINER_TAG => { + let mut obj = serde_json::Map::with_capacity(length); + for (key, jentry, val) in iterate_object_entries(value, header) { + let item = scalar_to_serde_json(jentry, val)?; + obj.insert(key.to_string(), item); + } + Some(obj) + } + ARRAY_CONTAINER_TAG | SCALAR_CONTAINER_TAG => None, + _ => { + return Err(Error::InvalidJsonb); + } + }; + Ok(obj_value) +} + +fn containter_to_serde_json(value: &[u8]) -> Result { + let header = read_u32(value, 0).unwrap_or_default(); + let length = (header & CONTAINER_HEADER_LEN_MASK) as usize; + + let json_value = match header & CONTAINER_HEADER_TYPE_MASK { + OBJECT_CONTAINER_TAG => { + let mut obj = serde_json::Map::with_capacity(length); + for (key, jentry, val) in iterate_object_entries(value, header) { + let item = scalar_to_serde_json(jentry, val)?; + obj.insert(key.to_string(), item); + } + serde_json::Value::Object(obj) + } + ARRAY_CONTAINER_TAG => { + let mut arr = Vec::with_capacity(length); + for (jentry, val) in iterate_array(value, header) { + let item = scalar_to_serde_json(jentry, val)?; + arr.push(item); + } + serde_json::Value::Array(arr) + } + SCALAR_CONTAINER_TAG => { + let encoded = match read_u32(value, 4) { + Ok(encoded) => encoded, + Err(_) => { + return Err(Error::InvalidJsonb); + } + }; + let jentry = JEntry::decode_jentry(encoded); + scalar_to_serde_json(jentry, &value[8..])? + } + _ => { + return Err(Error::InvalidJsonb); + } + }; + Ok(json_value) +} + +fn scalar_to_serde_json(jentry: JEntry, value: &[u8]) -> Result { + let json_value = match jentry.type_code { + NULL_TAG => serde_json::Value::Null, + TRUE_TAG => serde_json::Value::Bool(true), + FALSE_TAG => serde_json::Value::Bool(false), + NUMBER_TAG => { + let len = jentry.length as usize; + let n = Number::decode(&value[..len]); + match n { + Number::Int64(v) => serde_json::Value::Number(serde_json::Number::from(v)), + Number::UInt64(v) => serde_json::Value::Number(serde_json::Number::from(v)), + Number::Float64(v) => match serde_json::Number::from_f64(v) { + Some(v) => serde_json::Value::Number(v), + None => { + return Err(Error::InvalidJson); + } + }, + } + } + STRING_TAG => { + let len = jentry.length as usize; + let s = unsafe { String::from_utf8_unchecked(value[..len].to_vec()) }; + serde_json::Value::String(s) + } + CONTAINER_TAG => containter_to_serde_json(value)?, + _ => { + return Err(Error::InvalidJsonb); + } + }; + Ok(json_value) +} + /// Convert `JSONB` value to String pub fn to_string(value: &[u8]) -> String { if !is_jsonb(value) { @@ -1606,7 +1731,7 @@ fn scalar_to_string( FALSE_TAG => json.push_str("false"), NUMBER_TAG => { let num = Number::decode(&value[*value_offset..*value_offset + length]); - json.push_str(&format!("{num}")); + json.push_str(&num.to_string()); } STRING_TAG => { escape_scalar_string(value, *value_offset, *value_offset + length, json); diff --git a/src/number.rs b/src/number.rs index 3b1edee..8744b27 100644 --- a/src/number.rs +++ b/src/number.rs @@ -244,9 +244,21 @@ impl Ord for Number { impl Display for Number { fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { match self { - Number::Int64(v) => write!(f, "{}", v), - Number::UInt64(v) => write!(f, "{}", v), - Number::Float64(v) => write!(f, "{}", v), + Number::Int64(v) => { + let mut buffer = itoa::Buffer::new(); + let s = buffer.format(*v); + write!(f, "{}", s) + } + Number::UInt64(v) => { + let mut buffer = itoa::Buffer::new(); + let s = buffer.format(*v); + write!(f, "{}", s) + } + Number::Float64(v) => { + let mut buffer = ryu::Buffer::new(); + let s = buffer.format(*v); + write!(f, "{}", s) + } } } } diff --git a/tests/it/functions.rs b/tests/it/functions.rs index acee1c1..74ad912 100644 --- a/tests/it/functions.rs +++ b/tests/it/functions.rs @@ -22,8 +22,8 @@ use jsonb::{ delete_by_name, exists_all_keys, exists_any_keys, from_slice, get_by_index, get_by_keypath, get_by_name, get_by_path, is_array, is_object, keypath::parse_key_paths, object_each, object_keys, parse_value, path_exists, path_match, strip_nulls, to_bool, to_f64, to_i64, - to_pretty_string, to_str, to_string, to_u64, traverse_check_string, type_of, Error, Number, - Object, Value, + to_pretty_string, to_serde_json, to_serde_json_object, to_str, to_string, to_u64, + traverse_check_string, type_of, Error, Number, Object, Value, }; use jsonb::jsonpath::parse_json_path; @@ -77,7 +77,7 @@ fn test_build_object() { r#"[1,2,3]"#, r#"{"k":"v"}"#, ]; - let keys = vec![ + let keys = [ "k1".to_string(), "k2".to_string(), "k3".to_string(), @@ -1466,6 +1466,39 @@ fn test_delete_by_keypath() { } } +#[test] +fn test_to_serde_json() { + let sources = vec![ + r#"true"#, + r#"1e20"#, + r#"[100,"abc",{"xx":"✅❌💻"}]"#, + r#"{"a":1,"b":[1,2,3]}"#, + r#"{"ab":{"k1":"v1","k2":"v2"},"cd":[true,100.23,"测试"]}"#, + ]; + let mut buf: Vec = Vec::new(); + for s in sources { + let value = parse_value(s.as_bytes()).unwrap(); + buf.clear(); + value.write_to_vec(&mut buf); + let jsonb_val_str = to_string(&buf); + + let json_val = to_serde_json(&buf).unwrap(); + let json_val_str = json_val.to_string(); + assert_eq!(jsonb_val_str, json_val_str); + + let obj_val = to_serde_json_object(&buf).unwrap(); + if is_object(&buf) { + assert!(obj_val.is_some()); + let obj_val = obj_val.unwrap(); + let json_val = serde_json::Value::Object(obj_val); + let obj_val_str = json_val.to_string(); + assert_eq!(jsonb_val_str, obj_val_str); + } else { + assert!(obj_val.is_none()); + } + } +} + fn init_object<'a>(entries: Vec<(&str, Value<'a>)>) -> Value<'a> { let mut map = BTreeMap::new(); for (key, val) in entries {