From 7472515c1f0c98f4c3ed82f7fbf41e3c3e52afed Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Fri, 23 Jan 2026 11:02:02 -0500 Subject: [PATCH 1/6] jsonfxns Signed-off-by: DanRoscigno --- .../json-functions/_category_.yml | 6 + .../json-constructor-functions/_category_.yml | 6 + .../json-constructor-functions/json_array.md | 45 ++++++ .../json-constructor-functions/json_object.md | 57 ++++++++ .../json-constructor-functions/parse_json.md | 108 ++++++++++++++ .../json-functions/json-operators.md | 56 ++++++++ .../_category_.yml | 6 + .../arrow-function.md | 61 ++++++++ .../cast.md | 128 +++++++++++++++++ .../get_json_bool.md | 70 ++++++++++ .../get_json_double.md | 66 +++++++++ .../get_json_int.md | 68 +++++++++ .../get_json_string.md | 107 ++++++++++++++ .../json_contains.md | 88 ++++++++++++ .../json_each.md | 55 ++++++++ .../json_exists.md | 63 +++++++++ .../json_keys.md | 103 ++++++++++++++ .../json_length.md | 132 ++++++++++++++++++ .../json_pretty.md | 85 +++++++++++ .../json_query.md | 65 +++++++++ .../json_remove.md | 88 ++++++++++++ .../json_set.md | 95 +++++++++++++ .../json_string.md | 51 +++++++ .../to_json.md | 70 ++++++++++ ...verview-of-json-functions-and-operators.md | 69 +++++++++ 25 files changed, 1748 insertions(+) create mode 100644 docs/en/sql-reference/sql-functions/json-functions/_category_.yml create mode 100644 docs/en/sql-reference/sql-functions/json-functions/json-constructor-functions/_category_.yml create mode 100644 docs/en/sql-reference/sql-functions/json-functions/json-constructor-functions/json_array.md create mode 100644 docs/en/sql-reference/sql-functions/json-functions/json-constructor-functions/json_object.md create mode 100644 docs/en/sql-reference/sql-functions/json-functions/json-constructor-functions/parse_json.md create mode 100644 docs/en/sql-reference/sql-functions/json-functions/json-operators.md create mode 100644 docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/_category_.yml create mode 100644 docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/arrow-function.md create mode 100644 docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/cast.md create mode 100644 docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/get_json_bool.md create mode 100644 docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/get_json_double.md create mode 100644 docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/get_json_int.md create mode 100644 docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/get_json_string.md create mode 100644 docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_contains.md create mode 100644 docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_each.md create mode 100644 docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_exists.md create mode 100644 docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_keys.md create mode 100644 docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_length.md create mode 100644 docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_pretty.md create mode 100644 docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_query.md create mode 100644 docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_remove.md create mode 100644 docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_set.md create mode 100644 docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_string.md create mode 100644 docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/to_json.md create mode 100644 docs/en/sql-reference/sql-functions/json-functions/overview-of-json-functions-and-operators.md diff --git a/docs/en/sql-reference/sql-functions/json-functions/_category_.yml b/docs/en/sql-reference/sql-functions/json-functions/_category_.yml new file mode 100644 index 0000000..1b6bc94 --- /dev/null +++ b/docs/en/sql-reference/sql-functions/json-functions/_category_.yml @@ -0,0 +1,6 @@ +position: 10 # float position is supported +label: 'JSON' +collapsible: true +collapsed: true +link: + type: generated-index \ No newline at end of file diff --git a/docs/en/sql-reference/sql-functions/json-functions/json-constructor-functions/_category_.yml b/docs/en/sql-reference/sql-functions/json-functions/json-constructor-functions/_category_.yml new file mode 100644 index 0000000..c86aacd --- /dev/null +++ b/docs/en/sql-reference/sql-functions/json-functions/json-constructor-functions/_category_.yml @@ -0,0 +1,6 @@ +position: 10 # float position is supported +label: 'JSON Constructor' +collapsible: true +collapsed: true +link: + type: generated-index \ No newline at end of file diff --git a/docs/en/sql-reference/sql-functions/json-functions/json-constructor-functions/json_array.md b/docs/en/sql-reference/sql-functions/json-functions/json-constructor-functions/json_array.md new file mode 100644 index 0000000..6ddb120 --- /dev/null +++ b/docs/en/sql-reference/sql-functions/json-functions/json-constructor-functions/json_array.md @@ -0,0 +1,45 @@ +--- +displayed_sidebar: docs +--- + +# json_array + +Converts each element of an SQL array to a JSON value and returns a JSON array that consists of the JSON values. + +:::tip +All of the JSON functions and operators are listed in the navigation and on the [overview page](../overview-of-json-functions-and-operators.md) + +Accelerate your queries with [generated columns](../../../sql-statements/generated_columns.md) +::: + +## Syntax + +```Haskell +json_array(value, ...) +``` + +## Parameters + +`value`: an element in the SQL array. Only `NULL` values and the following data types are supported: STRING, VARCHAR, CHAR, JSON, TINYINT, SMALLINT, INT, BIGINT, LARGEINT, DOUBLE, FLOAT, and BOOLEAN. + +## Return value + +Returns a JSON array. + +## Examples + +Example 1: Construct a JSON array that consists of values of different data types. + +```plaintext +mysql> SELECT json_array(1, true, 'starrocks', 1.1); + + -> [1, true, "starrocks", 1.1] +``` + +Example 2: Construct an empty JSON array. + +```plaintext +mysql> SELECT json_array(); + + -> [] +``` \ No newline at end of file diff --git a/docs/en/sql-reference/sql-functions/json-functions/json-constructor-functions/json_object.md b/docs/en/sql-reference/sql-functions/json-functions/json-constructor-functions/json_object.md new file mode 100644 index 0000000..4420d40 --- /dev/null +++ b/docs/en/sql-reference/sql-functions/json-functions/json-constructor-functions/json_object.md @@ -0,0 +1,57 @@ +--- +displayed_sidebar: docs +--- + +# json_object + +Converts one or more key-value pairs to a JSON object that consists of the key-value pairs. The key-value pairs are sorted by key in dictionary order. + +:::tip +All of the JSON functions and operators are listed in the navigation and on the [overview page](../overview-of-json-functions-and-operators.md) + +Accelerate your queries with [generated columns](../../../sql-statements/generated_columns.md) +::: + +## Syntax + +```Haskell +json_object(key, value, ...) +``` + +## Parameters + +- `key`: a key in the JSON object. Only the VARCHAR data type is supported. + +- `value`: a value in the JSON object. Only `NULL` values and the following data types are supported: STRING, VARCHAR, CHAR, JSON, TINYINT, SMALLINT, INT, BIGINT, LARGEINT, DOUBLE, FLOAT, and BOOLEAN. + +## Return value + +Returns a JSON object. + +> If the total number of keys and values is an odd number, the JSON_OBJECT function fills `NULL` in the last field. + +## Examples + +Example 1: Construct a JSON object that consists of values of different data types. + +```plaintext +mysql> SELECT json_object('name', 'starrocks', 'active', true, 'published', 2020); + + -> {"active": true, "name": "starrocks", "published": 2020} +``` + +Example 2: Construct a JSON object by using nested JSON_OBJECT functions. + +```plaintext +mysql> SELECT json_object('k1', 1, 'k2', json_object('k2', 2), 'k3', json_array(4, 5)); + + -> {"k1": 1, "k2": {"k2": 2}, "k3": [4, 5]} +``` + +Example 3: Construct an empty JSON object. + +```plaintext +mysql> SELECT json_object(); + + -> {} +``` diff --git a/docs/en/sql-reference/sql-functions/json-functions/json-constructor-functions/parse_json.md b/docs/en/sql-reference/sql-functions/json-functions/json-constructor-functions/parse_json.md new file mode 100644 index 0000000..e500548 --- /dev/null +++ b/docs/en/sql-reference/sql-functions/json-functions/json-constructor-functions/parse_json.md @@ -0,0 +1,108 @@ +--- +displayed_sidebar: docs +--- + +# parse_json + +Converts a string to a JSON value. + +:::tip +All of the JSON functions and operators are listed in the navigation and on the [overview page](../overview-of-json-functions-and-operators.md) + +Accelerate your queries with [generated columns](../../../sql-statements/generated_columns.md) +::: + +## Syntax + +```Haskell +parse_json(string_expr) +``` + +## Parameters + +`string_expr`: the expression that represents the string. Only the STRING, VARCHAR, and CHAR data types are supported. + +## Return value + +Returns a JSON value. + +> Note: If the string cannot be parsed into a standard JSON value, the PARSE_JSON function returns `NULL` (see Example 5). For information about the JSON specifications, see [RFC 7159](https://tools.ietf.org/html/rfc7159?spm=a2c63.p38356.0.0.14d26b9fcp7fcf#page-4). + +## Examples + +Example 1: Convert a STRING value of `1` to a JSON value of `1`. + +```plaintext +mysql> SELECT parse_json('1'); ++-----------------+ +| parse_json('1') | ++-----------------+ +| "1" | ++-----------------+ +``` + +Example 2: Convert an array of the STRING data type to a JSON array. + +```plaintext +mysql> SELECT parse_json('[1,2,3]'); ++-----------------------+ +| parse_json('[1,2,3]') | ++-----------------------+ +| [1, 2, 3] | ++-----------------------+ +``` + +Example 3: Convert an object of the STRING data type to a JSON object. + +```plaintext +mysql> SELECT parse_json('{"star": "rocks"}'); ++---------------------------------+ +| parse_json('{"star": "rocks"}') | ++---------------------------------+ +| {"star": "rocks"} | ++---------------------------------+ +``` + +Example 4: Construct a JSON value of `NULL`. + +```plaintext +mysql> SELECT parse_json('null'); ++--------------------+ +| parse_json('null') | ++--------------------+ +| "null" | ++--------------------+ +``` + +Example 5: If the string cannot be parsed into a standard JSON value, the PARSE_JSON function returns `NULL`. In this example, `star` is not enclosed in double quotation marks ("). Therefore, the PARSE_JSON function returns `NULL`. + +```plaintext +mysql> SELECT parse_json('{star: "rocks"}'); ++-------------------------------+ +| parse_json('{star: "rocks"}') | ++-------------------------------+ +| NULL | ++-------------------------------+ +``` + +Example 6: If a JSON key contains a '.', for example, 'a.1', it must be escaped with '\\' or you need to enclose the entire key value along with double quotes in single quotes. + + +```plaintext +mysql> select parse_json('{"b":4, "a.1": "1"}')->"a\\.1"; ++--------------------------------------------+ +| parse_json('{"b":4, "a.1": "1"}')->'a\\.1' | ++--------------------------------------------+ +| "1" | ++--------------------------------------------+ +mysql> select parse_json('{"b":4, "a.1": "1"}')->'"a.1"'; ++--------------------------------------------+ +| parse_json('{"b":4, "a.1": "1"}')->'"a.1"' | ++--------------------------------------------+ +| "1" | ++--------------------------------------------+ +``` + +## Keywords + +parse_json, parse json diff --git a/docs/en/sql-reference/sql-functions/json-functions/json-operators.md b/docs/en/sql-reference/sql-functions/json-functions/json-operators.md new file mode 100644 index 0000000..2c5911a --- /dev/null +++ b/docs/en/sql-reference/sql-functions/json-functions/json-operators.md @@ -0,0 +1,56 @@ +--- +displayed_sidebar: docs +--- + +# JSON operators + +StarRocks supports the following JSON comparison operators: `<`, `<=`, `>`, `>=`, `=`, and `!=`. You can use these operators to query JSON data. However, StarRocks does not allow you to use `IN` to query JSON data. + +- > The operands of an operator must both be JSON values. + +- > If one operand of an operator is a JSON value while the other is not, the operand that is not a JSON value is converted to a JSON value during the arithmetic operation. For more information about the conversion rules, see [CAST](./json-query-and-processing-functions/cast.md). + +:::tip +All of the JSON functions and operators are listed in the navigation and on the [overview page](./overview-of-json-functions-and-operators.md) + +Accelerate your queries with [generated columns](../../sql-statements/generated_columns.md) +::: + +## Arithmetic rules + +JSON operators comply with the following arithmetic rules: + +- When the operands of an operator are JSON values of the same data type: + - If both operands are JSONs values of a basic data type, such as NUMBER, STRING, or BOOLEAN, the operator performs the arithmetic operation in compliance with the arithmetic rules for the basic data type. + +> Note: If both operands are numbers but one is a DOUBLE value and the other is an INT value, the operator converts the INT value to a DOUBLE value. + +- If both operands are JSON values of a composite data type, such as OBJECT or ARRAY, the operator sorts the keys in the operands in dictionary order based on the sequence of the keys in the first operand and then compares the values of the keys between the operands. + +Example 1: + +The first operand is `{"a": 1, "c": 2}`, and the second operand is `{"b": 1, "a": 2}`. In this example, the operator compares the values of the keys `a` between the operands. The value of the key `a` in the first operand is `1`, whereas the value of the key `a` in the second operand is `2`. The value `1` is greater than the value `2`. Therefore, the operator concludes that the first operand `{"a": 1, "c": 2}` is less than the second operand `{"b": 1, "``a``": 2}`. + +```plaintext +mysql> SELECT PARSE_JSON('{"a": 1, "c": 2}') < PARSE_JSON('{"b": 1, "a": 2} '); + + -> 1 +``` + +Example 2: + +The first operand is `{"a": 1, "c": 2}`, and the second operand is `{"b": 1, "a": 1}`. In this example, the operator first compares the values of the keys `a` between the operands. The values of the keys `a` in the operands are both `1`. Then, the operator compares the values of the keys `c` in the operands. The second operand does not contain the key `c`. Therefore, the operator concludes that the first operand `{"a": 1, "c": 2}` is greater than the second operand `{"b": 1, "a": 1}`. + +```plaintext +mysql> SELECT PARSE_JSON('{"a": 1, "c": 2}') < PARSE_JSON('{"b": 1, "a": 1}'); + + -> 0 +``` + +- When the operands of an operator are JSON values of two distinct data types, the operator compares the operands in compliance with the following arithmetic rules: NULL < BOOLEAN < ARRAY < OBJECT < DOUBLE < INT < STRING. + +```plaintext +mysql> SELECT PARSE_JSON('"a"') < PARSE_JSON('{"a": 1, "c": 2}'); + + -> 0 +``` diff --git a/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/_category_.yml b/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/_category_.yml new file mode 100644 index 0000000..b89ec25 --- /dev/null +++ b/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/_category_.yml @@ -0,0 +1,6 @@ +position: 10 # float position is supported +label: 'JSON query and processing' +collapsible: true +collapsed: true +link: + type: generated-index \ No newline at end of file diff --git a/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/arrow-function.md b/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/arrow-function.md new file mode 100644 index 0000000..15ff759 --- /dev/null +++ b/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/arrow-function.md @@ -0,0 +1,61 @@ +--- +displayed_sidebar: docs +--- + +# Arrow function + +Queries an element that can be located by the `json_path` expression in a JSON object and returns a JSON value. The arrow function `->` is more compact and easier to use than the [json_query](json_query.md) function. + +:::tip +All of the JSON functions and operators are listed in the navigation and on the [overview page](../overview-of-json-functions-and-operators.md) + +Accelerate your queries with [generated columns](../../../sql-statements/generated_columns.md) +::: + +## Syntax + +```Haskell +json_object_expr -> json_path +``` + +## Parameters + +- `json_object_expr`: the expression that represents the JSON object. The object can be a JSON column, or a JSON object that is produced by a JSON constructor function such as PARSE_JSON. + +- `json_path`: the expression that represents the path to an element in the JSON object. The value of this parameter is a string. For information about the JSON path syntax that is supported by StarRocks, see [Overview of JSON functions and operators](../overview-of-json-functions-and-operators.md). + +## Return value + +Returns a JSON value. + +> If the element does not exist, the arrow function returns an SQL value of `NULL`. + +## Examples + +Example 1: Query an element that can be located by the `'$.a.b'` expression in the specified JSON object. + +```plaintext +mysql> SELECT parse_json('{"a": {"b": 1}}') -> '$.a.b'; + + -> 1 +``` + +Example 2: Use nested arrow functions to query an element. The arrow function in which another arrow function is nested queries an element based on the result that is returned by the nested arrow function. + +> In this example, the root element $ is omitted from the `json_path` expression. + +```plaintext +mysql> SELECT parse_json('{"a": {"b": 1}}')->'a'->'b'; + + -> 1 +``` + +Example 3: Query an element that can be located by the `'a'` expression in the specified JSON object. + +> In this example, the root element $ is omitted from the `json_path` expression. + +```plaintext +mysql> SELECT parse_json('{"a": "b"}') -> 'a'; + + -> "b" +``` diff --git a/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/cast.md b/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/cast.md new file mode 100644 index 0000000..84e8ab5 --- /dev/null +++ b/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/cast.md @@ -0,0 +1,128 @@ +--- +displayed_sidebar: docs +--- + +# cast + +Converts a value between the JSON type and SQL type. + +:::tip +All of the JSON functions and operators are listed in the navigation and on the [overview page](../overview-of-json-functions-and-operators.md) + +Accelerate your queries with [generated columns](../../../sql-statements/generated_columns.md) +::: + +## Syntax + +- Conversion from JSON to SQL + +```Haskell +cast(json_expr AS sql_data_type) +``` + +- Conversion from SQL to JSON + +```Haskell +cast(sql_expr AS JSON) +``` + +## Parameters + +- `json_expr`: the expression that represents the JSON value you want to convert to an SQL value. + +- `sql_data_type`: the SQL data type to which you want to convert the JSON value. Only the STRING, VARCHAR, CHAR, BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, LARGEINT, DOUBLE, and FLOAT data types are supported. + +- `sql_expr`: the expression that represents the SQL value you want to convert to a JSON value. This parameter supports all SQL data types that are supported by the `sql_data_type` parameter. + +## Return value + +- If you use the `cast(json_expr AS sql_data_type)` syntax, the cast function returns a value of the SQL data type that is specified by the `sql_data_type` parameter. + +- If you use the `cast(sql_expr AS JSON)` syntax, the cast function returns a JSON value. + +## Usage notes + +- Conversion from SQL to JSON + + - If the SQL value exceeds the precision that is supported by JSON, the cast function returns `NULL` to prevent an arithmetic overflow. + + - If the SQL value is `NULL`, the cast function does not convert the SQL value `NULL` to a JSON value of `NULL`. The return value is still an SQL value of `NULL`. + +- Conversion from JSON to SQL + + - The cast function supports only conversions between compatible JSON and SQL data types. For example, you can convert a JSON string to an SQL string. + + - The cast function does not support conversions between incompatible JSON and SQL data types. For example, if you convert a JSON number to an SQL string, the function returns `NULL`. + + - If an arithmetic overflow occurs, the cast function returns an SQL value of `NULL`. + + - If you convert a JSON value of `NULL` to an SQL value, the function returns an SQL value of `NULL`. + + - If you convert a JSON string to a VARCHAR value, the function returns a VARCHAR value that is not enclosed in double quotation marks ("). + +## Examples + +Example 1: Convert a JSON value to an SQL value. + +```plaintext +-- Convert a JSON value to an INT value. +mysql> select cast(parse_json('{"a": 1}') -> 'a' as int); ++--------------------------------------------+ +| CAST((parse_json('{"a": 1}')->'a') AS INT) | ++--------------------------------------------+ +| 1 | ++--------------------------------------------+ + +-- Convert a JSON string to a VARCHAR value. +mysql> select cast(parse_json('"star"') as varchar); ++---------------------------------------+ +| cast(parse_json('"star"') AS VARCHAR) | ++---------------------------------------+ +| star | ++---------------------------------------+ + +-- Convert a JSON object to a VARCHAR value. +mysql> select cast(parse_json('{"star": 1}') as varchar); ++--------------------------------------------+ +| cast(parse_json('{"star": 1}') AS VARCHAR) | ++--------------------------------------------+ +| {"star": 1} | ++--------------------------------------------+ + +-- Convert a JSON array to a VARCHAR value. + +mysql> select cast(parse_json('[1,2,3]') as varchar); ++----------------------------------------+ +| cast(parse_json('[1,2,3]') AS VARCHAR) | ++----------------------------------------+ +| [1, 2, 3] | ++----------------------------------------+ +``` + +Example 2: Convert an SQL value to a JSON value. + +```plaintext +-- Convert an INT value to a JSON value. +mysql> select cast(1 as json); ++-----------------+ +| cast(1 AS JSON) | ++-----------------+ +| 1 | ++-----------------+ + +-- Convert a VARCHAR value to a JSON value. +mysql> select cast("star" as json); ++----------------------+ +| cast('star' AS JSON) | ++----------------------+ +| "star" | ++----------------------+ + +-- Convert a BOOLEAN value to a JSON value. +mysql> select cast(true as json); ++--------------------+ +| cast(TRUE AS JSON) | ++--------------------+ +| true | ++--------------------+ +``` diff --git a/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/get_json_bool.md b/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/get_json_bool.md new file mode 100644 index 0000000..69248e6 --- /dev/null +++ b/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/get_json_bool.md @@ -0,0 +1,70 @@ +--- +displayed_sidebar: docs +--- + +# get_json_bool + +Parses and gets the boolean value from a specified JSON path in a JSON string. + +:::tip +All of the JSON functions and operators are listed in the navigation and on the [overview page](../overview-of-json-functions-and-operators.md) + +Accelerate your queries with [generated columns](../../../sql-statements/generated_columns.md) +::: + +If the format of `json_str` or `json_path` is invalid, or if no matching content can be found, this function will return NULL. + +This function is supported from v3.3. + +## Syntax + +```Haskell +BOOLEAN get_json_bool(VARCHAR json_str, VARCHAR json_path) +``` + +## Parameters + +- `json_str`: the JSON string. The supported data type is VARCHAR. +- `json_path`: the JSON path. The supported data type is VARCHAR. + + - `json_path` must start with `$` and use `.` as the path separator. If the path includes `.`, it can be enclosed in a pair of `"`. + - `[ ]` is used as the array subscripts which starts from 0. + +## Examples + +1. Get the value whose key is "k1". The value is `true` and `1` is returned. + + ```Plain Text + MySQL > SELECT get_json_bool('{"k1":true, "k2":"false"}', "$.k1"); + +----------------------------------------------------+ + | get_json_bool('{"k1":true, "k2":"false"}', '$.k1') | + +----------------------------------------------------+ + | 1 | + +----------------------------------------------------+ + ``` + +2. Get the second element in the array whose key is "my.key". The second element is `false` and `0` is returned. + + ```Plain Text + SELECT get_json_bool('{"k1":"v1", "my.key":[true, false, 3]}', '$."my.key"[1]'); + +--------------------------------------------------------------------------+ + | get_json_bool('{"k1":"v1", "my.key":[true, false, 3]}', '$."my.key"[1]') | + +--------------------------------------------------------------------------+ + | 0 | + +--------------------------------------------------------------------------+ + ``` + +3. Get the first element in the array whose path is `k1.key -> k2`. The first element is `false` and `0` is returned. + + ```Plain Text + MYSQL > SELECT get_json_bool('{"k1.key":{"k2":[false, true]}}', '$."k1.key".k2[0]'); + +----------------------------------------------------------------------+ + | get_json_bool('{"k1.key":{"k2":[false, true]}}', '$."k1.key".k2[0]') | + +----------------------------------------------------------------------+ + | 0 | + +----------------------------------------------------------------------+ + ``` + +## keyword + +GET_JSON_BOOL,GET,JSON,BOOL diff --git a/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/get_json_double.md b/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/get_json_double.md new file mode 100644 index 0000000..dc36274 --- /dev/null +++ b/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/get_json_double.md @@ -0,0 +1,66 @@ +--- +displayed_sidebar: docs +--- + +# get_json_double + +This function analyzes and gets the floating point value from a specified path in json string. json_path must start with `$` and use `.` as the path separator. + +:::tip +All of the JSON functions and operators are listed in the navigation and on the [overview page](../overview-of-json-functions-and-operators.md) + +Accelerate your queries with [generated columns](../../../sql-statements/generated_columns.md) +::: + +If the path includes `.`, it could be circled by `"` and `"`. + +`[ ]` is used as the array subscripts which start from 0. + +Content in the path should not contain `"` , `[` and `]`. + +If the format of json_string or json_path is wrong, this function will return NULL. + +## Syntax + +```Haskell +DOUBLE get_json_double(VARCHAR json_str, VARCHAR json_path) +``` + +## Examples + +1. Get the value whose key is "k1" + + ```Plain Text + MySQL > SELECT get_json_double('{"k1":1.3, "k2":"2"}', "$.k1"); + +-------------------------------------------------+ + | get_json_double('{"k1":1.3, "k2":"2"}', '$.k1') | + +-------------------------------------------------+ + | 1.3 | + +-------------------------------------------------+ + ``` + +2. Get the second element in the array whose key is "my.key" + + ```Plain Text + MySQL > SELECT get_json_double('{"k1":"v1", "my.key":[1.1, 2.2, 3.3]}', '$."my.key"[1]'); + +---------------------------------------------------------------------------+ + | get_json_double('{"k1":"v1", "my.key":[1.1, 2.2, 3.3]}', '$."my.key"[1]') | + +---------------------------------------------------------------------------+ + | 2.2 | + +---------------------------------------------------------------------------+ + ``` + +3. Get the first element in the array whose path is k1.key -> k2. + + ```Plain Text + MySQL > SELECT get_json_double('{"k1.key":{"k2":[1.1, 2.2]}}', '$."k1.key".k2[0]'); + +---------------------------------------------------------------------+ + | get_json_double('{"k1.key":{"k2":[1.1, 2.2]}}', '$."k1.key".k2[0]') | + +---------------------------------------------------------------------+ + | 1.1 | + +---------------------------------------------------------------------+ + ``` + +## keyword + +GET_JSON_DOUBLE,GET,JSON,DOUBLE diff --git a/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/get_json_int.md b/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/get_json_int.md new file mode 100644 index 0000000..54d219f --- /dev/null +++ b/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/get_json_int.md @@ -0,0 +1,68 @@ +--- +displayed_sidebar: docs +--- + +# get_json_int + +This function analyzes and gets the integer value from a specified path in json string. + +:::tip +All of the JSON functions and operators are listed in the navigation and on the [overview page](../overview-of-json-functions-and-operators.md) + +Accelerate your queries with [generated columns](../../../sql-statements/generated_columns.md) +::: + +json_path must start with `$` and use `.` as the path separator. + +If the path includes `.`, it could be circled by `"` and `"`. + +`[ ]` is used as the array subscripts which start from 0. + +Content in the path should not contain `"` , `[` and `]`. + +If the format of json_string or json_path is wrong, this function will return NULL. + +## Syntax + +```Haskell +BIGINT get_json_int(VARCHAR json_str, VARCHAR json_path) +``` + +## Examples + +1. Get the value whose key is "k1" + + ```Plain Text + MySQL > SELECT get_json_int('{"k1":1, "k2":"2"}', "$.k1"); + +--------------------------------------------+ + | get_json_int('{"k1":1, "k2":"2"}', '$.k1') | + +--------------------------------------------+ + | 1 | + +--------------------------------------------+ + ``` + +2. Get the second element in the array whose key is "my.key" + + ```Plain Text + MySQL > SELECT get_json_int('{"k1":"v1", "my.key":[1, 2, 3]}', '$."my.key"[1]'); + +------------------------------------------------------------------+ + | get_json_int('{"k1":"v1", "my.key":[1, 2, 3]}', '$."my.key"[1]') | + +------------------------------------------------------------------+ + | 2 | + +------------------------------------------------------------------+ + ``` + +3. Get the first element in the array whose path is k1.key -> k2. + + ```Plain Text + MySQL > SELECT get_json_int('{"k1.key":{"k2":[1, 2]}}', '$."k1.key".k2[0]'); + +--------------------------------------------------------------+ + | get_json_int('{"k1.key":{"k2":[1, 2]}}', '$."k1.key".k2[0]') | + +--------------------------------------------------------------+ + | 1 | + +--------------------------------------------------------------+ + ``` + +## keyword + +GET_JSON_INT,GET,JSON,INT diff --git a/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/get_json_string.md b/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/get_json_string.md new file mode 100644 index 0000000..23aee39 --- /dev/null +++ b/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/get_json_string.md @@ -0,0 +1,107 @@ +--- +displayed_sidebar: docs +--- + +# get_json_string,get_json_object + +Analyzes and gets strings from the specified path (`json_path`) in a JSON string. If the format of `json_string` or `json_path` is wrong or if no matching value is found, this function will return NULL. + +:::tip +All of the JSON functions and operators are listed in the navigation and on the [overview page](../overview-of-json-functions-and-operators.md) + +Accelerate your queries with [generated columns](../../../sql-statements/generated_columns.md) +::: + +The alias is get_json_object. + +## Syntax + +```Haskell +VARCHAR get_json_string(VARCHAR json_str, VARCHAR json_path) +``` + +## Parameters + +- `json_str`: the JSON string. Supported data type is VARCHAR. +- `json_path`: the JSON path. Supported data type is VARCHAR. `json_path` starts with `$` and uses `.` as the path separator. `[ ]` is used as the array subscripts which starts from 0. For example, `$."my.key"[1]` indicates to obtain the second value form element `my.key`. + +## Return value + +Returns a value of the VARCHAR type. If no matching object is found, NULL is returned. + +## Examples + +### Example 1: Get the value whose key is `k1`. + +```sql +SELECT get_json_string('{"k1":"v1", "k2":"v2"}', "$.k1"); +``` + +```sql ++---------------------------------------------------+ +| get_json_string('{"k1":"v1", "k2":"v2"}', '$.k1') | ++---------------------------------------------------+ +| v1 | ++---------------------------------------------------+ +``` + +### Example 2: Get the value whose key is `a` from the first element. + +```sql +SELECT get_json_object('[{"a":"123", "b": "456"},{"a":"23", "b": "56"}]', '$[0].a'); +``` + +```sql ++------------------------------------------------------------------------------+ +| get_json_object('[{"a":"123", "b": "456"},{"a":"23", "b": "56"}]', '$[0].a') | ++------------------------------------------------------------------------------+ +| 123 | ++------------------------------------------------------------------------------+ +``` + +### Example 3: Get the second element in the array whose key is `my.key` + +```sql +SELECT get_json_string('{"k1":"v1", "my.key":["e1", "e2", "e3"]}', '$."my.key"[1]'); +``` + +```sql ++------------------------------------------------------------------------------+ +| get_json_string('{"k1":"v1", "my.key":["e1", "e2", "e3"]}', '$."my.key"[1]') | ++------------------------------------------------------------------------------+ +| e2 | ++------------------------------------------------------------------------------+ +``` + +### Example 4: Get the first element in the array whose path is `k1.key -> k2`. + +```sql +SELECT get_json_string('{"k1.key":{"k2":["v1", "v2"]}}', '$."k1.key".k2[0]'); +``` + +```sql ++-----------------------------------------------------------------------+ +| get_json_string('{"k1.key":{"k2":["v1", "v2"]}}', '$."k1.key".k2[0]') | ++-----------------------------------------------------------------------+ +| v1 | ++-----------------------------------------------------------------------+ +``` + +### Example 5: Get all values whose key is `k1` from the array. + +```sql +SELECT get_json_string('[{"k1":"v1"}, {"k2":"v2"}, {"k1":"v3"}, {"k1":"v4"}]', '$.[*].k1'); +``` + +```sql ++-------------------------------------------------------------------------------------+ +| get_json_string('[{"k1":"v1"}, {"k2":"v2"}, {"k1":"v3"}, {"k1":"v4"}]', '$.[*].k1') | ++-------------------------------------------------------------------------------------+ +| ["v1", "v3", "v4"] | ++-------------------------------------------------------------------------------------+ +1 row in set (0.01 sec) +``` + +## keyword + +GET_JSON_STRING,GET,JSON,STRING diff --git a/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_contains.md b/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_contains.md new file mode 100644 index 0000000..f995d71 --- /dev/null +++ b/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_contains.md @@ -0,0 +1,88 @@ +--- +displayed_sidebar: docs +--- + +# json_contains + +Checks whether a JSON document contains a specific value or subdocument. If the target JSON document contains the candidate JSON value, the JSON_CONTAINS function returns `1`. Otherwise, the JSON_CONTAINS function returns `0`. + +:::tip +All of the JSON functions and operators are listed in the navigation and on the [overview page](../overview-of-json-functions-and-operators.md) + +::: + +## Syntax + +```Haskell +json_contains(json_target, json_candidate) +``` + +## Parameters + +- `json_target`: the expression that represents the target JSON document. The document can be a JSON column, or a JSON object that is produced by a JSON constructor function such as PARSE_JSON. + +- `json_candidate`: the expression that represents the candidate JSON value or subdocument to search for within the target. The value can be a JSON column, or a JSON object that is produced by a JSON constructor function such as PARSE_JSON. + +## Return value + +Returns a BOOLEAN value. + +## Usage notes + +- For scalar values (strings, numbers, booleans, null), the function returns true if the values are equal. +- For JSON objects, the function returns true if the target object contains all key-value pairs from the candidate object. +- For JSON arrays, the function returns true if the target array contains all elements from the candidate array, or if the candidate is a single value contained in the target array. +- The function performs deep containment checking for nested structures. + +## Examples + +Example 1: Check if a JSON object contains a specific key-value pair. + +```plaintext +mysql> SELECT json_contains(PARSE_JSON('{"a": 1, "b": 2}'), PARSE_JSON('{"a": 1}')); + + -> 1 +``` + +Example 2: Check if a JSON object contains a key-value pair that doesn't exist. + +```plaintext +mysql> SELECT json_contains(PARSE_JSON('{"a": 1, "b": 2}'), PARSE_JSON('{"c": 3}')); + + -> 0 +``` + +Example 3: Check if a JSON array contains specific elements. + +```plaintext +mysql> SELECT json_contains(PARSE_JSON('[1, 2, 3, 4]'), PARSE_JSON('[2, 3]')); + + -> 1 +``` + +Example 4: Check if a JSON array contains a single scalar value. + +```plaintext +mysql> SELECT json_contains(PARSE_JSON('[1, 2, 3, 4]'), PARSE_JSON('2')); + + -> 1 +``` + +Example 5: Check if a JSON array contains elements that don't exist. + +```plaintext +mysql> SELECT json_contains(PARSE_JSON('[1, 2, 3, 4]'), PARSE_JSON('[5, 6]')); + + -> 0 +``` + +Example 6: Check containment with nested JSON structures. + +```plaintext +mysql> SELECT json_contains(PARSE_JSON('{"users": [{"id": 1, "name": "John"}, {"id": 2, "name": "Jane"}]}'), + PARSE_JSON('{"users": [{"id": 1}]}')); + + -> 0 +``` + +Note: In the last example, the result is 0 because array containment requires exact element matching, not partial object matching within arrays. \ No newline at end of file diff --git a/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_each.md b/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_each.md new file mode 100644 index 0000000..3115bc7 --- /dev/null +++ b/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_each.md @@ -0,0 +1,55 @@ +--- +displayed_sidebar: docs +--- + +# json_each + +Expands the outermost elements of a JSON object into a set of key-value pairs held in two columns and returns a table that consists of one row for each element. + +:::tip +All of the JSON functions and operators are listed in the navigation and on the [overview page](../overview-of-json-functions-and-operators.md) + +Accelerate your queries with [generated columns](../../../sql-statements/generated_columns.md) +::: + +## Syntax + +```Haskell +json_each(json_object_expr) +``` + +## Parameters + +`json_object_expr`: the expression that represents the JSON object. The object can be a JSON column, or a JSON object that is produced by a JSON constructor function such as PARSE_JSON. + +## Return value + +Returns two columns: one named key and one named value. The key column stores VARCHAR values, and the value column stores JSON values. + +## Usage notes + +The json_each function is a table function that returns a table. The returned table is a result set that consists of multiple rows. Therefore, a lateral join must be used in the FROM clause to join the returned table to the original table. The lateral join is mandatory, but the LATERAL keyword is optional. The json_each function cannot be used in the SELECT clause. + +## Examples + +```plaintext +-- A table named tj is used as an example. In the tj table, the j column is a JSON object. +mysql> SELECT * FROM tj; ++------+------------------+ +| id | j | ++------+------------------+ +| 1 | {"a": 1, "b": 2} | +| 3 | {"a": 3} | ++------+------------------+ + +-- Expand the j column of the tj table into two columns by key and value to obtain a result set that consists of multiple rows. In this example, the LATERAL keyword is used to join the result set to the tj table. + +mysql> SELECT * FROM tj, LATERAL json_each(j); ++------+------------------+------+-------+ +| id | j | key | value | ++------+------------------+------+-------+ +| 1 | {"a": 1, "b": 2} | a | 1 | +| 1 | {"a": 1, "b": 2} | b | 2 | +| 3 | {"a": 3} | a | 3 | ++------+------------------+------+-------+ +``` diff --git a/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_exists.md b/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_exists.md new file mode 100644 index 0000000..a35b483 --- /dev/null +++ b/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_exists.md @@ -0,0 +1,63 @@ +--- +displayed_sidebar: docs +--- + +# json_exists + +Checks whether a JSON object contains an element that can be located by the `json_path` expression. If the element exists, the JSON_EXISTS function returns `1`. Otherwise, the JSON_EXISTS function returns `0`. + +:::tip +All of the JSON functions and operators are listed in the navigation and on the [overview page](../overview-of-json-functions-and-operators.md) + +Accelerate your queries with [generated columns](../../../sql-statements/generated_columns.md) +::: + +## Syntax + +```Haskell +json_exists(json_object_expr, json_path) +``` + +## Parameters + +- `json_object_expr`: the expression that represents the JSON object. The object can be a JSON column, or a JSON object that is produced by a JSON constructor function such as PARSE_JSON. + +- `json_path`: the expression that represents the path to an element in the JSON object. The value of this parameter is a string. For more information about the JSON path syntax that is supported by StarRocks, see [Overview of JSON functions and operators](../overview-of-json-functions-and-operators.md). + +## Return value + +Returns a BOOLEAN value. + +## Examples + +Example 1: Check whether the specified JSON object contains an element that can be located by the `'$.a.b'` expression. In this example, the element exists in the JSON object. Therefore, the json_exists function returns `1`. + +```plaintext +mysql> SELECT json_exists(PARSE_JSON('{"a": {"b": 1}}'), '$.a.b') ; + + -> 1 +``` + +Example 2: Check whether the specified JSON object contains an element that can be located by the `'$.a.c'` expression. In this example, the element does not exist in the JSON object. Therefore, the json_exists function returns `0`. + +```plaintext +mysql> SELECT json_exists(PARSE_JSON('{"a": {"b": 1}}'), '$.a.c') ; + + -> 0 +``` + +Example 3: Check whether the specified JSON object contains an element that can be located by the `'$.a[2]'` expression. In this example, the JSON object, which is an array named a, contains an element at index 2. Therefore, the json_exists function returns `1`. + +```plaintext +mysql> SELECT json_exists(PARSE_JSON('{"a": [1,2,3]}'), '$.a[2]') ; + + -> 1 +``` + +Example 4: Check whether the specified JSON object contains an element that can be located by the `'$.a[3]'` expression. In this example, the JSON object, which is an array named a, does not contain an element at index 3. Therefore, the json_exists function returns `0`. + +```plaintext +mysql> SELECT json_exists(PARSE_JSON('{"a": [1,2,3]}'), '$.a[3]') ; + + -> 0 +``` diff --git a/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_keys.md b/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_keys.md new file mode 100644 index 0000000..d9f1e9c --- /dev/null +++ b/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_keys.md @@ -0,0 +1,103 @@ +--- +displayed_sidebar: docs +--- + +# json_keys + +Returns the top-level keys from a JSON object as a JSON array, or, if a `path` is specified, the top-level keys from the path. + +:::tip +All of the JSON functions and operators are listed in the navigation and on the [overview page](../overview-of-json-functions-and-operators.md) + +Accelerate your queries with [generated columns](../../../sql-statements/generated_columns.md) +::: + +## Syntax + +```Haskell +json_keys(json_doc[, path]) +``` + +## Parameters + +`json_doc`: required. The JSON document for which to return the keys. It must be a JSON object. + +`path`: optional. The path generally starts with `$` and uses `.` as the path separator. `[]` is used as the array subscript, which starts from 0. + +## Return value + +Returns a JSON array. + +An empty array is returned if the JSON object is empty. + +`NULL` is returned if the JSON document is not a JSON object or the path does not identify a value in the document. + +If the JSON document is an array nested with a JSON object, you can use the `path` parameter to obtain keys from that object. + +## Examples + +Example 1: Return an empty array because the input JSON object is empty. + +```Plain +select json_keys('{}'); ++-----------------+ +| json_keys('{}') | ++-----------------+ +| [] | ++-----------------+ +``` + +Example 2: Return the keys of a JSON object. + +```Plain +select json_keys('{"a": 1, "b": 2, "c": 3}'); ++----------------+ +| json_keys('1') | ++----------------+ +|["a", "b", "c"] | ++----------------+ +``` + +Example 3: Return the keys of a JSON object that matches the specified path. + +```Plain +select json_keys('{"a": 1, "b": 2, "c": {"d": 3, "e": 4, "f": 5}}', '$.c'); ++---------------------------------------------------------------------+ +| json_keys('{"a": 1, "b": 2, "c": {"d": 3, "e": 4, "f": 5}}', '$.c') | ++---------------------------------------------------------------------+ +| ["d", "e", "f"] | ++---------------------------------------------------------------------+ +``` + +Example 4: The path does not exist. + +```Plain +select json_keys('{"a": 1, "b": 2, "c": {"d": 3, "e": 4, "f": 5}}', '$.e'); ++---------------------------------------------------------------------+ +| json_keys('{"a": 1, "b": 2, "c": {"d": 3, "e": 4, "f": 5}}', '$.e') | ++---------------------------------------------------------------------+ +| NULL | ++---------------------------------------------------------------------+ +``` + +Example 5: The JSON document is not a JSON object. + +```Plain +select json_keys('[1, 2, {"a": 1, "b": 2}]'); ++---------------------------------------+ +| json_keys('[1, 2, {"a": 1, "b": 2}]') | ++---------------------------------------+ +| NULL | ++---------------------------------------+ +``` + +Example 6: The JSON document is an array nested with a JSON object. A path is specified to obtain keys from that object. + +```Plain +select json_keys('[0, 1, {"a": 1, "b": 2}]', '$[2]'); ++-----------------------------------------------+ +| json_keys('[0, 1, {"a": 1, "b": 2}]', '$[2]') | ++-----------------------------------------------+ +| ["a", "b"] | ++-----------------------------------------------+ +``` diff --git a/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_length.md b/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_length.md new file mode 100644 index 0000000..419ba4e --- /dev/null +++ b/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_length.md @@ -0,0 +1,132 @@ +--- +displayed_sidebar: docs +--- + +# json_length + + Returns the length of a JSON document. If a path is specified, this function returns the length of the value identified by the path. + +:::tip +All of the JSON functions and operators are listed in the navigation and on the [overview page](../overview-of-json-functions-and-operators.md) + +Accelerate your queries with [generated columns](../../../sql-statements/generated_columns.md) +::: + +The length of a document is determined according to the following rules: + +- The length of a scalar value is 1. For example, the length of `1`, `"a"`, `true`, `false`, and `null` is 1. + +- The length of an array is the number of array elements. For example, the length of `[1, 2]` is 2. + +- The length of an object is the number of object members. For example, the length of `{"a": 1}` is 1. + +- The length of nested arrays or objects does not count. For example, the length of `{"a": [1, 2]}` is 1, because the nested array `[1, 2]` is not calculated into length. + +## Syntax + +```Haskell +json_length(json_doc[, path]) +``` + +## Parameters + +`json_doc`: required, the JSON document for which to return the length. + +`path`: optional. It is used to return the length of a value within the document. The path generally starts with `$` and uses `.` as the path separator. `[]` is used as the array subscript, which starts from 0. + +## Return value + +Returns a value of the INT type. + +An error is returned if the JSON document is not a valid document. + +0 is returned in any of the following scenarios: + +- The **`path`** does not identify a value in the document. + +- The path is not a valid path expression. + +- The path contains the `*` or `**` wildcard. + +## Examples + +Example 1: Return the length of a scalar value. + +```Plain +select json_length('1'); ++------------------+ +| json_length('1') | ++------------------+ +| 1 | ++------------------+ +``` + +Example 2: Return the length of an empty object. + +```Plain +select json_length('{}'); ++-------------------+ +| json_length('{}') | ++-------------------+ +| 0 | ++-------------------+ +``` + +Example 3: Return the length of an object that has data. + +```Plain +select json_length('{"Name": "Homer"}'); ++----------------------------------+ +| json_length('{"Name": "Homer"}') | ++----------------------------------+ +| 1 | ++----------------------------------+ +``` + +Example 4: Return the length of a JSON array. + +```plain text +select json_length('[1, 2, 3]'); ++--------------------------+ +| json_length('[1, 2, 3]') | ++--------------------------+ +| 3 | ++--------------------------+ +``` + +Example 5: Return the length of a JSON array in which one element has a nested array. + +The nested array `[3, 4]` is not calculated into length. + +```plain text +select json_length('[1, 2, [3, 4]]'); ++-------------------------------+ +| json_length('[1, 2, [3, 4]]') | ++-------------------------------+ +| 3 | ++-------------------------------+ +``` + +Example 6: Return the length of an object specified by path `$.Person`. + +```SQL +SET @file = '{ + "Person": { + "Name": "Homer", + "Age": 39, + "Hobbies": ["Eating", "Sleeping"] + } + }'; +select json_length(@file, '$.Person') 'Result'; +``` + +Example 7: Return the length of the value specified by path `$.y`. + +```plain text +select json_length('{"x": 1, "y": [1, 2]}', '$.y'); ++---------------------------------------------+ +| json_length('{"x": 1, "y": [1, 2]}', '$.y') | ++---------------------------------------------+ +| 2 | ++---------------------------------------------+ +``` diff --git a/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_pretty.md b/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_pretty.md new file mode 100644 index 0000000..db697d9 --- /dev/null +++ b/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_pretty.md @@ -0,0 +1,85 @@ +--- +displayed_sidebar: docs +--- + +# json_pretty + +Formats a JSON document into an easy-to-read, indented string format. This function is useful for debugging or displaying JSON data in a human-readable structure. + +:::tip +All of the JSON functions and operators are listed in the navigation and on the overview page. +::: + +## Syntax + +```SQL +json_pretty(json_object_expr) +``` + +## Parameters +- `json_object_expr`: The expression that represents the JSON object. The object can be a JSON column, a string containing valid JSON, or a JSON object produced by a JSON constructor function such as PARSE_JSON. + +## Return value +Returns the formatted JSON document as a string. + +> - Returns NULL if the argument is NULL. +> - The returned string includes newlines and spaces for indentation. +> - Object keys are sorted alphabetically in the output. + +## Examples + +Example 1: Format a simple JSON object. + +```Plaintext +mysql> SELECT json_pretty('{"b": 2, "a": 1}'); + -> { + "a": 1, + "b": 2 + } +``` + +Example 2: Format a JSON array. + +```Plaintext +mysql> SELECT json_pretty('[1, 2, 3]'); + -> [ + 1, + 2, + 3 + ] +``` + +Example 3: Format a nested JSON structure. + +```Plaintext +mysql> SELECT json_pretty('{"level1": {"level2": {"level3": "value"}}}'); + -> { + "level1": { + "level2": { + "level3": "value" + } + } + } +``` + +Example 4: Use with a table column containing JSON data. + +```Plaintext + +mysql> CREATE TABLE json_test (id INT, data JSON); +mysql> INSERT INTO json_test VALUES (1, parse_json('{"name": "Alice", "details": {"age": 25, "city": "NYC"}}')); +mysql> SELECT json_pretty(data) FROM json_test; + -> { + "details": { + "age": 25, + "city": "NYC" + }, + "name": "Alice" + } +``` + +## Usage notes +- **Indentation:** The function adds standard indentation (spaces) and newlines to make the JSON structure visual. +- **Key Sorting:** JSON object keys are sorted alphabetically in the output string. This is standard behavior for the underlying JSON processing library (VelocyPack). +- **Null Handling:** If the input is SQL NULL, the function returns NULL. +- **Data Types:** It supports formatting of standard JSON types including Objects, Arrays, Strings, Numbers, Booleans, and Nulls. \ No newline at end of file diff --git a/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_query.md b/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_query.md new file mode 100644 index 0000000..b2f88c5 --- /dev/null +++ b/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_query.md @@ -0,0 +1,65 @@ +--- +displayed_sidebar: docs +--- + +# json_query + +Queries the value of an element that can be located by the `json_path` expression in a JSON object and returns a JSON value. + +:::tip +All of the JSON functions and operators are listed in the navigation and on the [overview page](../overview-of-json-functions-and-operators.md) + +Accelerate your queries with [generated columns](../../../sql-statements/generated_columns.md) +::: + +## Syntax + +```Haskell +json_query(json_object_expr, json_path) +``` + +## Parameters + +- `json_object_expr`: the expression that represents the JSON object. The object can be a JSON column, or a JSON object that is produced by a JSON constructor function such as PARSE_JSON. + +- `json_path`: the expression that represents the path to an element in the JSON object. The value of this parameter is a string. For information about the JSON path syntax that is supported by StarRocks, see [Overview of JSON functions and operators](../overview-of-json-functions-and-operators.md). + +## Return value + +Returns a JSON value. + +> If the element does not exist, the json_query function returns an SQL value of `NULL`. + +## Examples + +Example 1: Query the value of an element that can be located by the `'$.a.b'` expression in the specified JSON object. In this example, the json_query function returns a JSON value of `1`. + +```plaintext +mysql> SELECT json_query(PARSE_JSON('{"a": {"b": 1}}'), '$.a.b') ; + + -> 1 +``` + +Example 2: Query the value of an element that can be located by the `'$.a.c'` expression in the specified JSON object. In this example, the element does not exist. Therefore, the json_query function returns an SQL value of `NULL`. + +```plaintext +mysql> SELECT json_query(PARSE_JSON('{"a": {"b": 1}}'), '$.a.c') ; + + -> NULL +``` + +Example 3: Query the value of an element that can be located by the `'$.a[2]'` expression in the specified JSON object. In this example, the JSON object, which is an array named a, contains an element at index 2, and the value of the element is 3. Therefore, the JSON_QUERY function returns a JSON value of `3`. + +```plaintext +mysql> SELECT json_query(PARSE_JSON('{"a": [1,2,3]}'), '$.a[2]') ; + + -> 3 +``` + +Example 4: Query an element that can be located by the `'$.a[3]'` expression in the specified JSON object. In this example, the JSON object, which is an array named a, does not contain an element at index 3. Therefore, the json_query function returns an SQL value of `NULL`. + +```plaintext +mysql> SELECT json_query(PARSE_JSON('{"a": [1,2,3]}'), '$.a[3]') ; + + -> NULL +``` diff --git a/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_remove.md b/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_remove.md new file mode 100644 index 0000000..fb5d29c --- /dev/null +++ b/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_remove.md @@ -0,0 +1,88 @@ +--- +displayed_sidebar: docs +--- + +# json_remove + +Removes data from a JSON document at one or more specified JSON paths and returns the modified JSON document. + +:::tip +All of the JSON functions and operators are listed in the navigation and on the [overview page](../overview-of-json-functions-and-operators.md) +::: + +## Syntax + +```Haskell +json_remove(json_object_expr, json_path[, json_path] ...) +``` + +## Parameters + +- `json_object_expr`: the expression that represents the JSON object. The object can be a JSON column, or a JSON object that is produced by a JSON constructor function such as PARSE_JSON. + +- `json_path`: one or more expressions that represent the paths to elements in the JSON object that should be removed. The value of each parameter is a string. For information about the JSON path syntax that is supported by StarRocks, see [Overview of JSON functions and operators](../overview-of-json-functions-and-operators.md). + +## Return value + +Returns a JSON document with the specified paths removed. + +> - If a path does not exist in the JSON document, it is ignored. +> - If an invalid path is provided, it is ignored. +> - If all paths are invalid or non-existent, the original JSON document is returned unchanged. + +## Examples + +Example 1: Remove a single key from a JSON object. + +```plaintext +mysql> SELECT json_remove('{"a": 1, "b": [10, 20, 30]}', '$.a'); + + -> {"b": [10, 20, 30]} +``` + +Example 2: Remove multiple keys from a JSON object. + +```plaintext +mysql> SELECT json_remove('{"a": 1, "b": [10, 20, 30], "c": "test"}', '$.a', '$.c'); + + -> {"b": [10, 20, 30]} +``` + +Example 3: Remove array elements from a JSON object. + +```plaintext +mysql> SELECT json_remove('{"a": 1, "b": [10, 20, 30]}', '$.b[1]'); + + -> {"a": 1, "b": [10, 30]} +``` + +Example 4: Remove nested object properties. + +```plaintext +mysql> SELECT json_remove('{"a": {"x": 1, "y": 2}, "b": 3}', '$.a.x'); + + -> {"a": {"y": 2}, "b": 3} +``` + +Example 5: Attempt to remove non-existent paths (ignored). + +```plaintext +mysql> SELECT json_remove('{"a": 1, "b": 2}', '$.c', '$.d'); + + -> {"a": 1, "b": 2} +``` + +Example 6: Remove multiple paths including non-existent ones. + +```plaintext +mysql> SELECT json_remove('{"a": 1, "b": 2, "c": 3}', '$.a', '$.nonexistent', '$.c'); + + -> {"b": 2} +``` + +## Usage notes + +- The `json_remove` function follows MySQL-compatible behavior. +- Invalid JSON paths are silently ignored rather than causing errors. +- The function supports removing multiple paths in a single operation, which is more efficient than multiple separate operations. +- Currently, the function supports simple object key removal (e.g., `$.key`). Support for complex nested paths and array element removal may be limited in the current implementation. \ No newline at end of file diff --git a/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_set.md b/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_set.md new file mode 100644 index 0000000..99e8de3 --- /dev/null +++ b/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_set.md @@ -0,0 +1,95 @@ +--- +displayed_sidebar: docs +--- + +# json_set + +Inserts or updates data in a JSON document at one or more specified JSON paths and returns the modified JSON document. + +:::tip +All of the JSON functions and operators are listed in the navigation and on the overview page. +::: + +## Syntax + +```SQL +json_set(json_object_expr, json_path, value[, json_path, value] ...) +``` + +## Parameters +- `json_object_expr`: the expression that represents the JSON object. The object can be a JSON column, or a JSON object that is produced by a JSON constructor function such as **PARSE_JSON**. + +- `json_path`: the path to the element in the JSON object that you want to insert or update. The value must be a string. For information about the JSON path syntax that is supported by StarRocks, see Overview of JSON functions and operators. + +- `value`: the value to be inserted or updated at the specified path. It can be a string, number, boolean, null, or a JSON object. + +## Return value +Returns the modified JSON document. + +> - Returns *NULL* if any argument is *NULL*. +> - If the path exists in the JSON document, the existing value is updated (replaced). +> - If the path does not exist, the new value is inserted (Upsert behavior). +> - Arguments are evaluated from left to right. The result of the first path-value pair becomes the input for the second pair. + +## Examples + +Example 1: Insert a new key into a JSON object. + +```Plaintext +mysql> SELECT json_set('{"a": 1}', '$.b', 2); + -> {"a": 1, "b": 2} +``` + +Example 2: Update an existing key in a JSON object. + +```Plaintext +mysql> SELECT json_set('{"a": 1}', '$.a', 10); + -> {"a": 10} +``` + +Example 3: Perform multiple operations (Update one existing key, Insert one new key). + +```Plaintext +mysql> SELECT json_set('{"a": 1, "b": 2}', '$.a', 10, '$.c', 3); + -> {"a": 10, "b": 2, "c": 3} +``` + +Example 4: Update a value inside a nested JSON object. +```Plaintext + +mysql> SELECT json_set('{"a": {"x": 1, "y": 2}}', '$.a.x', 100); + -> {"a": {"x": 100, "y": 2}} +``` + +Example 5: Update an element in an array by index. + +```Plaintext +mysql> SELECT json_set('{"arr": [10, 20, 30]}', '$.arr[1]', 99); + -> {"arr": [10, 99, 30]} +``` + +Example 6: Append to an array (using an index larger than the array length). + +```Plaintext +mysql> SELECT json_set('{"arr": [10, 20]}', '$.arr[5]', 30); + -> {"arr": [10, 20, 30]} +``` + +Example 7: Insert different data types (Boolean and JSON Null). +To insert a JSON `null` value, use `parse_json('null')`. Passing a raw SQL `NULL` will return `NULL` for the entire result. + +```plaintext +mysql> SELECT json_set('{"a": 1}', '$.b', true, '$.c', parse_json('null')); + -> {"a": 1, "b": true, "c": null} +``` + +## Usage notes + +- The `json_set` function follows MySQL-compatible behavior. +- It operates as an **Upsert** (Update or Insert): + - **INSERT:** If the path does not exist, the value is added to the document. + - **UPDATE:** If the path already exists, the old value is replaced with the new value. +- If you specifically want to insert *only* (without updating existing values), use `json_insert`. +- If you specifically want to update *only* (without inserting new values), use `json_replace`. +- **Null Handling:** To insert a JSON null value, use parse_json('null'). Passing a raw SQL NULL as an argument will cause the function to return NULL. +- **Note:** Wildcard characters (e.g., `*` or `**`) and array slices (e.g., `[1:3]`) are not currently supported in `json_path` for modification. If a path contains these, the update for that path will be ignored to ensure safety. \ No newline at end of file diff --git a/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_string.md b/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_string.md new file mode 100644 index 0000000..0fc7a3b --- /dev/null +++ b/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_string.md @@ -0,0 +1,51 @@ +--- +displayed_sidebar: docs +--- + +# json_string + +Converting JSON object to JSON string + +:::tip +All of the JSON functions and operators are listed in the navigation and on the [overview page](../overview-of-json-functions-and-operators.md) + +Accelerate your queries with [generated columns](../../../sql-statements/generated_columns.md) +::: + +## Syntax + +```SQL +json_string(json_object_expr) +``` + +## Parameters + +- `json_object_expr`: the expression that represents the JSON object. The object can be a JSON column, or a JSON object that is produced by a JSON constructor function such as PARSE_JSON. + +## Return value + +Returns a VARCHAR value. + +## Examples + +Example 1: Converting the JSON object to a JSON string + +```Plain +select json_string('{"Name": "Alice"}'); ++----------------------------------+ +| json_string('{"Name": "Alice"}') | ++----------------------------------+ +| {"Name": "Alice"} | ++----------------------------------+ +``` + +Example 1: Convert the result of PARSE_JSON to a JSON string + +```Plain +select json_string(parse_json('{"Name": "Alice"}')); ++----------------------------------+ +| json_string('{"Name": "Alice"}') | ++----------------------------------+ +| {"Name": "Alice"} | ++----------------------------------+ +``` diff --git a/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/to_json.md b/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/to_json.md new file mode 100644 index 0000000..67a6143 --- /dev/null +++ b/docs/en/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/to_json.md @@ -0,0 +1,70 @@ +--- +displayed_sidebar: docs +--- + +# to_json + +Converts a Map or Struct value into a JSON string. If the input value is NULL, NULL is returned. + +:::tip +All of the JSON functions and operators are listed in the navigation and on the [overview page](../overview-of-json-functions-and-operators.md) + +Accelerate your queries with [generated columns](../../../sql-statements/generated_columns.md) +::: + +If you want to cast values of other data types, see [cast](./cast.md). + +This function is supported from v3.1 onwards. + +## Syntax + +```Haskell +to_json(any_value) +``` + +## Parameters + +`any_value`: the Map or Struct expression you want to convert. If the input value is invalid, an error is returned. The value in each key-value pair of the Map or Struct value is nullable. See the last example. + +## Return value + +Returns a JSON value. + +## Examples + +```Haskell +select to_json(map{1:'a',2:'b'}); ++---------------------------+ +| to_json(map{1:'a',2:'b'}) | ++---------------------------+ +| {"1": "a", "2": "b"} | ++---------------------------+ + +select to_json(row('asia','eu')); ++--------------------------------+ +| to_json(row('asia', 'eu')) | ++--------------------------------+ +| {"col1": "asia", "col2": "eu"} | ++--------------------------------+ + +select to_json(map('a', named_struct('b', 1))); ++----------------------------------------+ +| to_json(map{'a':named_struct('b', 1)}) | ++----------------------------------------+ +| {"a": {"b": 1}} | ++----------------------------------------+ + +select to_json(named_struct("k1", cast(null as string), "k2", "v2")); ++-----------------------------------------------------------------------+ +| to_json(named_struct('k1', CAST(NULL AS VARCHAR(65533)), 'k2', 'v2')) | ++-----------------------------------------------------------------------+ +| {"k1": null, "k2": "v2"} | ++-----------------------------------------------------------------------+ +``` + +## See also + +- [Map data type](../../../data-types/semi_structured/Map.md) +- [Struct data type](../../../data-types/semi_structured/STRUCT.md) +- [Map functions](../../README.md#map-functions) +- [Struct functions](../../README.md#struct-functions) diff --git a/docs/en/sql-reference/sql-functions/json-functions/overview-of-json-functions-and-operators.md b/docs/en/sql-reference/sql-functions/json-functions/overview-of-json-functions-and-operators.md new file mode 100644 index 0000000..a462e9e --- /dev/null +++ b/docs/en/sql-reference/sql-functions/json-functions/overview-of-json-functions-and-operators.md @@ -0,0 +1,69 @@ +--- +displayed_sidebar: docs +--- + +# Overview of JSON functions and operators + +This topic provides an overview of the JSON constructor functions, query functions, and processing functions, operators, and path expressions that are supported by StarRocks. + +:::tip +Accelerate your queries with [generated columns](../../sql-statements/generated_columns.md) +::: + +## JSON constructor functions + +JSON constructor functions are used to construct JSON data, such as JSON objects and JSON arrays. + +| Function | Description | Example | Return value | +| ------------------------------------------------------------ | ------------------------------------------------------------ | --------------------------------------------------------- | -------------------------------------- | +| [json_object](./json-constructor-functions/json_object.md) | Converts one or more key-value pairs to a JSON object that consists of the key-value pairs, which are sorted by key in dictionary order. | `SELECT JSON_OBJECT('Daniel Smith', 26, 'Lily Smith', 25);` | `{"Daniel Smith": 26, "Lily Smith": 25}` | +| [json_array](./json-constructor-functions/json_array.md) | Converts each element of an SQL array to a JSON value and returns a JSON array that consists of those JSON values. | `SELECT JSON_ARRAY(1, 2, 3);` | `[1,2,3]` | +| [parse_json](./json-constructor-functions/parse_json.md) | Converts a string to a JSON value. | `SELECT PARSE_JSON('{"a": 1}');` | `{"a": 1}` | + +## JSON query functions and processing functions + +JSON query functions and processing functions are used to query and process JSON data. For example, you can use a path expression to locate an element in a JSON object. + +| Function | Description | Example | Return value | +| ------------------------------------------------------------ | ------------------------------------------------------------ | ---------------------------------------------------------- | ---------------------------------------------------------- | +| [arrow function](./json-query-and-processing-functions/arrow-function.md) | Queries the element that can be located by a path expression in a JSON object. | `SELECT parse_json('{"a": {"b": 1}}') -> '$.a.b';` | `1` | +| [cast](./json-query-and-processing-functions/cast.md) | Converts data between a JSON data type and an SQL data type. | `SELECT CAST(1 AS JSON);` | `1` | +| [get_json_double](./json-query-and-processing-functions/get_json_double.md) | Analyzes and gets the floating point value from a specified path in a JSON string. | `SELECT get_json_double('{"k1":1.3, "k2":"2"}', "$.k1");` | `1.3` | +| [get_json_int](./json-query-and-processing-functions/get_json_int.md) | Analyzes and gets the integer value from a specified path in a JSON string. | `SELECT get_json_int('{"k1":1, "k2":"2"}', "$.k1");` | `1` | +| [get_json_string](./json-query-and-processing-functions/get_json_string.md) | Analyzes and gets the strings from a specified path in a JSON string. | `SELECT get_json_string('{"k1":"v1", "k2":"v2"}', "$.k1");` | `v1` | +| [json_query](./json-query-and-processing-functions/json_query.md) | Queries the value of an element that can be located by a path expression in a JSON object. | `SELECT JSON_QUERY('{"a": 1}', '$.a');` | `1` | +| [json_remove](./json-query-and-processing-functions/json_remove.md) | Removes data from a JSON document at one or more specified JSON paths. | `SELECT JSON_REMOVE('{"a": 1, "b": [10, 20, 30]}', '$.a', '$.b[1]');` | `{"b": [10, 30]}` | +| [json_each](./json-query-and-processing-functions/json_each.md) | Expands the top-level elements of a JSON object into key-value pairs. | `SELECT * FROM tj_test, LATERAL JSON_EACH(j);` | `!`[json_each](../../../_assets/json_each.png) | +| [json_exists](./json-query-and-processing-functions/json_exists.md) | Checks whether a JSON object contains an element that can be located by a path expression. If the element exists, this function returns 1. If the element does not exist, the function returns 0. | `SELECT JSON_EXISTS('{"a": 1}', '$.a'); ` | `1` | +| [json_keys](./json-query-and-processing-functions/json_keys.md) | Returns the top-level keys from a JSON object as a JSON array, or, if a path is specified, the top-level keys from the path. | `SELECT JSON_KEYS('{"a": 1, "b": 2, "c": 3}');` | `["a", "b", "c"]`| +| [json_length](./json-query-and-processing-functions/json_length.md) | Returns the length of a JSON document. | `SELECT json_length('{"Name": "Alice"}');` | `1` | +| [json_string](./json-query-and-processing-functions/json_string.md) | Converts the JSON object to a JSON string | `SELECT json_string(parse_json('{"Name": "Alice"}'));` | `{"Name": "Alice"}` | + +## JSON operators + +StarRocks supports the following JSON comparison operators: `<`, `<=`, `>`, `>=`, `=`, and `!=`. You can use these operators to query JSON data. However, it does not allow you to use `IN` to query JSON data. For more information about JSON operators, see [JSON operators](./json-operators.md). + +## JSON path expressions + +You can use a JSON path expression to query an element in a JSON object. JSON path expressions are of the STRING data type. In most cases, they are used with various JSON functions, such as JSON_QUERY. In StarRocks, JSON path expressions do not completely comply with the [SQL/JSON path specifications](https://modern-sql.com/blog/2017-06/whats-new-in-sql-2016#json-path). For information about the JSON path syntax that is supported in StarRocks, see the following table, in which the following JSON object is used as an example. + +```JSON +{ + "people": [{ + "name": "Daniel", + "surname": "Smith" + }, { + "name": "Lily", + "surname": "Smith", + "active": true + }] +} +``` + +| JSON path symbol | Description | JSON path example | Return value | +| ---------------- | ------------------------------------------------------------ | --------------------- | ------------------------------------------------------------ | +| `$` | Denotes a root JSON object. | `'$'` | `{ "people": [ { "name": "Daniel", "surname": "Smith" }, { "name": "Lily", "surname": Smith, "active": true } ] }`| +|`.` | Denotes a child JSON object. |`' $.people'` |`[ { "name": "Daniel", "surname": "Smith" }, { "name": "Lily", "surname": Smith, "active": true } ]`| +|`[]` | Denotes one or more array indexes. `[n]` denotes the nth element in an array. Indexes start from 0.
StarRocks 2.5 supports querying multi-dimensional arrays, for example, `["Lucy", "Daniel"], ["James", "Smith"]`. To query the "Lucy" element, you can use `$.people[0][0]`.| `'$.people [0]'` | `{ "name": "Daniel", "surname": "Smith" }` | +| `[*]` | Denotes all elements in an array. | `'$.people[*].name'` | `["Daniel", "Lily"]` | +| `[start: end]` | Denotes a subset of elements from an array. The subset is specified by the `[start, end]` interval, which excludes the element that is denoted by the end index. | `'$.people[0: 1].name'` | `["Daniel"]` | From 675d679a442d7ae58c4acab7d2d8cf5639a18f63 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Fri, 23 Jan 2026 11:03:23 -0500 Subject: [PATCH 2/6] dictionaryfxns Signed-off-by: DanRoscigno --- .../dict-functions/_category_.yml | 6 + .../dict-functions/dict_mapping.md | 217 ++++++++++++++++++ .../dict-functions/dictionary_get.md | 111 +++++++++ 3 files changed, 334 insertions(+) create mode 100644 docs/en/sql-reference/sql-functions/dict-functions/_category_.yml create mode 100644 docs/en/sql-reference/sql-functions/dict-functions/dict_mapping.md create mode 100644 docs/en/sql-reference/sql-functions/dict-functions/dictionary_get.md diff --git a/docs/en/sql-reference/sql-functions/dict-functions/_category_.yml b/docs/en/sql-reference/sql-functions/dict-functions/_category_.yml new file mode 100644 index 0000000..35d3585 --- /dev/null +++ b/docs/en/sql-reference/sql-functions/dict-functions/_category_.yml @@ -0,0 +1,6 @@ +position: 10 # float position is supported +label: 'Dictionary' +collapsible: true +collapsed: true +link: + type: generated-index \ No newline at end of file diff --git a/docs/en/sql-reference/sql-functions/dict-functions/dict_mapping.md b/docs/en/sql-reference/sql-functions/dict-functions/dict_mapping.md new file mode 100644 index 0000000..b6e7017 --- /dev/null +++ b/docs/en/sql-reference/sql-functions/dict-functions/dict_mapping.md @@ -0,0 +1,217 @@ +--- +displayed_sidebar: docs +--- + +# dict_mapping + + + +Returns the value mapped to the specified key in a dictionary table. + +This function is mainly used to simplify the application of a global dictionary table. During data loading into a target table, StarRocks automatically obtains the value mapped to the specified key from the dictionary table by using the input parameters in this function, and then loads the value into the target table. + +Since v3.2.5, StarRocks supports this function. Also, note that currently StarRocks's shared-data mode does not support this function. + +## Syntax + +```SQL +dict_mapping("[.]", key_column_expr_list [, ] [, ] ) + +key_column_expr_list ::= key_column_expr [, key_column_expr ... ] + +key_column_expr ::= | +``` + +## Parameters + +- Required parameters: + - `[.]`: The name of the dictionary table, which needs to be a Primary Key table. The supported data type is VARCHAR. + - `key_column_expr_list`: The expression list for key columns in the dictionary table, including one or multiple `key_column_exprs`. The `key_column_expr` can be the name of a key column in the dictionary table, or a specific key or key expression. + + This expression list needs to include all Primary Key columns of the dictionary table, which means the total number of expressions needs to match the total number of Primary Key columns in the dictionary table. So when the dictionary table uses Composite Primary Key, the expressions in this list needs to correspond to the Primary Key columns defined in the table schema by sequence. Multiple expressions in this list are separated by commas (`,`). And if a `key_column_expr` is a specific key or key expression, its type must match the type of the corresponding Primary Key column in the dictionary table. + +- Optional parameters: + - ``: The name of the value column, which is also the mapping column. If the value column is not specified, the default value column is the AUTO_INCREMENT column of the dictionary table. The value column can also be defined as any column in the dictionary table excluding auto-incremented columns and primary keys. The column's data type has no restrictions. + - `` (Optional): Whether to return if the key does not exist in the dictionary table. Valid values: + - `true`: Null is returned if the key does not exist. + - `false` (Default): An exception is thrown if the key does not exist. + +## Return Value + +The data type of the returned values remains consistent with the data type of the value column. If the value column is the auto-incremented column of the dictionary table, the data type of the returned values is BIGINT. + +However, when the value mapped to the specified key is not found, if the `` parameter is set to `true`, `NULL` is returned. If the parameter is set to `false`(default), an error `query failed if record not exist in dict table` is returned. + +## Example + +**Example 1: Directly query the value mapped to a key from a dictionary table.** + +1. Create a dictionary table and load simulated data. + + ```SQL + MySQL [test]> CREATE TABLE dict ( + order_uuid STRING, + order_id_int BIGINT AUTO_INCREMENT + ) + PRIMARY KEY (order_uuid) + DISTRIBUTED BY HASH (order_uuid); + Query OK, 0 rows affected (0.02 sec) + + MySQL [test]> INSERT INTO dict (order_uuid) VALUES ('a1'), ('a2'), ('a3'); + Query OK, 3 rows affected (0.12 sec) + {'label':'insert_9e60b0e4-89fa-11ee-a41f-b22a2c00f66b', 'status':'VISIBLE', 'txnId':'15029'} + + MySQL [test]> SELECT * FROM dict; + +------------+--------------+ + | order_uuid | order_id_int | + +------------+--------------+ + | a1 | 1 | + | a3 | 3 | + | a2 | 2 | + +------------+--------------+ + 3 rows in set (0.01 sec) + ``` + + > **NOTICE** + > + > Currently the `INSERT INTO` statement does not support partial updates. So please make sure that the values inserted into the `dict`'s key column are not duplicated. Otherwise, inserting the same key column value in the dictionary table multiple times causes its mapped value in the value column to change. + +2. Query the value mapped to key `a1` in the dictionary table. + + ```SQL + MySQL [test]> SELECT dict_mapping('dict', 'a1'); + +----------------------------+ + | dict_mapping('dict', 'a1') | + +----------------------------+ + | 1 | + +----------------------------+ + 1 row in set (0.01 sec) + ``` + +**Example 2: The mapping column in the table is configured as a generated column using the `dict_mapping` function. So StarRocks can automatically obtain the values mapped to the keys when loading data into this table.** + +1. Create a data table and configure the mapping column as a generated column by using `dict_mapping('dict', order_uuid)`. + + ```SQL + CREATE TABLE dest_table1 ( + id BIGINT, + -- This column records the STRING type order number, corresponding to the order_uuid column in the dict table in Example 1. + order_uuid STRING, + batch int comment 'used to distinguish different batch loading', + -- This column records the BIGINT type order number which mapped with the order_uuid column. + -- Because this column is a generated column configured with dict_mapping, the values in this column are automatically obtained from the dict table in Example 1 during data loading. + -- Subsequently, this column can be directly used for deduplication and JOIN queries. + order_id_int BIGINT AS dict_mapping('dict', order_uuid) + ) + DUPLICATE KEY (id, order_uuid) + DISTRIBUTED BY HASH(id); + ``` + +2. When loading simulated data into this table where the `order_id_int` column is configured as `dict_mapping('dict', 'order_uuid')`, StarRocks automatically loads values into the `order_id_int` column based on the mapping relationship between keys and values in the `dict` table. + + ```SQL + MySQL [test]> INSERT INTO dest_table1(id, order_uuid, batch) VALUES (1, 'a1', 1), (2, 'a1', 1), (3, 'a3', 1), (4, 'a3', 1); + Query OK, 4 rows affected (0.05 sec) + {'label':'insert_e191b9e4-8a98-11ee-b29c-00163e03897d', 'status':'VISIBLE', 'txnId':'72'} + + MySQL [test]> SELECT * FROM dest_table1; + +------+------------+-------+--------------+ + | id | order_uuid | batch | order_id_int | + +------+------------+-------+--------------+ + | 1 | a1 | 1 | 1 | + | 4 | a3 | 1 | 3 | + | 2 | a1 | 1 | 1 | + | 3 | a3 | 1 | 3 | + +------+------------+-------+--------------+ + 4 rows in set (0.02 sec) + ``` + + The usage of `dict_mapping` in this example can accelerate [deduplication calculations and JOIN queries](../../../using_starrocks/query_acceleration_with_auto_increment.md). Compared to the previous solutions for building a global dictionary to accelerate precise deduplication, the solution by using `dict_mapping` is more flexible and user-friendly. Because the mapping values are directly obtained from the dictionary table at the stage "loading mapping relationships between keys and values into the table". You do not need to write statements to join the dictionary table to obtain mapping values. Additionally, this solution supports various data loading methods. + +**Example 3: If the mapping column in the table is not configured as a generated column, you need to explicitly configure the `dict_mapping` function for the mapping column when loading data into the table, obtain the values mapped to the keys.** + +> **NOTICE** +> +> The difference between Example 3 and Example 2 is that when importing into the data table, you need to modify the import command to explicitly configure the `dict_mapping` expression for the mapping column. + +1. Create a table. + + ```SQL + CREATE TABLE dest_table2 ( + id BIGINT, + order_uuid STRING, + order_id_int BIGINT NULL, + batch int comment 'used to distinguish different batch loading' + ) + DUPLICATE KEY (id, order_uuid, order_id_int) + DISTRIBUTED BY HASH(id); + ``` + +2. When simulated data loads into this table, you obtain the mapped values from the dictionary table by configuring `dict_mapping`. + + ```SQL + MySQL [test]> INSERT INTO dest_table2 VALUES (1, 'a1', dict_mapping('dict', 'a1'), 1); + Query OK, 1 row affected (0.35 sec) + {'label':'insert_19872ab6-8a96-11ee-b29c-00163e03897d', 'status':'VISIBLE', 'txnId':'42'} + + MySQL [test]> SELECT * FROM dest_table2; + +------+------------+--------------+-------+ + | id | order_uuid | order_id_int | batch | + +------+------------+--------------+-------+ + | 1 | a1 | 1 | 1 | + +------+------------+--------------+-------+ + 1 row in set (0.02 sec) + ``` + +**Example 4: Enable null_if_not_exist mode** + +When `` mode is disabled and the value mapped to the key that doesn't exist in the dictionary table is queried , an error, instead of `NULL`, is returned. It makes sure that a data row's key is first loaded into the dictionary table and its mapped value (dictionary ID) is generated before that data row is loaded into the target table. + +```SQL +MySQL [test]> SELECT dict_mapping('dict', 'b1', true); +ERROR 1064 (HY000): Query failed if record not exist in dict table. +``` + +**Example 5: If the dictionary table uses composite primary keys, all primary keys must be specified when querying.** + +1. Create a dictionary table with Composite Primary Keys and load simulated data into it. + + ```SQL + MySQL [test]> CREATE TABLE dict2 ( + order_uuid STRING, + order_date DATE, + order_id_int BIGINT AUTO_INCREMENT + ) + PRIMARY KEY (order_uuid,order_date) -- Composite Primary Key + DISTRIBUTED BY HASH (order_uuid,order_date) + ; + Query OK, 0 rows affected (0.02 sec) + + MySQL [test]> INSERT INTO dict2 VALUES ('a1','2023-11-22',default), ('a2','2023-11-22',default), ('a3','2023-11-22',default); + Query OK, 3 rows affected (0.12 sec) + {'label':'insert_9e60b0e4-89fa-11ee-a41f-b22a2c00f66b', 'status':'VISIBLE', 'txnId':'15029'} + + + MySQL [test]> select * from dict2; + +------------+------------+--------------+ + | order_uuid | order_date | order_id_int | + +------------+------------+--------------+ + | a1 | 2023-11-22 | 1 | + | a3 | 2023-11-22 | 3 | + | a2 | 2023-11-22 | 2 | + +------------+------------+--------------+ + 3 rows in set (0.01 sec) + ``` + +2. Query the value mapped to the key in the dictionary table. Because the dictionary table has Composite Primary Keys, all primary keys need to be specified in `dict_mapping`. + + ```SQL + SELECT dict_mapping('dict2', 'a1', cast('2023-11-22' as DATE)); + ``` + + Note that an error occurs when only one Primary Key is specified. + + ```SQL + MySQL [test]> SELECT dict_mapping('dict2', 'a1'); + ERROR 1064 (HY000): Getting analyzing error. Detail message: dict_mapping function param size should be 3 - 5. + ``` diff --git a/docs/en/sql-reference/sql-functions/dict-functions/dictionary_get.md b/docs/en/sql-reference/sql-functions/dict-functions/dictionary_get.md new file mode 100644 index 0000000..51fc00a --- /dev/null +++ b/docs/en/sql-reference/sql-functions/dict-functions/dictionary_get.md @@ -0,0 +1,111 @@ +--- +displayed_sidebar: docs +--- + +# dictionary_get + + + +Query the value mapped to the key in a dictionary object. + +## Syntax + +```SQL +dictionary_get('dictionary_object_name', key_expression_list, [NULL_IF_NOT_EXIST]) + +key_expression_list ::= + key_expression [, ...] + +key_expression ::= + column_name | const_value +``` + +## Parameters + +- `dictionary_name`: The name of the dictionary object. +- `key_expression_list`: A list of expressions for all key columns. It can be a list of column names or a list of values. +- `NULL_IF_NOT_EXIST` (Optional): Whether to return if the key does not exist in the dictionary cache. Valid values: + - `true`: Null is returned if the key does not exist. + - `false` (Default): An exception is thrown if the key does not exist. + +## Returns + +Returns the values of value columns as a STRUCT type. Therefore, you can use `[N]` or `.` to specify a particular column's value. `N` represents the column's position, starting from 1. + +## Examples + +The following examples uses the dataset from the examples of [dict_mapping](dict_mapping.md). + +- Example 1: Query the values of the value column mapped to the key column `order_uuid` in the dictionary object `dict_obj`. + + ```Plain + MySQL > SELECT dictionary_get('dict_obj', order_uuid) FROM dict; + +--------------------+ + | DICTIONARY_GET | + +--------------------+ + | {"order_id_int":1} | + | {"order_id_int":3} | + | {"order_id_int":2} | + +--------------------+ + 3 rows in set (0.02 sec) + ``` + +- Example 2: Query the value of the value column mapped to key `a1` in the dictionary object `dict_obj`. + + ```Plain + MySQL > SELECT dictionary_get("dict_obj", "a1"); + +--------------------+ + | DICTIONARY_GET | + +--------------------+ + | {"order_id_int":1} | + +--------------------+ + 1 row in set (0.01 sec) + ``` + +- Example 3: Query the values of the value columns mapped to key `1` in the dictionary object `dimension_obj`. + + ```Plain + MySQL > SELECT dictionary_get("dimension_obj", 1); + +-----------------------------------------------------------------------------------------------------------------+ + | DICTIONARY_GET | + +-----------------------------------------------------------------------------------------------------------------+ + | {"ProductName":"T-Shirt","Category":"Apparel","SubCategory":"Shirts","Brand":"BrandA","Color":"Red","Size":"M"} | + +-----------------------------------------------------------------------------------------------------------------+ + 1 row in set (0.01 sec) + ``` + +- Example 4: Query the value of the first value column mapped to key `1` in the dictionary object `dimension_obj`. + + ```Plain + MySQL > SELECT dictionary_get("dimension_obj", 1)[1]; + +-------------------+ + | DICTIONARY_GET[1] | + +-------------------+ + | T-Shirt | + +-------------------+ + 1 row in set (0.01 sec) + ``` + +- Example 5: Query the value of the second value column mapped to key `1` in the dictionary object `dimension_obj`. + + ```Plain + MySQL > SELECT dictionary_get("dimension_obj", 1)[2]; + +-------------------+ + | DICTIONARY_GET[2] | + +-------------------+ + | Apparel | + +-------------------+ + 1 row in set (0.01 sec) + ``` + +- Example 6: Query the value of `ProductName` value column mapped to key `1` in the dictionary object `dimension_obj`. + + ```Plain + MySQL > SELECT dictionary_get("dimension_obj", 1).ProductName; + +----------------------------+ + | DICTIONARY_GET.ProductName | + +----------------------------+ + | T-Shirt | + +----------------------------+ + 1 row in set (0.01 sec) + ``` \ No newline at end of file From 5ad72421541065b369673ce9db4b0695e3978199 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Fri, 23 Jan 2026 11:04:42 -0500 Subject: [PATCH 3/6] hivebitmap Signed-off-by: DanRoscigno --- .../sql-functions/hive_bitmap_udf.md | 214 ++++++++++++++++++ 1 file changed, 214 insertions(+) create mode 100644 docs/en/sql-reference/sql-functions/hive_bitmap_udf.md diff --git a/docs/en/sql-reference/sql-functions/hive_bitmap_udf.md b/docs/en/sql-reference/sql-functions/hive_bitmap_udf.md new file mode 100644 index 0000000..6fc7bbf --- /dev/null +++ b/docs/en/sql-reference/sql-functions/hive_bitmap_udf.md @@ -0,0 +1,214 @@ +--- +displayed_sidebar: docs +sidebar_position: 0.9 +--- + +# Hive Bitmap UDF + +Hive Bitmap UDF provides UDFs that can be directly used in Hive. They can be used to generate Bitmap data and perform Bitmap-related calculations. + +The Bitmap format defined by Hive Bitmap UDF is consistent with the format in StarRocks and can be directly used for loading Bitmap data into StarRocks and unloading Bitmap data from StarRocks to Hive. + +Applicable scenarios: + +- The amount of raw data is large and directly loading these data into StarRocks for computing will cause tremendous pressure on StarRocks clusters. The desired solution is generating Bitmap data in Hive and then loading Bitmap into StarRocks. +- Export the Bitmap data generated in StarRocks to Hive for other systems to use. + +Supported source and target data types: + +- v3.1 and later support loading and unloading data of these types: String, Base64, and Binary. +- v2.5 and v3.0 only support loading and unloading of String and Base64 data. + +## Hive Bitmap UDFs that can be generated + +- com.starrocks.hive.udf.UDAFBitmapAgg + + Combines multiple rows of non-null values in a column into one row of Bitmap values, which is equivalent to StarRocks' built-in aggregate function [bitmap_agg](bitmap-functions/bitmap_agg.md). + +- com.starrocks.hive.udf.UDAFBitmapUnion + + Calculates the union of a set of bitmaps, which is equivalent to StarRocks' built-in aggregate function [bitmap_union](bitmap-functions/bitmap_union.md). + +- com.starrocks.hive.udf.UDFBase64ToBitmap + + Converts a base64-encoded string into a bitmap, which is equivalent to StarRocks' built-in function [base64_to_bitmap](bitmap-functions/base64_to_bitmap.md). + +- com.starrocks.hive.udf.UDFBitmapAnd + + Calculates the intersection of two bitmaps, which is equivalent to StarRocks' built-in function [bitmap_and](bitmap-functions/bitmap_and.md). + +- com.starrocks.hive.udf.UDFBitmapCount + + Counts the number of values in the bitmap, which is equivalent to StarRocks' built-in function [bitmap_count](bitmap-functions/bitmap_count.md). + +- com.starrocks.hive.udf.UDFBitmapFromString + + Converts a comma-separated string to a bitmap, equivalent to StarRocks' built-in function [bitmap_from_string](bitmap-functions/bitmap_from_string.md). + +- com.starrocks.hive.udf.UDFBitmapOr + + Calculates the union of two bitmaps, equivalent to StarRocks' built-in function [bitmap_or](bitmap-functions/bitmap_or.md). + +- com.starrocks.hive.udf.UDFBitmapToBase64 + + Converts Bitmap to Base64 string, equivalent to StarRocks' built-in function [bitmap_to_base64](bitmap-functions/bitmap_to_base64.md). + +- com.starrocks.hive.udf.UDFBitmapToString + + Converts a bitmap to a comma-separated string, equivalent to StarRocks' built-in function [bitmap_to_string](bitmap-functions/bitmap_to_string.md). + +- com.starrocks.hive.udf.UDFBitmapXor + + Calculates the set of unique elements in two Bitmaps, which is equivalent to StarRocks' built-in function [bitmap_xor](bitmap-functions/bitmap_xor.md). + +## How to use + +1. Compile and generate Hive UDF on the FE. + + ```bash + ./build.sh --hive-udf + ``` + + A JAR package `hive-udf-*.jar` will be generated in the `fe/hive-udf/` directory. + +2. Upload the JAR package to HDFS. + + ```bash + hadoop fs -put -f ./hive-udf-*.jar hdfs://:/hive-udf-*.jar + ``` + +3. Load the JAR package to Hive. + + ```bash + hive> add jar hdfs://:/hive-udf-*.jar; + ``` + +4. Load UDF functions. + + ```sql + hive> create temporary function bitmap_agg as 'com.starrocks.hive.udf.UDAFBitmapAgg'; + hive> create temporary function bitmap_union as 'com.starrocks.hive.udf.UDAFBitmapUnion'; + hive> create temporary function base64_to_bitmap as 'com.starrocks.hive.udf.UDFBase64ToBitmap'; + hive> create temporary function bitmap_and as 'com.starrocks.hive.udf.UDFBitmapAnd'; + hive> create temporary function bitmap_count as 'com.starrocks.hive.udf.UDFBitmapCount'; + hive> create temporary function bitmap_from_string as 'com.starrocks.hive.udf.UDFBitmapFromString'; + hive> create temporary function bitmap_or as 'com.starrocks.hive.udf.UDFBitmapOr'; + hive> create temporary function bitmap_to_base64 as 'com.starrocks.hive.udf.UDFBitmapToBase64'; + hive> create temporary function bitmap_to_string as 'com.starrocks.hive.udf.UDFBitmapToString'; + hive> create temporary function bitmap_xor as 'com.starrocks.hive.udf.UDFBitmapXor'; + ``` + +## Usage examples + +### Generate Bitmap in Hive and load it into StarRocks in Binary format + +1. Create a Hive source table. + + ```sql + hive> create table t_src(c1 bigint, c2 bigint) stored as parquet; + + hive> insert into t_src values (1,1), (1,2), (1,3), (2,4), (2,5); + + hive> select * from t_src; + 1 1 + 1 2 + 1 3 + 2 4 + 2 5 + ``` + +2. Create a Hive bitmap table. + + ```sql + hive> create table t_bitmap(c1 bigint, c2 binary) stored as parquet; + ``` + + Hive generates bitmap through UDFBitmapAgg and writes it into the bitmap table. + + ```sql + hive> insert into t_bitmap select c1, bitmap_agg(c2) from t_src group by c1; + ``` + +3. Create a StarRocks Bitmap table. + + ```sql + mysql> create table t1(c1 int, c2 bitmap bitmap_union) aggregate key(c1) distributed by hash(c1); + ``` + +4. Load Bitmap data into StarRocks in different ways. + + - Load data via the [files](table-functions/files.md) function. + + ```sql + mysql> insert into t1 select c1, bitmap_from_binary(c2) from files ( + "path" = "hdfs://://t_bitmap/*", + "format"="parquet", + "compression" = "uncompressed" + ); + ``` + + - Load data via [Hive Catalog](../../data_source/catalog/hive_catalog.md). + + ```sql + mysql> insert into t1 select c1, bitmap_from_binary(c2) from hive_catalog_hms.xxx_db.t_bitmap; + ``` + +5. View the results. + + ```sql + mysql> select c1, bitmap_to_string(c2) from t1; + +------+----------------------+ + | c1 | bitmap_to_string(c2) | + +------+----------------------+ + | 1 | 1,2,3 | + | 2 | 4,5 | + +------+----------------------+ + ``` + +### Export Bitmap from StarRocks to Hive + +1. Create a Bitmap table in StarRocks and write data into this table. + + ```sql + mysql> create table t1(c1 int, c2 bitmap bitmap_union) aggregate key(c1) buckets 3 distributed by hash(c1); + + mysql> select c1, bitmap_to_string(c2) from t1; + +------+----------------------+ + | c1 | bitmap_to_string(c2) | + +------+----------------------+ + | 1 | 1,2,3 | + | 2 | 4,5 | + +------+----------------------+ + ``` + +2. Create a Bitmap table in Hive. + + ```sql + hive> create table t_bitmap(c1 bigint, c2 binary) stored as parquet; + ``` + +3. Export data in different ways. + + - Export data via INSERT INTO FILES (Binary format). + + ```sql + mysql> insert into files ( + "path" = "hdfs://://t_bitmap/", + "format"="parquet", + "compression" = "uncompressed" + ) select c1, bitmap_to_binary(c2) as c2 from t1; + ``` + + - Export data via [Hive Catalog](../../data_source/catalog/hive_catalog.md) (Binary format). + + ```sql + mysql> insert into hive_catalog_hms..t_bitmap select c1, bitmap_to_binary(c2) from t1; + ``` + +4. View results in Hive. + + ```plain + hive> select c1, bitmap_to_string(c2) from t_bitmap; + 1 1,2,3 + 2 4,5 + ``` From 6079753618f855784c7ae20f474f9c779f1cfd48 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 23 Jan 2026 16:08:28 +0000 Subject: [PATCH 4/6] docs: automated translation via Gemini [skip ci] --- .../dict-functions/dict_mapping.md | 215 ++++++++++++++++++ .../dict-functions/dictionary_get.md | 109 +++++++++ .../sql-functions/hive_bitmap_udf.md | 213 +++++++++++++++++ .../json-constructor-functions/json_array.md | 45 ++++ .../json-constructor-functions/json_object.md | 56 +++++ .../json-constructor-functions/parse_json.md | 107 +++++++++ .../json-functions/json-operators.md | 56 +++++ .../arrow-function.md | 61 +++++ .../cast.md | 128 +++++++++++ .../get_json_bool.md | 70 ++++++ .../get_json_double.md | 66 ++++++ .../get_json_int.md | 68 ++++++ .../get_json_string.md | 107 +++++++++ .../json_contains.md | 88 +++++++ .../json_each.md | 55 +++++ .../json_exists.md | 63 +++++ .../json_keys.md | 103 +++++++++ .../json_length.md | 131 +++++++++++ .../json_pretty.md | 85 +++++++ .../json_query.md | 65 ++++++ .../json_remove.md | 88 +++++++ .../json_set.md | 95 ++++++++ .../json_string.md | 51 +++++ .../to_json.md | 70 ++++++ ...verview-of-json-functions-and-operators.md | 69 ++++++ 25 files changed, 2264 insertions(+) create mode 100644 docs/zh/sql-reference/sql-functions/dict-functions/dict_mapping.md create mode 100644 docs/zh/sql-reference/sql-functions/dict-functions/dictionary_get.md create mode 100644 docs/zh/sql-reference/sql-functions/hive_bitmap_udf.md create mode 100644 docs/zh/sql-reference/sql-functions/json-functions/json-constructor-functions/json_array.md create mode 100644 docs/zh/sql-reference/sql-functions/json-functions/json-constructor-functions/json_object.md create mode 100644 docs/zh/sql-reference/sql-functions/json-functions/json-constructor-functions/parse_json.md create mode 100644 docs/zh/sql-reference/sql-functions/json-functions/json-operators.md create mode 100644 docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/arrow-function.md create mode 100644 docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/cast.md create mode 100644 docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/get_json_bool.md create mode 100644 docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/get_json_double.md create mode 100644 docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/get_json_int.md create mode 100644 docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/get_json_string.md create mode 100644 docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_contains.md create mode 100644 docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_each.md create mode 100644 docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_exists.md create mode 100644 docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_keys.md create mode 100644 docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_length.md create mode 100644 docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_pretty.md create mode 100644 docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_query.md create mode 100644 docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_remove.md create mode 100644 docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_set.md create mode 100644 docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_string.md create mode 100644 docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/to_json.md create mode 100644 docs/zh/sql-reference/sql-functions/json-functions/overview-of-json-functions-and-operators.md diff --git a/docs/zh/sql-reference/sql-functions/dict-functions/dict_mapping.md b/docs/zh/sql-reference/sql-functions/dict-functions/dict_mapping.md new file mode 100644 index 0000000..2b1c002 --- /dev/null +++ b/docs/zh/sql-reference/sql-functions/dict-functions/dict_mapping.md @@ -0,0 +1,215 @@ +--- +displayed_sidebar: docs +--- + +# dict_mapping + +返回字典表中与指定键映射的值。 + +此函数主要用于简化全局字典表的应用。在将数据加载到目标表期间,StarRocks 会通过使用此函数中的输入参数自动从字典表中获取与指定键映射的值,然后将该值加载到目标表中。 + +自 v3.2.5 起,StarRocks 支持此功能。另请注意,目前 StarRocks 的 shared-data mode 不支持此功能。 + +## 语法 + +```SQL +dict_mapping("[.]", key_column_expr_list [, ] [, ] ) + +key_column_expr_list ::= key_column_expr [, key_column_expr ... ] + +key_column_expr ::= | +``` + +## 参数 + +- 必需参数: + - `[.]`:字典表的名称,需要是 Primary Key table。支持的数据类型为 VARCHAR。 + - `key_column_expr_list`:字典表中键列的表达式列表,包括一个或多个 `key_column_exprs`。`key_column_expr` 可以是字典表中的键列的名称,也可以是特定的键或键表达式。 + + 此表达式列表需要包含字典表的所有主键列,这意味着表达式的总数需要与字典表中的主键列的总数匹配。因此,当字典表使用 Composite Primary Key 时,此列表中的表达式需要按顺序与表结构中定义的主键列相对应。此列表中的多个表达式用逗号 (`,`) 分隔。并且如果 `key_column_expr` 是特定的键或键表达式,则其类型必须与字典表中相应主键列的类型匹配。 + +- 可选参数: + - ``:值列的名称,也就是映射列。如果未指定值列,则默认值列是字典表的 AUTO_INCREMENT 列。值列也可以定义为字典表中的任何列,不包括自增列和主键。该列的数据类型没有限制。 + - ``(可选):如果字典表中不存在键,是否返回 Null。有效值: + - `true`:如果键不存在,则返回 Null。 + - `false`(默认):如果键不存在,则抛出异常。 + +## 返回值 + +返回的数据类型与值列的数据类型保持一致。如果值列是字典表的自增列,则返回的数据类型为 BIGINT。 + +但是,当未找到与指定键映射的值时,如果 `` 参数设置为 `true`,则返回 `NULL`。如果参数设置为 `false`(默认),则返回错误 `query failed if record not exist in dict table`。 + +## 示例 + +**示例 1:直接从字典表中查询与键映射的值。** + +1. 创建一个字典表并加载模拟数据。 + + ```SQL + MySQL [test]> CREATE TABLE dict ( + order_uuid STRING, + order_id_int BIGINT AUTO_INCREMENT + ) + PRIMARY KEY (order_uuid) + DISTRIBUTED BY HASH (order_uuid); + Query OK, 0 rows affected (0.02 sec) + + MySQL [test]> INSERT INTO dict (order_uuid) VALUES ('a1'), ('a2'), ('a3'); + Query OK, 3 rows affected (0.12 sec) + {'label':'insert_9e60b0e4-89fa-11ee-a41f-b22a2c00f66b', 'status':'VISIBLE', 'txnId':'15029'} + + MySQL [test]> SELECT * FROM dict; + +------------+--------------+ + | order_uuid | order_id_int | + +------------+--------------+ + | a1 | 1 | + | a3 | 3 | + | a2 | 2 | + +------------+--------------+ + 3 rows in set (0.01 sec) + ``` + + > **注意** + > + > 目前,`INSERT INTO` 语句不支持部分更新。因此,请确保插入到 `dict` 的键列中的值不重复。否则,在字典表中多次插入相同的键列值会导致其在值列中映射的值发生更改。 + +2. 查询字典表中与键 `a1` 映射的值。 + + ```SQL + MySQL [test]> SELECT dict_mapping('dict', 'a1'); + +----------------------------+ + | dict_mapping('dict', 'a1') | + +----------------------------+ + | 1 | + +----------------------------+ + 1 row in set (0.01 sec) + ``` + +**示例 2:表中的映射列配置为使用 `dict_mapping` 函数生成的列。因此,在将数据加载到此表时,StarRocks 可以自动获取与键映射的值。** + +1. 创建一个数据表,并通过使用 `dict_mapping('dict', order_uuid)` 将映射列配置为生成的列。 + + ```SQL + CREATE TABLE dest_table1 ( + id BIGINT, + -- 此列记录 STRING 类型的订单号,对应于示例 1 中 dict 表中的 order_uuid 列。 + order_uuid STRING, + batch int comment 'used to distinguish different batch loading', + -- 此列记录与 order_uuid 列映射的 BIGINT 类型的订单号。 + -- 因为此列是使用 dict_mapping 配置的生成列,所以此列中的值在数据加载期间会自动从示例 1 中的 dict 表中获取。 + -- 随后,此列可以直接用于去重和 JOIN 查询。 + order_id_int BIGINT AS dict_mapping('dict', order_uuid) + ) + DUPLICATE KEY (id, order_uuid) + DISTRIBUTED BY HASH(id); + ``` + +2. 当将模拟数据加载到此表中时,其中 `order_id_int` 列配置为 `dict_mapping('dict', 'order_uuid')`,StarRocks 会根据 `dict` 表中键和值之间的映射关系自动将值加载到 `order_id_int` 列中。 + + ```SQL + MySQL [test]> INSERT INTO dest_table1(id, order_uuid, batch) VALUES (1, 'a1', 1), (2, 'a1', 1), (3, 'a3', 1), (4, 'a3', 1); + Query OK, 4 rows affected (0.05 sec) + {'label':'insert_e191b9e4-8a98-11ee-b29c-00163e03897d', 'status':'VISIBLE', 'txnId':'72'} + + MySQL [test]> SELECT * FROM dest_table1; + +------+------------+-------+--------------+ + | id | order_uuid | batch | order_id_int | + +------+------------+-------+--------------+ + | 1 | a1 | 1 | 1 | + | 4 | a3 | 1 | 3 | + | 2 | a1 | 1 | 1 | + | 3 | a3 | 1 | 3 | + +------+------------+-------+--------------+ + 4 rows in set (0.02 sec) + ``` + + 在此示例中使用 `dict_mapping` 可以加速 [去重计算和 JOIN 查询](../../../using_starrocks/query_acceleration_with_auto_increment.md)。与之前构建全局字典以加速精确去重的解决方案相比,使用 `dict_mapping` 的解决方案更加灵活和用户友好。因为映射值是在“将键和值之间的映射关系加载到表”的阶段直接从字典表中获取的。您无需编写语句来连接字典表以获取映射值。此外,此解决方案支持各种数据加载方法。 + +**示例 3:如果表中的映射列未配置为生成的列,则在将数据加载到表时,您需要为映射列显式配置 `dict_mapping` 函数,以获取与键映射的值。** + +> **注意** +> +> 示例 3 和示例 2 之间的区别在于,在导入到数据表时,您需要修改导入命令,以便为映射列显式配置 `dict_mapping` 表达式。 + +1. 创建一个表。 + + ```SQL + CREATE TABLE dest_table2 ( + id BIGINT, + order_uuid STRING, + order_id_int BIGINT NULL, + batch int comment 'used to distinguish different batch loading' + ) + DUPLICATE KEY (id, order_uuid, order_id_int) + DISTRIBUTED BY HASH(id); + ``` + +2. 当模拟数据加载到此表中时,您可以通过配置 `dict_mapping` 从字典表中获取映射的值。 + + ```SQL + MySQL [test]> INSERT INTO dest_table2 VALUES (1, 'a1', dict_mapping('dict', 'a1'), 1); + Query OK, 1 row affected (0.35 sec) + {'label':'insert_19872ab6-8a96-11ee-b29c-00163e03897d', 'status':'VISIBLE', 'txnId':'42'} + + MySQL [test]> SELECT * FROM dest_table2; + +------+------------+--------------+-------+ + | id | order_uuid | order_id_int | batch | + +------+------------+--------------+-------+ + | 1 | a1 | 1 | 1 | + +------+------------+--------------+-------+ + 1 row in set (0.02 sec) + ``` + +**示例 4:启用 null_if_not_exist 模式** + +当禁用 `` 模式并且查询在字典表中不存在的键映射的值时,将返回错误,而不是 `NULL`。它确保数据行的键首先加载到字典表中,并且在将该数据行加载到目标表之前生成其映射的值(字典 ID)。 + +```SQL +MySQL [test]> SELECT dict_mapping('dict', 'b1', true); +ERROR 1064 (HY000): Query failed if record not exist in dict table. +``` + +**示例 5:如果字典表使用复合主键,则在查询时必须指定所有主键。** + +1. 创建一个具有 Composite Primary Key 的字典表,并将模拟数据加载到其中。 + + ```SQL + MySQL [test]> CREATE TABLE dict2 ( + order_uuid STRING, + order_date DATE, + order_id_int BIGINT AUTO_INCREMENT + ) + PRIMARY KEY (order_uuid,order_date) -- Composite Primary Key + DISTRIBUTED BY HASH (order_uuid,order_date) + ; + Query OK, 0 rows affected (0.02 sec) + + MySQL [test]> INSERT INTO dict2 VALUES ('a1','2023-11-22',default), ('a2','2023-11-22',default), ('a3','2023-11-22',default); + Query OK, 3 rows affected (0.12 sec) + {'label':'insert_9e60b0e4-89fa-11ee-a41f-b22a2c00f66b', 'status':'VISIBLE', 'txnId':'15029'} + + + MySQL [test]> select * from dict2; + +------------+------------+--------------+ + | order_uuid | order_date | order_id_int | + +------------+------------+--------------+ + | a1 | 2023-11-22 | 1 | + | a3 | 2023-11-22 | 3 | + | a2 | 2023-11-22 | 2 | + +------------+------------+--------------+ + 3 rows in set (0.01 sec) + ``` + +2. 查询字典表中与键映射的值。由于字典表具有 Composite Primary Key,因此需要在 `dict_mapping` 中指定所有主键。 + + ```SQL + SELECT dict_mapping('dict2', 'a1', cast('2023-11-22' as DATE)); + ``` + + 请注意,仅指定一个主键时会发生错误。 + + ```SQL + MySQL [test]> SELECT dict_mapping('dict2', 'a1'); + ERROR 1064 (HY000): Getting analyzing error. Detail message: dict_mapping function param size should be 3 - 5. + ``` \ No newline at end of file diff --git a/docs/zh/sql-reference/sql-functions/dict-functions/dictionary_get.md b/docs/zh/sql-reference/sql-functions/dict-functions/dictionary_get.md new file mode 100644 index 0000000..65a9202 --- /dev/null +++ b/docs/zh/sql-reference/sql-functions/dict-functions/dictionary_get.md @@ -0,0 +1,109 @@ +--- +displayed_sidebar: docs +--- + +# dictionary_get + +查询字典对象中 key 映射的值。 + +## 语法 + +```SQL +dictionary_get('dictionary_object_name', key_expression_list, [NULL_IF_NOT_EXIST]) + +key_expression_list ::= + key_expression [, ...] + +key_expression ::= + column_name | const_value +``` + +## 参数 + +- `dictionary_name`: 字典对象的名称。 +- `key_expression_list`: 所有 key 列的表达式列表。 它可以是列名列表或值列表。 +- `NULL_IF_NOT_EXIST` (可选): 如果字典缓存中不存在 key,是否返回 Null。有效值: + - `true`: 如果 key 不存在,则返回 Null。 + - `false` (默认): 如果 key 不存在,则抛出异常。 + +## 返回值 + +返回 STRUCT 类型的 value 列的值。 因此,您可以使用 `[N]` 或 `.` 来指定特定列的值。 `N` 表示列的位置,从 1 开始。 + +## 示例 + +以下示例使用 [dict_mapping](dict_mapping.md) 中的数据集。 + +- 示例 1:查询字典对象 `dict_obj` 中 key 列 `order_uuid` 映射的 value 列的值。 + + ```Plain + MySQL > SELECT dictionary_get('dict_obj', order_uuid) FROM dict; + +--------------------+ + | DICTIONARY_GET | + +--------------------+ + | {"order_id_int":1} | + | {"order_id_int":3} | + | {"order_id_int":2} | + +--------------------+ + 3 rows in set (0.02 sec) + ``` + +- 示例 2:查询字典对象 `dict_obj` 中 key `a1` 映射的 value 列的值。 + + ```Plain + MySQL > SELECT dictionary_get("dict_obj", "a1"); + +--------------------+ + | DICTIONARY_GET | + +--------------------+ + | {"order_id_int":1} | + +--------------------+ + 1 row in set (0.01 sec) + ``` + +- 示例 3:查询字典对象 `dimension_obj` 中 key `1` 映射的 value 列的值。 + + ```Plain + MySQL > SELECT dictionary_get("dimension_obj", 1); + +-----------------------------------------------------------------------------------------------------------------+ + | DICTIONARY_GET | + +-----------------------------------------------------------------------------------------------------------------+ + | {"ProductName":"T-Shirt","Category":"Apparel","SubCategory":"Shirts","Brand":"BrandA","Color":"Red","Size":"M"} | + +-----------------------------------------------------------------------------------------------------------------+ + 1 row in set (0.01 sec) + ``` + +- 示例 4:查询字典对象 `dimension_obj` 中 key `1` 映射的第一个 value 列的值。 + + ```Plain + MySQL > SELECT dictionary_get("dimension_obj", 1)[1]; + +-------------------+ + | DICTIONARY_GET[1] | + +-------------------+ + | T-Shirt | + +-------------------+ + 1 row in set (0.01 sec) + ``` + +- 示例 5:查询字典对象 `dimension_obj` 中 key `1` 映射的第二个 value 列的值。 + + ```Plain + MySQL > SELECT dictionary_get("dimension_obj", 1)[2]; + +-------------------+ + | DICTIONARY_GET[2] | + +-------------------+ + | Apparel | + +-------------------+ + 1 row in set (0.01 sec) + ``` + +- 示例 6:查询字典对象 `dimension_obj` 中 key `1` 映射的 `ProductName` value 列的值。 + + ```Plain + MySQL > SELECT dictionary_get("dimension_obj", 1).ProductName; + +----------------------------+ + | DICTIONARY_GET.ProductName | + +----------------------------+ + | T-Shirt | + +----------------------------+ + 1 row in set (0.01 sec) + ``` \ No newline at end of file diff --git a/docs/zh/sql-reference/sql-functions/hive_bitmap_udf.md b/docs/zh/sql-reference/sql-functions/hive_bitmap_udf.md new file mode 100644 index 0000000..f5c61ea --- /dev/null +++ b/docs/zh/sql-reference/sql-functions/hive_bitmap_udf.md @@ -0,0 +1,213 @@ +displayed_sidebar: docs +sidebar_position: 0.9 +``` + +# Hive Bitmap UDF + +Hive Bitmap UDF 提供了一系列可以直接在 Hive 中使用的 UDF。 这些 UDF 可以用于生成 Bitmap 数据以及执行 Bitmap 相关的计算。 + +Hive Bitmap UDF 定义的 Bitmap 格式与 StarRocks 中的格式一致,可以直接用于将 Bitmap 数据导入到 StarRocks 中,以及将 Bitmap 数据从 StarRocks 导出到 Hive。 + +适用场景: + +- 原始数据量大,直接将这些数据导入到 StarRocks 进行计算,会对 StarRocks 集群造成巨大的压力。 期望的解决方案是在 Hive 中生成 Bitmap 数据,然后将 Bitmap 导入到 StarRocks 中。 +- 将在 StarRocks 中生成的 Bitmap 数据导出到 Hive,以供其他系统使用。 + +支持的源和目标数据类型: + +- v3.1 及更高版本支持加载和卸载以下类型的数据:String、Base64 和 Binary。 +- v2.5 和 v3.0 仅支持加载和卸载 String 和 Base64 数据。 + +## 可以生成的 Hive Bitmap UDF + +- com.starrocks.hive.udf.UDAFBitmapAgg + + 将一列中的多行非空值合并为一行 Bitmap 值,等效于 StarRocks 的内置聚合函数 [bitmap_agg](bitmap-functions/bitmap_agg.md)。 + +- com.starrocks.hive.udf.UDAFBitmapUnion + + 计算一组 Bitmap 的并集,等效于 StarRocks 的内置聚合函数 [bitmap_union](bitmap-functions/bitmap_union.md)。 + +- com.starrocks.hive.udf.UDFBase64ToBitmap + + 将 base64 编码的字符串转换为 Bitmap,等效于 StarRocks 的内置函数 [base64_to_bitmap](bitmap-functions/base64_to_bitmap.md)。 + +- com.starrocks.hive.udf.UDFBitmapAnd + + 计算两个 Bitmap 的交集,等效于 StarRocks 的内置函数 [bitmap_and](bitmap-functions/bitmap_and.md)。 + +- com.starrocks.hive.udf.UDFBitmapCount + + 计算 Bitmap 中的值的数量,等效于 StarRocks 的内置函数 [bitmap_count](bitmap-functions/bitmap_count.md)。 + +- com.starrocks.hive.udf.UDFBitmapFromString + + 将逗号分隔的字符串转换为 Bitmap,等效于 StarRocks 的内置函数 [bitmap_from_string](bitmap-functions/bitmap_from_string.md)。 + +- com.starrocks.hive.udf.UDFBitmapOr + + 计算两个 Bitmap 的并集,等效于 StarRocks 的内置函数 [bitmap_or](bitmap-functions/bitmap_or.md)。 + +- com.starrocks.hive.udf.UDFBitmapToBase64 + + 将 Bitmap 转换为 Base64 字符串,等效于 StarRocks 的内置函数 [bitmap_to_base64](bitmap-functions/bitmap_to_base64.md)。 + +- com.starrocks.hive.udf.UDFBitmapToString + + 将 Bitmap 转换为逗号分隔的字符串,等效于 StarRocks 的内置函数 [bitmap_to_string](bitmap-functions/bitmap_to_string.md)。 + +- com.starrocks.hive.udf.UDFBitmapXor + + 计算两个 Bitmap 中唯一元素的集合,等效于 StarRocks 的内置函数 [bitmap_xor](bitmap-functions/bitmap_xor.md)。 + +## 如何使用 + +1. 在 FE 上编译并生成 Hive UDF。 + + ```bash + ./build.sh --hive-udf + ``` + + JAR 包 `hive-udf-*.jar` 将在 `fe/hive-udf/` 目录中生成。 + +2. 将 JAR 包上传到 HDFS。 + + ```bash + hadoop fs -put -f ./hive-udf-*.jar hdfs://:/hive-udf-*.jar + ``` + +3. 将 JAR 包加载到 Hive。 + + ```sql + hive> add jar hdfs://:/hive-udf-*.jar; + ``` + +4. 加载 UDF 函数。 + + ```sql + hive> create temporary function bitmap_agg as 'com.starrocks.hive.udf.UDAFBitmapAgg'; + hive> create temporary function bitmap_union as 'com.starrocks.hive.udf.UDAFBitmapUnion'; + hive> create temporary function base64_to_bitmap as 'com.starrocks.hive.udf.UDFBase64ToBitmap'; + hive> create temporary function bitmap_and as 'com.starrocks.hive.udf.UDFBitmapAnd'; + hive> create temporary function bitmap_count as 'com.starrocks.hive.udf.UDFBitmapCount'; + hive> create temporary function bitmap_from_string as 'com.starrocks.hive.udf.UDFBitmapFromString'; + hive> create temporary function bitmap_or as 'com.starrocks.hive.udf.UDFBitmapOr'; + hive> create temporary function bitmap_to_base64 as 'com.starrocks.hive.udf.UDFBitmapToBase64'; + hive> create temporary function bitmap_to_string as 'com.starrocks.hive.udf.UDFBitmapToString'; + hive> create temporary function bitmap_xor as 'com.starrocks.hive.udf.UDFBitmapXor'; + ``` + +## 使用示例 + +### 在 Hive 中生成 Bitmap 并以 Binary 格式加载到 StarRocks 中 + +1. 创建一个 Hive 源表。 + + ```sql + hive> create table t_src(c1 bigint, c2 bigint) stored as parquet; + + hive> insert into t_src values (1,1), (1,2), (1,3), (2,4), (2,5); + + hive> select * from t_src; + 1 1 + 1 2 + 1 3 + 2 4 + 2 5 + ``` + +2. 创建一个 Hive Bitmap 表。 + + ```sql + hive> create table t_bitmap(c1 bigint, c2 binary) stored as parquet; + ``` + + Hive 通过 UDFBitmapAgg 生成 Bitmap 并将其写入 Bitmap 表。 + + ```sql + hive> insert into t_bitmap select c1, bitmap_agg(c2) from t_src group by c1; + ``` + +3. 创建一个 StarRocks Bitmap 表。 + + ```sql + mysql> create table t1(c1 int, c2 bitmap bitmap_union) aggregate key(c1) distributed by hash(c1); + ``` + +4. 以不同的方式将 Bitmap 数据加载到 StarRocks 中。 + + - 通过 [files](table-functions/files.md) 函数加载数据。 + + ```sql + mysql> insert into t1 select c1, bitmap_from_binary(c2) from files ( + "path" = "hdfs://://t_bitmap/*", + "format"="parquet", + "compression" = "uncompressed" + ); + ``` + + - 通过 [Hive Catalog] (../../data_source/catalog/hive_catalog.md) 加载数据。 + + ```sql + mysql> insert into t1 select c1, bitmap_from_binary(c2) from hive_catalog_hms.xxx_db.t_bitmap; + ``` + +5. 查看结果。 + + ```sql + mysql> select c1, bitmap_to_string(c2) from t1; + +------+----------------------+ + | c1 | bitmap_to_string(c2) | + +------+----------------------+ + | 1 | 1,2,3 | + | 2 | 4,5 | + +------+----------------------+ + ``` + +### 将 Bitmap 从 StarRocks 导出到 Hive + +1. 在 StarRocks 中创建一个 Bitmap 表,并将数据写入该表。 + + ```sql + mysql> create table t1(c1 int, c2 bitmap bitmap_union) aggregate key(c1) buckets 3 distributed by hash(c1); + + mysql> select c1, bitmap_to_string(c2) from t1; + +------+----------------------+ + | c1 | bitmap_to_string(c2) | + +------+----------------------+ + | 1 | 1,2,3 | + | 2 | 4,5 | + +------+----------------------+ + ``` + +2. 在 Hive 中创建一个 Bitmap 表。 + + ```sql + hive> create table t_bitmap(c1 bigint, c2 binary) stored as parquet; + ``` + +3. 以不同的方式导出数据。 + + - 通过 INSERT INTO FILES 导出数据(Binary 格式)。 + + ```sql + mysql> insert into files ( + "path" = "hdfs://://t_bitmap/", + "format"="parquet", + "compression" = "uncompressed" + ) select c1, bitmap_to_binary(c2) as c2 from t1; + ``` + + - 通过 [Hive Catalog] (../../data_source/catalog/hive_catalog.md) 导出数据(Binary 格式)。 + + ```sql + mysql> insert into hive_catalog_hms..t_bitmap select c1, bitmap_to_binary(c2) from t1; + ``` + +4. 在 Hive 中查看结果。 + + ```plain + hive> select c1, bitmap_to_string(c2) from t_bitmap; + 1 1,2,3 + 2 4,5 + ``` \ No newline at end of file diff --git a/docs/zh/sql-reference/sql-functions/json-functions/json-constructor-functions/json_array.md b/docs/zh/sql-reference/sql-functions/json-functions/json-constructor-functions/json_array.md new file mode 100644 index 0000000..a6c688f --- /dev/null +++ b/docs/zh/sql-reference/sql-functions/json-functions/json-constructor-functions/json_array.md @@ -0,0 +1,45 @@ +--- +displayed_sidebar: docs +--- + +# json_array + +将 SQL 数组的每个元素转换为 JSON 值,并返回一个由 JSON 值组成的 JSON 数组。 + +:::tip +所有 JSON 函数和运算符均在导航栏和 [概述页面](../overview-of-json-functions-and-operators.md) 上列出。 + +使用 [生成列](../../../sql-statements/generated_columns.md) 加速查询 +::: + +## 语法 + +```Haskell +json_array(value, ...) +``` + +## 参数 + +`value`: SQL 数组中的一个元素。仅支持 `NULL` 值和以下数据类型:STRING、VARCHAR、CHAR、JSON、TINYINT、SMALLINT、INT、BIGINT、LARGEINT、DOUBLE、FLOAT 和 BOOLEAN。 + +## 返回值 + +返回 JSON 数组。 + +## 示例 + +示例 1:构造一个由不同数据类型的值组成的 JSON 数组。 + +```plaintext +mysql> SELECT json_array(1, true, 'starrocks', 1.1); + + -> [1, true, "starrocks", 1.1] +``` + +示例 2:构造一个空的 JSON 数组。 + +```plaintext +mysql> SELECT json_array(); + + -> [] +``` \ No newline at end of file diff --git a/docs/zh/sql-reference/sql-functions/json-functions/json-constructor-functions/json_object.md b/docs/zh/sql-reference/sql-functions/json-functions/json-constructor-functions/json_object.md new file mode 100644 index 0000000..9003d48 --- /dev/null +++ b/docs/zh/sql-reference/sql-functions/json-functions/json-constructor-functions/json_object.md @@ -0,0 +1,56 @@ +--- +displayed_sidebar: docs +--- + +# json_object + +将一个或多个键值对转换为包含这些键值对的 JSON 对象。键值对会按照键的字典顺序进行排序。 + +:::tip +所有 JSON 函数和运算符均在导航栏和 [概述页面](../overview-of-json-functions-and-operators.md) 上列出。 + +使用 [生成列](../../../sql-statements/generated_columns.md) 加速查询 +::: + +## 语法 + +```Haskell +json_object(key, value, ...) +``` + +## 参数 + +- `key`: JSON 对象中的键。仅支持 VARCHAR 数据类型。 + +- `value`: JSON 对象中的值。仅支持 `NULL` 值和以下数据类型:STRING、VARCHAR、CHAR、JSON、TINYINT、SMALLINT、INT、BIGINT、LARGEINT、DOUBLE、FLOAT 和 BOOLEAN。 + +## 返回值 + +返回一个 JSON 对象。 + +> 如果键和值的总数为奇数,则 JSON_OBJECT 函数会在最后一个字段中填充 `NULL`。 + +## 示例 + +示例 1:构造一个包含不同数据类型值的 JSON 对象。 + +```plaintext +mysql> SELECT json_object('name', 'starrocks', 'active', true, 'published', 2020); + + -> {"active": true, "name": "starrocks", "published": 2020} +``` + +示例 2:通过使用嵌套的 JSON_OBJECT 函数构造 JSON 对象。 + +```plaintext +mysql> SELECT json_object('k1', 1, 'k2', json_object('k2', 2), 'k3', json_array(4, 5)); + + -> {"k1": 1, "k2": {"k2": 2}, "k3": [4, 5]} +``` + +示例 3:构造一个空的 JSON 对象。 + +```plaintext +mysql> SELECT json_object(); + + -> {} \ No newline at end of file diff --git a/docs/zh/sql-reference/sql-functions/json-functions/json-constructor-functions/parse_json.md b/docs/zh/sql-reference/sql-functions/json-functions/json-constructor-functions/parse_json.md new file mode 100644 index 0000000..a74e015 --- /dev/null +++ b/docs/zh/sql-reference/sql-functions/json-functions/json-constructor-functions/parse_json.md @@ -0,0 +1,107 @@ +--- +displayed_sidebar: docs +--- + +# parse_json + +将字符串转换为 JSON 值。 + +:::tip +所有 JSON 函数和运算符均在导航栏和 [概述页面](../overview-of-json-functions-and-operators.md) 中列出。 + +通过 [生成列](../../../sql-statements/generated_columns.md) 加速查询 +::: + +## 语法 + +```Haskell +parse_json(string_expr) +``` + +## 参数 + +`string_expr`: 表示字符串的表达式。仅支持 STRING、VARCHAR 和 CHAR 数据类型。 + +## 返回值 + +返回 JSON 值。 + +> 注意:如果字符串无法解析为标准 JSON 值,则 PARSE_JSON 函数返回 `NULL`(参见示例 5)。有关 JSON 规范的信息,请参见 [RFC 7159](https://tools.ietf.org/html/rfc7159?spm=a2c63.p38356.0.0.14d26b9fcp7fcf#page-4)。 + +## 示例 + +示例 1:将 `1` 的 STRING 值转换为 `1` 的 JSON 值。 + +```plaintext +mysql> SELECT parse_json('1'); ++-----------------+ +| parse_json('1') | ++-----------------+ +| "1" | ++-----------------+ +``` + +示例 2:将 STRING 数据类型的数组转换为 JSON 数组。 + +```plaintext +mysql> SELECT parse_json('[1,2,3]'); ++-----------------------+ +| parse_json('[1,2,3]') | ++-----------------------+ +| [1, 2, 3] | ++-----------------------+ +``` + +示例 3:将 STRING 数据类型的对象转换为 JSON 对象。 + +```plaintext +mysql> SELECT parse_json('{"star": "rocks"}'); ++---------------------------------+ +| parse_json('{"star": "rocks"}') | ++---------------------------------+ +| {"star": "rocks"} | ++---------------------------------+ +``` + +示例 4:构造一个 `NULL` 的 JSON 值。 + +```plaintext +mysql> SELECT parse_json('null'); ++--------------------+ +| parse_json('null') | ++--------------------+ +| "null" | ++--------------------+ +``` + +示例 5:如果字符串无法解析为标准 JSON 值,则 PARSE_JSON 函数返回 `NULL`。在此示例中,`star` 未用双引号 (") 括起来。因此,PARSE_JSON 函数返回 `NULL`。 + +```plaintext +mysql> SELECT parse_json('{star: "rocks"}'); ++-------------------------------+ +| parse_json('{star: "rocks"}') | ++-------------------------------+ +| NULL | ++-------------------------------+ +``` + +示例 6:如果 JSON 键包含“.”,例如“a.1”,则必须使用“\\”对其进行转义,或者需要将整个键值以及双引号括在单引号中。 + +```plaintext +mysql> select parse_json('{"b":4, "a.1": "1"}')->"a\\.1"; ++--------------------------------------------+ +| parse_json('{"b":4, "a.1": "1"}')->'a\\.1' | ++--------------------------------------------+ +| "1" | ++--------------------------------------------+ +mysql> select parse_json('{"b":4, "a.1": "1"}')->'"a.1"'; ++--------------------------------------------+ +| parse_json('{"b":4, "a.1": "1"}')->'"a.1"' | ++--------------------------------------------+ +| "1" | ++--------------------------------------------+ +``` + +## Keywords + +parse_json, parse json \ No newline at end of file diff --git a/docs/zh/sql-reference/sql-functions/json-functions/json-operators.md b/docs/zh/sql-reference/sql-functions/json-functions/json-operators.md new file mode 100644 index 0000000..3e8c43a --- /dev/null +++ b/docs/zh/sql-reference/sql-functions/json-functions/json-operators.md @@ -0,0 +1,56 @@ +--- +displayed_sidebar: docs +--- + +# JSON 操作符 + +StarRocks 支持以下 JSON 比较操作符:`<`、`<=`、`>`、`>=`、`=` 和 `!=`。您可以使用这些操作符来查询 JSON 数据。但是,StarRocks 不允许您使用 `IN` 来查询 JSON 数据。 + +- > 操作符的操作数必须都是 JSON 值。 + +- > 如果一个操作符的一个操作数是 JSON 值,而另一个操作数不是,则在算术运算期间,非 JSON 值的操作数将转换为 JSON 值。有关转换规则的更多信息,请参见 [CAST](./json-query-and-processing-functions/cast.md)。 + +:::tip +所有 JSON 函数和操作符都列在导航栏和 [概述页面](./overview-of-json-functions-and-operators.md) 上。 + +使用 [generated columns](../../sql-statements/generated_columns.md) 加速查询 +::: + +## 算术规则 + +JSON 操作符遵循以下算术规则: + +- 当操作符的操作数是相同数据类型的 JSON 值时: + - 如果两个操作数都是基本数据类型的 JSON 值,例如 NUMBER、STRING 或 BOOLEAN,则操作符按照基本数据类型的算术规则执行算术运算。 + +> 注意:如果两个操作数都是数字,但一个是 DOUBLE 值,另一个是 INT 值,则操作符会将 INT 值转换为 DOUBLE 值。 + +- 如果两个操作数都是复合数据类型的 JSON 值,例如 OBJECT 或 ARRAY,则操作符会根据第一个操作数中键的顺序,按字典顺序对操作数中的键进行排序,然后比较操作数之间键的值。 + +示例 1: + +第一个操作数是 `{"a": 1, "c": 2}`,第二个操作数是 `{"b": 1, "a": 2}`。在此示例中,操作符比较操作数之间键 `a` 的值。第一个操作数中键 `a` 的值是 `1`,而第二个操作数中键 `a` 的值是 `2`。值 `1` 大于值 `2`。因此,操作符得出结论,第一个操作数 `{"a": 1, "c": 2}` 小于第二个操作数 `{"b": 1, "a": 2}`。 + +```plaintext +mysql> SELECT PARSE_JSON('{"a": 1, "c": 2}') < PARSE_JSON('{"b": 1, "a": 2} '); + + -> 1 +``` + +示例 2: + +第一个操作数是 `{"a": 1, "c": 2}`,第二个操作数是 `{"b": 1, "a": 1}`。在此示例中,操作符首先比较操作数之间键 `a` 的值。操作数中键 `a` 的值均为 `1`。然后,操作符比较操作数中键 `c` 的值。第二个操作数不包含键 `c`。因此,操作符得出结论,第一个操作数 `{"a": 1, "c": 2}` 大于第二个操作数 `{"b": 1, "a": 1}`。 + +```plaintext +mysql> SELECT PARSE_JSON('{"a": 1, "c": 2}') < PARSE_JSON('{"b": 1, "a": 1}'); + + -> 0 +``` + +- 当操作符的操作数是两种不同数据类型的 JSON 值时,操作符按照以下算术规则比较操作数:NULL < BOOLEAN < ARRAY < OBJECT < DOUBLE < INT < STRING。 + +```plaintext +mysql> SELECT PARSE_JSON('"a"') < PARSE_JSON('{"a": 1, "c": 2}'); + + -> 0 +``` \ No newline at end of file diff --git a/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/arrow-function.md b/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/arrow-function.md new file mode 100644 index 0000000..762f210 --- /dev/null +++ b/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/arrow-function.md @@ -0,0 +1,61 @@ +--- +displayed_sidebar: docs +--- + +# Arrow function + +查询 JSON 对象中可以通过 `json_path` 表达式定位的元素,并返回 JSON 值。 箭头函数 `->` 比 [json_query](json_query.md) 函数更简洁且易于使用。 + +:::tip +所有 JSON 函数和 `Operator` 都列在导航栏和 [概述页面](../overview-of-json-functions-and-operators.md) 上。 + +通过 [generated columns](../../../sql-statements/generated_columns.md) 加速查询 +::: + +## Syntax + +```Haskell +json_object_expr -> json_path +``` + +## Parameters + +- `json_object_expr`: 表示 JSON 对象的表达式。 该对象可以是 JSON 列,也可以是由 JSON 构造函数(如 PARSE_JSON)生成的 JSON 对象。 + +- `json_path`: 表示 JSON 对象中元素路径的表达式。 此参数的值是一个字符串。 有关 StarRocks 支持的 JSON 路径语法的信息,请参见 [JSON 函数和 `Operator` 概述](../overview-of-json-functions-and-operators.md)。 + +## Return value + +返回一个 JSON 值。 + +> 如果该元素不存在,则箭头函数返回一个 `NULL` 的 SQL 值。 + +## Examples + +示例 1:查询指定 JSON 对象中可以通过 `'$.a.b'` 表达式定位的元素。 + +```plaintext +mysql> SELECT parse_json('{"a": {"b": 1}}') -> '$.a.b'; + + -> 1 +``` + +示例 2:使用嵌套的箭头函数来查询元素。 嵌套另一个箭头函数的箭头函数基于嵌套箭头函数返回的结果查询元素。 + +> 在此示例中,根元素 $ 从 `json_path` 表达式中省略。 + +```plaintext +mysql> SELECT parse_json('{"a": {"b": 1}}')->'a'->'b'; + + -> 1 +``` + +示例 3:查询指定 JSON 对象中可以通过 `'a'` 表达式定位的元素。 + +> 在此示例中,根元素 $ 从 `json_path` 表达式中省略。 + +```plaintext +mysql> SELECT parse_json('{"a": "b"}') -> 'a'; + + -> "b" +``` \ No newline at end of file diff --git a/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/cast.md b/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/cast.md new file mode 100644 index 0000000..c994289 --- /dev/null +++ b/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/cast.md @@ -0,0 +1,128 @@ +--- +displayed_sidebar: docs +--- + +# cast + +在 JSON 类型和 SQL 类型之间转换值。 + +:::tip +所有 JSON 函数和运算符均在导航栏和 [概述页面](../overview-of-json-functions-and-operators.md) 中列出。 + +使用 [生成列](../../../sql-statements/generated_columns.md) 加速查询 +::: + +## 语法 + +- 从 JSON 转换为 SQL + +```Haskell +cast(json_expr AS sql_data_type) +``` + +- 从 SQL 转换为 JSON + +```Haskell +cast(sql_expr AS JSON) +``` + +## 参数 + +- `json_expr`: 表示要转换为 SQL 值的 JSON 值的表达式。 + +- `sql_data_type`: 要将 JSON 值转换为的 SQL 数据类型。仅支持 STRING、VARCHAR、CHAR、BOOLEAN、TINYINT、SMALLINT、INT、BIGINT、LARGEINT、DOUBLE 和 FLOAT 数据类型。 + +- `sql_expr`: 表示要转换为 JSON 值的 SQL 值的表达式。此参数支持 `sql_data_type` 参数支持的所有 SQL 数据类型。 + +## 返回值 + +- 如果使用 `cast(json_expr AS sql_data_type)` 语法,则 cast 函数返回一个 `sql_data_type` 参数指定的 SQL 数据类型的值。 + +- 如果使用 `cast(sql_expr AS JSON)` 语法,则 cast 函数返回一个 JSON 值。 + +## 使用说明 + +- 从 SQL 转换为 JSON + + - 如果 SQL 值超过 JSON 支持的精度,则 cast 函数返回 `NULL` 以防止算术溢出。 + + - 如果 SQL 值为 `NULL`,则 cast 函数不会将 SQL 值 `NULL` 转换为 JSON 值 `NULL`。返回值仍然是 SQL 值 `NULL`。 + +- 从 JSON 转换为 SQL + + - cast 函数仅支持兼容的 JSON 和 SQL 数据类型之间的转换。例如,可以将 JSON 字符串转换为 SQL 字符串。 + + - cast 函数不支持不兼容的 JSON 和 SQL 数据类型之间的转换。例如,如果将 JSON 数字转换为 SQL 字符串,则该函数返回 `NULL`。 + + - 如果发生算术溢出,则 cast 函数返回一个 SQL 值 `NULL`。 + + - 如果将 JSON 值 `NULL` 转换为 SQL 值,则该函数返回一个 SQL 值 `NULL`。 + + - 如果将 JSON 字符串转换为 VARCHAR 值,则该函数返回一个未包含在双引号 (") 中的 VARCHAR 值。 + +## 示例 + +示例 1:将 JSON 值转换为 SQL 值。 + +```plaintext +-- 将 JSON 值转换为 INT 值。 +mysql> select cast(parse_json('{"a": 1}') -> 'a' as int); ++--------------------------------------------+ +| CAST((parse_json('{"a": 1}')->'a') AS INT) | ++--------------------------------------------+ +| 1 | ++--------------------------------------------+ + +-- 将 JSON 字符串转换为 VARCHAR 值。 +mysql> select cast(parse_json('"star"') as varchar); ++---------------------------------------+ +| cast(parse_json('"star"') AS VARCHAR) | ++---------------------------------------+ +| star | ++---------------------------------------+ + +-- 将 JSON 对象转换为 VARCHAR 值。 +mysql> select cast(parse_json('{"star": 1}') as varchar); ++--------------------------------------------+ +| cast(parse_json('{"star": 1}') AS VARCHAR) | ++--------------------------------------------+ +| {"star": 1} | ++--------------------------------------------+ + +-- 将 JSON 数组转换为 VARCHAR 值。 + +mysql> select cast(parse_json('[1,2,3]') as varchar); ++----------------------------------------+ +| cast(parse_json('[1,2,3]') AS VARCHAR) | ++----------------------------------------+ +| [1, 2, 3] | ++----------------------------------------+ +``` + +示例 2:将 SQL 值转换为 JSON 值。 + +```plaintext +-- 将 INT 值转换为 JSON 值。 +mysql> select cast(1 as json); ++-----------------+ +| cast(1 AS JSON) | ++-----------------+ +| 1 | ++-----------------+ + +-- 将 VARCHAR 值转换为 JSON 值。 +mysql> select cast("star" as json); ++----------------------+ +| cast('star' AS JSON) | ++----------------------+ +| "star" | ++----------------------+ + +-- 将 BOOLEAN 值转换为 JSON 值。 +mysql> select cast(true as json); ++--------------------+ +| cast(TRUE AS JSON) | ++--------------------+ +| true | ++--------------------+ +``` \ No newline at end of file diff --git a/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/get_json_bool.md b/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/get_json_bool.md new file mode 100644 index 0000000..895cd82 --- /dev/null +++ b/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/get_json_bool.md @@ -0,0 +1,70 @@ +--- +displayed_sidebar: docs +--- + +# get_json_bool + +解析 JSON 字符串,并从指定的 JSON 路径中获取布尔值。 + +:::tip +所有 JSON 函数和 `Operator` 都列在导航栏和 [概览页面](../overview-of-json-functions-and-operators.md) 上。 + +使用 [生成列](../../../sql-statements/generated_columns.md) 加速查询 +::: + +如果 `json_str` 或 `json_path` 的格式无效,或者找不到匹配的内容,此函数将返回 NULL。 + +该函数从 v3.3 版本开始支持。 + +## 语法 + +```Haskell +BOOLEAN get_json_bool(VARCHAR json_str, VARCHAR json_path) +``` + +## 参数 + +- `json_str`: JSON 字符串。支持的数据类型为 VARCHAR。 +- `json_path`: JSON 路径。支持的数据类型为 VARCHAR。 + + - `json_path` 必须以 `$` 开头,并使用 `.` 作为路径分隔符。如果路径包含 `.`,则可以将其括在一对 `"` 中。 + - `[ ]` 用作数组下标,从 0 开始。 + +## 示例 + +1. 获取键为 "k1" 的值。该值为 `true`,返回 `1`。 + + ```Plain Text + MySQL > SELECT get_json_bool('{"k1":true, "k2":"false"}', "$.k1"); + +----------------------------------------------------+ + | get_json_bool('{"k1":true, "k2":"false"}', '$.k1') | + +----------------------------------------------------+ + | 1 | + +----------------------------------------------------+ + ``` + +2. 获取键为 "my.key" 的数组中的第二个元素。第二个元素为 `false`,返回 `0`。 + + ```Plain Text + SELECT get_json_bool('{"k1":"v1", "my.key":[true, false, 3]}', '$."my.key"[1]'); + +--------------------------------------------------------------------------+ + | get_json_bool('{"k1":"v1", "my.key":[true, false, 3]}', '$."my.key"[1]') | + +--------------------------------------------------------------------------+ + | 0 | + +--------------------------------------------------------------------------+ + ``` + +3. 获取路径为 `k1.key -> k2` 的数组中的第一个元素。第一个元素为 `false`,返回 `0`。 + + ```Plain Text + MYSQL > SELECT get_json_bool('{"k1.key":{"k2":[false, true]}}', '$."k1.key".k2[0]'); + +----------------------------------------------------------------------+ + | get_json_bool('{"k1.key":{"k2":[false, true]}}', '$."k1.key".k2[0]') | + +----------------------------------------------------------------------+ + | 0 | + +----------------------------------------------------------------------+ + ``` + +## keyword + +GET_JSON_BOOL,GET,JSON,BOOL \ No newline at end of file diff --git a/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/get_json_double.md b/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/get_json_double.md new file mode 100644 index 0000000..0b38efc --- /dev/null +++ b/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/get_json_double.md @@ -0,0 +1,66 @@ +--- +displayed_sidebar: docs +--- + +# get_json_double + +该函数用于解析 JSON 字符串,并从指定的路径中获取浮点数值。`json_path` 必须以 `$` 开头,并使用 `.` 作为路径分隔符。 + +:::tip +所有 JSON 函数和操作符均在导航栏和 [概览页面](../overview-of-json-functions-and-operators.md) 中列出。 + +使用 [generated columns](../../../sql-statements/generated_columns.md) 加速查询。 +::: + +如果路径中包含 `.`,则可以使用 `"` 和 `"` 将其括起来。 + +`[ ]` 用作数组下标,从 0 开始。 + +路径中的内容不应包含 `"`、`[` 和 `]`。 + +如果 `json_string` 或 `json_path` 的格式错误,此函数将返回 NULL。 + +## 语法 + +```Haskell +DOUBLE get_json_double(VARCHAR json_str, VARCHAR json_path) +``` + +## 示例 + +1. 获取键为 "k1" 的值 + + ```Plain Text + MySQL > SELECT get_json_double('{"k1":1.3, "k2":"2"}', "$.k1"); + +-------------------------------------------------+ + | get_json_double('{"k1":1.3, "k2":"2"}', '$.k1') | + +-------------------------------------------------+ + | 1.3 | + +-------------------------------------------------+ + ``` + +2. 获取键为 "my.key" 的数组中的第二个元素 + + ```Plain Text + MySQL > SELECT get_json_double('{"k1":"v1", "my.key":[1.1, 2.2, 3.3]}', '$."my.key"[1]'); + +---------------------------------------------------------------------------+ + | get_json_double('{"k1":"v1", "my.key":[1.1, 2.2, 3.3]}', '$."my.key"[1]') | + +---------------------------------------------------------------------------+ + | 2.2 | + +---------------------------------------------------------------------------+ + ``` + +3. 获取路径为 k1.key -> k2 的数组中的第一个元素 + + ```Plain Text + MySQL > SELECT get_json_double('{"k1.key":{"k2":[1.1, 2.2]}}', '$."k1.key".k2[0]'); + +---------------------------------------------------------------------+ + | get_json_double('{"k1.key":{"k2":[1.1, 2.2]}}', '$."k1.key".k2[0]') | + +---------------------------------------------------------------------+ + | 1.1 | + +---------------------------------------------------------------------+ + ``` + +## keyword + +GET_JSON_DOUBLE,GET,JSON,DOUBLE \ No newline at end of file diff --git a/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/get_json_int.md b/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/get_json_int.md new file mode 100644 index 0000000..589f6f4 --- /dev/null +++ b/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/get_json_int.md @@ -0,0 +1,68 @@ +--- +displayed_sidebar: docs +--- + +# get_json_int + +该函数用于解析 JSON 字符串,并从指定的路径中获取整数值。 + +:::tip +所有 JSON 函数和运算符均在导航栏和 [概述页面](../overview-of-json-functions-and-operators.md) 中列出。 + +使用 [生成列](../../../sql-statements/generated_columns.md) 加速查询 +::: + +`json_path` 必须以 `$` 开头,并使用 `.` 作为路径分隔符。 + +如果路径中包含 `.`,则可以用 `"` 和 `"` 将其括起来。 + +`[ ]` 用作数组下标,从 0 开始。 + +路径中的内容不应包含 `"`、`[` 和 `]`。 + +如果 `json_string` 或 `json_path` 的格式错误,此函数将返回 NULL。 + +## 语法 + +```Haskell +BIGINT get_json_int(VARCHAR json_str, VARCHAR json_path) +``` + +## 示例 + +1. 获取键为 "k1" 的值。 + + ```Plain Text + MySQL > SELECT get_json_int('{"k1":1, "k2":"2"}', "$.k1"); + +--------------------------------------------+ + | get_json_int('{"k1":1, "k2":"2"}', '$.k1') | + +--------------------------------------------+ + | 1 | + +--------------------------------------------+ + ``` + +2. 获取键为 "my.key" 的数组中的第二个元素。 + + ```Plain Text + MySQL > SELECT get_json_int('{"k1":"v1", "my.key":[1, 2, 3]}', '$."my.key"[1]'); + +------------------------------------------------------------------+ + | get_json_int('{"k1":"v1", "my.key":[1, 2, 3]}', '$."my.key"[1]') | + +------------------------------------------------------------------+ + | 2 | + +------------------------------------------------------------------+ + ``` + +3. 获取路径为 k1.key -> k2 的数组中的第一个元素。 + + ```Plain Text + MySQL > SELECT get_json_int('{"k1.key":{"k2":[1, 2]}}', '$."k1.key".k2[0]'); + +--------------------------------------------------------------+ + | get_json_int('{"k1.key":{"k2":[1, 2]}}', '$."k1.key".k2[0]') | + +--------------------------------------------------------------+ + | 1 | + +--------------------------------------------------------------+ + ``` + +## keyword + +GET_JSON_INT,GET,JSON,INT \ No newline at end of file diff --git a/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/get_json_string.md b/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/get_json_string.md new file mode 100644 index 0000000..efb515d --- /dev/null +++ b/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/get_json_string.md @@ -0,0 +1,107 @@ +--- +displayed_sidebar: docs +--- + +# get_json_string,get_json_object + +分析 JSON 字符串,并从指定的路径 (`json_path`) 中获取字符串。如果 `json_string` 或 `json_path` 的格式错误,或者未找到匹配的值,此函数将返回 NULL。 + +:::tip +所有 JSON 函数和运算符均在导航栏和 [概述页面](../overview-of-json-functions-and-operators.md) 上列出。 + +使用 [generated columns](../../../sql-statements/generated_columns.md) 加速查询 +::: + +别名为 get_json_object。 + +## Syntax + +```Haskell +VARCHAR get_json_string(VARCHAR json_str, VARCHAR json_path) +``` + +## Parameters + +- `json_str`: JSON 字符串。支持的数据类型为 VARCHAR。 +- `json_path`: JSON 路径。支持的数据类型为 VARCHAR。 `json_path` 以 `$` 开头,并使用 `.` 作为路径分隔符。 `[ ]` 用作数组下标,从 0 开始。例如,`$."my.key"[1]` 表示从元素 `my.key` 中获取第二个值。 + +## Return value + +返回 VARCHAR 类型的值。如果未找到匹配的对象,则返回 NULL。 + +## Examples + +### Example 1: 获取键为 `k1` 的值。 + +```sql +SELECT get_json_string('{"k1":"v1", "k2":"v2"}', "$.k1"); +``` + +```sql ++---------------------------------------------------+ +| get_json_string('{"k1":"v1", "k2":"v2"}', '$.k1') | ++---------------------------------------------------+ +| v1 | ++---------------------------------------------------+ +``` + +### Example 2: 从第一个元素中获取键为 `a` 的值。 + +```sql +SELECT get_json_object('[{"a":"123", "b": "456"},{"a":"23", "b": "56"}]', '$[0].a'); +``` + +```sql ++------------------------------------------------------------------------------+ +| get_json_object('[{"a":"123", "b": "456"},{"a":"23", "b": "56"}]', '$[0].a') | ++------------------------------------------------------------------------------+ +| 123 | ++------------------------------------------------------------------------------+ +``` + +### Example 3: 获取键为 `my.key` 的数组中的第二个元素。 + +```sql +SELECT get_json_string('{"k1":"v1", "my.key":["e1", "e2", "e3"]}', '$."my.key"[1]'); +``` + +```sql ++------------------------------------------------------------------------------+ +| get_json_string('{"k1":"v1", "my.key":["e1", "e2", "e3"]}', '$."my.key"[1]') | ++------------------------------------------------------------------------------+ +| e2 | ++------------------------------------------------------------------------------+ +``` + +### Example 4: 获取路径为 `k1.key -> k2` 的数组中的第一个元素。 + +```sql +SELECT get_json_string('{"k1.key":{"k2":["v1", "v2"]}}', '$."k1.key".k2[0]'); +``` + +```sql ++-----------------------------------------------------------------------+ +| get_json_string('{"k1.key":{"k2":["v1", "v2"]}}', '$."k1.key".k2[0]') | ++-----------------------------------------------------------------------+ +| v1 | ++-----------------------------------------------------------------------+ +``` + +### Example 5: 从数组中获取所有键为 `k1` 的值。 + +```sql +SELECT get_json_string('[{"k1":"v1"}, {"k2":"v2"}, {"k1":"v3"}, {"k1":"v4"}]', '$.[*].k1'); +``` + +```sql ++-------------------------------------------------------------------------------------+ +| get_json_string('[{"k1":"v1"}, {"k2":"v2"}, {"k1":"v3"}, {"k1":"v4"}]', '$.[*].k1') | ++-------------------------------------------------------------------------------------+ +| ["v1", "v3", "v4"] | ++-------------------------------------------------------------------------------------+ +1 row in set (0.01 sec) +``` + +## keyword + +GET_JSON_STRING,GET,JSON,STRING \ No newline at end of file diff --git a/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_contains.md b/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_contains.md new file mode 100644 index 0000000..12d53c4 --- /dev/null +++ b/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_contains.md @@ -0,0 +1,88 @@ +--- +displayed_sidebar: docs +--- + +# json_contains + +用于检查 JSON 文档是否包含特定的值或子文档。如果目标 JSON 文档包含候选 JSON 值,则 JSON_CONTAINS 函数返回 `1`。否则,JSON_CONTAINS 函数返回 `0`。 + +:::tip +所有 JSON 函数和 `Operator` 都列在导航栏和 [概览页面](../overview-of-json-functions-and-operators.md) 上。 + +::: + +## 语法 + +```Haskell +json_contains(json_target, json_candidate) +``` + +## 参数 + +- `json_target`: 表达式,表示目标 JSON 文档。该文档可以是 JSON 列,也可以是由 JSON 构造函数(如 PARSE_JSON)生成的 JSON 对象。 + +- `json_candidate`: 表达式,表示要在目标中搜索的候选 JSON 值或子文档。该值可以是 JSON 列,也可以是由 JSON 构造函数(如 PARSE_JSON)生成的 JSON 对象。 + +## 返回值 + +返回一个 BOOLEAN 值。 + +## 使用说明 + +- 对于标量值(字符串、数字、布尔值、null),如果值相等,则该函数返回 true。 +- 对于 JSON 对象,如果目标对象包含候选对象中的所有键值对,则该函数返回 true。 +- 对于 JSON 数组,如果目标数组包含候选数组中的所有元素,或者候选数组是目标数组中包含的单个值,则该函数返回 true。 +- 该函数对嵌套结构执行深度包含检查。 + +## 示例 + +示例 1:检查 JSON 对象是否包含特定的键值对。 + +```plaintext +mysql> SELECT json_contains(PARSE_JSON('{"a": 1, "b": 2}'), PARSE_JSON('{"a": 1}')); + + -> 1 +``` + +示例 2:检查 JSON 对象是否包含不存在的键值对。 + +```plaintext +mysql> SELECT json_contains(PARSE_JSON('{"a": 1, "b": 2}'), PARSE_JSON('{"c": 3}')); + + -> 0 +``` + +示例 3:检查 JSON 数组是否包含特定元素。 + +```plaintext +mysql> SELECT json_contains(PARSE_JSON('[1, 2, 3, 4]'), PARSE_JSON('[2, 3]')); + + -> 1 +``` + +示例 4:检查 JSON 数组是否包含单个标量值。 + +```plaintext +mysql> SELECT json_contains(PARSE_JSON('[1, 2, 3, 4]'), PARSE_JSON('2')); + + -> 1 +``` + +示例 5:检查 JSON 数组是否包含不存在的元素。 + +```plaintext +mysql> SELECT json_contains(PARSE_JSON('[1, 2, 3, 4]'), PARSE_JSON('[5, 6]')); + + -> 0 +``` + +示例 6:检查包含嵌套 JSON 结构的包含关系。 + +```plaintext +mysql> SELECT json_contains(PARSE_JSON('{"users": [{"id": 1, "name": "John"}, {"id": 2, "name": "Jane"}]}'), + PARSE_JSON('{"users": [{"id": 1}]}')); + + -> 0 +``` + +注意:在最后一个示例中,结果为 0,因为数组包含要求完全元素匹配,而不是数组中部分对象匹配。 \ No newline at end of file diff --git a/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_each.md b/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_each.md new file mode 100644 index 0000000..bb68871 --- /dev/null +++ b/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_each.md @@ -0,0 +1,55 @@ +--- +displayed_sidebar: docs +--- + +# json_each + +将 JSON 对象的外部元素展开为两列中保存的键值对集合,并返回一个表,该表由每个元素的一行组成。 + +:::tip +所有 JSON 函数和运算符均在导航栏和 [概述页面](../overview-of-json-functions-and-operators.md) 上列出。 + +使用 [generated columns](../../../sql-statements/generated_columns.md) 加速查询 +::: + +## 语法 + +```Haskell +json_each(json_object_expr) +``` + +## 参数 + +`json_object_expr`: 表示 JSON 对象的表达式。 该对象可以是 JSON 列,也可以是由 JSON 构造函数(如 PARSE_JSON)生成的 JSON 对象。 + +## 返回值 + +返回两列:一列名为 key,另一列名为 value。 key 列存储 VARCHAR 值,value 列存储 JSON 值。 + +## 使用说明 + +json_each 函数是一个表函数,返回一个表。 返回的表是由多行组成的结果集。 因此,必须在 FROM 子句中使用 lateral join 将返回的表连接到原始表。 lateral join 是强制性的,但 LATERAL 关键字是可选的。 json_each 函数不能在 SELECT 子句中使用。 + +## 示例 + +```plaintext +-- 一个名为 tj 的表用作示例。 在 tj 表中,j 列是一个 JSON 对象。 +mysql> SELECT * FROM tj; ++------+------------------+ +| id | j | ++------+------------------+ +| 1 | {"a": 1, "b": 2} | +| 3 | {"a": 3} | ++------+------------------+ + +-- 通过键和值将 tj 表的 j 列展开为两列,以获得由多行组成的结果集。 在此示例中,LATERAL 关键字用于将结果集连接到 tj 表。 + +mysql> SELECT * FROM tj, LATERAL json_each(j); ++------+------------------+------+-------+ +| id | j | key | value | ++------+------------------+------+-------+ +| 1 | {"a": 1, "b": 2} | a | 1 | +| 1 | {"a": 1, "b": 2} | b | 2 | +| 3 | {"a": 3} | a | 3 | ++------+------------------+------+-------+ +``` \ No newline at end of file diff --git a/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_exists.md b/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_exists.md new file mode 100644 index 0000000..b80d219 --- /dev/null +++ b/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_exists.md @@ -0,0 +1,63 @@ +--- +displayed_sidebar: docs +--- + +# json_exists + +检查 JSON 对象是否包含可由 `json_path` 表达式定位的元素。如果元素存在,则 JSON_EXISTS 函数返回 `1`。否则,JSON_EXISTS 函数返回 `0`。 + +:::tip +所有 JSON 函数和 `Operator` 都列在导航栏和 [概览页面](../overview-of-json-functions-and-operators.md) 上。 + +使用 [生成列](../../../sql-statements/generated_columns.md) 加速查询 +::: + +## 语法 + +```Haskell +json_exists(json_object_expr, json_path) +``` + +## 参数 + +- `json_object_expr`: 表示 JSON 对象的表达式。该对象可以是 JSON 列,也可以是由 JSON 构造函数(如 PARSE_JSON)生成的 JSON 对象。 + +- `json_path`: 表示 JSON 对象中元素路径的表达式。此参数的值是一个字符串。有关 StarRocks 支持的 JSON 路径语法的更多信息,请参见 [JSON 函数和 `Operator` 概览](../overview-of-json-functions-and-operators.md)。 + +## 返回值 + +返回一个 BOOLEAN 值。 + +## 示例 + +示例 1:检查指定的 JSON 对象是否包含可由 `'$.a.b'` 表达式定位的元素。在此示例中,该元素存在于 JSON 对象中。因此,json_exists 函数返回 `1`。 + +```plaintext +mysql> SELECT json_exists(PARSE_JSON('{"a": {"b": 1}}'), '$.a.b') ; + + -> 1 +``` + +示例 2:检查指定的 JSON 对象是否包含可由 `'$.a.c'` 表达式定位的元素。在此示例中,该元素不存在于 JSON 对象中。因此,json_exists 函数返回 `0`。 + +```plaintext +mysql> SELECT json_exists(PARSE_JSON('{"a": {"b": 1}}'), '$.a.c') ; + + -> 0 +``` + +示例 3:检查指定的 JSON 对象是否包含可由 `'$.a[2]'` 表达式定位的元素。在此示例中,JSON 对象(一个名为 a 的数组)在索引 2 处包含一个元素。因此,json_exists 函数返回 `1`。 + +```plaintext +mysql> SELECT json_exists(PARSE_JSON('{"a": [1,2,3]}'), '$.a[2]') ; + + -> 1 +``` + +示例 4:检查指定的 JSON 对象是否包含可由 `'$.a[3]'` 表达式定位的元素。在此示例中,JSON 对象(一个名为 a 的数组)在索引 3 处不包含元素。因此,json_exists 函数返回 `0`。 + +```plaintext +mysql> SELECT json_exists(PARSE_JSON('{"a": [1,2,3]}'), '$.a[3]') ; + + -> 0 +``` \ No newline at end of file diff --git a/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_keys.md b/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_keys.md new file mode 100644 index 0000000..3a01e45 --- /dev/null +++ b/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_keys.md @@ -0,0 +1,103 @@ +--- +displayed_sidebar: docs +--- + +# json_keys + +以 JSON 数组的形式返回 JSON 对象中的顶层键。如果指定了 `path`,则返回该路径中的顶层键。 + +:::tip +所有 JSON 函数和 `Operator` 都列在导航栏和 [概览页面](../overview-of-json-functions-and-operators.md) 上。 + +使用 [generated columns](../../../sql-statements/generated_columns.md) 加速查询 +::: + +## 语法 + +```Haskell +json_keys(json_doc[, path]) +``` + +## 参数 + +`json_doc`: 必需。要返回键的 JSON 文档。它必须是 JSON 对象。 + +`path`: 可选。路径通常以 `$` 开头,并使用 `.` 作为路径分隔符。`[]` 用作数组下标,从 0 开始。 + +## 返回值 + +返回 JSON 数组。 + +如果 JSON 对象为空,则返回一个空数组。 + +如果 JSON 文档不是 JSON 对象,或者路径未标识文档中的值,则返回 `NULL`。 + +如果 JSON 文档是一个嵌套了 JSON 对象的数组,则可以使用 `path` 参数从该对象中获取键。 + +## 示例 + +示例 1:返回一个空数组,因为输入的 JSON 对象为空。 + +```Plain +select json_keys('{}'); ++-----------------+ +| json_keys('{}') | ++-----------------+ +| [] | ++-----------------+ +``` + +示例 2:返回 JSON 对象的键。 + +```Plain +select json_keys('{"a": 1, "b": 2, "c": 3}'); ++----------------+ +| json_keys('1') | ++----------------+ +|["a", "b", "c"] | ++----------------+ +``` + +示例 3:返回与指定路径匹配的 JSON 对象的键。 + +```Plain +select json_keys('{"a": 1, "b": 2, "c": {"d": 3, "e": 4, "f": 5}}', '$.c'); ++---------------------------------------------------------------------+ +| json_keys('{"a": 1, "b": 2, "c": {"d": 3, "e": 4, "f": 5}}', '$.c') | ++---------------------------------------------------------------------+ +| ["d", "e", "f"] | ++---------------------------------------------------------------------+ +``` + +示例 4:路径不存在。 + +```Plain +select json_keys('{"a": 1, "b": 2, "c": {"d": 3, "e": 4, "f": 5}}', '$.e'); ++---------------------------------------------------------------------+ +| json_keys('{"a": 1, "b": 2, "c": {"d": 3, "e": 4, "f": 5}}', '$.e') | ++---------------------------------------------------------------------+ +| NULL | ++---------------------------------------------------------------------+ +``` + +示例 5:JSON 文档不是 JSON 对象。 + +```Plain +select json_keys('[1, 2, {"a": 1, "b": 2}]'); ++---------------------------------------+ +| json_keys('[1, 2, {"a": 1, "b": 2}]') | ++---------------------------------------+ +| NULL | ++---------------------------------------+ +``` + +示例 6:JSON 文档是一个嵌套了 JSON 对象的数组。指定了一个路径以从该对象中获取键。 + +```Plain +select json_keys('[0, 1, {"a": 1, "b": 2}]', '$[2]'); ++-----------------------------------------------+ +| json_keys('[0, 1, {"a": 1, "b": 2}]', '$[2]') | ++-----------------------------------------------+ +| ["a", "b"] | ++-----------------------------------------------+ +``` \ No newline at end of file diff --git a/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_length.md b/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_length.md new file mode 100644 index 0000000..078d263 --- /dev/null +++ b/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_length.md @@ -0,0 +1,131 @@ +--- +displayed_sidebar: docs +--- + +# json_length + +返回 JSON 文档的长度。如果指定了路径,此函数将返回由该路径标识的值的长度。 + +:::tip +所有 JSON 函数和运算符都列在导航栏和 [概述页面](../overview-of-json-functions-and-operators.md) 上。 + +通过 [generated columns](../../../sql-statements/generated_columns.md) 加速您的查询 +::: + +文档的长度根据以下规则确定: + +- 标量值的长度为 1。例如,`1`、`"a"`、`true`、`false` 和 `null` 的长度均为 1。 + +- 数组的长度是数组元素的数量。例如,`[1, 2]` 的长度为 2。 + +- 对象的长度是对象成员的数量。例如,`{"a": 1}` 的长度为 1。 + +- 不计算嵌套数组或对象的长度。例如,`{"a": [1, 2]}` 的长度为 1,因为嵌套数组 `[1, 2]` 不计入长度。 + +## 语法 + +```Haskell +json_length(json_doc[, path]) +``` + +## 参数 + +`json_doc`: 必需,要返回长度的 JSON 文档。 + +`path`: 可选。用于返回文档中值的长度。路径通常以 `$` 开头,并使用 `.` 作为路径分隔符。`[]` 用作数组下标,从 0 开始。 + +## 返回值 + +返回 INT 类型的值。 + +如果 JSON 文档不是有效文档,则返回错误。 + +在以下任何情况下,都将返回 0: + +- **`path`** 未标识文档中的值。 + +- 该路径不是有效的路径表达式。 + +- 路径包含 `*` 或 `**` 通配符。 + +## 示例 + +示例 1:返回标量值的长度。 + +```Plain +select json_length('1'); ++------------------+ +| json_length('1') | ++------------------+ +| 1 | ++------------------+ +``` + +示例 2:返回空对象的长度。 + +```Plain +select json_length('{}'); ++-------------------+ +| json_length('{}') | ++-------------------+ +| 0 | ++-------------------+ +``` + +示例 3:返回包含数据的对象的长度。 + +```Plain +select json_length('{"Name": "Homer"}'); ++----------------------------------+ +| json_length('{"Name": "Homer"}') | ++----------------------------------+ +| 1 | ++----------------------------------+ +``` + +示例 4:返回 JSON 数组的长度。 + +```plain text +select json_length('[1, 2, 3]'); ++--------------------------+ +| json_length('[1, 2, 3]') | ++--------------------------+ +| 3 | ++--------------------------+ +``` + +示例 5:返回 JSON 数组的长度,其中一个元素具有嵌套数组。 + +嵌套数组 `[3, 4]` 不计入长度。 + +```plain text +select json_length('[1, 2, [3, 4]]'); ++-------------------------------+ +| json_length('[1, 2, [3, 4]]') | ++-------------------------------+ +| 3 | ++-------------------------------+ +``` + +示例 6:返回由路径 `$.Person` 指定的对象的长度。 + +```SQL +SET @file = '{ + "Person": { + "Name": "Homer", + "Age": 39, + "Hobbies": ["Eating", "Sleeping"] + } + }'; +select json_length(@file, '$.Person') 'Result'; +``` + +示例 7:返回由路径 `$.y` 指定的值的长度。 + +```plain text +select json_length('{"x": 1, "y": [1, 2]}', '$.y'); ++---------------------------------------------+ +| json_length('{"x": 1, "y": [1, 2]}', '$.y') | ++---------------------------------------------+ +| 2 | ++---------------------------------------------+ \ No newline at end of file diff --git a/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_pretty.md b/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_pretty.md new file mode 100644 index 0000000..05d9c02 --- /dev/null +++ b/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_pretty.md @@ -0,0 +1,85 @@ +--- +displayed_sidebar: docs +--- + +# json_pretty + +将 JSON 文档格式化为易于阅读的缩进字符串格式。此函数对于调试或以人类可读的结构显示 JSON 数据非常有用。 + +:::tip +所有 JSON 函数和运算符均在导航栏和概述页面中列出。 +::: + +## 语法 + +```SQL +json_pretty(json_object_expr) +``` + +## 参数 +- `json_object_expr`: 表示 JSON 对象的表达式。该对象可以是 JSON 列、包含有效 JSON 的字符串或由 JSON 构造函数(如 PARSE_JSON)生成的 JSON 对象。 + +## 返回值 +以字符串形式返回格式化的 JSON 文档。 + +> - 如果参数为 NULL,则返回 NULL。 +> - 返回的字符串包括用于缩进的换行符和空格。 +> - 对象键在输出中按字母顺序排序。 + +## 示例 + +示例 1:格式化一个简单的 JSON 对象。 + +```Plaintext +mysql> SELECT json_pretty('{"b": 2, "a": 1}'); + -> { + "a": 1, + "b": 2 + } +``` + +示例 2:格式化一个 JSON 数组。 + +```Plaintext +mysql> SELECT json_pretty('[1, 2, 3]'); + -> [ + 1, + 2, + 3 + ] +``` + +示例 3:格式化一个嵌套的 JSON 结构。 + +```Plaintext +mysql> SELECT json_pretty('{"level1": {"level2": {"level3": "value"}}}'); + -> { + "level1": { + "level2": { + "level3": "value" + } + } + } +``` + +示例 4:与包含 JSON 数据的表列一起使用。 + +```Plaintext + +mysql> CREATE TABLE json_test (id INT, data JSON); +mysql> INSERT INTO json_test VALUES (1, parse_json('{"name": "Alice", "details": {"age": 25, "city": "NYC"}}')); +mysql> SELECT json_pretty(data) FROM json_test; + -> { + "details": { + "age": 25, + "city": "NYC" + }, + "name": "Alice" + } +``` + +## 使用说明 +- **缩进:** 该函数添加标准缩进(空格)和换行符,以使 JSON 结构可视化。 +- **键排序:** JSON 对象键在输出字符串中按字母顺序排序。这是底层 JSON 处理库 (VelocyPack) 的标准行为。 +- **NULL 处理:** 如果输入为 SQL NULL,则该函数返回 NULL。 +- **数据类型:** 它支持格式化标准 JSON 类型,包括对象、数组、字符串、数字、布尔值和 Null。 \ No newline at end of file diff --git a/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_query.md b/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_query.md new file mode 100644 index 0000000..5d1920c --- /dev/null +++ b/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_query.md @@ -0,0 +1,65 @@ +--- +displayed_sidebar: docs +--- + +# json_query + +查询 JSON 对象中可以通过 `json_path` 表达式定位的元素的值,并返回 JSON 值。 + +:::tip +所有 JSON 函数和运算符均在导航栏和 [概览页面](../overview-of-json-functions-and-operators.md) 上列出。 + +使用 [生成列](../../../sql-statements/generated_columns.md) 加速查询 +::: + +## 语法 + +```Haskell +json_query(json_object_expr, json_path) +``` + +## 参数 + +- `json_object_expr`: 表示 JSON 对象的表达式。该对象可以是 JSON 列,也可以是由 JSON 构造函数(如 PARSE_JSON)生成的 JSON 对象。 + +- `json_path`: 表示 JSON 对象中元素路径的表达式。此参数的值是一个字符串。有关 StarRocks 支持的 JSON 路径语法的信息,请参见 [JSON 函数和运算符概述](../overview-of-json-functions-and-operators.md)。 + +## 返回值 + +返回一个 JSON 值。 + +> 如果该元素不存在,则 json_query 函数返回一个 SQL 值 `NULL`。 + +## 示例 + +示例 1:查询指定 JSON 对象中可以通过 `'$.a.b'` 表达式定位的元素的值。在此示例中,json_query 函数返回 JSON 值 `1`。 + +```plaintext +mysql> SELECT json_query(PARSE_JSON('{"a": {"b": 1}}'), '$.a.b') ; + + -> 1 +``` + +示例 2:查询指定 JSON 对象中可以通过 `'$.a.c'` 表达式定位的元素的值。在此示例中,该元素不存在。因此,json_query 函数返回 SQL 值 `NULL`。 + +```plaintext +mysql> SELECT json_query(PARSE_JSON('{"a": {"b": 1}}'), '$.a.c') ; + + -> NULL +``` + +示例 3:查询指定 JSON 对象中可以通过 `'$.a[2]'` 表达式定位的元素的值。在此示例中,JSON 对象(一个名为 a 的数组)包含索引 2 处的元素,并且该元素的值为 3。因此,JSON_QUERY 函数返回 JSON 值 `3`。 + +```plaintext +mysql> SELECT json_query(PARSE_JSON('{"a": [1,2,3]}'), '$.a[2]') ; + + -> 3 +``` + +示例 4:查询指定 JSON 对象中可以通过 `'$.a[3]'` 表达式定位的元素。在此示例中,JSON 对象(一个名为 a 的数组)不包含索引 3 处的元素。因此,json_query 函数返回 SQL 值 `NULL`。 + +```plaintext +mysql> SELECT json_query(PARSE_JSON('{"a": [1,2,3]}'), '$.a[3]') ; + + -> NULL +``` \ No newline at end of file diff --git a/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_remove.md b/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_remove.md new file mode 100644 index 0000000..9b3b9a1 --- /dev/null +++ b/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_remove.md @@ -0,0 +1,88 @@ +--- +displayed_sidebar: docs +--- + +# json_remove + +从 JSON 文档中删除一个或多个指定 JSON 路径的数据,并返回修改后的 JSON 文档。 + +:::tip +所有 JSON 函数和运算符均在导航栏和 [概述页面](../overview-of-json-functions-and-operators.md) 上列出。 +::: + +## 语法 + +```Haskell +json_remove(json_object_expr, json_path[, json_path] ...) +``` + +## 参数 + +- `json_object_expr`: 表示 JSON 对象的表达式。该对象可以是 JSON 列,也可以是由 JSON 构造函数(如 PARSE_JSON)生成的 JSON 对象。 + +- `json_path`: 一个或多个表达式,表示要删除的 JSON 对象中元素的路径。每个参数的值都是一个字符串。有关 StarRocks 支持的 JSON 路径语法的信息,请参见 [JSON 函数和运算符概述](../overview-of-json-functions-and-operators.md)。 + +## 返回值 + +返回删除了指定路径的 JSON 文档。 + +> - 如果路径在 JSON 文档中不存在,则忽略该路径。 +> - 如果提供了无效路径,则忽略该路径。 +> - 如果所有路径都无效或不存在,则返回原始 JSON 文档,不作任何更改。 + +## 示例 + +示例 1:从 JSON 对象中删除单个键。 + +```plaintext +mysql> SELECT json_remove('{"a": 1, "b": [10, 20, 30]}', '$.a'); + + -> {"b": [10, 20, 30]} +``` + +示例 2:从 JSON 对象中删除多个键。 + +```plaintext +mysql> SELECT json_remove('{"a": 1, "b": [10, 20, 30], "c": "test"}', '$.a', '$.c'); + + -> {"b": [10, 20, 30]} +``` + +示例 3:从 JSON 对象中删除数组元素。 + +```plaintext +mysql> SELECT json_remove('{"a": 1, "b": [10, 20, 30]}', '$.b[1]'); + + -> {"a": 1, "b": [10, 30]} +``` + +示例 4:删除嵌套对象属性。 + +```plaintext +mysql> SELECT json_remove('{"a": {"x": 1, "y": 2}, "b": 3}', '$.a.x'); + + -> {"a": {"y": 2}, "b": 3} +``` + +示例 5:尝试删除不存在的路径(已忽略)。 + +```plaintext +mysql> SELECT json_remove('{"a": 1, "b": 2}', '$.c', '$.d'); + + -> {"a": 1, "b": 2} +``` + +示例 6:删除包括不存在路径的多个路径。 + +```plaintext +mysql> SELECT json_remove('{"a": 1, "b": 2, "c": 3}', '$.a', '$.nonexistent', '$.c'); + + -> {"b": 2} +``` + +## 使用须知 + +- `json_remove` 函数遵循 MySQL 兼容的行为。 +- 无效的 JSON 路径将被静默忽略,而不会导致错误。 +- 该函数支持在单个操作中删除多个路径,这比多个单独的操作更有效。 +- 目前,该函数支持简单的对象键删除(例如,`$.key`)。当前实现中对复杂嵌套路径和数组元素删除的支持可能有限。 \ No newline at end of file diff --git a/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_set.md b/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_set.md new file mode 100644 index 0000000..6692afe --- /dev/null +++ b/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_set.md @@ -0,0 +1,95 @@ +--- +displayed_sidebar: docs +--- + +# json_set + +在指定的 JSON 路径中插入或更新 JSON 文档中的数据,并返回修改后的 JSON 文档。 + +:::tip +所有 JSON 函数和运算符均在导航和概述页面中列出。 +::: + +## 语法 + +```SQL +json_set(json_object_expr, json_path, value[, json_path, value] ...) +``` + +## 参数 +- `json_object_expr`: 表示 JSON 对象的表达式。该对象可以是 JSON 列,也可以是由 JSON 构造函数(如 **PARSE_JSON**)生成的 JSON 对象。 + +- `json_path`: 要插入或更新的 JSON 对象中元素的路径。该值必须是字符串。有关 StarRocks 支持的 JSON 路径语法的信息,请参阅 JSON 函数和运算符概述。 + +- `value`: 要在指定路径中插入或更新的值。它可以是字符串、数字、布尔值、null 或 JSON 对象。 + +## 返回值 +返回修改后的 JSON 文档。 + +> - 如果任何参数为 *NULL*,则返回 *NULL*。 +> - 如果路径存在于 JSON 文档中,则现有值将被更新(替换)。 +> - 如果路径不存在,则会插入新值 (Upsert 行为)。 +> - 参数从左到右进行评估。第一个路径-值对的结果将成为第二个对的输入。 + +## 示例 + +示例 1:将新键插入 JSON 对象。 + +```Plaintext +mysql> SELECT json_set('{"a": 1}', '$.b', 2); + -> {"a": 1, "b": 2} +``` + +示例 2:更新 JSON 对象中的现有键。 + +```Plaintext +mysql> SELECT json_set('{"a": 1}', '$.a', 10); + -> {"a": 10} +``` + +示例 3:执行多个操作(更新一个现有键,插入一个新键)。 + +```Plaintext +mysql> SELECT json_set('{"a": 1, "b": 2}', '$.a', 10, '$.c', 3); + -> {"a": 10, "b": 2, "c": 3} +``` + +示例 4:更新嵌套 JSON 对象中的值。 +```Plaintext + +mysql> SELECT json_set('{"a": {"x": 1, "y": 2}}', '$.a.x', 100); + -> {"a": {"x": 100, "y": 2}} +``` + +示例 5:按索引更新数组中的元素。 + +```Plaintext +mysql> SELECT json_set('{"arr": [10, 20, 30]}', '$.arr[1]', 99); + -> {"arr": [10, 99, 30]} +``` + +示例 6:追加到数组(使用大于数组长度的索引)。 + +```Plaintext +mysql> SELECT json_set('{"arr": [10, 20]}', '$.arr[5]', 30); + -> {"arr": [10, 20, 30]} +``` + +示例 7:插入不同的数据类型(布尔值和 JSON Null)。 +要插入 JSON `null` 值,请使用 `parse_json('null')`。传递原始 SQL `NULL` 将为整个结果返回 `NULL`。 + +```plaintext +mysql> SELECT json_set('{"a": 1}', '$.b', true, '$.c', parse_json('null')); + -> {"a": 1, "b": true, "c": null} +``` + +## 使用说明 + +- `json_set` 函数遵循 MySQL 兼容的行为。 +- 它作为 **Upsert**(更新或插入)运行: + - **INSERT:** 如果路径不存在,则将值添加到文档中。 + - **UPDATE:** 如果路径已存在,则旧值将替换为新值。 +- 如果您专门想要*仅*插入(不更新现有值),请使用 `json_insert`。 +- 如果您专门想要*仅*更新(不插入新值),请使用 `json_replace`。 +- **Null 处理:** 要插入 JSON null 值,请使用 parse_json('null')。将原始 SQL NULL 作为参数传递将导致该函数返回 NULL。 +- **注意:** 目前 `json_path` 中不支持通配符(例如 `*` 或 `**`)和数组切片(例如 `[1:3]`)进行修改。如果路径包含这些,则将忽略该路径的更新以确保安全。 \ No newline at end of file diff --git a/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_string.md b/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_string.md new file mode 100644 index 0000000..d0860a7 --- /dev/null +++ b/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/json_string.md @@ -0,0 +1,51 @@ +--- +displayed_sidebar: docs +--- + +# json_string + +将 JSON 对象转换为 JSON 字符串。 + +:::tip +所有 JSON 函数和算子均在导航栏和 [概览页面](../overview-of-json-functions-and-operators.md) 中列出。 + +使用 [生成列](../../../sql-statements/generated_columns.md) 加速查询。 +::: + +## 语法 + +```SQL +json_string(json_object_expr) +``` + +## 参数 + +- `json_object_expr`: 代表 JSON 对象的表达式。对象可以是 JSON 列,也可以是由 JSON 构造函数(如 PARSE_JSON)生成的 JSON 对象。 + +## 返回值 + +返回 VARCHAR 类型的值。 + +## 示例 + +示例 1:将 JSON 对象转换为 JSON 字符串。 + +```Plain +select json_string('{"Name": "Alice"}'); ++----------------------------------+ +| json_string('{"Name": "Alice"}') | ++----------------------------------+ +| {"Name": "Alice"} | ++----------------------------------+ +``` + +示例 2:将 PARSE_JSON 的结果转换为 JSON 字符串。 + +```Plain +select json_string(parse_json('{"Name": "Alice"}')); ++----------------------------------+ +| json_string('{"Name": "Alice"}') | ++----------------------------------+ +| {"Name": "Alice"} | ++----------------------------------+ +``` \ No newline at end of file diff --git a/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/to_json.md b/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/to_json.md new file mode 100644 index 0000000..dd09ecc --- /dev/null +++ b/docs/zh/sql-reference/sql-functions/json-functions/json-query-and-processing-functions/to_json.md @@ -0,0 +1,70 @@ +--- +displayed_sidebar: docs +--- + +# to_json + +将 Map 或 Struct 类型的值转换为 JSON 字符串。如果输入值为 NULL,则返回 NULL。 + +:::tip +所有 JSON 函数和运算符均在导航栏和 [概览页面](../overview-of-json-functions-and-operators.md) 上列出。 + +使用 [生成列](../../../sql-statements/generated_columns.md) 加速查询 +::: + +如果要转换其他数据类型的值,请参阅 [cast](./cast.md)。 + +该函数从 v3.1 版本开始支持。 + +## 语法 + +```Haskell +to_json(any_value) +``` + +## 参数 + +`any_value`: 要转换的 Map 或 Struct 表达式。如果输入值无效,则返回错误。Map 或 Struct 类型值的每个键值对中的值都可以为 NULL。请参见最后一个示例。 + +## 返回值 + +返回 JSON 值。 + +## 示例 + +```Haskell +select to_json(map{1:'a',2:'b'}); ++---------------------------+ +| to_json(map{1:'a',2:'b'}) | ++---------------------------+ +| {"1": "a", "2": "b"} | ++---------------------------+ + +select to_json(row('asia','eu')); ++--------------------------------+ +| to_json(row('asia', 'eu')) | ++--------------------------------+ +| {"col1": "asia", "col2": "eu"} | ++--------------------------------+ + +select to_json(map('a', named_struct('b', 1))); ++----------------------------------------+ +| to_json(map{'a':named_struct('b', 1)}) | ++----------------------------------------+ +| {"a": {"b": 1}} | ++----------------------------------------+ + +select to_json(named_struct("k1", cast(null as string), "k2", "v2")); ++-----------------------------------------------------------------------+ +| to_json(named_struct('k1', CAST(NULL AS VARCHAR(65533)), 'k2', 'v2')) | ++-----------------------------------------------------------------------+ +| {"k1": null, "k2": "v2"} | ++-----------------------------------------------------------------------+ +``` + +## 参见 + +- [Map 数据类型](../../../data-types/semi_structured/Map.md) +- [Struct 数据类型](../../../data-types/semi_structured/STRUCT.md) +- [Map 函数](../../README.md#map-functions) +- [Struct 函数](../../README.md#struct-functions) \ No newline at end of file diff --git a/docs/zh/sql-reference/sql-functions/json-functions/overview-of-json-functions-and-operators.md b/docs/zh/sql-reference/sql-functions/json-functions/overview-of-json-functions-and-operators.md new file mode 100644 index 0000000..25872b9 --- /dev/null +++ b/docs/zh/sql-reference/sql-functions/json-functions/overview-of-json-functions-and-operators.md @@ -0,0 +1,69 @@ +--- +displayed_sidebar: docs +--- + +# JSON 函数和运算符概述 + +本文档概述了 StarRocks 支持的 JSON 构造函数、查询函数、处理函数、运算符和路径表达式。 + +:::tip +使用 [生成列](../../sql-statements/generated_columns.md) 加速查询 +::: + +## JSON 构造函数 + +JSON 构造函数用于构造 JSON 数据,例如 JSON 对象和 JSON 数组。 + +| 函数 | 描述 | 示例 | 返回值 | +| ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ---------------------------------------- | +| [json_object](./json-constructor-functions/json_object.md) | 将一个或多个键值对转换为 JSON 对象,该对象由键值对组成,这些键值对按字典顺序按键排序。 | `SELECT JSON_OBJECT('Daniel Smith', 26, 'Lily Smith', 25);` | `{"Daniel Smith": 26, "Lily Smith": 25}` | +| [json_array](./json-constructor-functions/json_array.md) | 将 SQL 数组的每个元素转换为 JSON 值,并返回一个由这些 JSON 值组成的 JSON 数组。 | `SELECT JSON_ARRAY(1, 2, 3);` | `[1,2,3]` | +| [parse_json](./json-constructor-functions/parse_json.md) | 将字符串转换为 JSON 值。 | `SELECT PARSE_JSON('{"a": 1}');` | `{"a": 1}` | + +## JSON 查询函数和处理函数 + +JSON 查询函数和处理函数用于查询和处理 JSON 数据。 例如,您可以使用路径表达式来定位 JSON 对象中的元素。 + +| 函数 | 描述 | 示例 | 返回值 | +| ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | +| [arrow function](./json-query-and-processing-functions/arrow-function.md) | 查询可以通过 JSON 对象中的路径表达式定位的元素。 | `SELECT parse_json('{"a": {"b": 1}}') -> '$.a.b';` | `1` | +| [cast](./json-query-and-processing-functions/cast.md) | 在 JSON 数据类型和 SQL 数据类型之间转换数据。 | `SELECT CAST(1 AS JSON);` | `1` | +| [get_json_double](./json-query-and-processing-functions/get_json_double.md) | 从 JSON 字符串中的指定路径分析并获取浮点数值。 | `SELECT get_json_double('{"k1":1.3, "k2":"2"}', "$.k1");` | `1.3` | +| [get_json_int](./json-query-and-processing-functions/get_json_int.md) | 从 JSON 字符串中的指定路径分析并获取整数值。 | `SELECT get_json_int('{"k1":1, "k2":"2"}', "$.k1");` | `1` | +| [get_json_string](./json-query-and-processing-functions/get_json_string.md) | 从 JSON 字符串中的指定路径分析并获取字符串。 | `SELECT get_json_string('{"k1":"v1", "k2":"v2"}', "$.k1");` | `v1` | +| [json_query](./json-query-and-processing-functions/json_query.md) | 查询可以通过 JSON 对象中的路径表达式定位的元素的值。 | `SELECT JSON_QUERY('{"a": 1}', '$.a');` | `1` | +| [json_remove](./json-query-and-processing-functions/json_remove.md) | 从一个或多个指定的 JSON 路径从 JSON 文档中删除数据。 | `SELECT JSON_REMOVE('{"a": 1, "b": [10, 20, 30]}', '$.a', '$.b[1]');` | `{"b": [10, 30]}` | +| [json_each](./json-query-and-processing-functions/json_each.md) | 将 JSON 对象的顶层元素展开为键值对。 | `SELECT * FROM tj_test, LATERAL JSON_EACH(j);` | `!`[json_each](../../../_assets/json_each.png) | +| [json_exists](./json-query-and-processing-functions/json_exists.md) | 检查 JSON 对象是否包含可以通过路径表达式定位的元素。 如果元素存在,则此函数返回 1。 如果元素不存在,则该函数返回 0。 | `SELECT JSON_EXISTS('{"a": 1}', '$.a'); ` | `1` | +| [json_keys](./json-query-and-processing-functions/json_keys.md) | 从 JSON 对象返回顶层键作为 JSON 数组,或者,如果指定了路径,则从该路径返回顶层键。 | `SELECT JSON_KEYS('{"a": 1, "b": 2, "c": 3}');` | `["a", "b", "c"]`| +| [json_length](./json-query-and-processing-functions/json_length.md) | 返回 JSON 文档的长度。 | `SELECT json_length('{"Name": "Alice"}');` | `1` | +| [json_string](./json-query-and-processing-functions/json_string.md) | 将 JSON 对象转换为 JSON 字符串 | `SELECT json_string(parse_json('{"Name": "Alice"}'));` | `{"Name": "Alice"}` | + +## JSON 运算符 + +StarRocks 支持以下 JSON 比较运算符:`<`、`<=`、`>`、`>=`、`=` 和 `!=`。 您可以使用这些运算符来查询 JSON 数据。 但是,它不允许您使用 `IN` 来查询 JSON 数据。 有关 JSON 运算符的更多信息,请参见 [JSON 运算符](./json-operators.md)。 + +## JSON 路径表达式 + +您可以使用 JSON 路径表达式来查询 JSON 对象中的元素。 JSON 路径表达式是 STRING 数据类型。 在大多数情况下,它们与各种 JSON 函数(例如 JSON_QUERY)一起使用。 在 StarRocks 中,JSON 路径表达式并不完全符合 [SQL/JSON 路径规范](https://modern-sql.com/blog/2017-06/whats-new-in-sql-2016#json-path)。 有关 StarRocks 中支持的 JSON 路径语法的信息,请参见下表,其中以下 JSON 对象用作示例。 + +```JSON +{ + "people": [{ + "name": "Daniel", + "surname": "Smith" + }, { + "name": "Lily", + "surname": "Smith", + "active": true + }] +} +``` + +| JSON 路径符号 | 描述 | JSON 路径示例 | 返回值 | +| ---------------- | ------------------------------------------------------------ | --------------------- | ------------------------------------------------------------ | +| `$` | 表示根 JSON 对象。 | `'$'` | `{ "people": [ { "name": "Daniel", "surname": "Smith" }, { "name": "Lily", "surname": "Smith", "active": true } ] }`| +|`.` | 表示子 JSON 对象。 |`' $.people'` |`[ { "name": "Daniel", "surname": "Smith" }, { "name": "Lily", "surname": "Smith", "active": true } ]`| +|`[]` | 表示一个或多个数组索引。 `[n]` 表示数组中的第 n 个元素。 索引从 0 开始。
StarRocks 2.5 支持查询多维数组,例如 `["Lucy", "Daniel"], ["James", "Smith"]`。 要查询“Lucy”元素,可以使用 `$.people[0][0]`。| `'$.people [0]'` | `{ "name": "Daniel", "surname": "Smith" }` | +| `[*]` | 表示数组中的所有元素。 | `'$.people[*].name'` | `["Daniel", "Lily"]` | +| `[start: end]` | 表示数组中元素的子集。 子集由 `[start, end]` 间隔指定,该间隔不包括由结束索引表示的元素。 | `'$.people[0: 1].name'` | `["Daniel"]` | \ No newline at end of file From ad51f3f5d6dee7190c991df5f1a963dca14298f4 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Fri, 23 Jan 2026 11:17:14 -0500 Subject: [PATCH 5/6] arch Signed-off-by: DanRoscigno --- docs/en/introduction/Architecture.md | 83 ++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 docs/en/introduction/Architecture.md diff --git a/docs/en/introduction/Architecture.md b/docs/en/introduction/Architecture.md new file mode 100644 index 0000000..bc97c9f --- /dev/null +++ b/docs/en/introduction/Architecture.md @@ -0,0 +1,83 @@ +--- +displayed_sidebar: docs +--- +import QSOverview from '../_assets/commonMarkdown/quickstart-overview-tip.mdx' + +# Architecture + +StarRocks has a simple architecture. The entire system consists of only two types of components; frontends and backends. The frontend nodes are called **FE**s. There are two types of backend nodes, **BE**s, and **CN**s (Compute Nodes). BEs are deployed when local storage for data is used, and CNs are deployed when data is stored on object storage or HDFS. StarRocks does not rely on any external components, simplifying deployment and maintenance. Nodes can be horizontally scaled without service downtime. In addition, StarRocks has a replica mechanism for metadata and service data, which increases data reliability and efficiently prevents single points of failure (SPOFs). + +StarRocks is compatible with MySQL protocols and supports standard SQL. Users can easily connect to StarRocks from MySQL clients to gain instant and valuable insights. + +## Architecture choices + +StarRocks supports shared-nothing (Each BE has a portion of the data on its local storage) and shared-data (all data on object storage or HDFS and each CN has only cache on local storage). You can decide where the data is stored based on your needs. + +![Architecture choices](../_assets/architecture_choices.png) + +### Shared-nothing + +Local storage provides improved query latency for real-time queries. + +As a typical massively parallel processing (MPP) database StarRocks supports the shared-nothing architecture. In this architecture, BEs are responsible for both data storage and computation. Direct access to local data on the BE mode allows for local computation, avoiding data transfer and data copying, and providing ultra-fast query and analytics performance. This architecture supports multi-replica data storage, enhancing the cluster's ability to handle high-concurrency queries and ensuring data reliability. It is well-suited for scenarios that pursue optimal query performance. + +![shared-data-arch](../_assets/shared-nothing.png) + +#### Nodes + +In the shared-nothing architecture, StarRocks consists of two types of nodes: FEs and BEs. + +- FEs are responsible for metadata management and constructing execution plans. +- BEs execute query plans and store data. BEs utilize local storage to accelerate queries and the multi-replica mechanism to ensure high data availability. + +##### FE + +FEs are responsible for metadata management, client connection management, query planning, and query scheduling. Each FE uses BDB JE (Berkeley DB Java Edition) to store and maintain a complete copy of the metadata in its memory, ensuring consistent services across all FEs. FEs can work as the leader, followers, and observers. If the leader node crashes, with followers electing a leader based on the Raft protocol. + +| **FE Role** | **Metadata management** | **Leader election** | +| ----------- |------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| ---------------------------------- | +| Leader | The leader FE reads and writes metadata. Follower and observer FEs can only read metadata. They route metadata write requests to the leader FE. The leader FE updates the metadata and then uses the Raft protocol to synchronize the metadata changes to the follower and observer FEs. Data writes are considered successful only after the metadata changes are synchronized to more than half of the follower FEs. | The leader FE, technically speaking, is also a follower node and is elected from follower FEs. To perform leader election, more than half of the follower FEs in the cluster must be active. When the leader FE fails, follower FEs will start another round of leader election. | +| Follower | Followers can only read metadata. They synchronize and replay logs from the leader FE to update metadata. | Followers participate in leader election, which requires more than half of the followers in the cluster be active. | +| Observer | Observers synchronize and replay logs from the leader FE to update metadata. | Observers are mainly used to increase the query concurrency of the cluster. Observers do not participate in leader election and therefore, will not add leader selection pressure to the cluster.| + +##### BE + +BEs are responsible for data storage and SQL execution. + +- Data storage: BEs have equivalent data storage capabilities. FEs distribute data to BEs based on predefined rules. BEs transform the ingested data, write the data into the required format, and generate indexes for the data. + +- SQL execution: FEs parse each SQL query into a logical execution plan according to the semantics of the query, and then transform the logical plan into physical execution plans that can be executed on BEs. BEs that store the destination data execute the query. This eliminates the need for data transmission and copy, achieving high query performance. + +### Shared-data + +Object storage and HDFS provide cost, reliability, and scalability benefits. In addition to the scalability of storage, CN nodes can be added and removed without the need to rebalance data since storage and compute are separate. + +In the shared-data architecture, BEs are replaced with "compute nodes (CNs)" which are responsible only for data compute tasks and caching hot data. Data is stored in low-cost and reliable remote storage systems such as Amazon S3, Google Cloud Storage, Azure Blob Storage, MinIO, etc. When the cache is hit, query performance is comparable to that of the shared-nothing architecture. CN nodes can be added or removed on demand within seconds. This architecture reduces storage cost, ensures better resource isolation, and high elasticity and scalability. + +The shared-data architecture maintains as simple an architecture as its shared-nothing counterpart. It consists of only two types of nodes: FE and CN. The only difference is users have to provision backend object storage. + +![shared-data-arch](../_assets/shared-data.png) + +#### Nodes + +FEs in the shared-data architecture provide the same functions as in the shared-nothing architecture. + +BEs are replaced with CNs (Compute Nodes), and the storage function is offloaded to object storage or HDFS. CNs are stateless compute nodes that perform all the functions of BEs, except for the storage of data. + +#### Storage + +StarRocks shared-data clusters support two storage solutions: object storage (for example, AWS S3, Google GCS, Azure Blob Storage, or MinIO) and HDFS. + +In a shared-data cluster, the data file format remains consistent with that of a shared-nothing cluster (featuring coupled storage and compute). Data is organized into segment files, and various indexing technologies are reused in cloud-native tables, which are tables used specifically in shared-data clusters. + +#### Cache + +StarRocks shared-data clusters decouple data storage and computation, allowing each to scale independently, thereby reducing costs and enhancing elasticity. However, this architecture can affect query performance. + +To mitigate the impact, StarRocks establishes a multi-tiered data access system encompassing memory, local disk, and remote storage to better meet various business needs. + +Queries against hot data scan the cache directly and then the local disk, while cold data needs to be loaded from the object storage into the local cache to accelerate subsequent queries. By keeping hot data close to compute units, StarRocks achieves truly high-performance computation and cost-effective storage. Moreover, access to cold data has been optimized with data prefetch strategies, effectively eliminating performance limits for queries. + +Caching can be enabled when creating tables. If caching is enabled, data will be written to both the local disk and backend object storage. During queries, the CN nodes first read data from the local disk. If the data is not found, it will be retrieved from the backend object storage and simultaneously cached on the local disk. + + From 4a0b11e939a3726cc6f8c8956cb4b3f0919ab6d3 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 23 Jan 2026 16:19:12 +0000 Subject: [PATCH 6/6] docs: automated translation via Gemini [skip ci] --- docs/ja/introduction/Architecture.md | 83 ++++++++++++++++++++++++++++ docs/zh/introduction/Architecture.md | 83 ++++++++++++++++++++++++++++ 2 files changed, 166 insertions(+) create mode 100644 docs/ja/introduction/Architecture.md create mode 100644 docs/zh/introduction/Architecture.md diff --git a/docs/ja/introduction/Architecture.md b/docs/ja/introduction/Architecture.md new file mode 100644 index 0000000..22f00ca --- /dev/null +++ b/docs/ja/introduction/Architecture.md @@ -0,0 +1,83 @@ +--- +displayed_sidebar: docs +--- +import QSOverview from '../_assets/commonMarkdown/quickstart-overview-tip.mdx' + +# アーキテクチャ + +StarRocks は、シンプルなアーキテクチャを採用しています。システム全体は、フロントエンドとバックエンドの 2 種類のコンポーネントのみで構成されています。フロントエンドノードは **FE** と呼ばれます。バックエンドノードには、**BE** と **CN** (コンピュートノード) の 2 種類があります。BE はデータのローカルストレージを使用する場合にデプロイされ、CN はデータがオブジェクトストレージまたは HDFS に保存される場合にデプロイされます。StarRocks は外部コンポーネントに依存しないため、デプロイとメンテナンスが簡素化されます。ノードは、サービスを停止することなく水平方向に拡張できます。また、StarRocks にはメタデータとサービスデータのレプリカメカニズムがあり、データの信頼性を高め、単一障害点 (SPOF) を効率的に防止します。 + +StarRocks は MySQL プロトコルと互換性があり、標準 SQL をサポートしています。ユーザーは MySQL クライアントから StarRocks に簡単に接続して、即座に価値のある洞察を得ることができます。 + +## アーキテクチャの選択 + +StarRocks は、共有なし (各 BE はローカルストレージにデータの一部を保持) と 共有データ (オブジェクトストレージまたは HDFS 上のすべてのデータ、および各 CN はローカルストレージにキャッシュのみを保持) をサポートします。ニーズに応じて、データの保存場所を決定できます。 + +![Architecture choices](../_assets/architecture_choices.png) + +### 共有なし + +ローカルストレージは、リアルタイムクエリのクエリレイテンシを改善します。 + +一般的な超並列処理 (MPP) データベースとして、StarRocks は 共有なしアーキテクチャをサポートしています。このアーキテクチャでは、BE はデータストレージと計算の両方を担当します。BE モードでローカルデータに直接アクセスすることで、ローカル計算が可能になり、データ転送とデータコピーを回避し、超高速なクエリと分析パフォーマンスを提供します。このアーキテクチャは、マルチレプリカデータストレージをサポートし、高 い同時実行性クエリを処理するクラスタの能力を高め、データの信頼性を確保します。最適なクエリパフォーマンスを追求するシナリオに適しています。 + +![shared-data-arch](../_assets/shared-nothing.png) + +#### ノード + +共有なしアーキテクチャでは、StarRocks は FE と BE の 2 種類のノードで構成されています。 + +- FE は、メタデータ管理と実行プランの構築を担当します。 +- BE は、クエリプランを実行し、データを保存します。BE はローカルストレージを利用してクエリを高速化し、マルチレプリカメカニズムを利用して高いデータ可用性を確保します。 + +##### FE + +FE は、メタデータ管理、クライアント接続管理、クエリプランニング、およびクエリスケジューリングを担当します。各 FE は BDB JE (Berkeley DB Java Edition) を使用して、メモリ内のメタデータの完全なコピーを保存および維持し、すべての FE で一貫したサービスを保証します。FE は、リーダー、フォロワー、およびオブザーバーとして機能できます。リーダーノードがクラッシュした場合、フォロワーは Raft プロトコルに基づいてリーダーを選出します。 + +| **FE Role** | **Metadata management** | **Leader election** | +| ----------- |------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| ---------------------------------- | +| Leader | リーダー FE はメタデータを読み書きします。フォロワーおよびオブザーバー FE は、メタデータの読み取りのみ可能です。メタデータの書き込みリクエストをリーダー FE にルーティングします。リーダー FE はメタデータを更新し、Raft プロトコルを使用して、メタデータの変更をフォロワーおよびオブザーバー FE に同期します。データの書き込みは、メタデータの変更がフォロワー FE の半数以上に同期された後にのみ成功したと見なされます。 | 正確に言うと、リーダー FE はフォロワーノードでもあり、フォロワー FE から選出されます。リーダー選出を実行するには、クラスタ内のフォロワー FE の半数以上がアクティブである必要があります。リーダー FE が失敗すると、フォロワー FE は別のリーダー選出ラウンドを開始します。 | +| Follower | フォロワーはメタデータの読み取りのみ可能です。リーダー FE からログを同期および再生して、メタデータを更新します。 | フォロワーはリーダー選出に参加します。これには、クラスタ内のフォロワーの半数以上がアクティブである必要があります。 | +| Observer | オブザーバーはリーダー FE からログを同期および再生して、メタデータを更新します。 | オブザーバーは、主にクラスタのクエリの同時実行性を高めるために使用されます。オブザーバーはリーダー選出に参加しないため、クラスタにリーダー選択のプレッシャーを加えることはありません。| + +##### BE + +BE は、データストレージと SQL 実行を担当します。 + +- データストレージ: BE は同等のデータストレージ機能を備えています。FE は、事前定義されたルールに基づいてデータを BE に分散します。BE は取り込まれたデータを変換し、必要な形式でデータを書き込み、データのインデックスを生成します。 + +- SQL 実行: FE は、クエリの意味に従って各 SQL クエリを論理実行プランに解析し、次にその論理プランを BE で実行できる物理実行プランに変換します。宛先データを保存する BE がクエリを実行します。これにより、データ伝送とコピーの必要がなくなり、高いクエリパフォーマンスが実現します。 + +### 共有データ + +オブジェクトストレージと HDFS は、コスト、信頼性、およびスケーラビリティの利点を提供します。ストレージのスケーラビリティに加えて、ストレージとコンピュートが分離されているため、データをリバランスする必要なく CN ノードを追加および削除できます。 + +共有データアーキテクチャでは、BE は「コンピュートノード (CN)」に置き換えられます。これらはデータコンピュートタスクのみを担当し、ホットデータをキャッシュします。データは、Amazon S3、Google Cloud Storage、Azure Blob Storage、MinIO などの低コストで信頼性の高いリモートストレージシステムに保存されます。キャッシュがヒットすると、クエリパフォーマンスは 共有なしアーキテクチャのパフォーマンスに匹敵します。CN ノードは、必要に応じて数秒以内に追加または削除できます。このアーキテクチャにより、ストレージコストが削減され、より優れたリソース分離、高い弾力性とスケーラビリティが保証されます。 + +共有データアーキテクチャは、共有なしアーキテクチャと同様にシンプルなアーキテクチャを維持します。FE と CN の 2 種類のノードのみで構成されています。唯一の違いは、ユーザーがバックエンドオブジェクトストレージをプロビジョニングする必要があることです。 + +![shared-data-arch](../_assets/shared-data.png) + +#### ノード + +共有データアーキテクチャの FE は、共有なしアーキテクチャと同じ機能を提供します。 + +BE は CN (コンピュートノード) に置き換えられ、ストレージ機能はオブジェクトストレージまたは HDFS にオフロードされます。CN は、データの保存を除く、BE のすべての機能を実行するステートレスコンピュートノードです。 + +#### ストレージ + +StarRocks 共有データクラスタは、オブジェクトストレージ (たとえば、AWS S3、Google GCS、Azure Blob Storage、または MinIO) と HDFS の 2 つのストレージソリューションをサポートしています。 + +共有データクラスタでは、データファイル形式は 共有なしクラスタ (ストレージとコンピュートが結合されている) の形式と一貫しています。データはセグメントファイルに編成され、さまざまなインデックス作成テクノロジーがクラウドネイティブテーブルで再利用されます。これらは、共有データクラスタで特に使用されるテーブルです。 + +#### キャッシュ + +StarRocks 共有データクラスタは、データストレージと計算を分離し、それぞれが独立してスケーリングできるようにすることで、コストを削減し、弾力性を高めます。ただし、このアーキテクチャはクエリパフォーマンスに影響を与える可能性があります。 + +影響を軽減するために、StarRocks は、さまざまなビジネスニーズをより適切に満たすために、メモリ、ローカルディスク、およびリモートストレージを含む多層データアクセスシステムを確立します。 + +ホットデータに対するクエリは、キャッシュを直接スキャンしてからローカルディスクをスキャンしますが、コールドデータは、後続のクエリを高速化するために、オブジェクトストレージからローカルキャッシュにロードする必要があります。ホットデータをコンピュートユニットの近くに保持することで、StarRocks は真に高性能な計算と費用対効果の高いストレージを実現します。さらに、コールドデータへのアクセスはデータプリフェッチ戦略で最適化されており、クエリのパフォーマンス制限を効果的に排除しています。 + +テーブルの作成時にキャッシュを有効にできます。キャッシュを有効にすると、データはローカルディスクとバックエンドオブジェクトストレージの両方に書き込まれます。クエリ中、CN ノードは最初にローカルディスクからデータを読み取ります。データが見つからない場合は、バックエンドオブジェクトストレージから取得され、同時にローカルディスクにキャッシュされます。 + + \ No newline at end of file diff --git a/docs/zh/introduction/Architecture.md b/docs/zh/introduction/Architecture.md new file mode 100644 index 0000000..ffaef32 --- /dev/null +++ b/docs/zh/introduction/Architecture.md @@ -0,0 +1,83 @@ +--- +displayed_sidebar: docs +--- +import QSOverview from '../_assets/commonMarkdown/quickstart-overview-tip.mdx' + +# 架构 + +StarRocks 具有简单的架构。整个系统仅由两种类型的组件组成:前端和后端。前端节点称为 **FE**。后端节点有两种类型,**BE** 和 **CN** (计算节点)。当使用数据的本地存储时,将部署 BE;当数据存储在对象存储或 HDFS 上时,将部署 CN。StarRocks 不依赖任何外部组件,从而简化了部署和维护。节点可以水平扩展,而不会导致服务中断。此外,StarRocks 具有元数据和服务数据的副本机制,从而提高了数据可靠性,并有效地防止了单点故障 (SPOF)。 + +StarRocks 兼容 MySQL 协议,并支持标准 SQL。用户可以轻松地从 MySQL 客户端连接到 StarRocks,以获得即时且有价值的见解。 + +## 架构选择 + +StarRocks 支持 shared-nothing (每个 BE 在其本地存储上都有一部分数据) 和 shared-data (所有数据都在对象存储或 HDFS 上,每个 CN 仅在本地存储上有缓存)。您可以根据自己的需求决定将数据存储在哪里。 + +![Architecture choices](../_assets/architecture_choices.png) + +### Shared-nothing + +本地存储为实时查询提供了更高的查询延迟。 + +作为一种典型的大规模并行处理 (MPP) 数据库,StarRocks 支持 shared-nothing 架构。在此架构中,BE 负责数据存储和计算。直接访问 BE 模式下的本地数据可以进行本地计算,避免了数据传输和数据复制,并提供了超快的查询和分析性能。此架构支持多副本数据存储,从而增强了集群处理高并发查询的能力并确保了数据可靠性。它非常适合追求最佳查询性能的场景。 + +![shared-data-arch](../_assets/shared-nothing.png) + +#### 节点 + +在 shared-nothing 架构中,StarRocks 由两种类型的节点组成:FE 和 BE。 + +- FE 负责元数据管理和构建执行计划。 +- BE 执行查询计划并存储数据。BE 利用本地存储来加速查询,并利用多副本机制来确保高数据可用性。 + +##### FE + +FE 负责元数据管理、客户端连接管理、查询规划和查询调度。每个 FE 使用 BDB JE (Berkeley DB Java Edition) 在其内存中存储和维护元数据的完整副本,从而确保所有 FE 上的一致服务。FE 可以充当 leader、follower 和 observer。如果 leader 节点崩溃,则 follower 基于 Raft 协议选举 leader。 + +| **FE 角色** | **元数据管理** | **Leader 选举** | +| ----------- |------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| ---------------------------------- | +| Leader | Leader FE 读取和写入元数据。Follower 和 observer FE 只能读取元数据。它们将元数据写入请求路由到 leader FE。Leader FE 更新元数据,然后使用 Raft 协议将元数据更改同步到 follower 和 observer FE。只有在元数据更改同步到超过一半的 follower FE 后,数据写入才被视为成功。 | 从技术上讲,leader FE 也是一个 follower 节点,并且是从 follower FE 中选举出来的。要执行 leader 选举,集群中必须有超过一半的 follower FE 处于活动状态。当 leader FE 发生故障时,follower FE 将启动另一轮 leader 选举。 | +| Follower | Follower 只能读取元数据。它们从 leader FE 同步和重放日志以更新元数据。 | Follower 参与 leader 选举,这要求集群中超过一半的 follower 处于活动状态。 | +| Observer | Observer 从 leader FE 同步和重放日志以更新元数据。 | Observer 主要用于增加集群的查询并发性。Observer 不参与 leader 选举,因此不会给集群增加 leader 选择压力。| + +##### BE + +BE 负责数据存储和 SQL 执行。 + +- 数据存储:BE 具有等效的数据存储能力。FE 根据预定义的规则将数据分发到 BE。BE 转换摄取的数据,将数据写入所需的格式,并为数据生成索引。 + +- SQL 执行:FE 根据查询的语义将每个 SQL 查询解析为逻辑执行计划,然后将逻辑计划转换为可以在 BE 上执行的物理执行计划。存储目标数据的 BE 执行查询。这无需数据传输和复制,从而实现了高查询性能。 + +### Shared-data + +对象存储和 HDFS 提供了成本、可靠性和可扩展性优势。除了存储的可扩展性之外,由于存储和计算是分开的,因此可以添加和删除 CN 节点,而无需重新平衡数据。 + +在 shared-data 架构中,BE 被“计算节点 (CN)”取代,这些节点仅负责数据计算任务和缓存热数据。数据存储在低成本且可靠的远程存储系统中,例如 Amazon S3、Google Cloud Storage、Azure Blob Storage、MinIO 等。当缓存命中时,查询性能与 shared-nothing 架构的查询性能相当。可以根据需要在几秒钟内添加或删除 CN 节点。此架构降低了存储成本,确保了更好的资源隔离以及高弹性和可扩展性。 + +shared-data 架构与其 shared-nothing 架构一样,保持了简单的架构。它仅由两种类型的节点组成:FE 和 CN。唯一的区别是用户必须配置后端对象存储。 + +![shared-data-arch](../_assets/shared-data.png) + +#### 节点 + +shared-data 架构中的 FE 提供与 shared-nothing 架构中相同的功能。 + +BE 被 CN (计算节点) 取代,并且存储功能被卸载到对象存储或 HDFS。CN 是无状态计算节点,可执行 BE 的所有功能,但数据存储除外。 + +#### 存储 + +StarRocks shared-data 集群支持两种存储解决方案:对象存储 (例如,AWS S3、Google GCS、Azure Blob Storage 或 MinIO) 和 HDFS。 + +在 shared-data 集群中,数据文件格式与 shared-nothing 集群 (具有耦合的存储和计算) 的数据文件格式保持一致。数据被组织成 Segment 文件,并且各种索引技术在云原生表中被重用,云原生表是专门在 shared-data 集群中使用的表。 + +#### 缓存 + +StarRocks shared-data 集群将数据存储和计算分离,从而允许每个组件独立扩展,从而降低了成本并提高了弹性。但是,此架构可能会影响查询性能。 + +为了减轻这种影响,StarRocks 建立了一个包含内存、本地磁盘和远程存储的多层数据访问系统,以更好地满足各种业务需求。 + +针对热数据的查询直接扫描缓存,然后扫描本地磁盘,而冷数据需要从对象存储加载到本地缓存中,以加速后续查询。通过使热数据靠近计算单元,StarRocks 实现了真正的高性能计算和经济高效的存储。此外,通过数据预取策略优化了对冷数据的访问,从而有效地消除了查询的性能限制。 + +创建表时可以启用缓存。如果启用了缓存,则数据将被写入本地磁盘和后端对象存储。在查询期间,CN 节点首先从本地磁盘读取数据。如果未找到数据,则将从后端对象存储中检索数据,并同时缓存在本地磁盘上。 + + \ No newline at end of file