From 3a1c40eb4501d378b994d335730a09fbf2f1d8f8 Mon Sep 17 00:00:00 2001 From: Mikhail Cheshkov Date: Thu, 19 Dec 2024 21:33:34 +0200 Subject: [PATCH] Fixes after review --- .../cubesql/src/compile/engine/udf/common.rs | 33 ++++++++++++++----- ..._compile__tests__formatted_identifier.snap | 2 +- ...compile__tests__formatted_identifiers.snap | 10 +++--- ...besql__compile__tests__quoted_keyword.snap | 2 +- .../src/compile/test/test_introspection.rs | 3 ++ 5 files changed, 35 insertions(+), 15 deletions(-) diff --git a/rust/cubesql/cubesql/src/compile/engine/udf/common.rs b/rust/cubesql/cubesql/src/compile/engine/udf/common.rs index c6a05b6cd5a81..078b49d00638b 100644 --- a/rust/cubesql/cubesql/src/compile/engine/udf/common.rs +++ b/rust/cubesql/cubesql/src/compile/engine/udf/common.rs @@ -3088,8 +3088,9 @@ pub fn create_cube_regclass_cast_udf() -> ScalarUDF { match PgType::get_all().iter().find(|e| e.typname == as_str) { None => { // If the type name contains a dot, it's a schema-qualified name - // and we should return return the approprate RegClass to be converted to OID + // and we should return the approprate RegClass to be converted to OID // For now, we'll return 0 so metabase can sync without failing + // TODO actually read `pg_type` if as_str.contains('.') { builder.append_value(0)?; } else { @@ -3156,6 +3157,7 @@ pub fn create_pg_get_serial_sequence_udf() -> ScalarUDF { } // Return a NOOP for this so metabase can sync without failing +// See https://www.postgresql.org/docs/17/functions-info.html#FUNCTIONS-INFO-COMMENT here // TODO: Implement this pub fn create_col_description_udf() -> ScalarUDF { let fun = make_scalar_function(move |args: &[ArrayRef]| { @@ -3174,15 +3176,26 @@ pub fn create_col_description_udf() -> ScalarUDF { ScalarUDF::new( "col_description", + // Correct signature for col_description should be `(oid, integer) → text` + // We model oid as UInt32, so [DataType::UInt32, DataType::Int32] is a proper arguments + // However, it seems that coercion rules in DF differs from PostgreSQL at the moment + // And metabase uses col_description(CAST(CAST(... AS regclass) AS oid), cardinal_number) + // And we model regclass as Int64, and cardinal_number as UInt32 + // Which is why second signature is necessary &Signature::one_of( - vec![TypeSignature::Exact(vec![DataType::Utf8, DataType::Utf8])], - Volatility::Immutable, + vec![ + TypeSignature::Exact(vec![DataType::UInt32, DataType::Int32]), + // TODO remove this signature in favor of proper model/coercion + TypeSignature::Exact(vec![DataType::Int64, DataType::UInt32]), + ], + Volatility::Stable, ), &return_type, &fun, ) } +// See https://www.postgresql.org/docs/17/functions-string.html#FUNCTIONS-STRING-FORMAT pub fn create_format_udf() -> ScalarUDF { let fun = make_scalar_function(move |args: &[ArrayRef]| { // Ensure at least one argument is provided @@ -3255,11 +3268,11 @@ pub fn create_format_udf() -> ScalarUDF { } }; - // Quote identifier if necessary - let needs_quoting = !value - .chars() - .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_') - || value.is_empty(); + // Quote any identifier for now + // That's a safety-first approach: it would quote too much, but every edge-case would be covered + // Like `1` or `1a` or `select` + // TODO Quote identifier only if necessary + let needs_quoting = true; if needs_quoting { result.push('"'); @@ -3298,6 +3311,10 @@ pub fn create_format_udf() -> ScalarUDF { ScalarUDF::new( "format", + // Actually, format should be variadic with types (Utf8, any*) + // But ATM DataFusion does not support those signatures + // And this would work through implicit casting to Utf8 + // TODO migrate to proper custom signature once it's supported by DF &Signature::variadic(vec![DataType::Utf8], Volatility::Immutable), &return_type, &fun, diff --git a/rust/cubesql/cubesql/src/compile/snapshots/cubesql__compile__tests__formatted_identifier.snap b/rust/cubesql/cubesql/src/compile/snapshots/cubesql__compile__tests__formatted_identifier.snap index 9db0a273294c0..18a0775468b72 100644 --- a/rust/cubesql/cubesql/src/compile/snapshots/cubesql__compile__tests__formatted_identifier.snap +++ b/rust/cubesql/cubesql/src/compile/snapshots/cubesql__compile__tests__formatted_identifier.snap @@ -5,5 +5,5 @@ expression: result +----------------------+ | formatted_identifier | +----------------------+ -| column_name | +| "column_name" | +----------------------+ diff --git a/rust/cubesql/cubesql/src/compile/snapshots/cubesql__compile__tests__formatted_identifiers.snap b/rust/cubesql/cubesql/src/compile/snapshots/cubesql__compile__tests__formatted_identifiers.snap index b9bafbf6d4723..de5454a9cd550 100644 --- a/rust/cubesql/cubesql/src/compile/snapshots/cubesql__compile__tests__formatted_identifiers.snap +++ b/rust/cubesql/cubesql/src/compile/snapshots/cubesql__compile__tests__formatted_identifiers.snap @@ -2,8 +2,8 @@ source: cubesql/src/compile/mod.rs expression: result --- -+-------------------------+ -| formatted_identifiers | -+-------------------------+ -| table_name, column_name | -+-------------------------+ ++-----------------------------+ +| formatted_identifiers | ++-----------------------------+ +| "table_name", "column_name" | ++-----------------------------+ diff --git a/rust/cubesql/cubesql/src/compile/snapshots/cubesql__compile__tests__quoted_keyword.snap b/rust/cubesql/cubesql/src/compile/snapshots/cubesql__compile__tests__quoted_keyword.snap index 9d6685868557f..3ea11401a8272 100644 --- a/rust/cubesql/cubesql/src/compile/snapshots/cubesql__compile__tests__quoted_keyword.snap +++ b/rust/cubesql/cubesql/src/compile/snapshots/cubesql__compile__tests__quoted_keyword.snap @@ -5,5 +5,5 @@ expression: result +----------------+ | quoted_keyword | +----------------+ -| select | +| "select" | +----------------+ diff --git a/rust/cubesql/cubesql/src/compile/test/test_introspection.rs b/rust/cubesql/cubesql/src/compile/test/test_introspection.rs index 7a1e83c7cd2bf..e0e40723d2f4d 100644 --- a/rust/cubesql/cubesql/src/compile/test/test_introspection.rs +++ b/rust/cubesql/cubesql/src/compile/test/test_introspection.rs @@ -3143,9 +3143,12 @@ async fn test_metabase_introspection_indoption() -> Result<(), CubeError> { #[tokio::test] async fn test_metabase_v0_51_2_introspection_field_indoption() -> Result<(), CubeError> { + init_testing_logger(); + insta::assert_snapshot!( "test_metabase_v0_51_2_introspection_field_indoption", execute_query( + // language=PostgreSQL r#" SELECT c.column_name AS name,