Skip to content

Commit

Permalink
chore: spill out short_sql to common/base/src/string.rs (#15633)
Browse files Browse the repository at this point in the history
  • Loading branch information
BohuTANG authored May 24, 2024
1 parent 0604f10 commit 1079b7e
Show file tree
Hide file tree
Showing 12 changed files with 83 additions and 58 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions src/common/base/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ state = "0.5"
tikv-jemalloc-ctl = { workspace = true }
tikv-jemalloc-sys = "0.5.2"
tokio = { workspace = true }
unicode-segmentation = "1.10.1"
uuid = { workspace = true }

[dev-dependencies]
Expand Down
1 change: 1 addition & 0 deletions src/common/base/src/base/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ pub use string::escape_for_key;
pub use string::format_byte_size;
pub use string::mask_connection_info;
pub use string::mask_string;
pub use string::short_sql;
pub use string::unescape_for_key;
pub use string::unescape_string;
pub use take_mut::take_mut;
Expand Down
31 changes: 31 additions & 0 deletions src/common/base/src/base/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use std::string::FromUtf8Error;
use databend_common_exception::ErrorCode;
use databend_common_exception::Result;
use regex::Regex;
use unicode_segmentation::UnicodeSegmentation;

/// Function that escapes special characters in a string.
///
Expand Down Expand Up @@ -188,3 +189,33 @@ pub fn mask_connection_info(sql: &str) -> String {

masked_sql
}

/// Maximum length of the SQL query to be displayed or log.
/// If the query exceeds this length and starts with keywords,
/// it will be truncated and appended with the remaining length.
pub fn short_sql(sql: String) -> String {
const MAX_LENGTH: usize = 128;
let keywords = ["INSERT"];

fn starts_with_any(query: &str, keywords: &[&str]) -> bool {
keywords
.iter()
.any(|&keyword| query.to_uppercase().starts_with(keyword))
}

let query = sql.trim_start();

// Graphemes represent user-perceived characters, which might be composed
// of multiple Unicode code points.
// This ensures that we handle complex characters like emojis or
// accented characters properly.
if query.graphemes(true).count() > MAX_LENGTH && starts_with_any(query, &keywords) {
let truncated: String = query.graphemes(true).take(MAX_LENGTH).collect();
let original_length = query.graphemes(true).count();
let remaining_length = original_length.saturating_sub(MAX_LENGTH);
// Append the remaining length indicator
truncated + &format!("...[{} more characters]", remaining_length)
} else {
query.to_string()
}
}
45 changes: 45 additions & 0 deletions src/common/base/tests/it/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

use databend_common_base::base::*;
use databend_common_exception::Result;
use unicode_segmentation::UnicodeSegmentation;

#[test]
fn test_progress() -> Result<()> {
Expand Down Expand Up @@ -107,3 +108,47 @@ fn test_mask_connection_info() {

assert_eq!(expect, actual);
}

#[test]
fn test_short_sql() {
// Test case 1: SQL query shorter than 128 characters
let sql1 = "SELECT * FROM users WHERE id = 1;".to_string();
assert_eq!(short_sql(sql1.clone()), sql1);

// Test case 2: SQL query longer than 128 characters and starts with "INSERT"
let long_sql_insert = "INSERT INTO users (id, name, email) VALUES ".to_string()
+ &"(1, 'John Doe', 'john@example.com'), ".repeat(5); // Make sure this creates a string longer than 128 characters
let expected_length_insert = long_sql_insert.graphemes(true).count().saturating_sub(128);
let expected_result_insert = {
let truncated: String = long_sql_insert.graphemes(true).take(128).collect();
truncated + &format!("...[{} more characters]", expected_length_insert)
};
assert_eq!(short_sql(long_sql_insert.clone()), expected_result_insert);

// Test case 3: SQL query longer than 128 characters but does not start with "INSERT"
let long_sql_update =
"UPDATE users SET name = 'John' WHERE id = 1;".to_string() + &"id = 1 OR ".repeat(20); // Make sure this creates a string longer than 128 characters
assert_eq!(short_sql(long_sql_update.clone()), long_sql_update);

// Test case 4: Empty SQL query
let empty_sql = "".to_string();
assert_eq!(short_sql(empty_sql.clone()), empty_sql);

// Test case 5: SQL query with leading whitespace
let sql_with_whitespace =
" INSERT INTO users (id, name, email) VALUES (1, 'John Doe', 'john@example.com');"
.to_string();
let trimmed_sql = sql_with_whitespace.trim_start().to_string();
assert_eq!(short_sql(sql_with_whitespace.clone()), trimmed_sql);

// Test case 6: SQL query with multiple emojis to test truncation at an emoji point
let emoji_sql = "INSERT INTO users (id, name) VALUES (1, 'John Doe 😊😊😊😊😊😊😊😊😊😊');"
.to_string()
+ &" more text to exceed 128 characters ".repeat(3);
let expected_emoji_result = {
let truncated: String = emoji_sql.graphemes(true).take(128).collect();
let remaining_length = emoji_sql.graphemes(true).count().saturating_sub(128);
truncated + &format!("...[{} more characters]", remaining_length)
};
assert_eq!(short_sql(emoji_sql.clone()), expected_emoji_result);
}
1 change: 0 additions & 1 deletion src/query/service/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,6 @@ tokio-stream = { workspace = true, features = ["net"] }
toml = { version = "0.8", default-features = false }
tonic = { workspace = true }
typetag = { workspace = true }
unicode-segmentation = "1.10.1"
uuid = { workspace = true }
walkdir = { workspace = true }
xorf = { version = "0.11.0", default-features = false, features = ["binary-fuse"] }
Expand Down
2 changes: 1 addition & 1 deletion src/query/service/src/interpreters/interpreter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ use std::time::SystemTime;

use databend_common_ast::ast::Literal;
use databend_common_ast::ast::Statement;
use databend_common_base::base::short_sql;
use databend_common_base::runtime::profile::get_statistics_desc;
use databend_common_base::runtime::profile::ProfileDesc;
use databend_common_base::runtime::profile::ProfileStatisticsName;
Expand Down Expand Up @@ -46,7 +47,6 @@ use crate::pipelines::executor::ExecutorSettings;
use crate::pipelines::executor::PipelineCompleteExecutor;
use crate::pipelines::executor::PipelinePullingExecutor;
use crate::pipelines::PipelineBuildResult;
use crate::sessions::short_sql;
use crate::sessions::QueryContext;
use crate::sessions::SessionManager;
use crate::stream::DataBlockStream;
Expand Down
2 changes: 1 addition & 1 deletion src/query/service/src/servers/http/clickhouse_handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use std::collections::HashMap;
use std::sync::Arc;

use async_stream::stream;
use databend_common_base::base::short_sql;
use databend_common_base::base::tokio;
use databend_common_base::base::tokio::sync::mpsc::Sender;
use databend_common_base::base::tokio::task::JoinHandle;
Expand Down Expand Up @@ -64,7 +65,6 @@ use crate::interpreters::InterpreterFactory;
use crate::interpreters::InterpreterPtr;
use crate::servers::http::middleware::sanitize_request_headers;
use crate::servers::http::v1::HttpQueryContext;
use crate::sessions::short_sql;
use crate::sessions::QueriesQueueManager;
use crate::sessions::QueryContext;
use crate::sessions::QueryEntry;
Expand Down
2 changes: 1 addition & 1 deletion src/query/service/src/servers/http/v1/query/http_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ use std::sync::Arc;
use std::time::Duration;
use std::time::Instant;

use databend_common_base::base::short_sql;
use databend_common_base::base::tokio;
use databend_common_base::base::tokio::sync::Mutex as TokioMutex;
use databend_common_base::base::tokio::sync::RwLock;
Expand Down Expand Up @@ -60,7 +61,6 @@ use crate::servers::http::v1::HttpQueryManager;
use crate::servers::http::v1::QueryError;
use crate::servers::http::v1::QueryResponse;
use crate::servers::http::v1::QueryStats;
use crate::sessions::short_sql;
use crate::sessions::QueryAffect;
use crate::sessions::Session;
use crate::sessions::SessionType;
Expand Down
1 change: 0 additions & 1 deletion src/query/service/src/sessions/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ pub use databend_common_catalog::table_context::TableContext;
pub use query_affect::QueryAffect;
pub use query_ctx::convert_query_log_timestamp;
pub use query_ctx::QueryContext;
pub use query_ctx_shared::short_sql;
pub use query_ctx_shared::QueryContextShared;
pub use queue_mgr::AcquireQueueGuard;
pub use queue_mgr::QueriesQueueManager;
Expand Down
26 changes: 1 addition & 25 deletions src/query/service/src/sessions/query_ctx_shared.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ use std::time::Duration;
use std::time::SystemTime;

use dashmap::DashMap;
use databend_common_base::base::short_sql;
use databend_common_base::base::Progress;
use databend_common_base::runtime::drop_guard;
use databend_common_base::runtime::Runtime;
Expand Down Expand Up @@ -579,28 +580,3 @@ impl Drop for QueryContextShared {
})
}
}

pub fn short_sql(sql: String) -> String {
use unicode_segmentation::UnicodeSegmentation;
const MAX_LENGTH: usize = 128;

let query = sql.trim_start();
if query.as_bytes().len() > MAX_LENGTH && query.as_bytes()[..6].eq_ignore_ascii_case(b"INSERT")
{
let mut result = Vec::new();
let mut bytes_taken = 0;
for grapheme in query.graphemes(true) {
let grapheme_bytes = grapheme.as_bytes();
if bytes_taken + grapheme_bytes.len() <= MAX_LENGTH {
result.extend_from_slice(grapheme_bytes);
bytes_taken += grapheme_bytes.len();
} else {
break;
}
}
result.extend_from_slice(b"...");
String::from_utf8(result).unwrap() // by construction, this cannot panic as we extracted unicode graphemes
} else {
sql
}
}
27 changes: 0 additions & 27 deletions src/query/service/tests/it/sessions/query_ctx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ use databend_common_exception::Result;
use databend_common_meta_app::storage::StorageFsConfig;
use databend_common_meta_app::storage::StorageParams;
use databend_common_meta_app::storage::StorageS3Config;
use databend_query::sessions::short_sql;
use databend_query::sessions::TableContext;
use databend_query::test_kits::ConfigBuilder;
use databend_query::test_kits::TestFixture;
Expand Down Expand Up @@ -66,29 +65,3 @@ async fn test_get_storage_accessor_fs() -> Result<()> {

Ok(())
}

#[test]
fn test_short_sql() {
// Test case 1: SQL query shorter than 128 bytes
let sql1 = "SELECT * FROM users WHERE id = 1;".to_string();
assert_eq!(short_sql(sql1.clone()), sql1);

// Test case 2: SQL query longer than 128 bytes and starts with "INSERT"
let long_sql = "INSERT INTO users (id, name, email) VALUES ".to_string()
+ &"(1, 'John Doe', 'john@example.com'), ".repeat(5); // Adjusted for 128 bytes
let expected_result = long_sql.as_bytes()[..128].to_vec();
let expected_result = String::from_utf8(expected_result).unwrap() + "...";
assert_eq!(short_sql(long_sql), expected_result);

// Test case 3: SQL query longer than 128 bytes but does not start with "INSERT"
let long_sql = "SELECT * FROM users WHERE ".to_string() + &"id = 1 OR ".repeat(20); // Adjusted for 128 bytes
assert_eq!(short_sql(long_sql.clone()), long_sql);

// Test case 4: Empty SQL query
let empty_sql = "".to_string();
assert_eq!(short_sql(empty_sql.clone()), empty_sql);

// Test case 5: SQL query with leading whitespace
let sql_with_whitespace = " SELECT * FROM users;".to_string();
assert_eq!(short_sql(sql_with_whitespace.clone()), sql_with_whitespace);
}

0 comments on commit 1079b7e

Please sign in to comment.