Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions changelog.d/clickhouse_arrow_uuid_support.enhancement.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Added support for the ClickHouse `UUID` type in the ArrowStream format for the `clickhouse` sink. UUID columns are now automatically mapped to Arrow `Utf8` and cast by ClickHouse on insert.

authors: benjamin-awd
25 changes: 24 additions & 1 deletion src/sinks/clickhouse/arrow/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ pub enum ClickHouseType {
String,
FixedString(u32),

// UUID type
Uuid,

// Date/time types
Date,
DateTime,
Expand Down Expand Up @@ -93,7 +96,13 @@ impl ClickHouseType {
}),

// String types
Self::String | Self::FixedString(_) => Ok(DataType::Utf8),
// Note: UUID is mapped to Utf8 for two reasons:
// 1. Vector has no native UUID type — UUIDs are represented as strings in the event model
// 2. ClickHouse does not support UUID in Arrow format:
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
// ClickHouse handles the String → UUID cast implicitly on insert.
Self::String | Self::FixedString(_) | Self::Uuid => Ok(DataType::Utf8),

// Date/time types
Self::Date => Ok(DataType::Date32),
Expand Down Expand Up @@ -230,6 +239,7 @@ fn ch_type(input: &str) -> IResult<&str, ClickHouseType> {

// String types
"String" => Ok((rest, ClickHouseType::String)),
"UUID" => Ok((rest, ClickHouseType::Uuid)),
"FixedString" => parens(parse_u32)
.map(ClickHouseType::FixedString)
.parse(rest),
Expand Down Expand Up @@ -465,6 +475,19 @@ mod tests {
assert!(matches!(data_type, DataType::Map(_, _)));
}

#[test]
fn test_uuid_type_mapping() {
assert_eq!(convert_type("UUID").unwrap(), (DataType::Utf8, false));
assert_eq!(
convert_type("Nullable(UUID)").unwrap(),
(DataType::Utf8, true)
);
assert_eq!(
convert_type("LowCardinality(Nullable(UUID))").unwrap(),
(DataType::Utf8, true)
);
}

#[test]
fn test_unknown_type_fails() {
let result = convert_type("UnknownType");
Expand Down
18 changes: 16 additions & 2 deletions src/sinks/clickhouse/integration_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -565,7 +565,7 @@ async fn insert_events_arrow_with_schema_fetching() {
client
.create_table(
&table,
"host String, timestamp DateTime64(3), message String, id Int64, name String, score Float64, active Bool",
"host String, timestamp DateTime64(3), message String, id Int64, name String, score Float64, active Bool, request_id UUID",
)
.await;

Expand Down Expand Up @@ -595,6 +595,10 @@ async fn insert_events_arrow_with_schema_fetching() {
event.insert("name", format!("user_{}", i));
event.insert("score", 95.5 + i as f64);
event.insert("active", i % 2 == 0);
event.insert(
"request_id",
format!("550e8400-e29b-41d4-a716-44665544000{}", i),
);
events.push(event.into());
}

Expand All @@ -604,7 +608,7 @@ async fn insert_events_arrow_with_schema_fetching() {
assert_eq!(3, output.rows);

// Verify all fields exist and have the correct types
for row in output.data.iter() {
for (i, row) in output.data.iter().enumerate() {
// Check standard Vector fields exist
assert!(row.get("host").and_then(|v| v.as_str()).is_some());
assert!(row.get("message").and_then(|v| v.as_str()).is_some());
Expand All @@ -620,6 +624,16 @@ async fn insert_events_arrow_with_schema_fetching() {
assert!(row.get("name").and_then(|v| v.as_str()).is_some());
assert!(row.get("score").and_then(|v| v.as_f64()).is_some());
assert!(row.get("active").and_then(|v| v.as_bool()).is_some());

// Check UUID field
let request_id = row
.get("request_id")
.and_then(|v| v.as_str())
.expect("request_id should be present");
assert_eq!(
request_id,
format!("550e8400-e29b-41d4-a716-44665544000{}", i)
);
}
}

Expand Down
1 change: 0 additions & 1 deletion website/cue/reference/components/sinks/clickhouse.cue
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,6 @@ components: sinks: clickhouse: {
types are unsupported:
- `FIXED_SIZE_BINARY`
- `JSON`
- `UUID`
- `ENUM`

#### Timezone Handling
Expand Down
Loading