From ba381c0056abf0b37199b5c07316fa041bb7be87 Mon Sep 17 00:00:00 2001 From: Pravin Bhat Date: Thu, 19 Sep 2024 18:58:41 -0400 Subject: [PATCH] Implement two new codecs `STRING_BLOB` and `ASCII_BLOB` (#311) * Implemented two new codecs `STRING_BLOB` and `ASCII_BLOB` to allow migration from `TEXT` and `ASCII` fields to `BLOB` fields * Reuse constants for assertions in tests --------- Co-authored-by: Madhavan Sridharan --- RELEASE.md | 3 + .../cdm/cql/codec/ASCII_BLOBCodec.java | 71 +++++++++++++++++++ .../cdm/cql/codec/BLOB_ASCIICodec.java | 70 ++++++++++++++++++ .../cdm/cql/codec/BLOB_TEXTCodec.java | 70 ++++++++++++++++++ .../datastax/cdm/cql/codec/CodecFactory.java | 4 ++ .../com/datastax/cdm/cql/codec/Codecset.java | 2 +- .../cdm/cql/codec/TEXT_BLOBCodec.java | 71 +++++++++++++++++++ src/resources/cdm-detailed.properties | 2 + .../cdm/cql/codec/ASCII_BLOBCodecTest.java | 45 ++++++++++++ .../cdm/cql/codec/BLOB_ASCIICodecTest.java | 44 ++++++++++++ .../cdm/cql/codec/BLOB_TextCodecTest.java | 44 ++++++++++++ .../cdm/cql/codec/TEXT_BLOBCodecTest.java | 45 ++++++++++++ 12 files changed, 470 insertions(+), 1 deletion(-) create mode 100644 src/main/java/com/datastax/cdm/cql/codec/ASCII_BLOBCodec.java create mode 100644 src/main/java/com/datastax/cdm/cql/codec/BLOB_ASCIICodec.java create mode 100644 src/main/java/com/datastax/cdm/cql/codec/BLOB_TEXTCodec.java create mode 100644 src/main/java/com/datastax/cdm/cql/codec/TEXT_BLOBCodec.java create mode 100644 src/test/java/com/datastax/cdm/cql/codec/ASCII_BLOBCodecTest.java create mode 100644 src/test/java/com/datastax/cdm/cql/codec/BLOB_ASCIICodecTest.java create mode 100644 src/test/java/com/datastax/cdm/cql/codec/BLOB_TextCodecTest.java create mode 100644 src/test/java/com/datastax/cdm/cql/codec/TEXT_BLOBCodecTest.java diff --git a/RELEASE.md b/RELEASE.md index dbc3895f..8229bf70 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,4 +1,7 @@ # Release Notes +## [4.4.1] - 2024-09-20 +- Added two new codecs `STRING_BLOB` and `ASCII_BLOB` to allow migration from `TEXT` and `ASCII` fields to `BLOB` fields. These codecs can also be used to convert `BLOB` to `TEXT` or `ASCII`, but in such cases the `BLOB` value must be TEXT based in nature & fit within the applicable limits. + ## [4.4.0] - 2024-09-19 - Added property `spark.cdm.connect.origin.tls.isAstra` and `spark.cdm.connect.target.tls.isAstra` to allow connecting to Astra DB without using [SCB](https://docs.datastax.com/en/astra-db-serverless/drivers/secure-connect-bundle.html). This may be needed for enterprises that may find credentials packaged within SCB as a security risk. TLS properties can now be passed as params OR wrapper scripts (not included) could be used to pull sensitive credentials from a vault service in real-time & pass them to CDM. - Switched to using Apache Cassandra® `5.0` docker image for testing diff --git a/src/main/java/com/datastax/cdm/cql/codec/ASCII_BLOBCodec.java b/src/main/java/com/datastax/cdm/cql/codec/ASCII_BLOBCodec.java new file mode 100644 index 00000000..041a6cfe --- /dev/null +++ b/src/main/java/com/datastax/cdm/cql/codec/ASCII_BLOBCodec.java @@ -0,0 +1,71 @@ +/* + * Copyright DataStax, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.datastax.cdm.cql.codec; + +import java.nio.ByteBuffer; + +import org.jetbrains.annotations.NotNull; + +import com.datastax.cdm.properties.PropertyHelper; +import com.datastax.oss.driver.api.core.ProtocolVersion; +import com.datastax.oss.driver.api.core.type.DataType; +import com.datastax.oss.driver.api.core.type.DataTypes; +import com.datastax.oss.driver.api.core.type.codec.TypeCodecs; +import com.datastax.oss.driver.api.core.type.reflect.GenericType; + +public class ASCII_BLOBCodec extends AbstractBaseCodec { + + public ASCII_BLOBCodec(PropertyHelper propertyHelper) { + super(propertyHelper); + } + + @Override + public @NotNull GenericType getJavaType() { + return GenericType.BYTE_BUFFER; + } + + @Override + public @NotNull DataType getCqlType() { + return DataTypes.ASCII; + } + + @Override + public ByteBuffer encode(ByteBuffer value, @NotNull ProtocolVersion protocolVersion) { + if (value == null) { + return null; + } else { + String stringVal = new String(value.array()); + return TypeCodecs.ASCII.encode(stringVal, protocolVersion); + } + } + + @Override + public ByteBuffer decode(ByteBuffer bytes, @NotNull ProtocolVersion protocolVersion) { + String stringValue = TypeCodecs.ASCII.decode(bytes, protocolVersion); + return ByteBuffer.wrap(stringValue.getBytes()); + } + + @Override + public @NotNull String format(ByteBuffer value) { + String stringVal = new String(value.array()); + return TypeCodecs.ASCII.format(stringVal); + } + + @Override + public ByteBuffer parse(String value) { + return ByteBuffer.wrap(value.getBytes()); + } +} diff --git a/src/main/java/com/datastax/cdm/cql/codec/BLOB_ASCIICodec.java b/src/main/java/com/datastax/cdm/cql/codec/BLOB_ASCIICodec.java new file mode 100644 index 00000000..f0e3a5d9 --- /dev/null +++ b/src/main/java/com/datastax/cdm/cql/codec/BLOB_ASCIICodec.java @@ -0,0 +1,70 @@ +/* + * Copyright DataStax, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.datastax.cdm.cql.codec; + +import java.nio.ByteBuffer; + +import org.jetbrains.annotations.NotNull; + +import com.datastax.cdm.properties.PropertyHelper; +import com.datastax.oss.driver.api.core.ProtocolVersion; +import com.datastax.oss.driver.api.core.type.DataType; +import com.datastax.oss.driver.api.core.type.DataTypes; +import com.datastax.oss.driver.api.core.type.codec.TypeCodecs; +import com.datastax.oss.driver.api.core.type.reflect.GenericType; + +public class BLOB_ASCIICodec extends AbstractBaseCodec { + + public BLOB_ASCIICodec(PropertyHelper propertyHelper) { + super(propertyHelper); + } + + @Override + public @NotNull GenericType getJavaType() { + return GenericType.STRING; + } + + @Override + public @NotNull DataType getCqlType() { + return DataTypes.BLOB; + } + + @Override + public ByteBuffer encode(String value, @NotNull ProtocolVersion protocolVersion) { + if (value == null) { + return null; + } else { + return TypeCodecs.BLOB.encode(ByteBuffer.wrap(value.getBytes()), protocolVersion); + } + } + + @Override + public String decode(ByteBuffer bytes, @NotNull ProtocolVersion protocolVersion) { + return TypeCodecs.ASCII.decode(bytes, protocolVersion); + } + + @Override + public @NotNull String format(String value) { + ByteBuffer bb = ByteBuffer.wrap(value.getBytes()); + return TypeCodecs.BLOB.format(bb); + } + + @Override + public String parse(String value) { + ByteBuffer bb = TypeCodecs.BLOB.parse(value); + return bb == null ? null : bb.toString(); + } +} diff --git a/src/main/java/com/datastax/cdm/cql/codec/BLOB_TEXTCodec.java b/src/main/java/com/datastax/cdm/cql/codec/BLOB_TEXTCodec.java new file mode 100644 index 00000000..1efb37ad --- /dev/null +++ b/src/main/java/com/datastax/cdm/cql/codec/BLOB_TEXTCodec.java @@ -0,0 +1,70 @@ +/* + * Copyright DataStax, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.datastax.cdm.cql.codec; + +import java.nio.ByteBuffer; + +import org.jetbrains.annotations.NotNull; + +import com.datastax.cdm.properties.PropertyHelper; +import com.datastax.oss.driver.api.core.ProtocolVersion; +import com.datastax.oss.driver.api.core.type.DataType; +import com.datastax.oss.driver.api.core.type.DataTypes; +import com.datastax.oss.driver.api.core.type.codec.TypeCodecs; +import com.datastax.oss.driver.api.core.type.reflect.GenericType; + +public class BLOB_TEXTCodec extends AbstractBaseCodec { + + public BLOB_TEXTCodec(PropertyHelper propertyHelper) { + super(propertyHelper); + } + + @Override + public @NotNull GenericType getJavaType() { + return GenericType.STRING; + } + + @Override + public @NotNull DataType getCqlType() { + return DataTypes.BLOB; + } + + @Override + public ByteBuffer encode(String value, @NotNull ProtocolVersion protocolVersion) { + if (value == null) { + return null; + } else { + return TypeCodecs.BLOB.encode(ByteBuffer.wrap(value.getBytes()), protocolVersion); + } + } + + @Override + public String decode(ByteBuffer bytes, @NotNull ProtocolVersion protocolVersion) { + return TypeCodecs.TEXT.decode(bytes, protocolVersion); + } + + @Override + public @NotNull String format(String value) { + ByteBuffer bb = ByteBuffer.wrap(value.getBytes()); + return TypeCodecs.BLOB.format(bb); + } + + @Override + public String parse(String value) { + ByteBuffer bb = TypeCodecs.BLOB.parse(value); + return bb == null ? null : bb.toString(); + } +} diff --git a/src/main/java/com/datastax/cdm/cql/codec/CodecFactory.java b/src/main/java/com/datastax/cdm/cql/codec/CodecFactory.java index dd2407fc..c4e7387b 100644 --- a/src/main/java/com/datastax/cdm/cql/codec/CodecFactory.java +++ b/src/main/java/com/datastax/cdm/cql/codec/CodecFactory.java @@ -34,6 +34,10 @@ public static List> getCodecPair(PropertyHelper propertyHelper, Cod return Arrays.asList(new DOUBLE_StringCodec(propertyHelper), new TEXT_DoubleCodec(propertyHelper)); case BIGINT_STRING: return Arrays.asList(new BIGINT_StringCodec(propertyHelper), new TEXT_LongCodec(propertyHelper)); + case STRING_BLOB: + return Arrays.asList(new TEXT_BLOBCodec(propertyHelper), new BLOB_TEXTCodec(propertyHelper)); + case ASCII_BLOB: + return Arrays.asList(new ASCII_BLOBCodec(propertyHelper), new BLOB_ASCIICodec(propertyHelper)); case DECIMAL_STRING: return Arrays.asList(new DECIMAL_StringCodec(propertyHelper), new TEXT_BigDecimalCodec(propertyHelper)); case TIMESTAMP_STRING_MILLIS: diff --git a/src/main/java/com/datastax/cdm/cql/codec/Codecset.java b/src/main/java/com/datastax/cdm/cql/codec/Codecset.java index 5f8bafaa..07739075 100644 --- a/src/main/java/com/datastax/cdm/cql/codec/Codecset.java +++ b/src/main/java/com/datastax/cdm/cql/codec/Codecset.java @@ -17,5 +17,5 @@ public enum Codecset { INT_STRING, DOUBLE_STRING, BIGINT_STRING, DECIMAL_STRING, TIMESTAMP_STRING_MILLIS, TIMESTAMP_STRING_FORMAT, - POINT_TYPE, POLYGON_TYPE, DATE_RANGE, LINE_STRING + POINT_TYPE, POLYGON_TYPE, DATE_RANGE, LINE_STRING, STRING_BLOB, ASCII_BLOB } diff --git a/src/main/java/com/datastax/cdm/cql/codec/TEXT_BLOBCodec.java b/src/main/java/com/datastax/cdm/cql/codec/TEXT_BLOBCodec.java new file mode 100644 index 00000000..1ada5b3f --- /dev/null +++ b/src/main/java/com/datastax/cdm/cql/codec/TEXT_BLOBCodec.java @@ -0,0 +1,71 @@ +/* + * Copyright DataStax, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.datastax.cdm.cql.codec; + +import java.nio.ByteBuffer; + +import org.jetbrains.annotations.NotNull; + +import com.datastax.cdm.properties.PropertyHelper; +import com.datastax.oss.driver.api.core.ProtocolVersion; +import com.datastax.oss.driver.api.core.type.DataType; +import com.datastax.oss.driver.api.core.type.DataTypes; +import com.datastax.oss.driver.api.core.type.codec.TypeCodecs; +import com.datastax.oss.driver.api.core.type.reflect.GenericType; + +public class TEXT_BLOBCodec extends AbstractBaseCodec { + + public TEXT_BLOBCodec(PropertyHelper propertyHelper) { + super(propertyHelper); + } + + @Override + public @NotNull GenericType getJavaType() { + return GenericType.BYTE_BUFFER; + } + + @Override + public @NotNull DataType getCqlType() { + return DataTypes.TEXT; + } + + @Override + public ByteBuffer encode(ByteBuffer value, @NotNull ProtocolVersion protocolVersion) { + if (value == null) { + return null; + } else { + String stringVal = new String(value.array()); + return TypeCodecs.TEXT.encode(stringVal, protocolVersion); + } + } + + @Override + public ByteBuffer decode(ByteBuffer bytes, @NotNull ProtocolVersion protocolVersion) { + String stringValue = TypeCodecs.TEXT.decode(bytes, protocolVersion); + return ByteBuffer.wrap(stringValue.getBytes()); + } + + @Override + public @NotNull String format(ByteBuffer value) { + String stringVal = new String(value.array()); + return TypeCodecs.TEXT.format(stringVal); + } + + @Override + public ByteBuffer parse(String value) { + return ByteBuffer.wrap(value.getBytes()); + } +} diff --git a/src/resources/cdm-detailed.properties b/src/resources/cdm-detailed.properties index 715a1a6a..96f1438d 100644 --- a/src/resources/cdm-detailed.properties +++ b/src/resources/cdm-detailed.properties @@ -268,6 +268,8 @@ spark.cdm.perfops.ratelimit.target 20000 # DOUBLE_STRING : double stored in a String # BIGINT_STRING : bigint stored in a String # DECIMAL_STRING : decimal stored in a String +# STRING_BLOB : TEXT stored in a Blob +# ASCII_BLOB : ASCII stored in a Blob # TIMESTAMP_STRING_MILLIS : timestamp stored in a String, # as Epoch milliseconds # TIMESTAMP_STRING_FORMAT : timestamp stored in a String, diff --git a/src/test/java/com/datastax/cdm/cql/codec/ASCII_BLOBCodecTest.java b/src/test/java/com/datastax/cdm/cql/codec/ASCII_BLOBCodecTest.java new file mode 100644 index 00000000..9d79f5b7 --- /dev/null +++ b/src/test/java/com/datastax/cdm/cql/codec/ASCII_BLOBCodecTest.java @@ -0,0 +1,45 @@ +/* + * Copyright DataStax, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.datastax.cdm.cql.codec; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.nio.ByteBuffer; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import com.datastax.oss.driver.api.core.ProtocolVersion; + +public class ASCII_BLOBCodecTest { + private final String INPUT = "Encode this Text string to Blob"; + + private ASCII_BLOBCodec codec; + + @BeforeEach + public void setup() { + codec = new ASCII_BLOBCodec(null); + } + + @Test + public void encodeDecode() { + ByteBuffer buffer = codec.encode(ByteBuffer.wrap(INPUT.getBytes()), ProtocolVersion.V4); + ByteBuffer retBuffer = codec.decode(buffer, ProtocolVersion.V4); + assertEquals("'" + INPUT + "'", codec.format(retBuffer)); + assertEquals(retBuffer, codec.parse(INPUT)); + } + +} diff --git a/src/test/java/com/datastax/cdm/cql/codec/BLOB_ASCIICodecTest.java b/src/test/java/com/datastax/cdm/cql/codec/BLOB_ASCIICodecTest.java new file mode 100644 index 00000000..85486534 --- /dev/null +++ b/src/test/java/com/datastax/cdm/cql/codec/BLOB_ASCIICodecTest.java @@ -0,0 +1,44 @@ +/* + * Copyright DataStax, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.datastax.cdm.cql.codec; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.nio.ByteBuffer; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import com.datastax.oss.driver.api.core.ProtocolVersion; + +public class BLOB_ASCIICodecTest { + private final String INPUT = "Encode this Text string to Blob"; + + private BLOB_ASCIICodec codec; + + @BeforeEach + public void setup() { + codec = new BLOB_ASCIICodec(null); + } + + @Test + public void testEncode() { + ByteBuffer buffer = codec.encode(INPUT, ProtocolVersion.V4); + String retBuffer = codec.decode(buffer, ProtocolVersion.V4); + assertEquals(INPUT, retBuffer); + } + +} diff --git a/src/test/java/com/datastax/cdm/cql/codec/BLOB_TextCodecTest.java b/src/test/java/com/datastax/cdm/cql/codec/BLOB_TextCodecTest.java new file mode 100644 index 00000000..7a6707df --- /dev/null +++ b/src/test/java/com/datastax/cdm/cql/codec/BLOB_TextCodecTest.java @@ -0,0 +1,44 @@ +/* + * Copyright DataStax, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.datastax.cdm.cql.codec; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.nio.ByteBuffer; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import com.datastax.oss.driver.api.core.ProtocolVersion; + +public class BLOB_TextCodecTest { + private final String INPUT = "Encode this Text string to Blob"; + + private BLOB_TEXTCodec codec; + + @BeforeEach + public void setup() { + codec = new BLOB_TEXTCodec(null); + } + + @Test + public void testEncode() { + ByteBuffer buffer = codec.encode(INPUT, ProtocolVersion.V4); + String retBuffer = codec.decode(buffer, ProtocolVersion.V4); + assertEquals(INPUT, retBuffer); + } + +} diff --git a/src/test/java/com/datastax/cdm/cql/codec/TEXT_BLOBCodecTest.java b/src/test/java/com/datastax/cdm/cql/codec/TEXT_BLOBCodecTest.java new file mode 100644 index 00000000..0639c105 --- /dev/null +++ b/src/test/java/com/datastax/cdm/cql/codec/TEXT_BLOBCodecTest.java @@ -0,0 +1,45 @@ +/* + * Copyright DataStax, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.datastax.cdm.cql.codec; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.nio.ByteBuffer; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import com.datastax.oss.driver.api.core.ProtocolVersion; + +public class TEXT_BLOBCodecTest { + private final String INPUT = "Encode this Text string to Blob"; + + private TEXT_BLOBCodec codec; + + @BeforeEach + public void setup() { + codec = new TEXT_BLOBCodec(null); + } + + @Test + public void encodeDecode() { + ByteBuffer buffer = codec.encode(ByteBuffer.wrap(INPUT.getBytes()), ProtocolVersion.V4); + ByteBuffer retBuffer = codec.decode(buffer, ProtocolVersion.V4); + assertEquals("'" + INPUT + "'", codec.format(retBuffer)); + assertEquals(retBuffer, codec.parse(INPUT)); + } + +}