From 8de6fdbee2477029aa90ef61a6fc6b850fb4985d Mon Sep 17 00:00:00 2001 From: Srilakshmi Manjunath Bharadwaj Date: Wed, 9 Apr 2025 15:46:32 -0700 Subject: [PATCH 1/4] Fixing test --- .../linkedin/data/avro/DataTranslator.java | 17 ++++++++++ .../data/avro/TestDataTranslator.java | 31 ++++++++++++++++++- 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/data-avro/src/main/java/com/linkedin/data/avro/DataTranslator.java b/data-avro/src/main/java/com/linkedin/data/avro/DataTranslator.java index 21b2697c78..a7d38fad8a 100644 --- a/data-avro/src/main/java/com/linkedin/data/avro/DataTranslator.java +++ b/data-avro/src/main/java/com/linkedin/data/avro/DataTranslator.java @@ -61,6 +61,18 @@ public class DataTranslator implements DataTranslatorContext { protected DataTranslationOptions _dataTranslationOptions; + public static void backFillMissingDeafultInDataMap(DataMap map, RecordDataSchema dataSchema) { + for (RecordDataSchema.Field field: dataSchema.getFields()) { + DataSchema fieldType = field.getType(); + if ( fieldType.getDereferencedType() != DataSchema.Type.ARRAY && fieldType.getDereferencedType() != DataSchema.Type.MAP) { + continue; + } + if (field.getDefault() != null && (!map.containsKey(field.getName()) || map.get(field.getName()) == null)) { + map.put(field.getName(), field.getDefault()); + } + } + } + /** * Convert the given {@link DataMap} conforming to the provided {@link RecordDataSchema} to a {@link GenericRecord}. * @@ -74,6 +86,7 @@ public class DataTranslator implements DataTranslatorContext */ public static GenericRecord dataMapToGenericRecord(DataMap map, RecordDataSchema dataSchema) throws DataTranslationException { + backFillMissingDeafultInDataMap(map, dataSchema); Schema avroSchema = SchemaTranslator.dataToAvroSchema(dataSchema); return dataMapToGenericRecord(map, dataSchema, avroSchema, null); } @@ -94,6 +107,7 @@ public static GenericRecord dataMapToGenericRecord(DataMap map, RecordDataSchema DataMapToAvroRecordTranslationOptions options) throws DataTranslationException { + backFillMissingDeafultInDataMap(map, dataSchema); Schema avroSchema = SchemaTranslator.dataToAvroSchema(dataSchema); return dataMapToGenericRecord(map, dataSchema, avroSchema, options); } @@ -102,6 +116,7 @@ public static GenericRecord dataMapToGenericRecord(DataMap map, RecordDataSchema DataMapToAvroRecordTranslationOptions options) throws DataTranslationException { + backFillMissingDeafultInDataMap(map, dataSchema); DataMapToGenericRecordTranslator translator = new DataMapToGenericRecordTranslator(options); try { @@ -116,6 +131,7 @@ public static GenericRecord dataMapToGenericRecord(DataMap map, RecordDataSchema public static T dataMapToSpecificRecord(DataMap map, RecordDataSchema dataSchema, Schema avroSchema) throws DataTranslationException { + backFillMissingDeafultInDataMap(map, dataSchema); DataMapToSpecificRecordTranslator translator = new DataMapToSpecificRecordTranslator(); try { T avroRecord = translator.translate(map, dataSchema, avroSchema); @@ -145,6 +161,7 @@ public static T dataMapToSpecificRecord(DataMap m */ public static GenericRecord dataMapToGenericRecord(DataMap map, RecordDataSchema dataSchema, Schema avroSchema) throws DataTranslationException { + backFillMissingDeafultInDataMap(map, dataSchema); return dataMapToGenericRecord(map, dataSchema, avroSchema, null); } diff --git a/data-avro/src/test/java/com/linkedin/data/avro/TestDataTranslator.java b/data-avro/src/test/java/com/linkedin/data/avro/TestDataTranslator.java index 3abd677853..aaf22eeb5d 100644 --- a/data-avro/src/test/java/com/linkedin/data/avro/TestDataTranslator.java +++ b/data-avro/src/test/java/com/linkedin/data/avro/TestDataTranslator.java @@ -17,6 +17,8 @@ package com.linkedin.data.avro; import com.google.common.collect.ImmutableMap; +import com.linkedin.avroutil1.compatibility.AvroCompatibilityHelper; +import com.linkedin.data.Data; import com.linkedin.data.DataList; import com.linkedin.data.DataMap; import com.linkedin.data.TestUtil; @@ -28,6 +30,7 @@ import com.linkedin.data.avro.testevents.RecordArray; import com.linkedin.data.avro.testevents.RecordMap; import com.linkedin.data.avro.testevents.StringRecord; +import com.linkedin.data.avro.testevents.TestArray; import com.linkedin.data.avro.testevents.TestEventRecordOfRecord; import com.linkedin.data.avro.testevents.TestEventWithUnionAndEnum; import com.linkedin.data.avro.util.AvroUtil; @@ -1165,7 +1168,9 @@ private void testDataTranslation(String schemaText, String[][] row) throws IOExc String expectedBeforeNamespaceProcessor = row[col][i]; String expected = TestAvroUtil.namespaceProcessor(expectedBeforeNamespaceProcessor); if (debug && expected != expectedBeforeNamespaceProcessor) out.println(" Expected:" + expected); - + if (!result.contains(expected)) { + System.out.println("RESULT: "+result+"\n"+"EXPECTED: "+expected); + } assertTrue(result.contains(expected)); } @@ -2291,5 +2296,29 @@ public void testMapArrayUnion() throws IOException { Assert.assertTrue(mapOfMapOfArrayOfMapArrayUnion.get(0) instanceof Map); Assert.assertEquals(((Map) mapOfMapOfArrayOfMapArrayUnion.get(0)).get("recordMap"), mapOfArrayOfMapArrayUnion); } + + @Test + public void testDataMapBackfill() throws IOException { + final String SCHEMA = + "{" + + " \"type\":\"record\"," + + " \"name\":\"Foo\"," + + " \"fields\":[" + + " {" + + " \"name\":\"arrayField\"," + + " \"type\":{" + + " \"type\":\"array\"," + + " \"items\":\"string\"" + + " }," + + " \"default\":[ ]" + + " }" + + " ]" + + "}"; + RecordDataSchema recordDataSchema = + (RecordDataSchema) TestUtil.dataSchemaFromString(SCHEMA); + DataMap map = new DataMap(); + GenericRecord record = DataTranslator.dataMapToGenericRecord(map, recordDataSchema); + Assert.assertEquals(record.get("arrayField"), map.get("arrayField")); + } } From 5d8addc4b853e24df3baf8d09bc2cc5b3c839ea8 Mon Sep 17 00:00:00 2001 From: Srilakshmi Manjunath Bharadwaj Date: Fri, 18 Apr 2025 19:49:36 -0700 Subject: [PATCH 2/4] Test emoji in string field --- .../data/avro/TestDataTranslator.java | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/data-avro/src/test/java/com/linkedin/data/avro/TestDataTranslator.java b/data-avro/src/test/java/com/linkedin/data/avro/TestDataTranslator.java index aaf22eeb5d..aa3b80c5bc 100644 --- a/data-avro/src/test/java/com/linkedin/data/avro/TestDataTranslator.java +++ b/data-avro/src/test/java/com/linkedin/data/avro/TestDataTranslator.java @@ -40,6 +40,7 @@ import com.linkedin.data.schema.validation.ValidateDataAgainstSchema; import com.linkedin.data.schema.validation.ValidationOptions; import com.linkedin.data.schema.validation.ValidationResult; +import com.linkedin.data.template.RecordTemplate; import java.io.FileDescriptor; import java.io.FileOutputStream; import java.io.IOException; @@ -2320,5 +2321,49 @@ public void testDataMapBackfill() throws IOException { GenericRecord record = DataTranslator.dataMapToGenericRecord(map, recordDataSchema); Assert.assertEquals(record.get("arrayField"), map.get("arrayField")); } + + static RecordDataSchema getPegasusSchema(Class pegasusSchemaClass) { + try { + return pegasusSchemaClass.newInstance().schema(); + } catch (InstantiationException e) { + throw new IllegalStateException( + "Failed to instantiate pegasus schema object for class: " + pegasusSchemaClass.getName()); + } catch (IllegalAccessException e) { + throw new IllegalStateException("Failed to get pegasus schema for class: " + pegasusSchemaClass.getName()); + } + } + + + @Test + public void testEmoji() throws IOException { + String schemaText = "{\n" + + " \"type\" : \"record\",\n" + + " \"name\" : \"Foo\",\n" + + " \"fields\" : [\n" + + " { \"name\" : \"a\", \"type\" : { \"type\" : \"record\", \"name\" : \"FooFoo\", \"fields\" : [ { \"name\" : \"b\", \"type\" : \"string\" } ] }, \"optional\": true }\n" + + " ]\n" + + "}\n"; + + RecordDataSchema recordDataSchema = (RecordDataSchema) TestUtil.dataSchemaFromString(schemaText); + String avroSchemaText = "{\n" + + " \"type\" : \"record\",\n" + + " \"name\" : \"Foo\",\n" + + " \"fields\" : [\n" + + " { \"name\" : \"a\", \"type\" : [ \"null\", { \"type\" : \"record\", \"name\" : \"FooFoo\", \"fields\" : [ { \"name\" : \"b\", \"type\" : \"string\" } ] } ] }\n" + + " ]\n" + + "}\n"; + + Schema avroSchema = Schema.parse(avroSchemaText); + + GenericRecord avroRecord = AvroUtil.genericRecordFromJson(TestAvroUtil.namespaceProcessor("{ \"a\" : { \"FooFoo\": { \"b\" : \"❤\" } } }"), avroSchema); + System.out.println(avroRecord); + DataMap pegasusDataMap = DataTranslator.genericRecordToDataMap(avroRecord, recordDataSchema, avroSchema); + System.out.println(pegasusDataMap); + DataMap innerMap = new DataMap(); + innerMap.put("b", "❤"); + DataMap expectedMap = new DataMap(); + expectedMap.put("a", innerMap); + assertEquals(pegasusDataMap, expectedMap); + } } From ee5e4a32785433c68bd4d09989ba06e518682bd4 Mon Sep 17 00:00:00 2001 From: Srilakshmi Manjunath Bharadwaj Date: Fri, 18 Apr 2025 20:24:16 -0700 Subject: [PATCH 3/4] remove other changes --- .../linkedin/data/avro/DataTranslator.java | 18 ---------- .../data/avro/TestDataTranslator.java | 36 ------------------- 2 files changed, 54 deletions(-) diff --git a/data-avro/src/main/java/com/linkedin/data/avro/DataTranslator.java b/data-avro/src/main/java/com/linkedin/data/avro/DataTranslator.java index a7d38fad8a..a4814a54f1 100644 --- a/data-avro/src/main/java/com/linkedin/data/avro/DataTranslator.java +++ b/data-avro/src/main/java/com/linkedin/data/avro/DataTranslator.java @@ -60,19 +60,6 @@ public class DataTranslator implements DataTranslatorContext { protected DataTranslationOptions _dataTranslationOptions; - - public static void backFillMissingDeafultInDataMap(DataMap map, RecordDataSchema dataSchema) { - for (RecordDataSchema.Field field: dataSchema.getFields()) { - DataSchema fieldType = field.getType(); - if ( fieldType.getDereferencedType() != DataSchema.Type.ARRAY && fieldType.getDereferencedType() != DataSchema.Type.MAP) { - continue; - } - if (field.getDefault() != null && (!map.containsKey(field.getName()) || map.get(field.getName()) == null)) { - map.put(field.getName(), field.getDefault()); - } - } - } - /** * Convert the given {@link DataMap} conforming to the provided {@link RecordDataSchema} to a {@link GenericRecord}. * @@ -86,7 +73,6 @@ public static void backFillMissingDeafultInDataMap(DataMap map, RecordDataSchema */ public static GenericRecord dataMapToGenericRecord(DataMap map, RecordDataSchema dataSchema) throws DataTranslationException { - backFillMissingDeafultInDataMap(map, dataSchema); Schema avroSchema = SchemaTranslator.dataToAvroSchema(dataSchema); return dataMapToGenericRecord(map, dataSchema, avroSchema, null); } @@ -107,7 +93,6 @@ public static GenericRecord dataMapToGenericRecord(DataMap map, RecordDataSchema DataMapToAvroRecordTranslationOptions options) throws DataTranslationException { - backFillMissingDeafultInDataMap(map, dataSchema); Schema avroSchema = SchemaTranslator.dataToAvroSchema(dataSchema); return dataMapToGenericRecord(map, dataSchema, avroSchema, options); } @@ -116,7 +101,6 @@ public static GenericRecord dataMapToGenericRecord(DataMap map, RecordDataSchema DataMapToAvroRecordTranslationOptions options) throws DataTranslationException { - backFillMissingDeafultInDataMap(map, dataSchema); DataMapToGenericRecordTranslator translator = new DataMapToGenericRecordTranslator(options); try { @@ -131,7 +115,6 @@ public static GenericRecord dataMapToGenericRecord(DataMap map, RecordDataSchema public static T dataMapToSpecificRecord(DataMap map, RecordDataSchema dataSchema, Schema avroSchema) throws DataTranslationException { - backFillMissingDeafultInDataMap(map, dataSchema); DataMapToSpecificRecordTranslator translator = new DataMapToSpecificRecordTranslator(); try { T avroRecord = translator.translate(map, dataSchema, avroSchema); @@ -161,7 +144,6 @@ public static T dataMapToSpecificRecord(DataMap m */ public static GenericRecord dataMapToGenericRecord(DataMap map, RecordDataSchema dataSchema, Schema avroSchema) throws DataTranslationException { - backFillMissingDeafultInDataMap(map, dataSchema); return dataMapToGenericRecord(map, dataSchema, avroSchema, null); } diff --git a/data-avro/src/test/java/com/linkedin/data/avro/TestDataTranslator.java b/data-avro/src/test/java/com/linkedin/data/avro/TestDataTranslator.java index aa3b80c5bc..029d56af1f 100644 --- a/data-avro/src/test/java/com/linkedin/data/avro/TestDataTranslator.java +++ b/data-avro/src/test/java/com/linkedin/data/avro/TestDataTranslator.java @@ -2298,42 +2298,6 @@ public void testMapArrayUnion() throws IOException { Assert.assertEquals(((Map) mapOfMapOfArrayOfMapArrayUnion.get(0)).get("recordMap"), mapOfArrayOfMapArrayUnion); } - @Test - public void testDataMapBackfill() throws IOException { - final String SCHEMA = - "{" + - " \"type\":\"record\"," + - " \"name\":\"Foo\"," + - " \"fields\":[" + - " {" + - " \"name\":\"arrayField\"," + - " \"type\":{" + - " \"type\":\"array\"," + - " \"items\":\"string\"" + - " }," + - " \"default\":[ ]" + - " }" + - " ]" + - "}"; - RecordDataSchema recordDataSchema = - (RecordDataSchema) TestUtil.dataSchemaFromString(SCHEMA); - DataMap map = new DataMap(); - GenericRecord record = DataTranslator.dataMapToGenericRecord(map, recordDataSchema); - Assert.assertEquals(record.get("arrayField"), map.get("arrayField")); - } - - static RecordDataSchema getPegasusSchema(Class pegasusSchemaClass) { - try { - return pegasusSchemaClass.newInstance().schema(); - } catch (InstantiationException e) { - throw new IllegalStateException( - "Failed to instantiate pegasus schema object for class: " + pegasusSchemaClass.getName()); - } catch (IllegalAccessException e) { - throw new IllegalStateException("Failed to get pegasus schema for class: " + pegasusSchemaClass.getName()); - } - } - - @Test public void testEmoji() throws IOException { String schemaText = "{\n" + From 23439ba39ed8478c8f798792f3e75749a0b79201 Mon Sep 17 00:00:00 2001 From: Srilakshmi Manjunath Bharadwaj Date: Fri, 18 Apr 2025 20:25:47 -0700 Subject: [PATCH 4/4] add log --- .../test/java/com/linkedin/data/avro/TestDataTranslator.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data-avro/src/test/java/com/linkedin/data/avro/TestDataTranslator.java b/data-avro/src/test/java/com/linkedin/data/avro/TestDataTranslator.java index 029d56af1f..07bf41e917 100644 --- a/data-avro/src/test/java/com/linkedin/data/avro/TestDataTranslator.java +++ b/data-avro/src/test/java/com/linkedin/data/avro/TestDataTranslator.java @@ -2320,9 +2320,9 @@ public void testEmoji() throws IOException { Schema avroSchema = Schema.parse(avroSchemaText); GenericRecord avroRecord = AvroUtil.genericRecordFromJson(TestAvroUtil.namespaceProcessor("{ \"a\" : { \"FooFoo\": { \"b\" : \"❤\" } } }"), avroSchema); - System.out.println(avroRecord); + System.out.println("Original avro record = "+avroRecord); DataMap pegasusDataMap = DataTranslator.genericRecordToDataMap(avroRecord, recordDataSchema, avroSchema); - System.out.println(pegasusDataMap); + System.out.println("Avro record to data map = "+pegasusDataMap); DataMap innerMap = new DataMap(); innerMap.put("b", "❤"); DataMap expectedMap = new DataMap();