forked from apache/paimon
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add debezium-json format apache#2227
- Loading branch information
1 parent
30525c1
commit edfb615
Showing
32 changed files
with
1,489 additions
and
224 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
218 changes: 218 additions & 0 deletions
218
...ain/java/org/apache/paimon/flink/action/cdc/format/debezium/DebeziumJsonRecordParser.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,218 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.paimon.flink.action.cdc.format.debezium; | ||
|
||
import org.apache.paimon.flink.action.cdc.ComputedColumn; | ||
import org.apache.paimon.flink.action.cdc.TypeMapping; | ||
import org.apache.paimon.flink.action.cdc.format.RecordParser; | ||
import org.apache.paimon.flink.sink.cdc.RichCdcMultiplexRecord; | ||
import org.apache.paimon.types.RowKind; | ||
import org.apache.paimon.utils.Pair; | ||
import org.apache.paimon.utils.StringUtils; | ||
|
||
import org.apache.paimon.shade.guava30.com.google.common.collect.Lists; | ||
import org.apache.paimon.shade.jackson2.com.fasterxml.jackson.core.JsonProcessingException; | ||
import org.apache.paimon.shade.jackson2.com.fasterxml.jackson.databind.JsonNode; | ||
|
||
import org.apache.commons.lang3.BooleanUtils; | ||
import org.apache.flink.util.Collector; | ||
|
||
import java.util.ArrayList; | ||
import java.util.Collections; | ||
import java.util.List; | ||
import java.util.Objects; | ||
|
||
import static org.apache.paimon.utils.JsonSerdeUtil.isNull; | ||
import static org.apache.paimon.utils.Preconditions.checkArgument; | ||
|
||
/** | ||
* The {@code DebeziumRecordParser} class extends the abstract {@link RecordParser} and is designed | ||
* to parse records from Debezium's JSON change data capture (CDC) format. Debezium is a CDC | ||
* solution for MySQL databases that captures row-level changes to database tables and outputs them | ||
* in JSON format. This parser extracts relevant information from the debezium-json format and | ||
* converts it into a list of {@link RichCdcMultiplexRecord} objects. | ||
* | ||
* <p>The class supports various database operations such as INSERT, UPDATE, and DELETE, and creates | ||
* corresponding {@link RichCdcMultiplexRecord} objects to represent these changes. | ||
* | ||
* <p>Validation is performed to ensure that the JSON records contain all necessary fields, and the | ||
* class also supports schema extraction for the Kafka topic. | ||
*/ | ||
public class DebeziumJsonRecordParser extends RecordParser { | ||
|
||
private static final String RECORD_PAYLOAD = "payload"; | ||
private static final String RECORD_SCHEMA = "schema"; | ||
private static final String RECORD_SOURCE = "source"; | ||
private static final String RECORD_SOURCE_DB = "db"; | ||
private static final String FIELD_BEFORE = "before"; | ||
private static final String OP_FIELD = "op"; | ||
private static final String OP_READ = "r"; // snapshot read | ||
private static final String OP_CREATE = "c"; // insert | ||
private static final String OP_UPDATE = "u"; // update | ||
private static final String OP_DELETE = "d"; // delete | ||
|
||
protected JsonNode keyPayload; | ||
protected JsonNode source; | ||
|
||
public DebeziumJsonRecordParser( | ||
boolean caseSensitive, TypeMapping typeMapping, List<ComputedColumn> computedColumns) { | ||
super(caseSensitive, typeMapping, computedColumns); | ||
} | ||
|
||
@Override | ||
protected Boolean isDDL() { | ||
return root.has("tableChanges"); | ||
} | ||
|
||
@Override | ||
protected List<RichCdcMultiplexRecord> extractRecords() { | ||
List<RichCdcMultiplexRecord> records = new ArrayList<>(); | ||
// skip ddl | ||
if (BooleanUtils.isTrue(this.isDDL())) { | ||
return Collections.emptyList(); | ||
} | ||
String operation = extractStringFromRootJson(OP_FIELD); | ||
switch (operation) { | ||
case OP_CREATE: | ||
case OP_READ: | ||
processRecord(root.get(dataField()), RowKind.INSERT, records); | ||
break; | ||
case OP_UPDATE: | ||
processRecord(root.get(FIELD_BEFORE), RowKind.DELETE, records); | ||
processRecord(root.get(dataField()), RowKind.INSERT, records); | ||
break; | ||
case OP_DELETE: | ||
processRecord(root.get(FIELD_BEFORE), RowKind.DELETE, records); | ||
break; | ||
default: | ||
throw new UnsupportedOperationException("Unknown record operation: " + operation); | ||
} | ||
return records; | ||
} | ||
|
||
@Override | ||
protected void validateFormat() { | ||
String errorMessageTemplate = | ||
"Didn't find '%s' node in json. Please make sure your topic's format is correct."; | ||
checkArgument(!isNull(keyPayload), errorMessageTemplate, FIELD_RECORD_KEY); | ||
checkArgument(!isNull(source), errorMessageTemplate, RECORD_SOURCE); | ||
checkArgument(!isNull(source.get(FIELD_TABLE)), errorMessageTemplate, FIELD_TABLE); | ||
checkArgument( | ||
!isNull(source.get(RECORD_SOURCE_DB)), errorMessageTemplate, RECORD_SOURCE_DB); | ||
checkArgument(!isNull(root.get(OP_FIELD)), errorMessageTemplate, OP_FIELD); | ||
if (root.get(OP_FIELD).asText().equals(OP_DELETE)) { | ||
checkArgument(!isNull(root.get(FIELD_BEFORE)), errorMessageTemplate, FIELD_BEFORE); | ||
} else { | ||
checkArgument(!isNull(root.get(dataField())), errorMessageTemplate, dataField()); | ||
} | ||
} | ||
|
||
@Override | ||
protected String primaryField() { | ||
// No-op | ||
return null; | ||
} | ||
|
||
@Override | ||
protected String dataField() { | ||
return "after"; | ||
} | ||
|
||
@Override | ||
protected void extractPrimaryKeys() { | ||
primaryKeys = Lists.newArrayList(keyPayload.fieldNames()); | ||
} | ||
|
||
@Override | ||
public void flatMap(Pair record, Collector<RichCdcMultiplexRecord> out) throws Exception { | ||
// Extract debezium record key payload field | ||
if (Objects.nonNull(record.getKey())) { | ||
this.extractKeyPayload( | ||
OBJECT_MAPPER.readValue((String) record.getKey(), JsonNode.class)); | ||
} | ||
// Extract debezium record value payload field | ||
if (Objects.nonNull(record.getRight())) { | ||
this.extractValuePayload( | ||
OBJECT_MAPPER.readValue((String) record.getValue(), JsonNode.class)); | ||
} | ||
this.validateFormat(); | ||
databaseName = extractStringFromRootJson(FIELD_DATABASE); | ||
tableName = extractStringFromRootJson(FIELD_TABLE); | ||
extractRecords().forEach(out::collect); | ||
} | ||
|
||
@Override | ||
protected void parseKeyIfNeed(String key) { | ||
if (StringUtils.isBlank(key)) { | ||
return; | ||
} | ||
try { | ||
// For extract primary key | ||
JsonNode recordData = OBJECT_MAPPER.readValue(key, JsonNode.class); | ||
extractKeyPayload(recordData); | ||
} catch (JsonProcessingException e) { | ||
throw new RuntimeException("Error processing record key JSON: " + key, e); | ||
} | ||
} | ||
|
||
private void extractKeyPayload(JsonNode recordData) { | ||
if (includeSchema(recordData)) { | ||
keyPayload = recordData.get(RECORD_PAYLOAD); | ||
} else { | ||
keyPayload = recordData; | ||
} | ||
} | ||
|
||
@Override | ||
protected void parseRootJson(String record) { | ||
try { | ||
JsonNode recordData = OBJECT_MAPPER.readValue(record, JsonNode.class); | ||
extractValuePayload(recordData); | ||
} catch (JsonProcessingException e) { | ||
throw new RuntimeException("Error processing JSON: " + record, e); | ||
} | ||
} | ||
|
||
private void extractValuePayload(JsonNode recordData) { | ||
// for extract record value | ||
if (includeSchema(recordData)) { | ||
root = recordData.get(RECORD_PAYLOAD); | ||
} else { | ||
root = recordData; | ||
} | ||
// For extract record metadata | ||
source = root.get(RECORD_SOURCE); | ||
} | ||
|
||
@Override | ||
protected String extractStringFromRootJson(String key) { | ||
if (key.equals(FIELD_TABLE)) { | ||
tableName = source.get(FIELD_TABLE).asText(); | ||
return tableName; | ||
} else if (key.equals(FIELD_DATABASE)) { | ||
this.databaseName = source.get(RECORD_SOURCE_DB).asText(); | ||
return databaseName; | ||
} | ||
return root.get(key) != null ? root.get(key).asText() : null; | ||
} | ||
|
||
private boolean includeSchema(JsonNode record) { | ||
return record.size() == 2 && record.has(RECORD_SCHEMA) && record.has(RECORD_PAYLOAD); | ||
} | ||
} |
Oops, something went wrong.