From 6c40e602f046d90cf2fd6386a8ded4ccde8d85af Mon Sep 17 00:00:00 2001 From: wangxiaogang Date: Thu, 21 Aug 2025 17:13:24 +0800 Subject: [PATCH 1/5] X2SeaTunnel v0.0.1 --- README.md | 46 ++++++++++++++++++++++++++++++++++++++++++++-- pom.xml | 38 ++++++++++++++++++++------------------ 2 files changed, 64 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 8372d24..3ec28d3 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,47 @@ # Apache SeaTunnel Tools -The repository contains tools for Apache SeaTunnel. +This repository hosts auxiliary tools for Apache SeaTunnel. It focuses on developer/operator productivity around configuration, conversion, packaging and diagnostics. Current modules: -Get the main project from [Apache SeaTunnel](https://github.com/apache/seatunnel) \ No newline at end of file +- x2seatunnel: Convert configurations (e.g., DataX) into SeaTunnel configuration files. + +More tools may be added in the future. For the main data integration engine, see the +[Apache SeaTunnel](https://github.com/apache/seatunnel) project. + +## Modules documentation + +- x2seatunnel + - English: [x2seatunnel/README.md](x2seatunnel/README.md) + - 中文: [x2seatunnel/README_zh.md](x2seatunnel/README_zh.md) + +## Build and Test + +Prerequisites: +- Java 8+ +- Maven 3.6+ + +Build the whole repository: + +```bash +mvn -T 1C -e -DskipIT clean verify +``` + +Build only a submodule (x2seatunnel as example): + +```bash +mvn -pl x2seatunnel -am -DskipTests clean package +``` + +Artifacts will be generated under `x2seatunnel/target/`: +- Runnable JAR: `x2seatunnel-.jar` +- Distribution ZIP: `x2seatunnel--bin.zip` (or similar) + +Unzip the distribution and follow the submodule README to run. + +## Versioning and Dependencies + +This repository depends on released Seatunnel artifacts (e.g., `seatunnel-common`, `seatunnel-jackson`). +Versions are centrally managed via the `seatunnel.version` property in the root POM. + +## Contributing + +Issues and PRs are welcome. \ No newline at end of file diff --git a/pom.xml b/pom.xml index a795130..1488ee7 100644 --- a/pom.xml +++ b/pom.xml @@ -34,24 +34,29 @@ Production ready big data processing product based on Apache Spark and Apache Flink. - - seatunnel + x2seatunnel - + 0.0.1-SNAPSHOT UTF-8 1.8 - 2.12.15 - 2.12 ${java.version} ${java.version} + + + 2.3.11 + 1.7.36 + 2.17.1 + 4.13.2 + 5.9.0 + 4.11.0 + 1.33 + 1.4 + 2.16.1 + 2.22.2 2.22.2 1.6.8 @@ -67,21 +72,19 @@ 1.20 3.1.1 2.0.0 + 3.3.0 2.29.0 false true false - - - ${project.artifactId}-${project.version}-${scala.version} + ${project.artifactId}-${project.version} - org.apache.maven.plugins maven-compiler-plugin @@ -154,8 +157,7 @@ org.slf4j:* ch.qos.logback:* - log4j:* - org.apache.logging.log4j:* + log4j:log4j commons-logging:* @@ -463,9 +465,9 @@ - scm:git:https://github.com/apache/seatunnel.git - scm:git:https://github.com/apache/seatunnel.git - https://github.com/apache/seatunnel + scm:git:https://github.com/apache/seatunnel-tools.git + scm:git:https://github.com/apache/seatunnel-tools.git + https://github.com/apache/seatunnel-tools HEAD From 4335f55a3a6e8df8e623ee6466ad02321946f10a Mon Sep 17 00:00:00 2001 From: wangxiaogang Date: Thu, 21 Aug 2025 17:15:11 +0800 Subject: [PATCH 2/5] X2SeaTunnel v0.0.1 --- README_zh.md | 47 + x2seatunnel/.gitignore | 17 + x2seatunnel/README.md | 448 +++++ x2seatunnel/README_zh.md | 508 +++++ x2seatunnel/pom.xml | 238 +++ .../main/assembly/x2seatunnel-standalone.xml | 108 ++ .../x2seatunnel/cli/CommandLineOptions.java | 133 ++ .../tools/x2seatunnel/cli/X2SeaTunnelCli.java | 250 +++ .../x2seatunnel/core/ConversionEngine.java | 368 ++++ .../x2seatunnel/model/MappingResult.java | 322 ++++ .../x2seatunnel/model/MappingTracker.java | 327 ++++ .../x2seatunnel/model/SeaTunnelConfig.java | 203 ++ .../report/MarkdownReportGenerator.java | 467 +++++ .../template/ConfigDrivenTemplateEngine.java | 368 ++++ .../template/TemplateMappingManager.java | 252 +++ .../template/TemplateVariableResolver.java | 1653 +++++++++++++++++ .../util/BatchConversionReport.java | 244 +++ .../x2seatunnel/util/ConversionConfig.java | 76 + .../x2seatunnel/util/DataXFieldExtractor.java | 341 ++++ .../x2seatunnel/util/DirectoryProcessor.java | 79 + .../tools/x2seatunnel/util/FilePattern.java | 48 + .../tools/x2seatunnel/util/FileUtils.java | 216 +++ .../tools/x2seatunnel/util/PathResolver.java | 195 ++ .../util/TemplateFieldExtractor.java | 143 ++ .../x2seatunnel/util/YamlConfigParser.java | 66 + .../src/main/resources/bin/x2seatunnel.sh | 136 ++ .../src/main/resources/config/log4j2.xml | 49 + .../main/resources/examples/report/.gitkeep | 0 .../examples/source/datax-hdfs2mysql.json | 38 + .../source/datax-mysql2hdfs-full.json | 75 + .../examples/source/datax-mysql2hdfs.json | 43 + .../source/datax-mysql2hdfs2hive.json | 94 + .../source/datax-mysql2mysql-full.json | 63 + .../examples/source/datax-mysql2mysql.json | 45 + .../source/datax-oracle2hdfs-full.json | 75 + .../source/datax-postgresql2hdfs-full.json | 75 + .../source/datax-postgresql2hdfs.json | 40 + .../source/datax-sqlserver2hdfs-full.json | 75 + .../examples/yaml/datax-mysql2hdfs2hive.yaml | 23 + .../templates/datax/custom/mysql-to-hive.conf | 73 + .../templates/datax/env/batch-env.conf | 29 + .../templates/datax/sinks/hdfs-sink.conf | 63 + .../templates/datax/sinks/jdbc-sink.conf | 44 + .../templates/datax/sources/hdfs-source.conf | 105 ++ .../templates/datax/sources/jdbc-source.conf | 58 + .../datax/sources/localfile-source.conf | 103 + .../templates/report/report-template-zh.md | 49 + .../templates/report/report-template.md | 49 + .../resources/templates/template-mapping.yaml | 108 ++ .../cli/CommandLineOptionsTest.java | 37 + .../x2seatunnel/model/MappingTrackerTest.java | 208 +++ .../MarkdownReportGeneratorEnhancedTest.java | 149 ++ .../TemplateVariableResolverMappingTest.java | 259 +++ .../TemplateVariableResolverTest.java | 101 + .../tools/x2seatunnel/util/FileUtilsTest.java | 46 + .../util/YamlConfigParserTest.java | 57 + 56 files changed, 9486 insertions(+) create mode 100644 README_zh.md create mode 100644 x2seatunnel/.gitignore create mode 100644 x2seatunnel/README.md create mode 100644 x2seatunnel/README_zh.md create mode 100644 x2seatunnel/pom.xml create mode 100644 x2seatunnel/src/main/assembly/x2seatunnel-standalone.xml create mode 100644 x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/cli/CommandLineOptions.java create mode 100644 x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/cli/X2SeaTunnelCli.java create mode 100644 x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/core/ConversionEngine.java create mode 100644 x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/model/MappingResult.java create mode 100644 x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/model/MappingTracker.java create mode 100644 x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/model/SeaTunnelConfig.java create mode 100644 x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/report/MarkdownReportGenerator.java create mode 100644 x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/template/ConfigDrivenTemplateEngine.java create mode 100644 x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/template/TemplateMappingManager.java create mode 100644 x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/template/TemplateVariableResolver.java create mode 100644 x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/util/BatchConversionReport.java create mode 100644 x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/util/ConversionConfig.java create mode 100644 x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/util/DataXFieldExtractor.java create mode 100644 x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/util/DirectoryProcessor.java create mode 100644 x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/util/FilePattern.java create mode 100644 x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/util/FileUtils.java create mode 100644 x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/util/PathResolver.java create mode 100644 x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/util/TemplateFieldExtractor.java create mode 100644 x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/util/YamlConfigParser.java create mode 100644 x2seatunnel/src/main/resources/bin/x2seatunnel.sh create mode 100644 x2seatunnel/src/main/resources/config/log4j2.xml create mode 100644 x2seatunnel/src/main/resources/examples/report/.gitkeep create mode 100644 x2seatunnel/src/main/resources/examples/source/datax-hdfs2mysql.json create mode 100644 x2seatunnel/src/main/resources/examples/source/datax-mysql2hdfs-full.json create mode 100644 x2seatunnel/src/main/resources/examples/source/datax-mysql2hdfs.json create mode 100644 x2seatunnel/src/main/resources/examples/source/datax-mysql2hdfs2hive.json create mode 100644 x2seatunnel/src/main/resources/examples/source/datax-mysql2mysql-full.json create mode 100644 x2seatunnel/src/main/resources/examples/source/datax-mysql2mysql.json create mode 100644 x2seatunnel/src/main/resources/examples/source/datax-oracle2hdfs-full.json create mode 100644 x2seatunnel/src/main/resources/examples/source/datax-postgresql2hdfs-full.json create mode 100644 x2seatunnel/src/main/resources/examples/source/datax-postgresql2hdfs.json create mode 100644 x2seatunnel/src/main/resources/examples/source/datax-sqlserver2hdfs-full.json create mode 100644 x2seatunnel/src/main/resources/examples/yaml/datax-mysql2hdfs2hive.yaml create mode 100644 x2seatunnel/src/main/resources/templates/datax/custom/mysql-to-hive.conf create mode 100644 x2seatunnel/src/main/resources/templates/datax/env/batch-env.conf create mode 100644 x2seatunnel/src/main/resources/templates/datax/sinks/hdfs-sink.conf create mode 100644 x2seatunnel/src/main/resources/templates/datax/sinks/jdbc-sink.conf create mode 100644 x2seatunnel/src/main/resources/templates/datax/sources/hdfs-source.conf create mode 100644 x2seatunnel/src/main/resources/templates/datax/sources/jdbc-source.conf create mode 100644 x2seatunnel/src/main/resources/templates/datax/sources/localfile-source.conf create mode 100644 x2seatunnel/src/main/resources/templates/report/report-template-zh.md create mode 100644 x2seatunnel/src/main/resources/templates/report/report-template.md create mode 100644 x2seatunnel/src/main/resources/templates/template-mapping.yaml create mode 100644 x2seatunnel/src/test/java/org/apache/seatunnel/tools/x2seatunnel/cli/CommandLineOptionsTest.java create mode 100644 x2seatunnel/src/test/java/org/apache/seatunnel/tools/x2seatunnel/model/MappingTrackerTest.java create mode 100644 x2seatunnel/src/test/java/org/apache/seatunnel/tools/x2seatunnel/report/MarkdownReportGeneratorEnhancedTest.java create mode 100644 x2seatunnel/src/test/java/org/apache/seatunnel/tools/x2seatunnel/template/TemplateVariableResolverMappingTest.java create mode 100644 x2seatunnel/src/test/java/org/apache/seatunnel/tools/x2seatunnel/template/TemplateVariableResolverTest.java create mode 100644 x2seatunnel/src/test/java/org/apache/seatunnel/tools/x2seatunnel/util/FileUtilsTest.java create mode 100644 x2seatunnel/src/test/java/org/apache/seatunnel/tools/x2seatunnel/util/YamlConfigParserTest.java diff --git a/README_zh.md b/README_zh.md new file mode 100644 index 0000000..afb36e0 --- /dev/null +++ b/README_zh.md @@ -0,0 +1,47 @@ +# SeaTunnel Tools(工具集) + +本仓库用于沉淀与 Apache SeaTunnel 相关的周边工具,目标是提升配置生产力、迁移与运维体验。目前包含: + +- x2seatunnel:将 DataX 等配置转换为 SeaTunnel 配置文件的工具。 + +未来可能会新增更多模块;SeaTunnel 引擎本体请参考 +[Apache SeaTunnel](https://github.com/apache/seatunnel)。 + +## 模块文档导航 + +- x2seatunnel + - 英文:[x2seatunnel/README.md](x2seatunnel/README.md) + - 中文:[x2seatunnel/README_zh.md](x2seatunnel/README_zh.md) + +## 构建与测试 + +先决条件: +- Java 8+ +- Maven 3.6+ + +构建整个仓库: + +```bash +mvn -T 1C -e -DskipIT clean verify +``` + +仅构建某个子模块(例如 x2seatunnel): + +```bash +mvn -pl x2seatunnel -am -DskipTests clean package +``` + +产物在 `x2seatunnel/target/`: +- 可运行 JAR:`x2seatunnel-.jar` +- 分发 ZIP:`x2seatunnel--bin.zip`(或类似命名) + +解压后参考子模块 README 进行运行。 + +## 版本与依赖 + +本仓库依赖已发布的 SeaTunnel 组件(如 `seatunnel-common`、`seatunnel-jackson`)。 +版本通过根 POM 的 `seatunnel.version` 统一管理(当前为 2.3.11)。 + +## 贡献 + +欢迎提交 Issue 与 PR。 diff --git a/x2seatunnel/.gitignore b/x2seatunnel/.gitignore new file mode 100644 index 0000000..f5e1f30 --- /dev/null +++ b/x2seatunnel/.gitignore @@ -0,0 +1,17 @@ +# Files generated by X2SeaTunnel tests +src/main/resources/examples/target*/*.conf +src/main/resources/examples/report*/*.md + +# Keep the example files +!src/main/resources/examples/report*/summary-example.md + +target/ + +# IDE +.idea/ +*.iml +.vscode/ + +# log +logs/ +*.log diff --git a/x2seatunnel/README.md b/x2seatunnel/README.md new file mode 100644 index 0000000..7776eee --- /dev/null +++ b/x2seatunnel/README.md @@ -0,0 +1,448 @@ +# X2SeaTunnel Configuration Conversion Tool + +X2SeaTunnel is a tool for converting DataX and other configuration files to SeaTunnel configuration files, designed to help users quickly migrate from other data integration platforms to SeaTunnel. + +## 🚀 Quick Start + +### Prerequisites + +- Java 8 or higher + +### Installation + +#### Build from Source +```bash +# Build x2seatunnel module in this repository +mvn clean package -pl x2seatunnel -DskipTests +``` +After compilation, the release package will be at `x2seatunnel/target/x2seatunnel-*.zip`. + +#### Using Release Package +```bash +# Download and extract release package +unzip x2seatunnel-*.zip +cd x2seatunnel-*/ +``` + +### Basic Usage + +```bash +# Standard conversion: Use default template system with built-in common Sources and Sinks +./bin/x2seatunnel.sh -s examples/source/datax-mysql2hdfs.json -t examples/target/mysql2hdfs-result.conf -r examples/report/mysql2hdfs-report.md + +# Custom task: Implement customized conversion requirements through custom templates +# Scenario: MySQL → Hive (DataX doesn't have HiveWriter) +# DataX configuration: MySQL → HDFS Custom task: Convert to MySQL → Hive +./bin/x2seatunnel.sh -s examples/source/datax-mysql2hdfs2hive.json -t examples/target/mysql2hive-result.conf -r examples/report/mysql2hive-report.md -T templates/datax/custom/mysql-to-hive.conf + +# YAML configuration method (equivalent to above command line parameters) +./bin/x2seatunnel.sh -c examples/yaml/datax-mysql2hdfs2hive.yaml + +# Batch conversion mode: Process by directory +./bin/x2seatunnel.sh -d examples/source -o examples/target2 -R examples/report2 + +# Batch mode supports wildcard filtering +./bin/x2seatunnel.sh -d examples/source -o examples/target3 -R examples/report3 --pattern "*-full.json" --verbose + +# View help +./bin/x2seatunnel.sh --help +``` + +### Conversion Report +After conversion is completed, view the generated Markdown report file, which includes: +- **Basic Information**: Conversion time, source/target file paths, connector types, conversion status, etc. +- **Conversion Statistics**: Counts and percentages of direct mappings, smart transformations, default values used, and unmapped fields +- **Detailed Field Mapping Relationships**: Source values, target values, filters used for each field +- **Default Value Usage**: List of all fields using default values +- **Unmapped Fields**: Fields present in DataX but not converted +- **Possible Error and Warning Information**: Issue prompts during conversion process + +For batch conversions, a batch summary report `summary.md` will be generated in the batch report directory, including: +- **Conversion Overview**: Overall statistics, success rate, duration, etc. +- **Successful Conversion List**: Complete list of successfully converted files +- **Failed Conversion List**: Failed files and error messages (if any) + +### Log Files +```bash +# View log files +tail -f logs/x2seatunnel.log +``` + +## 🎯 Features + +- ✅ **Standard Configuration Conversion**: DataX → SeaTunnel configuration file conversion +- ✅ **Custom Template Conversion**: Support for user-defined conversion templates +- ✅ **Detailed Conversion Reports**: Generate Markdown format conversion reports +- ✅ **Regular Expression Variable Extraction**: Extract variables from configuration using regex, supporting custom scenarios +- ✅ **Batch Conversion Mode**: Support directory and file wildcard batch conversion, automatic report and summary report generation + +## 📁 Directory Structure + +``` +x2seatunnel/ +├── bin/ # Executable files +│ ├── x2seatunnel.sh # Startup script +├── lib/ # JAR package files +│ └── x2seatunnel-*.jar # Core JAR package +├── config/ # Configuration files +│ └── log4j2.xml # Log configuration +├── templates/ # Template files +│ ├── template-mapping.yaml # Template mapping configuration +│ ├── report-template.md # Report template +│ └── datax/ # DataX related templates +│ ├── custom/ # Custom templates +│ ├── env/ # Environment configuration templates +│ ├── sources/ # Data source templates +│ └── sinks/ # Data target templates +├── examples/ # Examples and tests +│ ├── source/ # Example source files +│ ├── target/ # Generated target files +│ └── report/ # Generated reports +├── logs/ # Log files +├── LICENSE # License +└── README.md # Usage instructions +``` + +## 📖 Usage Instructions + +### Basic Syntax + +```bash +x2seatunnel [OPTIONS] +``` + +### Command Line Parameters + +| Option | Long Option | Description | Required | +|----------|-----------------|-------------------------------------------------------------|----------| +| -s | --source | Source configuration file path | Yes | +| -t | --target | Target configuration file path | Yes | +| -st | --source-type | Source configuration type (datax, default: datax) | No | +| -T | --template | Custom template file path | No | +| -r | --report | Conversion report file path | No | +| -c | --config | YAML configuration file path, containing source, target, report, template and other settings | No | +| -d | --directory | Batch conversion source directory | No | +| -o | --output-dir | Batch conversion output directory | No | +| -p | --pattern | File wildcard pattern (comma separated, e.g.: *.json,*.xml)| No | +| -R | --report-dir | Report output directory in batch mode, individual file reports and summary.md will be output to this directory | No | +| -v | --version | Show version information | No | +| -h | --help | Show help information | No | +| | --verbose | Enable verbose log output | No | + +```bash +# Example: View command line help +./bin/x2seatunnel.sh --help +``` + +### Supported Configuration Types + +#### Source Configuration Types +- **datax**: DataX configuration files (JSON format) - Default type + +#### Target Configuration Types +- **seatunnel**: SeaTunnel configuration files (HOCON format) + +## 🎨 Template System + +### Design Philosophy + +X2SeaTunnel adopts a DSL (Domain Specific Language) based template system, implementing rapid adaptation of different data sources and targets through configuration-driven approach. Core advantages: + +- **Configuration-driven**: All conversion logic is defined through YAML configuration files, no need to modify Java code +- **Easy to extend**: Adding new data source types only requires adding template files and mapping configurations +- **Unified syntax**: Uses Jinja2-style template syntax, easy to understand and maintain +- **Intelligent mapping**: Implements complex parameter mapping logic through transformers + +### Template Syntax + +X2SeaTunnel supports partially compatible Jinja2-style template syntax, providing rich filter functionality to handle configuration conversion. + +```bash +# Basic variable reference +{{ datax.job.content[0].reader.parameter.username }} + +# Variables with filters +{{ datax.job.content[0].reader.parameter.column | join(',') }} + +# Chained filters +{{ datax.job.content[0].writer.parameter.path | split('/') | get(-2) | replace('.db','') }} +``` + +### 2. Filters + +| Filter | Syntax | Description | Example | +|--------|--------|-------------|---------| +| `join` | `{{ array \| join('separator') }}` | Array join | `{{ columns \| join(',') }}` | +| `default` | `{{ value \| default('default_value') }}` | Default value | `{{ port \| default(3306) }}` | +| `upper` | `{{ value \| upper }}` | Uppercase conversion | `{{ name \| upper }}` | +| `lower` | `{{ value \| lower }}` | Lowercase conversion | `{{ name \| lower }}` | +| `split` | `{{ string \| split('/') }}` | String split | `'a/b/c' → ['a','b','c']` | +| `get` | `{{ array \| get(0) }}` | Get array element | `['a','b','c'] → 'a'` | +| `replace` | `{{ string \| replace('old,new') }}` | String replace | `'hello' → 'hallo'` | +| `regex_extract` | `{{ string \| regex_extract('pattern') }}` | Regex extract | Extract matching content | +| `jdbc_driver_mapper` | `{{ jdbcUrl \| jdbc_driver_mapper }}` | JDBC driver mapping | Auto infer driver class | + +### 3. Examples + +```bash +# join filter: Array join +query = "SELECT {{ datax.job.content[0].reader.parameter.column | join(',') }} FROM table" + +# default filter: Default value +partition_column = "{{ datax.job.content[0].reader.parameter.splitPk | default('') }}" +fetch_size = {{ datax.job.content[0].reader.parameter.fetchSize | default(1024) }} + +# String operations +driver = "{{ datax.job.content[0].reader.parameter.connection[0].jdbcUrl[0] | upper }}" +``` + +```bash +# Chained filters: String split and get +{{ datax.job.content[0].writer.parameter.path | split('/') | get(-2) | replace('.db','') }} + +# Regular expression extraction +{{ jdbcUrl | regex_extract('jdbc:mysql://([^:]+):') }} + +# Transformer call: Intelligent parameter mapping +driver = "{{ datax.job.content[0].reader.parameter.connection[0].jdbcUrl[0] | jdbc_driver_mapper }}" +``` + +```bash +# Intelligent query generation +query = "{{ datax.job.content[0].reader.parameter.querySql[0] | default('SELECT') }} {{ datax.job.content[0].reader.parameter.column | join(',') }} FROM {{ datax.job.content[0].reader.parameter.connection[0].table[0] }} WHERE {{ datax.job.content[0].reader.parameter.where | default('1=1') }}" + +# Path intelligent parsing: Extract Hive table name from HDFS path +# Path: /user/hive/warehouse/test_ods.db/test_table/partition=20240101 +database = "{{ datax.job.content[0].writer.parameter.path | split('/') | get(-3) | replace('.db','') }}" +table = "{{ datax.job.content[0].writer.parameter.path | split('/') | get(-2) }}" +table_name = "{{ database }}.{{ table }}" +``` + +```bash +# Auto infer database driver +{{ datax.job.content[0].reader.parameter.connection[0].jdbcUrl[0] | jdbc_driver_mapper }} + +# Mapping relationships (configured in template-mapping.yaml): +# mysql -> com.mysql.cj.jdbc.Driver +# postgresql -> org.postgresql.Driver +# oracle -> oracle.jdbc.driver.OracleDriver +# sqlserver -> com.microsoft.sqlserver.jdbc.SQLServerDriver +``` + +### Custom Transformers + +Configure custom transformers through `templates/template-mapping.yaml`: + +```yaml +transformers: + # JDBC driver mapping + jdbc_driver_mapper: + mysql: "com.mysql.cj.jdbc.Driver" + postgresql: "org.postgresql.Driver" + oracle: "oracle.jdbc.driver.OracleDriver" + sqlserver: "com.microsoft.sqlserver.jdbc.SQLServerDriver" + + # File format mapping + file_format_mapper: + text: "text" + orc: "orc" + parquet: "parquet" + json: "json" +``` + +## Extending New Data Sources + +Adding new data source types requires only three steps: + +1. **Create template files**: Create new template files under `templates/datax/sources/` +2. **Configure mapping relationships**: Add mapping configurations in `template-mapping.yaml` +3. **Add transformers**: If special processing is needed, add corresponding transformer configurations + +No need to modify any Java code to support new data source types. + +## 🌐 Supported Data Sources and Targets + +### Data Sources (Sources) + +| Data Source Type | DataX Reader | Template File | Support Status | +|------------------|-------------|---------------|----------------| +| **MySQL** | `mysqlreader` | `mysql-source.conf` | ✅ Support | +| **PostgreSQL** | `postgresqlreader` | `jdbc-source.conf` | ✅ Support | +| **Oracle** | `oraclereader` | `jdbc-source.conf` | ✅ Support | +| **SQL Server** | `sqlserverreader` | `jdbc-source.conf` | ✅ Support | +| **HDFS** | `hdfsreader` | `hdfs-source.conf` | ✅ Support | + +### Data Targets (Sinks) + +| Data Target Type | DataX Writer | Template File | Support Status | +|------------------|-------------|---------------|----------------| +| **MySQL** | `mysqlwriter` | `jdbc-sink.conf` | ✅ Support | +| **PostgreSQL** | `postgresqlwriter` | `jdbc-sink.conf` | ✅ Support | +| **Oracle** | `oraclewriter` | `jdbc-sink.conf` | ✅ Support | +| **SQL Server** | `sqlserverwriter` | `jdbc-sink.conf` | ✅ Support | +| **HDFS** | `hdfswriter` | `hdfs-sink.conf` | ✅ Support | +| **Doris** | `doriswriter` | `doris-sink.conf` | 📋 Planned | + +## Development Guide + +### Custom Configuration Templates + +You can customize configuration templates in the `templates/datax/custom/` directory, referring to the format and placeholder syntax of existing templates. + +### Code Structure + +``` +src/main/java/org/apache/seatunnel/tools/x2seatunnel/ +├── cli/ # Command line interface +├── core/ # Core conversion logic +├── template/ # Template processing +├── utils/ # Utility classes +└── X2SeaTunnelApplication.java # Main application class +``` + +### Changelog + +#### v1.0.0-SNAPSHOT (Current Version) +- ✅ **Core Features**: Support for basic DataX to SeaTunnel configuration conversion +- ✅ **Template System**: Jinja2-style DSL template language with configuration-driven extension support +- ✅ **Unified JDBC Support**: MySQL, PostgreSQL, Oracle, SQL Server and other relational databases +- ✅ **Intelligent Features**: + - Auto driver mapping (infer database driver based on jdbcUrl) + - Intelligent query generation (auto-generate SELECT statements based on column, table, where) + - Auto parameter mapping (splitPk→partition_column, fetchSize→fetch_size, etc.) +- ✅ **Template Syntax**: + - Basic variable access: `{{ datax.path.to.value }}` + - Filter support: `{{ array | join(',') }}`, `{{ value | default('default') }}` + - Custom transformers: `{{ url | jdbc_driver_mapper }}` +- ✅ **Batch Processing**: Support directory-level batch conversion and report generation +- ✅ **Complete Examples**: Complete DataX configuration examples for 4 JDBC data sources +- ✅ **Comprehensive Documentation**: Complete usage instructions and API documentation + +# Appendix 1: X2SeaTunnel Conversion Report + +## 📋 Basic Information + +| Item | Value | +|------|----| +| **Conversion Time** | 2025-08-04T14:01:00.628 | +| **Source File** | `examples/source/datax-mysql2hdfs.json` | +| **Target File** | `examples/target/mysql2hdfs-result2.conf` | +| **Source Type** | DATAX | +| **Target Type** | SeaTunnel | +| **Source Connector** | Jdbc (mysql) | +| **Target Connector** | HdfsFile | +| **Conversion Status** | ✅ Success | + +| **Tool Version** | 0.1 | + + + +## 📊 Conversion Statistics + +| Type | Count | Percentage | +|------|------|--------| +| ✅ **Direct Mapping** | 16 | 57.1% | +| 🔧 **Transform Mapping** | 2 | 7.1% | +| 🔄 **Default Values Used** | 8 | 28.6% | +| ❌ **Missing Fields** | 0 | 0.0% | +| ⚠️ **Unmapped** | 2 | 7.1% | +| **Total** | 28 | 100% | + +## ✅ Direct Mapped Fields + +| SeaTunnel Field | Value | DATAX Source Field | +|---------------|----|--------------| +| `env.parallelism` | `3` | `null` | +| `source.Jdbc.url` | `jdbc:mysql://localhost:3306/testdb` | `job.content[0].reader.parameter.connection[0].jdbcUrl[0]` | +| `source.Jdbc.driver` | `jdbc:mysql://localhost:3306/testdb` | `job.content[0].reader.parameter.connection[0].jdbcUrl[0]` | +| `source.Jdbc.user` | `root` | `job.content[0].reader.parameter.username` | +| `source.Jdbc.password` | `1234567` | `job.content[0].reader.parameter.password` | +| `source.Jdbc.partition_column` | `id` | `null` | +| `source.Jdbc.partition_num` | `3` | `null` | +| `sink.HdfsFile.fs.defaultFS` | `hdfs://localhost:9000` | `job.content[0].writer.parameter.defaultFS` | +| `sink.HdfsFile.path` | `/data/users` | `job.content[0].writer.parameter.path` | +| `sink.HdfsFile.file_format_type` | `text` | `null` | +| `sink.HdfsFile.field_delimiter` | ` ` | `null` | +| `sink.HdfsFile.row_delimiter` | ` +` | `null` | +| `sink.HdfsFile.compress_codec` | `gzip` | `job.content[0].writer.parameter.compress` | +| `sink.HdfsFile.compress_codec` | `gzip` | `null` | +| `sink.HdfsFile.encoding` | `UTF-8` | `null` | +| `sink.HdfsFile.batch_size` | `50000` | `null` | + + +## 🔧 Transform Mapped Fields + +| SeaTunnel Field | Value | DATAX Source Field | Filter Used | +|---------------|----|--------------|-----------| +| `source.Jdbc.driver` | `com.mysql.cj.jdbc.Driver` | `null` | jdbc_driver_mapper | +| `source.Jdbc.query` | `SELECT id,name,age,email,create_time FROM users WHERE 1=1` | `{{ datax.job.content[0].reader.parameter.querySql[0] \| default('SELECT') }} {{ datax.job.content[0].reader.parameter.column \| join(',') }} FROM {{ datax.job.content[0].reader.parameter.connection[0].table[0] }} WHERE {{ datax.job.content[0].reader.parameter.where \| default('1=1') }}` | default, join | + + +## 🔄 Fields Using Default Values + +| SeaTunnel Field | Default Value | +|---------------|--------| +| `env.job.mode` | `BATCH` | +| `source.Jdbc.connection_check_timeout_sec` | `60` | +| `source.Jdbc.max_retries` | `3` | +| `source.Jdbc.fetch_size` | `1024` | +| `source.Jdbc.plugin_output` | `jdbc_source_table` | +| `sink.HdfsFile.tmp_path` | `/tmp/seatunnel` | +| `sink.HdfsFile.is_enable_transaction` | `true` | +| `sink.HdfsFile.enable_header_write` | `false` | + + +## ❌ Missing Fields + +*No missing fields* 🎉 + + +## ⚠️ Unmapped Fields + +| DataX Field | Value | +|--------|------| +| `job.content[0].writer.parameter.fileName` | `users_export_${now}` | +| `job.content[0].writer.parameter.writeMode` | `append` | + +# Appendix 2: Batch Conversion Report + +## 📋 Conversion Overview + +| Item | Value | +|------|-------| +| **Start Time** | 2025-08-04 14:53:35 | +| **End Time** | 2025-08-04 14:53:36 | +| **Duration** | 1 seconds | +| **Source Directory** | `examples/source` | +| **Output Directory** | `examples/target2` | +| **Report Directory** | `examples/report2` | +| **File Pattern** | `*.json` | +| **Custom Template** | `Default template` | +| **Successful Conversions** | 10 files | +| **Failed Conversions** | 0 files | +| **Total** | 10 files | +| **Success Rate** | 100.0% | + +## ✅ Successful Conversions (10) + +| # | Source File | Target File | Report File | +|---|-------------|-------------|-------------| +| 1 | `examples/source/datax-hdfs2mysql.json` | `examples/target2/datax-hdfs2mysql.conf` | `examples/report2/datax-hdfs2mysql.md` | +| 2 | `examples/source/datax-mysql2hdfs-full.json` | `examples/target2/datax-mysql2hdfs-full.conf` | `examples/report2/datax-mysql2hdfs-full.md` | +| 3 | `examples/source/datax-mysql2hdfs.json` | `examples/target2/datax-mysql2hdfs.conf` | `examples/report2/datax-mysql2hdfs.md` | +| 4 | `examples/source/datax-mysql2hdfs2hive.json` | `examples/target2/datax-mysql2hdfs2hive.conf` | `examples/report2/datax-mysql2hdfs2hive.md` | +| 5 | `examples/source/datax-mysql2mysql-full.json` | `examples/target2/datax-mysql2mysql-full.conf` | `examples/report2/datax-mysql2mysql-full.md` | +| 6 | `examples/source/datax-mysql2mysql.json` | `examples/target2/datax-mysql2mysql.conf` | `examples/report2/datax-mysql2mysql.md` | +| 7 | `examples/source/datax-oracle2hdfs-full.json` | `examples/target2/datax-oracle2hdfs-full.conf` | `examples/report2/datax-oracle2hdfs-full.md` | +| 8 | `examples/source/datax-postgresql2hdfs-full.json` | `examples/target2/datax-postgresql2hdfs-full.conf` | `examples/report2/datax-postgresql2hdfs-full.md` | +| 9 | `examples/source/datax-postgresql2hdfs.json` | `examples/target2/datax-postgresql2hdfs.conf` | `examples/report2/datax-postgresql2hdfs.md` | +| 10 | `examples/source/datax-sqlserver2hdfs-full.json` | `examples/target2/datax-sqlserver2hdfs-full.conf` | `examples/report2/datax-sqlserver2hdfs-full.md` | + +## ❌ Failed Conversions (0) + +*No failed conversion files* + +--- +*Report generated at: 2025-08-04 14:53:36* +*Tool version: X2SeaTunnel v0.1* diff --git a/x2seatunnel/README_zh.md b/x2seatunnel/README_zh.md new file mode 100644 index 0000000..fa3cbbb --- /dev/null +++ b/x2seatunnel/README_zh.md @@ -0,0 +1,508 @@ +# X2SeaTunnel 配置转换工具 +X2SeaTunnel 是一个用于将 DataX 等配置文件转换为 SeaTunnel 配置文件的工具,旨在帮助用户快速从其它数据集成平台迁移到 SeaTunnel。 + +## 🚀 快速开始 + +### 前置条件 + +- Java 8 或更高版本 + +### 安装 + +#### 从源码编译 +```bash +# 在本仓库内编译 x2seatunnel 模块 +mvn clean package -pl x2seatunnel -DskipTests +``` +编译结束后,发布包位于 `x2seatunnel/target/x2seatunnel-*.zip`。 + +#### 使用发布包 +```bash +# 下载并解压发布包 +unzip x2seatunnel-*.zip +cd x2seatunnel-*/ +``` + +### 基本用法 + +```bash +# 标准转换:使用默认模板系统,内置常见的Source和Sink +./bin/x2seatunnel.sh -s examples/source/datax-mysql2hdfs.json -t examples/target/mysql2hdfs-result.conf -r examples/report/mysql2hdfs-report.md + +# 自定义任务: 通过自定义模板实现定制化转换需求 +# 场景:MySQL → Hive(DataX 没有 HiveWriter) +# DataX 配置:MySQL → HDFS 自定义任务:转换为 MySQL → Hive +./bin/x2seatunnel.sh -s examples/source/datax-mysql2hdfs2hive.json -t examples/target/mysql2hive-result.conf -r examples/report/mysql2hive-report.md -T templates/datax/custom/mysql-to-hive.conf + +# YAML 配置方式(等效于上述命令行参数) +./bin/x2seatunnel.sh -c examples/yaml/datax-mysql2hdfs2hive.yaml + +# 批量转换模式:按目录处理 +./bin/x2seatunnel.sh -d examples/source -o examples/target2 -R examples/report2 + +# 批量模式支持通配符过滤 +./bin/x2seatunnel.sh -d examples/source -o examples/target3 -R examples/report3 --pattern "*-full.json" --verbose + +# 查看帮助 +./bin/x2seatunnel.sh --help +``` + +### 转换报告 +转换完成后,查看生成的Markdown报告文件,包含: +- **基本信息**: 转换时间、源/目标文件路径、连接器类型、转换状态等 +- **转换统计**: 直接映射、智能转换、默认值使用、未映射字段的数量和百分比 +- **详细字段映射关系**: 每个字段的源值、目标值、使用的过滤器等 +- **默认值使用情况**: 列出所有使用默认值的字段 +- **未映射字段**: 显示DataX中存在但未转换的字段 +- **可能的错误和警告信息**: 转换过程中的问题提示 + +如果是批量转换,则会在批量生成转换报告的文件夹下,生成批量汇总报告 `summary.md`,包含: +- **转换概览**: 总体统计信息、成功率、耗时等 +- **成功转换列表**: 所有成功转换的文件清单 +- **失败转换列表**: 失败的文件及错误信息(如有) + + +### 日志文件 +```bash +# 查看日志文件 +tail -f logs/x2seatunnel.log +``` + + +## 🎯 功能特性 + +- ✅ **标准配置转换**: DataX → SeaTunnel 配置文件转换 +- ✅ **自定义模板转换**: 支持用户自定义转换模板 +- ✅ **详细转换报告**: 生成 Markdown 格式的转换报告 +- ✅ **支持正则表达式变量提取**: 从配置中正则提取变量,支持自定义场景 +- ✅ **批量转换模式**: 支持目录和文件通配符批量转换,自动生成报告和汇总报告 + +## 📁 目录结构 + +``` +x2seatunnel/ +├── bin/ # 可执行文件 +│ ├── x2seatunnel.sh # 启动脚本 +├── lib/ # JAR包文件 +│ └── x2seatunnel-*.jar # 核心JAR包 +├── config/ # 配置文件 +│ └── log4j2.xml # 日志配置 +├── templates/ # 模板文件 +│ ├── template-mapping.yaml # 模板映射配置 +│ ├── report-template.md # 报告模板 +│ └── datax/ # DataX相关模板 +│ ├── custom/ # 自定义模板 +│ ├── env/ # 环境配置模板 +│ ├── sources/ # 数据源模板 +│ └── sinks/ # 数据目标模板 +├── examples/ # 示例和测试 +│ ├── source/ # 示例源文件 +│ ├── target/ # 生成的目标文件 +│ └── report/ # 生成的报告 +├── logs/ # 日志文件 +├── LICENSE # 许可证 +└── README.md # 使用说明 +``` + +## 📖 使用说明 + +### 基本语法 + +```bash +x2seatunnel [OPTIONS] +``` + +### 命令行参数 + +| 选项 | 长选项 | 描述 | 必需 | +|----------|-----------------|------------------------------------------------------|------| +| -s | --source | 源配置文件路径 | 是 | +| -t | --target | 目标配置文件路径 | 是 | +| -st | --source-type | 源配置类型 (datax, 默认: datax) | 否 | +| -T | --template | 自定义模板文件路径 | 否 | +| -r | --report | 转换报告文件路径 | 否 | +| -c | --config | YAML 配置文件路径,包含 source, target, report, template 等设置 | 否 | +| -d | --directory | 批量转换源目录 | 否 | +| -o | --output-dir | 批量转换输出目录 | 否 | +| -p | --pattern | 文件通配符模式(逗号分隔,例如: *.json,*.xml) | 否 | +| -R | --report-dir | 批量模式下报告输出目录,单文件报告和汇总 summary.md 将输出到该目录 | 否 | +| -v | --version | 显示版本信息 | 否 | +| -h | --help | 显示帮助信息 | 否 | +| | --verbose | 启用详细日志输出 | 否 | + +```bash +# 示例:查看命令行帮助 +./bin/x2seatunnel.sh --help +``` + +### 支持的配置类型 + +#### 源配置类型 +- **datax**: DataX配置文件(JSON格式)- 默认类型 + +#### 目标配置类型 +- **seatunnel**: SeaTunnel配置文件(HOCON格式) + +## 🎨 模板系统 + +### 设计理念 + +X2SeaTunnel 采用基于 DSL (Domain Specific Language) 的模板系统,通过配置驱动的方式实现不同数据源和目标的快速适配。核心优势: + +- **配置驱动**:所有转换逻辑都通过 YAML 配置文件定义,无需修改 Java 代码 +- **易于扩展**:新增数据源类型只需添加模板文件和映射配置 +- **统一语法**:使用 Jinja2 风格的模板语法,易于理解和维护 +- **智能映射**:通过转换器(transformer)实现复杂的参数映射逻辑 + +### 模板语法 + +X2SeaTunnel 支持部分兼容 Jinja2 风格模板语法,提供丰富的过滤器功能来处理配置转换。 + +```bash +# 基本变量引用 +{{ datax.job.content[0].reader.parameter.username }} + +# 带过滤器的变量 +{{ datax.job.content[0].reader.parameter.column | join(',') }} + +# 链式过滤器 +{{ datax.job.content[0].writer.parameter.path | split('/') | get(-2) | replace('.db','') }} +``` + + +### 2. 过滤器 + +| 过滤器 | 语法 | 描述 | 示例 | +|--------|------|------|------| +| `join` | `{{ array \| join('分隔符') }}` | 数组连接 | `{{ columns \| join(',') }}` | +| `default` | `{{ value \| default('默认值') }}` | 默认值 | `{{ port \| default(3306) }}` | +| `upper` | `{{ value \| upper }}` | 大写转换 | `{{ name \| upper }}` | +| `lower` | `{{ value \| lower }}` | 小写转换 | `{{ name \| lower }}` | +| `split` | `{{ string \| split('/') }}` | 字符串分割 | `'a/b/c' → ['a','b','c']` | +| `get` | `{{ array \| get(0) }}` | 获取数组元素 | `['a','b','c'] → 'a'` | +| `replace` | `{{ string \| replace('old,new') }}` | 字符串替换 | `'hello' → 'hallo'` | +| `regex_extract` | `{{ string \| regex_extract('pattern') }}` | 正则提取 | 提取匹配的内容 | +| `jdbc_driver_mapper` | `{{ jdbcUrl \| jdbc_driver_mapper }}` | JDBC 驱动映射 | 自动推断驱动类 | + +### 3. 样例 + +```bash +# join 过滤器:数组连接 +query = "SELECT {{ datax.job.content[0].reader.parameter.column | join(',') }} FROM table" + +# default 过滤器:默认值 +partition_column = "{{ datax.job.content[0].reader.parameter.splitPk | default('') }}" +fetch_size = {{ datax.job.content[0].reader.parameter.fetchSize | default(1024) }} + +# 字符串操作 +driver = "{{ datax.job.content[0].reader.parameter.connection[0].jdbcUrl[0] | upper }}" +``` + +```bash +# 链式过滤器:字符串分割和获取 +{{ datax.job.content[0].writer.parameter.path | split('/') | get(-2) | replace('.db','') }} + +# 正则表达式提取 +{{ jdbcUrl | regex_extract('jdbc:mysql://([^:]+):') }} + +# 转换器调用:智能参数映射 +driver = "{{ datax.job.content[0].reader.parameter.connection[0].jdbcUrl[0] | jdbc_driver_mapper }}" +``` + +```bash +# 智能查询生成 +query = "{{ datax.job.content[0].reader.parameter.querySql[0] | default('SELECT') }} {{ datax.job.content[0].reader.parameter.column | join(',') }} FROM {{ datax.job.content[0].reader.parameter.connection[0].table[0] }} WHERE {{ datax.job.content[0].reader.parameter.where | default('1=1') }}" + +# 路径智能解析:从 HDFS 路径提取 Hive 表名 +# 路径: /user/hive/warehouse/test_ods.db/test_table/partition=20240101 +database = "{{ datax.job.content[0].writer.parameter.path | split('/') | get(-3) | replace('.db','') }}" +table = "{{ datax.job.content[0].writer.parameter.path | split('/') | get(-2) }}" +table_name = "{{ database }}.{{ table }}" +``` + +```bash +# 自动推断数据库驱动 +{{ datax.job.content[0].reader.parameter.connection[0].jdbcUrl[0] | jdbc_driver_mapper }} + +# 映射关系(在 template-mapping.yaml 中配置): +# mysql -> com.mysql.cj.jdbc.Driver +# postgresql -> org.postgresql.Driver +# oracle -> oracle.jdbc.driver.OracleDriver +# sqlserver -> com.microsoft.sqlserver.jdbc.SQLServerDriver +``` + +### 4. 模板配置示例 + +```hocon +env { + execution.parallelism = {{ datax.job.setting.speed.channel | default(1) }} + job.mode = "BATCH" +} + +source { + Jdbc { + url = "{{ datax.job.content[0].reader.parameter.connection[0].jdbcUrl[0] }}" + driver = "com.mysql.cj.jdbc.Driver" + user = "{{ datax.job.content[0].reader.parameter.username }}" + password = "{{ datax.job.content[0].reader.parameter.password }}" + query = "{{ datax.job.content[0].reader.parameter.querySql[0] | default('SELECT') }} {{ datax.job.content[0].reader.parameter.column | join(',') }} FROM {{ datax.job.content[0].reader.parameter.connection[0].table[0] }}" + plugin_output = "source_table" + } +} + +sink { + Hive { + # 从路径智能提取 Hive 表名 + # 使用 split 和 get 过滤器来提取数据库名和表名 + # 步骤1:分割路径 + # 步骤2:获取倒数第二个部分作为数据库名,去掉.db后缀 + # 步骤3:获取倒数第一个部分作为表名 + table_name = "{{ datax.job.content[0].writer.parameter.path | split('/') | get(-3) | replace('.db,') }}.{{ datax.job.content[0].writer.parameter.path | split('/') | get(-2) }}" + + # Hive Metastore配置 + metastore_uri = "{{ datax.job.content[0].writer.parameter.metastoreUri | default('thrift://localhost:9083') }}" + + # 压缩配置 + compress_codec = "{{ datax.job.content[0].writer.parameter.compress | default('none') }}" + + # Hadoop配置文件路径(可选) + # hdfs_site_path = "/etc/hadoop/conf/hdfs-site.xml" + # hive_site_path = "/etc/hadoop/conf/hive-site.xml" + + # Hadoop配置(可选) + # hive.hadoop.conf = { + # "fs.defaultFS" = "{{ datax.job.content[0].writer.parameter.defaultFS | default('hdfs://localhost:9000') }}" + # } + + # 结果表名 + plugin_input = "source_table" + } +} +``` + +### 自定义转换器 + +通过 `templates/template-mapping.yaml` 配置自定义转换器: + +```yaml +transformers: + # JDBC 驱动映射 + jdbc_driver_mapper: + mysql: "com.mysql.cj.jdbc.Driver" + postgresql: "org.postgresql.Driver" + oracle: "oracle.jdbc.driver.OracleDriver" + sqlserver: "com.microsoft.sqlserver.jdbc.SQLServerDriver" + + # 文件格式映射 + file_format_mapper: + text: "text" + orc: "orc" + parquet: "parquet" + json: "json" +``` + +## 扩展新数据源 + +添加新数据源类型只需三步: + +1. **创建模板文件**:在 `templates/datax/sources/` 下创建新的模板文件 +2. **配置映射关系**:在 `template-mapping.yaml` 中添加映射配置 +3. **添加转换器**:如需特殊处理,添加对应的转换器配置 + +无需修改任何 Java 代码,即可支持新的数据源类型。 + + +## 🌐 支持的数据源和目标 + +### 数据源(Sources) + +| 数据源类型 | DataX Reader | 模板文件 | 支持状态 | +|-----------|-------------|----------|----------| +| **MySQL** | `mysqlreader` | `mysql-source.conf` | ✅ 支持 | +| **PostgreSQL** | `postgresqlreader` | `jdbc-source.conf` | ✅ 支持 | +| **Oracle** | `oraclereader` | `jdbc-source.conf` | ✅ 支持 | +| **SQL Server** | `sqlserverreader` | `jdbc-source.conf` | ✅ 支持 | +| **HDFS** | `hdfsreader` | `hdfs-source.conf` | 支持 | + +### 数据目标(Sinks) + +| 数据目标类型 | DataX Writer | 模板文件 | 支持状态 | +|-------------|-------------|----------|----------| +| **MySQL** | `mysqlwriter` | `jdbc-sink.conf` | ✅ 支持 | +| **PostgreSQL** | `postgresqlwriter` | `jdbc-sink.conf` | ✅ 支持 | +| **Oracle** | `oraclewriter` | `jdbc-sink.conf` | ✅ 支持 | +| **SQL Server** | `sqlserverwriter` | `jdbc-sink.conf` | ✅ 支持 | +| **HDFS** | `hdfswriter` | `hdfs-sink.conf` | ✅ 支持 | + + +## 开发指南 +### 自定义配置模板 + +可以在 `templates/datax/custom/` 目录下自定义配置模板,参考现有模板的格式和占位符语法。 + +### 代码结构 + +``` +src/main/java/org/apache/seatunnel/tools/x2seatunnel/ +├── cli/ # 命令行界面 +├── core/ # 核心转换逻辑 +├── template/ # 模板处理 +├── utils/ # 工具类 +└── X2SeaTunnelApplication.java # 主应用类 +``` + +### 限制和注意事项 +#### 版本兼容性 +- 支持 DataX 主流版本的配置格式 +- 生成的配置兼容 SeaTunnel 2.3.11+ 版本,旧版本大部分差异不大 +- 模板系统向后兼容 + +### 更新日志 + +#### v1.0.0-SNAPSHOT (当前版本) +- ✅ **核心功能**:支持DataX到SeaTunnel的基础配置转换 +- ✅ **模板系统**:基于Jinja2风格的DSL模板语言,支持配置驱动扩展 +- ✅ **JDBC统一支持**:MySQL、PostgreSQL、Oracle、SQL Server等关系型数据库 +- ✅ **智能特性**: + - 自动驱动映射(根据jdbcUrl推断数据库驱动) + - 智能查询生成(根据column、table、where自动拼接SELECT语句) + - 参数自动映射(splitPk→partition_column、fetchSize→fetch_size等) +- ✅ **模板语法**: + - 基础变量访问:`{{ datax.path.to.value }}` + - 过滤器支持:`{{ array | join(',') }}`、`{{ value | default('default') }}` + - 自定义转换器:`{{ url | jdbc_driver_mapper }}` +- ✅ **批量处理**:支持目录级别的批量转换和报告生成 +- ✅ **完整示例**:提供4种JDBC数据源的完整DataX配置样例 +- ✅ **详细文档**:完整的使用说明和API文档 + +--- + +# 附录1:X2SeaTunnel 转换报告样例 + +## 📋 Basic Information + +| Item | Value | +|------|----| +| **Conversion Time** | 2025-08-04T14:01:00.628 | +| **Source File** | `examples/source/datax-mysql2hdfs.json` | +| **Target File** | `examples/target/mysql2hdfs-result2.conf` | +| **Source Type** | DATAX | +| **Target Type** | SeaTunnel | +| **Source Connector** | Jdbc (mysql) | +| **Target Connector** | HdfsFile | +| **Conversion Status** | ✅ Success | + +| **Tool Version** | 0.1 | + + + +## 📊 Conversion Statistics + +| Type | Count | Percentage | +|------|------|--------| +| ✅ **Direct Mapping** | 16 | 57.1% | +| 🔧 **Transform Mapping** | 2 | 7.1% | +| 🔄 **Default Values Used** | 8 | 28.6% | +| ❌ **Missing Fields** | 0 | 0.0% | +| ⚠️ **Unmapped** | 2 | 7.1% | +| **Total** | 28 | 100% | + +## ✅ Direct Mapped Fields + +| SeaTunnel Field | Value | DATAX Source Field | +|---------------|----|--------------| +| `env.parallelism` | `3` | `null` | +| `source.Jdbc.url` | `jdbc:mysql://localhost:3306/testdb` | `job.content[0].reader.parameter.connection[0].jdbcUrl[0]` | +| `source.Jdbc.driver` | `jdbc:mysql://localhost:3306/testdb` | `job.content[0].reader.parameter.connection[0].jdbcUrl[0]` | +| `source.Jdbc.user` | `root` | `job.content[0].reader.parameter.username` | +| `source.Jdbc.password` | `1234567` | `job.content[0].reader.parameter.password` | +| `source.Jdbc.partition_column` | `id` | `null` | +| `source.Jdbc.partition_num` | `3` | `null` | +| `sink.HdfsFile.fs.defaultFS` | `hdfs://localhost:9000` | `job.content[0].writer.parameter.defaultFS` | +| `sink.HdfsFile.path` | `/data/users` | `job.content[0].writer.parameter.path` | +| `sink.HdfsFile.file_format_type` | `text` | `null` | +| `sink.HdfsFile.field_delimiter` | ` ` | `null` | +| `sink.HdfsFile.row_delimiter` | ` +` | `null` | +| `sink.HdfsFile.compress_codec` | `gzip` | `job.content[0].writer.parameter.compress` | +| `sink.HdfsFile.compress_codec` | `gzip` | `null` | +| `sink.HdfsFile.encoding` | `UTF-8` | `null` | +| `sink.HdfsFile.batch_size` | `50000` | `null` | + + +## 🔧 Transform Mapped Fields + +| SeaTunnel Field | Value | DATAX Source Field | Filter Used | +|---------------|----|--------------|-----------| +| `source.Jdbc.driver` | `com.mysql.cj.jdbc.Driver` | `null` | jdbc_driver_mapper | +| `source.Jdbc.query` | `SELECT id,name,age,email,create_time FROM users WHERE 1=1` | `{{ datax.job.content[0].reader.parameter.querySql[0] \| default('SELECT') }} {{ datax.job.content[0].reader.parameter.column \| join(',') }} FROM {{ datax.job.content[0].reader.parameter.connection[0].table[0] }} WHERE {{ datax.job.content[0].reader.parameter.where \| default('1=1') }}` | default, join | + + +## 🔄 Fields Using Default Values + +| SeaTunnel Field | Default Value | +|---------------|--------| +| `env.job.mode` | `BATCH` | +| `source.Jdbc.connection_check_timeout_sec` | `60` | +| `source.Jdbc.max_retries` | `3` | +| `source.Jdbc.fetch_size` | `1024` | +| `source.Jdbc.plugin_output` | `jdbc_source_table` | +| `sink.HdfsFile.tmp_path` | `/tmp/seatunnel` | +| `sink.HdfsFile.is_enable_transaction` | `true` | +| `sink.HdfsFile.enable_header_write` | `false` | + + +## ❌ Missing Fields + +*No missing fields* 🎉 + + +## ⚠️ Unmapped Fields + +| DataX Field | Value | +|--------|------| +| `job.content[0].writer.parameter.fileName` | `users_export_${now}` | +| `job.content[0].writer.parameter.writeMode` | `append` | + + +# 附录2: 批量汇总报告样例 + +## 📋 Conversion Overview + +| Item | Value | +|------|-------| +| **Start Time** | 2025-08-04 14:53:35 | +| **End Time** | 2025-08-04 14:53:36 | +| **Duration** | 1 seconds | +| **Source Directory** | `examples/source` | +| **Output Directory** | `examples/target2` | +| **Report Directory** | `examples/report2` | +| **File Pattern** | `*.json` | +| **Custom Template** | `Default template` | +| **Successful Conversions** | 10 files | +| **Failed Conversions** | 0 files | +| **Total** | 10 files | +| **Success Rate** | 100.0% | + +## ✅ Successful Conversions (10) + +| # | Source File | Target File | Report File | +|---|-------------|-------------|-------------| +| 1 | `examples/source/datax-hdfs2mysql.json` | `examples/target2/datax-hdfs2mysql.conf` | `examples/report2/datax-hdfs2mysql.md` | +| 2 | `examples/source/datax-mysql2hdfs-full.json` | `examples/target2/datax-mysql2hdfs-full.conf` | `examples/report2/datax-mysql2hdfs-full.md` | +| 3 | `examples/source/datax-mysql2hdfs.json` | `examples/target2/datax-mysql2hdfs.conf` | `examples/report2/datax-mysql2hdfs.md` | +| 4 | `examples/source/datax-mysql2hdfs2hive.json` | `examples/target2/datax-mysql2hdfs2hive.conf` | `examples/report2/datax-mysql2hdfs2hive.md` | +| 5 | `examples/source/datax-mysql2mysql-full.json` | `examples/target2/datax-mysql2mysql-full.conf` | `examples/report2/datax-mysql2mysql-full.md` | +| 6 | `examples/source/datax-mysql2mysql.json` | `examples/target2/datax-mysql2mysql.conf` | `examples/report2/datax-mysql2mysql.md` | +| 7 | `examples/source/datax-oracle2hdfs-full.json` | `examples/target2/datax-oracle2hdfs-full.conf` | `examples/report2/datax-oracle2hdfs-full.md` | +| 8 | `examples/source/datax-postgresql2hdfs-full.json` | `examples/target2/datax-postgresql2hdfs-full.conf` | `examples/report2/datax-postgresql2hdfs-full.md` | +| 9 | `examples/source/datax-postgresql2hdfs.json` | `examples/target2/datax-postgresql2hdfs.conf` | `examples/report2/datax-postgresql2hdfs.md` | +| 10 | `examples/source/datax-sqlserver2hdfs-full.json` | `examples/target2/datax-sqlserver2hdfs-full.conf` | `examples/report2/datax-sqlserver2hdfs-full.md` | + +## ❌ Failed Conversions (0) + +*No failed conversion files* + +--- +*Report generated at: 2025-08-04 14:53:36* +*Tool version: X2SeaTunnel v0.1* diff --git a/x2seatunnel/pom.xml b/x2seatunnel/pom.xml new file mode 100644 index 0000000..33307af --- /dev/null +++ b/x2seatunnel/pom.xml @@ -0,0 +1,238 @@ + + + + 4.0.0 + + + org.apache.seatunnel + seatunnel-tools + ${revision} + + + org.apache.seatunnel + x2seatunnel + ${revision} + jar + + X2SeaTunnel + X2SeaTunnel configuration conversion tool + + + yyyy-MM-dd HH:mm:ss + + + + + + org.slf4j + slf4j-api + ${slf4j.version} + + + org.apache.logging.log4j + log4j-slf4j-impl + ${log4j2.version} + + + org.apache.logging.log4j + log4j-api + ${log4j2.version} + + + org.apache.logging.log4j + log4j-core + ${log4j2.version} + + + + + + + org.apache.seatunnel + seatunnel-common + ${seatunnel.version} + + + + org.apache.logging.log4j + log4j-1.2-api + + + + + + commons-cli + commons-cli + ${commons.cli.version} + + + + org.apache.seatunnel + seatunnel-jackson + ${seatunnel.version} + optional + + + + org.apache.logging.log4j + log4j-1.2-api + + + + + + org.yaml + snakeyaml + ${snakeyaml.version} + + + + + org.slf4j + slf4j-api + + + org.apache.logging.log4j + log4j-api + + + org.apache.logging.log4j + log4j-core + + + org.apache.logging.log4j + log4j-slf4j-impl + + + + org.junit.jupiter + junit-jupiter-engine + ${junit5.version} + test + + + org.junit.jupiter + junit-jupiter-api + ${junit5.version} + test + + + junit + junit + ${junit4.version} + test + + + org.mockito + mockito-core + ${mockito.version} + test + + + + + + + + true + src/main/resources + + examples/target*/*.* + examples/report*/*.* + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + + + org.apache.maven.plugins + maven-shade-plugin + ${maven-shade-plugin.version} + + + false + + + org.apache.seatunnel.tools.x2seatunnel.cli.X2SeaTunnelCli + + + + META-INF/log4j-provider.properties + + + + META-INF/org/apache/logging/log4j/core/config/plugins/Log4j2Plugins.dat + + + + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + org.apache.logging.log4j:log4j-1.2-api + + org.apache.logging.log4j:log4j-to-slf4j + + + + + + + shade + + package + + + + + + org.apache.maven.plugins + maven-assembly-plugin + + + src/main/assembly/x2seatunnel-standalone.xml + + posix + + + + make-assembly + + single + + package + + + + + + diff --git a/x2seatunnel/src/main/assembly/x2seatunnel-standalone.xml b/x2seatunnel/src/main/assembly/x2seatunnel-standalone.xml new file mode 100644 index 0000000..e7b1e23 --- /dev/null +++ b/x2seatunnel/src/main/assembly/x2seatunnel-standalone.xml @@ -0,0 +1,108 @@ + + + + + bin + + zip + + true + x2seatunnel + + + + + src/main/resources/bin + bin + 0755 + + *.sh + + + + + + target + lib + + x2seatunnel-*.jar + + + *-sources.jar + *-tests.jar + + + + + src/main/resources/config + config + + **/* + + + + + src/main/resources/templates + templates + + **/* + + + + + src/main/resources/examples + examples + + **/* + + + + + ../../../../ + . + + LICENSE + NOTICE + + + + + src/main/resources/logs + logs + + .gitkeep + + + + + + + README.md + . + true + + + README_zh.md + . + true + + + + diff --git a/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/cli/CommandLineOptions.java b/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/cli/CommandLineOptions.java new file mode 100644 index 0000000..ae4e479 --- /dev/null +++ b/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/cli/CommandLineOptions.java @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.tools.x2seatunnel.cli; + +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; + +/** X2SeaTunnel command line options configuration */ +public class CommandLineOptions { + + /** Create command line options */ + public static Options createOptions() { + Options options = new Options(); + + // Source file parameter + options.addOption( + Option.builder("s") + .longOpt("source") + .hasArg() + .desc("Source configuration file path") + .required(false) + .build()); + + // Target file parameter + options.addOption( + Option.builder("t") + .longOpt("target") + .hasArg() + .desc("Target configuration file path") + .required(false) + .build()); + + // Source type parameter + options.addOption( + Option.builder("st") + .longOpt("source-type") + .hasArg() + .desc( + "Source configuration type (datax, sqloop, flume, auto, default: datax)") + .build()); + + // Custom template parameter + options.addOption( + Option.builder("T") + .longOpt("template") + .hasArg() + .desc("Custom template file name") + .build()); + + // Report file parameter + options.addOption( + Option.builder("r") + .longOpt("report") + .hasArg() + .desc("Conversion report file path") + .build()); + + // Report directory (output directory for individual file reports in batch mode) + options.addOption( + Option.builder("R") + .longOpt("report-dir") + .hasArg() + .desc( + "Report output directory in batch mode, individual file reports and summary.md will be output to this directory") + .build()); + + // Version information + options.addOption( + Option.builder("v").longOpt("version").desc("Show version information").build()); + + // Help information + options.addOption( + Option.builder("h").longOpt("help").desc("Show help information").build()); + + // Verbose logging + options.addOption( + Option.builder().longOpt("verbose").desc("Enable verbose log output").build()); + + // YAML configuration file + options.addOption( + Option.builder("c") + .longOpt("config") + .hasArg() + .desc( + "YAML configuration file path, containing source, target, report, template and other settings") + .required(false) + .build()); + + // Batch conversion source directory + options.addOption( + Option.builder("d") + .longOpt("directory") + .hasArg() + .desc("Source file directory to be converted") + .required(false) + .build()); + + // Batch conversion output directory + options.addOption( + Option.builder("o") + .longOpt("output-dir") + .hasArg() + .desc("Batch conversion output directory") + .required(false) + .build()); + + // Batch conversion file matching pattern + options.addOption( + Option.builder("p") + .longOpt("pattern") + .hasArg() + .desc( + "Batch conversion file wildcard pattern, comma separated, e.g.: *.json,*.xml") + .build()); + + return options; + } +} diff --git a/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/cli/X2SeaTunnelCli.java b/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/cli/X2SeaTunnelCli.java new file mode 100644 index 0000000..9cca950 --- /dev/null +++ b/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/cli/X2SeaTunnelCli.java @@ -0,0 +1,250 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.tools.x2seatunnel.cli; + +import org.apache.seatunnel.tools.x2seatunnel.core.ConversionEngine; +import org.apache.seatunnel.tools.x2seatunnel.util.BatchConversionReport; +import org.apache.seatunnel.tools.x2seatunnel.util.ConversionConfig; +import org.apache.seatunnel.tools.x2seatunnel.util.DirectoryProcessor; +import org.apache.seatunnel.tools.x2seatunnel.util.FilePattern; +import org.apache.seatunnel.tools.x2seatunnel.util.FileUtils; +import org.apache.seatunnel.tools.x2seatunnel.util.YamlConfigParser; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.nio.file.Paths; +import java.util.List; + +/** X2SeaTunnel command-line tool main class */ +public class X2SeaTunnelCli { + + private static final Logger logger = LoggerFactory.getLogger(X2SeaTunnelCli.class); + + private static final String TOOL_NAME = "x2seatunnel"; + + public static void main(String[] args) { + try { + X2SeaTunnelCli cli = new X2SeaTunnelCli(); + cli.run(args); + } catch (Exception e) { + logger.error("Execution failed: {}", e.getMessage()); + System.exit(1); + } + } + + public void run(String[] args) { + Options options = CommandLineOptions.createOptions(); + + try { + CommandLineParser parser = new DefaultParser(); + CommandLine cmd = parser.parse(options, args); + + // Support YAML configuration file + ConversionConfig yamlConfig = null; + if (cmd.hasOption("c") || cmd.hasOption("config")) { + String configPath = cmd.getOptionValue("c", cmd.getOptionValue("config")); + yamlConfig = YamlConfigParser.parse(configPath); + logger.info("Loaded YAML configuration: {}", configPath); + } + + // Read batch mode parameters in advance + String directory = null; + String outputDir = null; + String reportDir = null; + // Custom template for batch mode + String batchTemplate = null; + if (cmd.hasOption("d")) directory = cmd.getOptionValue("d"); + if (cmd.hasOption("directory")) directory = cmd.getOptionValue("directory"); + if (cmd.hasOption("o")) outputDir = cmd.getOptionValue("o"); + if (cmd.hasOption("output-dir")) outputDir = cmd.getOptionValue("output-dir"); + if (cmd.hasOption("R")) reportDir = cmd.getOptionValue("R"); + if (cmd.hasOption("report-dir")) reportDir = cmd.getOptionValue("report-dir"); + if (cmd.hasOption("T")) batchTemplate = cmd.getOptionValue("T"); + if (cmd.hasOption("template")) batchTemplate = cmd.getOptionValue("template"); + + // If batch mode is specified, execute batch logic first and return directly + if (directory != null) { + if (outputDir == null) { + logger.error("Batch conversion requires output directory: -o/--output-dir"); + printUsage(); + System.exit(1); + } + logger.info( + "Starting batch conversion, source directory={}, output directory={}", + directory, + outputDir); + FileUtils.createDirectory(outputDir); + if (reportDir != null) { + logger.info("Report directory={}", reportDir); + FileUtils.createDirectory(reportDir); + } + DirectoryProcessor dp = new DirectoryProcessor(directory, outputDir); + List sources = dp.listSourceFiles(); + String pattern = cmd.getOptionValue("p", cmd.getOptionValue("pattern")); + sources = FilePattern.filter(sources, pattern); + if (sources.isEmpty()) { + logger.warn( + "No files to convert found in source directory: {} with pattern: {}", + directory, + pattern); + } + ConversionEngine engine = new ConversionEngine(); + BatchConversionReport batchReport = new BatchConversionReport(); + + // Set batch conversion configuration information + batchReport.setConversionConfig( + directory, outputDir, reportDir, pattern, batchTemplate); + + int total = sources.size(); + for (int i = 0; i < total; i++) { + String src = sources.get(i); + String tgt = dp.resolveTargetPath(src); + String rpt; + if (reportDir != null) { + String name = FileUtils.getFileNameWithoutExtension(src); + rpt = Paths.get(reportDir, name + ".md").toString(); + } else { + rpt = cmd.getOptionValue("r", cmd.getOptionValue("report")); + if (rpt == null) { + rpt = dp.resolveReportPath(src); + } + } + logger.info("[{} / {}] Processing file: {}", i + 1, total, src); + try { + engine.convert(src, tgt, "datax", "seatunnel", batchTemplate, rpt); + batchReport.recordSuccess(src, tgt, rpt); + System.out.println( + String.format( + "[%d/%d] Conversion completed: %s -> %s", + i + 1, total, src, tgt)); + } catch (Exception e) { + logger.error( + "File conversion failed: {} -> {} , error: {}", + src, + tgt, + e.getMessage()); + batchReport.recordFailure(src, e.getMessage()); + } + } + String summary; + if (reportDir != null) { + summary = Paths.get(reportDir, "summary.md").toString(); + } else { + summary = cmd.getOptionValue("r", cmd.getOptionValue("report")); + if (summary == null) { + summary = Paths.get(outputDir, "summary.md").toString(); + } + } + batchReport.writeReport(summary); + System.out.println( + "Batch conversion completed! Output directory: " + + outputDir + + ", Report: " + + summary); + return; + } + + // Validate required parameters: only required to specify -s/-t in non-YAML and + // non-batch mode + if (yamlConfig == null && directory == null) { + if (!cmd.hasOption("s") && !cmd.hasOption("source")) { + logger.error("Missing required parameter: -s/--source"); + printUsage(); + System.exit(1); + } + if (!cmd.hasOption("t") && !cmd.hasOption("target")) { + logger.error("Missing required parameter: -t/--target"); + printUsage(); + System.exit(1); + } + } + + // Get parameter values, command line takes priority, then YAML + String sourceFile = yamlConfig != null ? yamlConfig.getSource() : null; + String targetFile = yamlConfig != null ? yamlConfig.getTarget() : null; + String sourceType = + yamlConfig != null && yamlConfig.getSourceType() != null + ? yamlConfig.getSourceType() + : "datax"; + String customTemplate = yamlConfig != null ? yamlConfig.getTemplate() : null; + String reportFile = yamlConfig != null ? yamlConfig.getReport() : null; + // Command line parameters override YAML configuration + if (cmd.hasOption("s")) sourceFile = cmd.getOptionValue("s"); + if (cmd.hasOption("source")) sourceFile = cmd.getOptionValue("source"); + if (cmd.hasOption("t")) targetFile = cmd.getOptionValue("t"); + if (cmd.hasOption("target")) targetFile = cmd.getOptionValue("target"); + if (cmd.hasOption("st")) sourceType = cmd.getOptionValue("st"); + if (cmd.hasOption("source-type")) sourceType = cmd.getOptionValue("source-type"); + if (cmd.hasOption("T")) customTemplate = cmd.getOptionValue("T"); + if (cmd.hasOption("template")) customTemplate = cmd.getOptionValue("template"); + if (cmd.hasOption("r")) reportFile = cmd.getOptionValue("r"); + if (cmd.hasOption("report")) reportFile = cmd.getOptionValue("report"); + String targetType = "seatunnel"; // Fixed as seatunnel + + // Execute conversion + ConversionEngine engine = new ConversionEngine(); + engine.convert( + sourceFile, targetFile, sourceType, targetType, customTemplate, reportFile); + + System.out.println("Configuration conversion completed!"); + System.out.println("Source file: " + sourceFile); + System.out.println("Target file: " + targetFile); + if (reportFile != null) { + System.out.println("Conversion report: " + reportFile); + } + + } catch (ParseException e) { + logger.error("Parameter parsing failed: {}", e.getMessage()); + printHelp(options); + System.exit(1); + } catch (Exception e) { + logger.error("Error occurred during conversion: {}", e.getMessage()); + System.exit(1); + } + } + + private void printHelp(Options options) { + HelpFormatter formatter = new HelpFormatter(); + formatter.printHelp( + TOOL_NAME, + "X2SeaTunnel configuration conversion tool", + options, + "\\nExamples:\\n" + + " " + + TOOL_NAME + + " -s datax.json -t seatunnel.conf\\n" + + " " + + TOOL_NAME + + " --source datax.json --target seatunnel.conf --source-type datax --report report.md\\n"); + } + + private void printUsage() { + System.out.println("Usage: x2seatunnel [OPTIONS]"); + System.out.println( + "Common batch mode: x2seatunnel -d -o [-R ] [-p ]"); + System.out.println("Use -h or --help to view complete help information"); + } +} diff --git a/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/core/ConversionEngine.java b/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/core/ConversionEngine.java new file mode 100644 index 0000000..a6d21a4 --- /dev/null +++ b/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/core/ConversionEngine.java @@ -0,0 +1,368 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.tools.x2seatunnel.core; + +import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.JsonNode; +import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.ObjectMapper; + +import org.apache.seatunnel.tools.x2seatunnel.model.MappingResult; +import org.apache.seatunnel.tools.x2seatunnel.model.MappingTracker; +import org.apache.seatunnel.tools.x2seatunnel.report.MarkdownReportGenerator; +import org.apache.seatunnel.tools.x2seatunnel.template.ConfigDrivenTemplateEngine; +import org.apache.seatunnel.tools.x2seatunnel.template.ConfigDrivenTemplateEngine.TemplateConversionResult; +import org.apache.seatunnel.tools.x2seatunnel.template.TemplateMappingManager; +import org.apache.seatunnel.tools.x2seatunnel.template.TemplateVariableResolver; +import org.apache.seatunnel.tools.x2seatunnel.util.FileUtils; +import org.apache.seatunnel.tools.x2seatunnel.util.PathResolver; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.util.List; +import java.util.Map; + +/** Core conversion engine */ +public class ConversionEngine { + + private static final Logger logger = LoggerFactory.getLogger(ConversionEngine.class); + + private final TemplateVariableResolver templateResolver; + private final ConfigDrivenTemplateEngine configDrivenEngine; + private final TemplateMappingManager templateMappingManager; + + public ConversionEngine() { + this.templateMappingManager = TemplateMappingManager.getInstance(); + this.templateResolver = new TemplateVariableResolver(templateMappingManager); + this.configDrivenEngine = new ConfigDrivenTemplateEngine(); + } + + /** + * Execute configuration conversion (standard conversion method) + * + * @param sourceFile Source file path + * @param targetFile Target file path + * @param sourceType Source type + * @param targetType Target type + * @param reportFile Report file path + */ + public void convert( + String sourceFile, + String targetFile, + String sourceType, + String targetType, + String reportFile) { + convert(sourceFile, targetFile, sourceType, targetType, null, reportFile); + } + + /** + * Execute configuration conversion (supports custom templates) + * + * @param sourceFile Source file path + * @param targetFile Target file path + * @param sourceType Source type + * @param targetType Target type + * @param customTemplate Custom template file name + * @param reportFile Report file path + */ + public void convert( + String sourceFile, + String targetFile, + String sourceType, + String targetType, + String customTemplate, + String reportFile) { + logger.info("Starting configuration conversion..."); + logger.info("Source file: {}", sourceFile); + logger.info("Target file: {}", targetFile); + logger.info("Source type: {}", sourceType); + logger.info("Target type: {}", targetType); + if (customTemplate != null) { + logger.info("Custom template: {}", customTemplate); + } + + try { + // Read source file + logger.info("Reading input file..."); + String sourceContent = FileUtils.readFile(sourceFile); + logger.info("File read successfully, size: {} bytes", sourceContent.length()); + + // Validate DataX configuration format + logger.info("Validating {} configuration format...", sourceType); + validateDataXFormat(sourceContent); + logger.info("Configuration validation completed"); + + String targetContent; + MappingResult mappingResult = null; + TemplateConversionResult templateResult = null; + + if (customTemplate != null && !customTemplate.trim().isEmpty()) { + // Use custom template for conversion (simplified approach) + logger.info("Using custom template for conversion: {}", customTemplate); + targetContent = convertWithCustomTemplate(customTemplate, sourceContent); + logger.info("Custom template conversion completed"); + } else { + // Use configuration-driven standard conversion process + logger.info("Using configuration-driven standard conversion process"); + + templateResult = configDrivenEngine.convertWithTemplate(sourceContent); + + if (!templateResult.isSuccess()) { + throw new RuntimeException( + "Configuration-driven template conversion failed: " + + templateResult.getErrorMessage()); + } + + targetContent = templateResult.getConfigContent(); + mappingResult = templateResult.getMappingResult(); + } + + // Generate report (if report file is specified) + if (reportFile != null && !reportFile.trim().isEmpty()) { + logger.info("Generating conversion report..."); + if (mappingResult != null && templateResult != null) { + // Detailed report for standard conversion + generateDetailedConversionReport( + mappingResult, + sourceFile, + targetFile, + sourceType, + customTemplate, + templateResult.getSourceTemplate(), + templateResult.getSinkTemplate(), + reportFile); + } else { + // Custom template conversion: analyze custom template to generate report data + logger.info("Generating report data for custom template conversion..."); + MappingResult customMappingResult = + analyzeCustomTemplate(customTemplate, sourceContent); + generateDetailedConversionReport( + customMappingResult, + sourceFile, + targetFile, + sourceType, + customTemplate, + customTemplate, // Custom template as source template + customTemplate, // Custom template as target template + reportFile); + } + logger.info("Conversion report generation completed: {}", reportFile); + } + + // Write target file + logger.info("Writing target file..."); + FileUtils.writeFile(targetFile, targetContent); + logger.info("Output file generation completed: {}", targetFile); + + } catch (Exception e) { + logger.error("Configuration conversion failed: {}", e.getMessage(), e); + throw new RuntimeException("Configuration conversion failed", e); + } + } + + /** + * Convert using custom template + * + * @param customTemplate Custom template file name + * @param sourceContent Original DataX JSON content + * @return Converted configuration content + */ + private String convertWithCustomTemplate(String customTemplate, String sourceContent) { + try { + // Load custom template + String templateContent = loadCustomTemplate(customTemplate); + + // Use template variable resolver for variable substitution (using original JSON + // content) + return templateResolver.resolve(templateContent, sourceContent); + + } catch (Exception e) { + logger.error("Custom template conversion failed: {}", e.getMessage(), e); + throw new RuntimeException("Custom template conversion failed: " + e.getMessage(), e); + } + } + + /** + * Load custom template file + * + * @param templatePath Template file path (supports absolute and relative paths) + * @return Template content + */ + private String loadCustomTemplate(String templatePath) { + logger.info("Loading custom template: {}", templatePath); + + // 1. Use intelligent path resolver to find template in file system + String resolvedPath = PathResolver.resolveTemplatePath(templatePath); + if (resolvedPath != null && PathResolver.exists(resolvedPath)) { + logger.info("Loading template from file system: {}", resolvedPath); + return FileUtils.readFile(resolvedPath); + } + + // 2. Load from classpath (built-in templates) + try { + String resourcePath = PathResolver.buildResourcePath(templatePath); + logger.info("Attempting to load template from classpath: {}", resourcePath); + + String content = FileUtils.readResourceFile(resourcePath); + if (content != null && !content.trim().isEmpty()) { + logger.info("Successfully loaded template from classpath: {}", resourcePath); + return content; + } + } catch (Exception e) { + logger.debug("Failed to load template from classpath: {}", e.getMessage()); + } + + // 3. Generate detailed error information to help users debug + String homePath = PathResolver.getHomePath(); + String configTemplatesDir = PathResolver.getConfigTemplatesDir(); + + throw new RuntimeException( + String.format( + "Custom template file not found: %s\n" + + "Search paths:\n" + + " 1. Current working directory: %s\n" + + " 2. Configuration template directory: %s\n" + + " 3. Development environment configuration: %s/config/x2seatunnel/templates/%s\n" + + " 4. Built-in resources: classpath:%s\n" + + "Hint: Please check if the template file exists, or use absolute path to specify template location", + templatePath, + new File(templatePath).getAbsolutePath(), + new File(configTemplatesDir, templatePath).getAbsolutePath(), + homePath, + templatePath, + PathResolver.buildResourcePath(templatePath))); + } + + /** Generate detailed conversion report */ + private void generateDetailedConversionReport( + MappingResult mappingResult, + String sourceFile, + String targetFile, + String sourceType, + String customTemplate, + String sourceTemplate, + String sinkTemplate, + String reportFile) { + MarkdownReportGenerator reportGenerator = new MarkdownReportGenerator(); + String reportContent = + reportGenerator.generateReport( + mappingResult, + sourceFile, + targetFile, + sourceType, + customTemplate, + sourceTemplate, + sinkTemplate); + FileUtils.writeFile(reportFile, reportContent); + } + + /** + * Validate DataX configuration format + * + * @param sourceContent DataX JSON content + * @throws IllegalArgumentException if configuration format is invalid + */ + private void validateDataXFormat(String sourceContent) { + try { + ObjectMapper objectMapper = new ObjectMapper(); + JsonNode rootNode = objectMapper.readTree(sourceContent); + + // Validate basic structure + if (!rootNode.has("job")) { + throw new IllegalArgumentException( + "DataX configuration missing required 'job' node"); + } + + JsonNode jobNode = rootNode.get("job"); + if (!jobNode.has("content")) { + throw new IllegalArgumentException( + "DataX configuration missing required 'content' node"); + } + + JsonNode contentNode = jobNode.get("content"); + if (!contentNode.isArray() || contentNode.size() == 0) { + throw new IllegalArgumentException( + "DataX configuration 'content' must be a non-empty array"); + } + + // Validate first content item has reader and writer + JsonNode firstContent = contentNode.get(0); + if (!firstContent.has("reader")) { + throw new IllegalArgumentException( + "DataX configuration missing required 'reader' configuration"); + } + if (!firstContent.has("writer")) { + throw new IllegalArgumentException( + "DataX configuration missing required 'writer' configuration"); + } + + } catch (Exception e) { + logger.error("DataX configuration validation failed: {}", e.getMessage()); + throw new IllegalArgumentException( + "Invalid DataX configuration format: " + e.getMessage(), e); + } + } + + /** Analyze custom template and generate mapping result */ + private MappingResult analyzeCustomTemplate(String customTemplate, String sourceContent) { + logger.info("Starting analysis of custom template: {}", customTemplate); + + try { + // 1. Load custom template content + String templateContent = loadCustomTemplate(customTemplate); + + // 2. Create dedicated mapping tracker and variable resolver + MappingTracker customTracker = new MappingTracker(); + TemplateVariableResolver customResolver = + new TemplateVariableResolver(templateMappingManager, customTracker); + + // 3. Analyze template and extract field mapping relationships + logger.info("Analyzing field mapping relationships in custom template..."); + Map> fieldMappings = + customResolver.analyzeTemplateFieldMappings(templateContent, "custom"); + logger.info("Custom template contains {} field mappings", fieldMappings.size()); + + // 4. Parse template variables and trigger mapping tracking + logger.info("Parsing custom template variables..."); + customResolver.resolveWithTemplateAnalysis(templateContent, "custom", sourceContent); + + // 5. Generate mapping result + MappingResult result = customTracker.generateMappingResult(); + result.setSuccess(true); + + logger.info( + "Custom template analysis completed: direct mappings({}), transform mappings({}), default values({}), missing({}), unmapped({})", + result.getSuccessMappings().size(), + result.getTransformMappings().size(), + result.getDefaultValues().size(), + result.getMissingRequiredFields().size(), + result.getUnmappedFields().size()); + + return result; + + } catch (Exception e) { + logger.error("Custom template analysis failed: {}", e.getMessage(), e); + // Return a basic success result to avoid report generation failure + MappingResult fallbackResult = new MappingResult(); + fallbackResult.setSuccess(true); + fallbackResult.addDefaultValueField( + "template.type", "custom", "Using custom template: " + customTemplate); + return fallbackResult; + } + } +} diff --git a/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/model/MappingResult.java b/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/model/MappingResult.java new file mode 100644 index 0000000..a850b1f --- /dev/null +++ b/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/model/MappingResult.java @@ -0,0 +1,322 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.tools.x2seatunnel.model; + +import java.util.ArrayList; +import java.util.List; + +/** Mapping result data model */ +public class MappingResult { + + private boolean success = false; + private String errorMessage; + private SeaTunnelConfig seaTunnelConfig; + + // Basic information + private String sourceTemplate; + private String sinkTemplate; + private String readerType; + private String writerType; + + // Mapping result statistics + private List successMappings = new ArrayList<>(); + private List transformMappings = new ArrayList<>(); + private List defaultValues = new ArrayList<>(); + private List missingRequiredFields = new ArrayList<>(); + private List unmappedFields = new ArrayList<>(); + + /** Successfully mapped fields */ + public static class MappingItem { + private String sourceField; + private String targetField; + private String value; + + public MappingItem(String sourceField, String targetField, String value) { + this.sourceField = sourceField; + this.targetField = targetField; + this.value = value; + } + + // Getters + public String getSourceField() { + return sourceField; + } + + public String getTargetField() { + return targetField; + } + + public String getValue() { + return value; + } + + @Override + public String toString() { + return sourceField + " -> " + targetField + " = " + value; + } + } + + /** Transform mapped fields (using filters) */ + public static class TransformMapping { + private String sourceField; + private String targetField; + private String value; + private String filterName; + + public TransformMapping( + String sourceField, String targetField, String value, String filterName) { + this.sourceField = sourceField; + this.targetField = targetField; + this.value = value; + this.filterName = filterName; + } + + // Getters + public String getSourceField() { + return sourceField; + } + + public String getTargetField() { + return targetField; + } + + public String getValue() { + return value; + } + + public String getFilterName() { + return filterName; + } + + @Override + public String toString() { + return sourceField + + " -> " + + targetField + + " = " + + value + + " (filter: " + + filterName + + ")"; + } + } + + /** Fields using default values */ + public static class DefaultValueField { + private String fieldName; + private String value; + private String reason; + + public DefaultValueField(String fieldName, String value, String reason) { + this.fieldName = fieldName; + this.value = value; + this.reason = reason; + } + + // Getters + public String getFieldName() { + return fieldName; + } + + public String getValue() { + return value; + } + + public String getReason() { + return reason; + } + + @Override + public String toString() { + return fieldName + " = " + value + " (default: " + reason + ")"; + } + } + + /** Missing required fields */ + public static class MissingField { + private String fieldName; + private String reason; + + public MissingField(String fieldName, String reason) { + this.fieldName = fieldName; + this.reason = reason; + } + + // Getters + public String getFieldName() { + return fieldName; + } + + public String getReason() { + return reason; + } + + @Override + public String toString() { + return fieldName + " (reason: " + reason + ")"; + } + } + + /** Unmapped fields */ + public static class UnmappedField { + private String fieldName; + private String value; + private String reason; + + public UnmappedField(String fieldName, String value, String reason) { + this.fieldName = fieldName; + this.value = value; + this.reason = reason; + } + + // Getters + public String getFieldName() { + return fieldName; + } + + public String getValue() { + return value; + } + + public String getReason() { + return reason; + } + + @Override + public String toString() { + return fieldName + " = " + value + " (reason: " + reason + ")"; + } + } + + // Convenient methods for adding mapping results + public void addSuccessMapping(String sourceField, String targetField, String value) { + successMappings.add(new MappingItem(sourceField, targetField, value)); + } + + public void addTransformMapping( + String sourceField, String targetField, String value, String filterName) { + transformMappings.add(new TransformMapping(sourceField, targetField, value, filterName)); + } + + public void addDefaultValueField(String fieldName, String value, String reason) { + defaultValues.add(new DefaultValueField(fieldName, value, reason)); + } + + public void addMissingRequiredField(String fieldName, String reason) { + missingRequiredFields.add(new MissingField(fieldName, reason)); + } + + public void addUnmappedField(String fieldName, String value, String reason) { + unmappedFields.add(new UnmappedField(fieldName, value, reason)); + } + + // Getter and Setter methods + public boolean isSuccess() { + return success; + } + + public void setSuccess(boolean success) { + this.success = success; + } + + public String getErrorMessage() { + return errorMessage; + } + + public void setErrorMessage(String errorMessage) { + this.errorMessage = errorMessage; + } + + public SeaTunnelConfig getSeaTunnelConfig() { + return seaTunnelConfig; + } + + public void setSeaTunnelConfig(SeaTunnelConfig seaTunnelConfig) { + this.seaTunnelConfig = seaTunnelConfig; + } + + public String getSourceTemplate() { + return sourceTemplate; + } + + public void setSourceTemplate(String sourceTemplate) { + this.sourceTemplate = sourceTemplate; + } + + public String getSinkTemplate() { + return sinkTemplate; + } + + public void setSinkTemplate(String sinkTemplate) { + this.sinkTemplate = sinkTemplate; + } + + public String getReaderType() { + return readerType; + } + + public void setReaderType(String readerType) { + this.readerType = readerType; + } + + public String getWriterType() { + return writerType; + } + + public void setWriterType(String writerType) { + this.writerType = writerType; + } + + public List getSuccessMappings() { + return successMappings; + } + + public List getTransformMappings() { + return transformMappings; + } + + public List getDefaultValues() { + return defaultValues; + } + + public List getMissingRequiredFields() { + return missingRequiredFields; + } + + public List getUnmappedFields() { + return unmappedFields; + } + + @Override + public String toString() { + return "MappingResult{" + + "success=" + + success + + ", successMappings=" + + successMappings.size() + + ", transformMappings=" + + transformMappings.size() + + ", defaultValues=" + + defaultValues.size() + + ", missingRequiredFields=" + + missingRequiredFields.size() + + ", unmappedFields=" + + unmappedFields.size() + + '}'; + } +} diff --git a/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/model/MappingTracker.java b/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/model/MappingTracker.java new file mode 100644 index 0000000..9766d3f --- /dev/null +++ b/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/model/MappingTracker.java @@ -0,0 +1,327 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.tools.x2seatunnel.model; + +import org.apache.seatunnel.tools.x2seatunnel.util.DataXFieldExtractor; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +/** Mapping tracker - records field mapping process for generating detailed conversion reports */ +public class MappingTracker { + + private static final Logger logger = LoggerFactory.getLogger(MappingTracker.class); + + private final List directMappings = new ArrayList<>(); // Direct mappings + private final List transformMappings = + new ArrayList<>(); // Transform mappings (filters) + private final List defaultValues = new ArrayList<>(); // Default values used + private final List missingFields = new ArrayList<>(); // Missing fields + private final List unmappedFields = new ArrayList<>(); // Unmapped fields + + /** Record successful direct mapping */ + public void recordDirectMapping( + String sourcePath, String targetField, String value, String description) { + FieldMapping mapping = + new FieldMapping(sourcePath, targetField, value, description, MappingType.DIRECT); + directMappings.add(mapping); + logger.debug("Recording direct mapping: {} -> {} = {}", sourcePath, targetField, value); + } + + /** Record transform mapping fields (using filters) */ + public void recordTransformMapping( + String sourcePath, String targetField, String value, String filterName) { + FieldMapping mapping = + new FieldMapping(sourcePath, targetField, value, filterName, MappingType.TRANSFORM); + transformMappings.add(mapping); + logger.debug( + "Recording transform mapping: {} -> {} = {} (filter: {})", + sourcePath, + targetField, + value, + filterName); + } + + /** Record fields using default values */ + public void recordDefaultValue(String targetField, String value, String reason) { + FieldMapping mapping = + new FieldMapping(null, targetField, value, reason, MappingType.DEFAULT); + defaultValues.add(mapping); + logger.debug("Recording default value: {} = {} ({})", targetField, value, reason); + } + + /** Record missing required fields */ + public void recordMissingField(String sourcePath, String reason) { + FieldMapping mapping = + new FieldMapping(sourcePath, null, null, reason, MappingType.MISSING); + missingFields.add(mapping); + logger.debug("Recording missing field: {} ({})", sourcePath, reason); + } + + /** Record unmapped fields */ + public void recordUnmappedField(String sourcePath, String value, String reason) { + FieldMapping mapping = + new FieldMapping(sourcePath, null, value, reason, MappingType.UNMAPPED); + unmappedFields.add(mapping); + logger.debug("Recording unmapped field: {} = {} ({})", sourcePath, value, reason); + } + + /** Generate complete mapping result */ + public MappingResult generateMappingResult() { + MappingResult result = new MappingResult(); + + // Convert direct mappings + for (FieldMapping mapping : directMappings) { + result.addSuccessMapping( + mapping.getSourcePath(), mapping.getTargetField(), mapping.getValue()); + } + + // Convert transform mapping fields + for (FieldMapping mapping : transformMappings) { + result.addTransformMapping( + mapping.getSourcePath(), + mapping.getTargetField(), + mapping.getValue(), + mapping.getDescription()); + } + + // Convert default value fields - separate category + for (FieldMapping mapping : defaultValues) { + result.addDefaultValueField( + mapping.getTargetField(), mapping.getValue(), mapping.getDescription()); + } + + // Convert missing fields + for (FieldMapping mapping : missingFields) { + result.addMissingRequiredField(mapping.getSourcePath(), mapping.getDescription()); + } + + // Convert unmapped fields + for (FieldMapping mapping : unmappedFields) { + result.addUnmappedField( + mapping.getSourcePath(), mapping.getValue(), mapping.getDescription()); + } + + result.setSuccess(true); + + logger.info( + "Mapping tracking completed: direct mappings({}), transform mappings({}), default values({}), missing({}), unmapped({})", + directMappings.size(), + transformMappings.size(), + defaultValues.size(), + missingFields.size(), + unmappedFields.size()); + + return result; + } + + /** Reset mapping tracker state for new conversion process */ + public void reset() { + directMappings.clear(); + transformMappings.clear(); + defaultValues.clear(); + missingFields.clear(); + unmappedFields.clear(); + logger.info("Mapping tracker has been reset"); + } + + /** + * Calculate and record unmapped fields based on field reference tracker + * + * @param fieldReferenceTracker field reference tracker + */ + public void calculateUnmappedFieldsFromTracker( + DataXFieldExtractor.FieldReferenceTracker fieldReferenceTracker) { + try { + if (fieldReferenceTracker == null) { + logger.warn("Field reference tracker is null, skipping unmapped field calculation"); + return; + } + + // Get unreferenced fields + Map unreferencedFields = fieldReferenceTracker.getUnreferencedFields(); + + // Record unmapped fields (with actual values) + for (Map.Entry entry : unreferencedFields.entrySet()) { + String fieldPath = entry.getKey(); + String actualValue = entry.getValue(); + recordUnmappedField( + fieldPath, actualValue, "Exists in DataX but not referenced in template"); + } + + logger.info( + "Unmapped field calculation completed: total fields({}), referenced({}), unmapped({})", + fieldReferenceTracker.getTotalFields(), + fieldReferenceTracker.getReferencedFieldCount(), + fieldReferenceTracker.getUnreferencedFieldCount()); + + } catch (Exception e) { + logger.error("Failed to calculate unmapped fields: {}", e.getMessage(), e); + } + } + + /** + * Get brief description of statistics + * + * @return statistics string + */ + public String getStatisticsText() { + return String.format( + "Direct mappings: %d, Transform mappings: %d, Default values: %d, Missing: %d, Unmapped: %d", + directMappings.size(), + transformMappings.size(), + defaultValues.size(), + missingFields.size(), + unmappedFields.size()); + } + + /** Get statistics */ + public MappingStatistics getStatistics() { + return new MappingStatistics( + directMappings.size(), + transformMappings.size(), + defaultValues.size(), + missingFields.size(), + unmappedFields.size()); + } + + /** Field mapping data model */ + public static class FieldMapping { + private final String + sourcePath; // Source field path, e.g. job.content[0].reader.parameter.username + private final String targetField; // Target field name, e.g. source.Jdbc.user + private final String value; // Field value + private final String description; // Mapping description + private final MappingType type; // Mapping type + + public FieldMapping( + String sourcePath, + String targetField, + String value, + String description, + MappingType type) { + this.sourcePath = sourcePath; + this.targetField = targetField; + this.value = value; + this.description = description; + this.type = type; + } + + // Getters + public String getSourcePath() { + return sourcePath; + } + + public String getTargetField() { + return targetField; + } + + public String getValue() { + return value; + } + + public String getDescription() { + return description; + } + + public MappingType getType() { + return type; + } + + @Override + public String toString() { + return String.format( + "%s: %s -> %s = %s (%s)", type, sourcePath, targetField, value, description); + } + } + + /** Mapping type enumeration */ + public enum MappingType { + DIRECT, // Direct mapping + TRANSFORM, // Transform mapping (filters) + DEFAULT, // Default value + MISSING, // Missing field + UNMAPPED // Unmapped field + } + + /** Mapping statistics */ + public static class MappingStatistics { + private final int directMappings; + private final int transformMappings; + private final int defaultValues; + private final int missingFields; + private final int unmappedFields; + + public MappingStatistics( + int directMappings, + int transformMappings, + int defaultValues, + int missingFields, + int unmappedFields) { + this.directMappings = directMappings; + this.transformMappings = transformMappings; + this.defaultValues = defaultValues; + this.missingFields = missingFields; + this.unmappedFields = unmappedFields; + } + + public int getDirectMappings() { + return directMappings; + } + + public int getTransformMappings() { + return transformMappings; + } + + public int getDefaultValues() { + return defaultValues; + } + + public int getMissingFields() { + return missingFields; + } + + public int getUnmappedFields() { + return unmappedFields; + } + + public int getTotalFields() { + return directMappings + + transformMappings + + defaultValues + + missingFields + + unmappedFields; + } + + @Override + public String toString() { + return String.format( + "Direct mappings: %d, Transform mappings: %d, Default values: %d, Missing: %d, Unmapped: %d, Total: %d", + directMappings, + transformMappings, + defaultValues, + missingFields, + unmappedFields, + getTotalFields()); + } + } +} diff --git a/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/model/SeaTunnelConfig.java b/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/model/SeaTunnelConfig.java new file mode 100644 index 0000000..eb4bed6 --- /dev/null +++ b/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/model/SeaTunnelConfig.java @@ -0,0 +1,203 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.tools.x2seatunnel.model; + +import java.util.HashMap; +import java.util.Map; + +/** SeaTunnel configuration data model */ +public class SeaTunnelConfig { + + // Environment configuration + private int parallelism = 1; + private String jobMode = "BATCH"; + + // Source configuration + private String sourceType; + private String sourceUrl; + private String sourceUser; + private String sourcePassword; + private String sourceDriver; + private String sourceQuery; + private Map sourceParams = new HashMap<>(); + + // Sink configuration + private String sinkType; + private String sinkPath; + private String sinkFileName; + private String sinkFieldDelimiter; + private String sinkFileFormat; + private String sinkTable; + private Map sinkParams = new HashMap<>(); + + // Getter and Setter methods + + public int getParallelism() { + return parallelism; + } + + public void setParallelism(int parallelism) { + this.parallelism = parallelism; + } + + public String getJobMode() { + return jobMode; + } + + public void setJobMode(String jobMode) { + this.jobMode = jobMode; + } + + public String getSourceType() { + return sourceType; + } + + public void setSourceType(String sourceType) { + this.sourceType = sourceType; + } + + public String getSourceUrl() { + return sourceUrl; + } + + public void setSourceUrl(String sourceUrl) { + this.sourceUrl = sourceUrl; + } + + public String getSourceUser() { + return sourceUser; + } + + public void setSourceUser(String sourceUser) { + this.sourceUser = sourceUser; + } + + public String getSourcePassword() { + return sourcePassword; + } + + public void setSourcePassword(String sourcePassword) { + this.sourcePassword = sourcePassword; + } + + public String getSourceDriver() { + return sourceDriver; + } + + public void setSourceDriver(String sourceDriver) { + this.sourceDriver = sourceDriver; + } + + public String getSourceQuery() { + return sourceQuery; + } + + public void setSourceQuery(String sourceQuery) { + this.sourceQuery = sourceQuery; + } + + public Map getSourceParams() { + return sourceParams; + } + + public void addSourceParam(String key, Object value) { + this.sourceParams.put(key, value); + } + + public String getSinkType() { + return sinkType; + } + + public void setSinkType(String sinkType) { + this.sinkType = sinkType; + } + + public String getSinkPath() { + return sinkPath; + } + + public void setSinkPath(String sinkPath) { + this.sinkPath = sinkPath; + } + + public String getSinkFileName() { + return sinkFileName; + } + + public void setSinkFileName(String sinkFileName) { + this.sinkFileName = sinkFileName; + } + + public String getSinkFieldDelimiter() { + return sinkFieldDelimiter; + } + + public void setSinkFieldDelimiter(String sinkFieldDelimiter) { + this.sinkFieldDelimiter = sinkFieldDelimiter; + } + + public String getSinkFileFormat() { + return sinkFileFormat; + } + + public void setSinkFileFormat(String sinkFileFormat) { + this.sinkFileFormat = sinkFileFormat; + } + + public String getSinkTable() { + return sinkTable; + } + + public void setSinkTable(String sinkTable) { + this.sinkTable = sinkTable; + } + + public Map getSinkParams() { + return sinkParams; + } + + public void addSinkParam(String key, Object value) { + this.sinkParams.put(key, value); + } + + @Override + public String toString() { + return "SeaTunnelConfig{" + + "parallelism=" + + parallelism + + ", jobMode='" + + jobMode + + '\'' + + ", sourceType='" + + sourceType + + '\'' + + ", sourceUrl='" + + sourceUrl + + '\'' + + ", sourceUser='" + + sourceUser + + '\'' + + ", sinkType='" + + sinkType + + '\'' + + ", sinkPath='" + + sinkPath + + '\'' + + '}'; + } +} diff --git a/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/report/MarkdownReportGenerator.java b/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/report/MarkdownReportGenerator.java new file mode 100644 index 0000000..dd08274 --- /dev/null +++ b/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/report/MarkdownReportGenerator.java @@ -0,0 +1,467 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.tools.x2seatunnel.report; + +import org.apache.seatunnel.tools.x2seatunnel.model.MappingResult; +import org.apache.seatunnel.tools.x2seatunnel.util.FileUtils; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.time.LocalDateTime; +import java.util.HashMap; +import java.util.Map; + +/** Markdown format conversion report generator */ +public class MarkdownReportGenerator { + private static final Logger logger = LoggerFactory.getLogger(MarkdownReportGenerator.class); + private static final String TEMPLATE_PATH = "/templates/report/report-template.md"; + + /** + * Generate Markdown format conversion report (standard conversion) + * + * @param result mapping result + * @param sourceFile source file path + * @param targetFile target file path + * @param sourceType source type + * @return Markdown report content + */ + public String generateReport( + MappingResult result, String sourceFile, String targetFile, String sourceType) { + return generateReport(result, sourceFile, targetFile, sourceType, null, "", ""); + } + + /** + * Generate Markdown format conversion report (supports custom templates) + * + * @param result mapping result + * @param sourceFile source file path + * @param targetFile target file path + * @param sourceType source type + * @param customTemplate custom template name (optional) + * @param sourceTemplate source template content (for extracting connector type) + * @param sinkTemplate sink template content (for extracting connector type) + * @return Markdown report content + */ + public String generateReport( + MappingResult result, + String sourceFile, + String targetFile, + String sourceType, + String customTemplate, + String sourceTemplate, + String sinkTemplate) { + logger.info("Generating Markdown conversion report"); + + // Load template + String template = loadTemplate(); + + // Build template variables + Map variables = + buildTemplateVariables( + result, + sourceFile, + targetFile, + sourceType, + customTemplate, + sourceTemplate, + sinkTemplate); + + // Replace template variables + return replaceTemplateVariables(template, variables); + } + + /** Load report template */ + private String loadTemplate() { + try { + return FileUtils.readResourceFile(TEMPLATE_PATH); + } catch (Exception e) { + logger.warn("Unable to load report template, using default format: {}", e.getMessage()); + return getDefaultTemplate(); + } + } + + /** Build template variables */ + private Map buildTemplateVariables( + MappingResult result, + String sourceFile, + String targetFile, + String sourceType, + String customTemplate, + String sourceTemplate, + String sinkTemplate) { + + Map variables = new HashMap<>(); + + // Basic information + variables.put("convertTime", LocalDateTime.now().toString()); + variables.put("sourceFile", formatFilePath(sourceFile)); + variables.put("targetFile", formatFilePath(targetFile)); + variables.put("sourceType", sourceType.toUpperCase()); + variables.put("sourceTypeName", sourceType.toUpperCase()); + variables.put("status", result.isSuccess() ? "✅ Success" : "❌ Failed"); + variables.put("generateTime", LocalDateTime.now().toString()); + + // Connector type identification + variables.put("sourceConnector", extractConnectorType(sourceTemplate, "Jdbc", result)); + variables.put("sinkConnector", extractConnectorType(sinkTemplate, "HdfsFile", result)); + + // Custom template information + if (customTemplate != null && !customTemplate.trim().isEmpty()) { + variables.put( + "customTemplateInfo", "| **Custom Template** | `" + customTemplate + "` |"); + } else { + variables.put("customTemplateInfo", ""); + } + + // Error information + if (!result.isSuccess() && result.getErrorMessage() != null) { + variables.put( + "errorInfo", + "### ⚠️ Error Information\n\n```\n" + result.getErrorMessage() + "\n```\n"); + } else { + variables.put("errorInfo", ""); + } + + // Statistics information + buildStatistics(variables, result); + + // Various tables + variables.put("directMappingTable", buildDirectMappingTable(result, sourceType)); + variables.put("transformMappingTable", buildTransformMappingTable(result, sourceType)); + variables.put("defaultValuesTable", buildDefaultValuesTable(result)); + variables.put("missingFieldsTable", buildMissingFieldsTable(result)); + variables.put("unmappedFieldsTable", buildUnmappedFieldsTable(result)); + + return variables; + } + + /** Build statistics information */ + private void buildStatistics(Map variables, MappingResult result) { + int directCount = result.getSuccessMappings().size(); + int transformCount = result.getTransformMappings().size(); + int defaultCount = result.getDefaultValues().size(); + int missingCount = result.getMissingRequiredFields().size(); + int unmappedCount = result.getUnmappedFields().size(); + int totalCount = directCount + transformCount + defaultCount + missingCount + unmappedCount; + + variables.put("directCount", String.valueOf(directCount)); + variables.put("transformCount", String.valueOf(transformCount)); + variables.put("defaultCount", String.valueOf(defaultCount)); + variables.put("missingCount", String.valueOf(missingCount)); + variables.put("unmappedCount", String.valueOf(unmappedCount)); + variables.put("totalCount", String.valueOf(totalCount)); + + if (totalCount > 0) { + variables.put( + "directPercent", + String.format("%.1f%%", (double) directCount / totalCount * 100)); + variables.put( + "transformPercent", + String.format("%.1f%%", (double) transformCount / totalCount * 100)); + variables.put( + "defaultPercent", + String.format("%.1f%%", (double) defaultCount / totalCount * 100)); + variables.put( + "missingPercent", + String.format("%.1f%%", (double) missingCount / totalCount * 100)); + variables.put( + "unmappedPercent", + String.format("%.1f%%", (double) unmappedCount / totalCount * 100)); + } else { + variables.put("successPercent", "0%"); + variables.put("autoPercent", "0%"); + variables.put("defaultPercent", "0%"); + variables.put("missingPercent", "0%"); + variables.put("unmappedPercent", "0%"); + } + } + + /** Build direct mapping fields table */ + private String buildDirectMappingTable(MappingResult result, String sourceType) { + if (result.getSuccessMappings().isEmpty()) { + return "*No direct mapped fields*\n"; + } + + StringBuilder table = new StringBuilder(); + table.append("| SeaTunnel Field | Value | ") + .append(sourceType.toUpperCase()) + .append(" Source Field |\n"); + table.append("|---------------|----|--------------|\n"); + + for (MappingResult.MappingItem item : result.getSuccessMappings()) { + table.append("| `") + .append(item.getTargetField()) + .append("` | `") + .append(item.getValue()) + .append("` | `") + .append(item.getSourceField()) + .append("` |\n"); + } + + return table.toString(); + } + + /** Build transform mapping fields table */ + private String buildTransformMappingTable(MappingResult result, String sourceType) { + if (result.getTransformMappings().isEmpty()) { + return "*No transform mapped fields*\n"; + } + + StringBuilder table = new StringBuilder(); + table.append("| SeaTunnel Field | Value | ") + .append(sourceType.toUpperCase()) + .append(" Source Field | Filter Used |\n"); + table.append("|---------------|----|--------------|-----------|\n"); + + for (MappingResult.TransformMapping item : result.getTransformMappings()) { + table.append("| `") + .append(item.getTargetField()) + .append("` | `") + .append(item.getValue()) + .append("` | `") + .append(item.getSourceField()) + .append("` | ") + .append(item.getFilterName()) + .append(" |\n"); + } + + return table.toString(); + } + + /** Build default value fields table */ + private String buildDefaultValuesTable(MappingResult result) { + if (result.getDefaultValues().isEmpty()) { + return "*No fields using default values*\n"; + } + + StringBuilder table = new StringBuilder(); + table.append("| SeaTunnel Field | Default Value |\n"); + table.append("|---------------|--------|\n"); + + for (MappingResult.DefaultValueField field : result.getDefaultValues()) { + table.append("| `") + .append(field.getFieldName()) + .append("` | `") + .append(field.getValue()) + .append("` |\n"); + } + + return table.toString(); + } + + /** Build missing fields table */ + private String buildMissingFieldsTable(MappingResult result) { + if (result.getMissingRequiredFields().isEmpty()) { + return "*No missing fields* 🎉\n"; + } + + StringBuilder table = new StringBuilder(); + table.append( + "⚠️ **Note**: The following fields were not found in the source configuration, please add manually:\n\n"); + table.append("| SeaTunnel Field |\n"); + table.append("|---------------|\n"); + + for (MappingResult.MissingField field : result.getMissingRequiredFields()) { + table.append("| `").append(field.getFieldName()).append("` |\n"); + } + + return table.toString(); + } + + /** Build unmapped fields table */ + private String buildUnmappedFieldsTable(MappingResult result) { + if (result.getUnmappedFields().isEmpty()) { + return "*All fields are mapped* 🎉\n"; + } + + StringBuilder table = new StringBuilder(); + table.append("| DataX Field | Value |\n"); + table.append("|--------|------|\n"); + + for (MappingResult.UnmappedField field : result.getUnmappedFields()) { + table.append("| `") + .append(field.getFieldName()) + .append("` | `") + .append(field.getValue()) + .append("` |\n"); + } + + return table.toString(); + } + + /** Extract connector type from template content */ + private String extractConnectorType( + String templateContent, String defaultType, MappingResult result) { + if (templateContent == null || templateContent.trim().isEmpty()) { + logger.warn("Template content is empty, using default type: {}", defaultType); + return defaultType; + } + + logger.debug( + "Analyzing template content to extract connector type, template length: {}", + templateContent.length()); + logger.debug( + "Template content first 200 characters: {}", + templateContent.substring(0, Math.min(200, templateContent.length()))); + + // Find connector type in template (e.g. Jdbc {, HdfsFile {, Kafka {, etc.) + // Need to skip top-level source { and sink {, look for nested connector types + String[] lines = templateContent.split("\n"); + boolean inSourceOrSink = false; + + for (String line : lines) { + String trimmed = line.trim(); + + // Detect if entering source { or sink { block + if (trimmed.equals("source {") || trimmed.equals("sink {")) { + inSourceOrSink = true; + continue; + } + + // Look for connector type within source/sink block + if (inSourceOrSink && trimmed.matches("\\w+\\s*\\{")) { + String connectorType = trimmed.substring(0, trimmed.indexOf('{')).trim(); + logger.info("Found connector type: {}", connectorType); + + // Add database type identification (for JDBC connector) + if ("Jdbc".equals(connectorType)) { + String dbType = extractDatabaseTypeFromMappingResult(result); + if (dbType != null) { + logger.info("Identified database type: {}", dbType); + return connectorType + " (" + dbType + ")"; + } + } + return connectorType; + } + + // Detect if exiting source/sink block (encountering top-level }) + if (inSourceOrSink && trimmed.equals("}") && !line.startsWith(" ")) { + inSourceOrSink = false; + } + } + + logger.warn("Connector type not found, using default type: {}", defaultType); + return defaultType; + } + + /** Extract database type from mapping result */ + private String extractDatabaseTypeFromMappingResult(MappingResult result) { + if (result == null) { + return null; + } + + // Look for JDBC URL in successful mappings + for (MappingResult.MappingItem mapping : result.getSuccessMappings()) { + String targetField = mapping.getTargetField(); + String value = mapping.getValue(); + + // Look for fields containing .url with JDBC URL value + if (targetField != null + && targetField.contains(".url") + && value != null + && value.startsWith("jdbc:")) { + String dbType = extractDatabaseTypeFromUrl(value); + if (dbType != null) { + logger.debug( + "Identified database type from mapping result: {} -> {}", + value, + dbType); + return dbType; + } + } + } + + logger.debug("JDBC URL not found in mapping result"); + return null; + } + + /** Extract database type from JDBC URL (using regular expression) */ + private String extractDatabaseTypeFromUrl(String jdbcUrl) { + if (jdbcUrl == null || jdbcUrl.trim().isEmpty()) { + return null; + } + + try { + // Use regular expression to extract "mysql" from "jdbc:mysql://..." + if (jdbcUrl.startsWith("jdbc:")) { + String dbType = jdbcUrl.replaceFirst("^jdbc:([^:]+):.*", "$1"); + if (!dbType.equals(jdbcUrl)) { // Ensure regex match succeeded + logger.debug("Identified database type via regex: {} -> {}", jdbcUrl, dbType); + return dbType; + } + } + } catch (Exception e) { + logger.warn("Failed to extract database type via regex: {}", e.getMessage()); + } + + logger.debug("Unable to identify database type from URL: {}", jdbcUrl); + return null; + } + + /** Replace template variables */ + private String replaceTemplateVariables(String template, Map variables) { + String result = template; + for (Map.Entry entry : variables.entrySet()) { + String placeholder = "{{" + entry.getKey() + "}}"; + result = result.replace(placeholder, entry.getValue()); + } + return result; + } + + /** Get default template (used when template file cannot be loaded) */ + private String getDefaultTemplate() { + return "# X2SeaTunnel Conversion Report\n\n" + + "## 📋 Basic Information\n\n" + + "- **Conversion Time**: {{convertTime}}\n" + + "- **Source File**: {{sourceFile}}\n" + + "- **Target File**: {{targetFile}}\n" + + "- **Conversion Status**: {{status}}\n\n" + + "Conversion completed!"; + } + + /** + * Format file path, convert absolute path to relative path (based on current working directory) + */ + private String formatFilePath(String filePath) { + if (filePath == null) { + return ""; + } + + try { + // Get current working directory + String currentDir = System.getProperty("user.dir"); + + // If it's an absolute path under current working directory, convert to relative path + if (filePath.startsWith(currentDir)) { + String relativePath = filePath.substring(currentDir.length()); + // Remove leading separator + if (relativePath.startsWith("\\") || relativePath.startsWith("/")) { + relativePath = relativePath.substring(1); + } + return relativePath.replace("\\", "/"); // Use forward slash uniformly + } + + // Otherwise return original path + return filePath.replace("\\", "/"); // Use forward slash uniformly + } catch (Exception e) { + logger.warn("Failed to format file path: {}", e.getMessage()); + return filePath; + } + } +} diff --git a/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/template/ConfigDrivenTemplateEngine.java b/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/template/ConfigDrivenTemplateEngine.java new file mode 100644 index 0000000..00c5360 --- /dev/null +++ b/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/template/ConfigDrivenTemplateEngine.java @@ -0,0 +1,368 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.tools.x2seatunnel.template; + +import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.JsonNode; +import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.ObjectMapper; + +import org.apache.seatunnel.tools.x2seatunnel.model.MappingResult; +import org.apache.seatunnel.tools.x2seatunnel.model.MappingTracker; +import org.apache.seatunnel.tools.x2seatunnel.util.FileUtils; +import org.apache.seatunnel.tools.x2seatunnel.util.PathResolver; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Configuration-driven template conversion engine based on template-mapping.yaml configuration file + * to automatically select and apply templates + */ +public class ConfigDrivenTemplateEngine { + + private static final Logger logger = LoggerFactory.getLogger(ConfigDrivenTemplateEngine.class); + + private final TemplateMappingManager mappingManager; + private final TemplateVariableResolver variableResolver; + private final MappingTracker mappingTracker; // Added: mapping tracker + + public ConfigDrivenTemplateEngine() { + this.mappingManager = TemplateMappingManager.getInstance(); + this.mappingTracker = new MappingTracker(); // Initialize mapping tracker + this.variableResolver = + new TemplateVariableResolver(this.mappingManager, this.mappingTracker); + } + + /** + * Convert DataX configuration using configuration-driven approach + * + * @param sourceContent Original DataX JSON content + * @return Conversion result + */ + public TemplateConversionResult convertWithTemplate(String sourceContent) { + logger.info("Starting configuration-driven template conversion..."); + + TemplateConversionResult result = new TemplateConversionResult(); + + try { + // Reset mapping tracker state + mappingTracker.reset(); + logger.info("Mapping tracker has been reset, starting new conversion process"); + + // Create field reference tracker + org.apache.seatunnel.tools.x2seatunnel.util.DataXFieldExtractor dataXExtractor = + new org.apache.seatunnel.tools.x2seatunnel.util.DataXFieldExtractor(); + org.apache.seatunnel.tools.x2seatunnel.util.DataXFieldExtractor.FieldReferenceTracker + fieldTracker = dataXExtractor.createFieldReferenceTracker(sourceContent); + variableResolver.setFieldReferenceTracker(fieldTracker); + + // Extract reader and writer types from JSON + String readerType = extractReaderType(sourceContent); + String writerType = extractWriterType(sourceContent); + + // 1. Select source template based on reader type + String sourceTemplate = mappingManager.getSourceTemplate(readerType); + logger.info( + "Selected source template for reader type {}: {}", readerType, sourceTemplate); + + // 2. Select sink template based on writer type + String sinkTemplate = mappingManager.getSinkTemplate(writerType); + logger.info("Selected sink template for writer type {}: {}", writerType, sinkTemplate); + + // 3. Load template content + String sourceTemplateContent = loadTemplate(sourceTemplate); + String sinkTemplateContent = loadTemplate(sinkTemplate); + + // 4. Generate env configuration + String envConfig = generateEnvConfig(sourceContent); + + // 5. Validate and parse source template + if (!variableResolver.validateTemplate(sourceTemplateContent)) { + throw new RuntimeException( + "Source template format error, does not conform to Jinja2 syntax standard. Please check template file: " + + sourceTemplate); + } + logger.info("Using template analyzer to parse source template"); + String resolvedSourceConfig = + variableResolver.resolveWithTemplateAnalysis( + sourceTemplateContent, "source", sourceContent); + + // 6. Validate and parse sink template + if (!variableResolver.validateTemplate(sinkTemplateContent)) { + throw new RuntimeException( + "Sink template format error, does not conform to Jinja2 syntax standard. Please check template file: " + + sinkTemplate); + } + logger.info("Using template analyzer to parse sink template"); + String resolvedSinkConfig = + variableResolver.resolveWithTemplateAnalysis( + sinkTemplateContent, "sink", sourceContent); + + // 7. Assemble complete SeaTunnel configuration + String finalConfig = + assembleConfig(envConfig, resolvedSourceConfig, resolvedSinkConfig); + + // 8. Calculate unmapped fields (based on reference count) + mappingTracker.calculateUnmappedFieldsFromTracker(fieldTracker); + + // 9. Generate mapping result (for reporting) - now integrated with MappingTracker data + MappingResult mappingResult = + generateMappingResult(readerType, writerType, sourceTemplate, sinkTemplate); + + result.setSuccess(true); + result.setConfigContent(finalConfig); + result.setMappingResult(mappingResult); + result.setSourceTemplate( + sourceTemplateContent); // Pass template content instead of path + result.setSinkTemplate(sinkTemplateContent); // Pass template content instead of path + + logger.info("Configuration-driven template conversion completed"); + logger.info("Mapping tracking statistics: {}", mappingTracker.getStatisticsText()); + + } catch (Exception e) { + logger.error("Configuration-driven template conversion failed: {}", e.getMessage(), e); + result.setSuccess(false); + result.setErrorMessage(e.getMessage()); + } + + return result; + } + + /** Load template file content */ + private String loadTemplate(String templatePath) { + logger.debug("Loading template file: {}", templatePath); + + // 1. Try to load from file system + String resolvedPath = PathResolver.resolveTemplatePath(templatePath); + if (resolvedPath != null && PathResolver.exists(resolvedPath)) { + logger.debug("Loading template from file system: {}", resolvedPath); + return FileUtils.readFile(resolvedPath); + } + + // 2. Load from classpath (built-in templates) + try { + String resourcePath = PathResolver.buildResourcePath(templatePath); + logger.debug("Loading template from classpath: {}", resourcePath); + return FileUtils.readResourceFile(resourcePath); + } catch (Exception e) { + throw new RuntimeException("Unable to load template file: " + templatePath, e); + } + } + + /** Generate environment configuration section */ + private String generateEnvConfig(String sourceContent) { + // Dynamically select environment template based on job type (default is batch) + String jobType = "batch"; // DataX defaults to batch processing + String envTemplatePath = mappingManager.getEnvTemplate(jobType); + logger.info("Selected environment template for job type {}: {}", jobType, envTemplatePath); + + // Load environment configuration template + String envTemplate = loadTemplate(envTemplatePath); + + // Use template variable resolver to process environment configuration + String resolvedEnvConfig = + variableResolver.resolveWithTemplateAnalysis(envTemplate, "env", sourceContent); + + return resolvedEnvConfig; + } + + /** Assemble complete SeaTunnel configuration */ + private String assembleConfig(String envConfig, String sourceConfig, String sinkConfig) { + StringBuilder finalConfig = new StringBuilder(); + + // Add header comments + finalConfig.append("# SeaTunnel Configuration File\n"); + finalConfig.append("# Auto-generated by X2SeaTunnel Configuration-Driven Engine\n"); + finalConfig.append("# Generated at: ").append(java.time.LocalDateTime.now()).append("\n"); + finalConfig.append("\n"); + + // Add env configuration + finalConfig.append(envConfig).append("\n"); + + // Add source configuration + finalConfig.append(sourceConfig).append("\n"); + + // Add sink configuration + finalConfig.append(sinkConfig).append("\n"); + + return finalConfig.toString(); + } + + /** Generate mapping result (for report generation) */ + private MappingResult generateMappingResult( + String readerType, String writerType, String sourceTemplate, String sinkTemplate) { + + // First get basic mapping result from MappingTracker + MappingResult result = mappingTracker.generateMappingResult(); + + // Set template information (these are basic info, not field mappings) + result.setSourceTemplate(sourceTemplate); + result.setSinkTemplate(sinkTemplate); + result.setReaderType(readerType); + result.setWriterType(writerType); + + // All configurations are template-driven, no hardcoded configuration items in Java code + + // Check if the types are supported + if (!mappingManager.isReaderSupported(readerType)) { + result.addUnmappedField("reader.name", readerType, "Using default JDBC template"); + } + + if (!mappingManager.isWriterSupported(writerType)) { + result.addUnmappedField("writer.name", writerType, "Using default HDFS template"); + } + + result.setSuccess(true); + logger.info( + "Mapping result generation completed, total fields: success {}, default values {}, missing {}, unmapped {}", + result.getSuccessMappings().size(), + result.getDefaultValues().size(), + result.getMissingRequiredFields().size(), + result.getUnmappedFields().size()); + + return result; + } + + /** Check if the specified configuration combination is supported */ + public boolean isConfigurationSupported(String readerType, String writerType) { + return mappingManager.isReaderSupported(readerType) + && mappingManager.isWriterSupported(writerType); + } + + /** Get supported configuration information */ + public String getSupportedConfigInfo() { + StringBuilder info = new StringBuilder(); + info.append("Supported Reader types: "); + info.append(String.join(", ", mappingManager.getSupportedReaders())); + info.append("\n"); + info.append("Supported Writer types: "); + info.append(String.join(", ", mappingManager.getSupportedWriters())); + return info.toString(); + } + + public static class TemplateConversionResult { + private boolean success; + private String configContent; + private String errorMessage; + private MappingResult mappingResult; + private String sourceTemplate; + private String sinkTemplate; + + // Getters and setters + public boolean isSuccess() { + return success; + } + + public void setSuccess(boolean success) { + this.success = success; + } + + public String getConfigContent() { + return configContent; + } + + public void setConfigContent(String configContent) { + this.configContent = configContent; + } + + public String getErrorMessage() { + return errorMessage; + } + + public void setErrorMessage(String errorMessage) { + this.errorMessage = errorMessage; + } + + public MappingResult getMappingResult() { + return mappingResult; + } + + public void setMappingResult(MappingResult mappingResult) { + this.mappingResult = mappingResult; + } + + public String getSourceTemplate() { + return sourceTemplate; + } + + public void setSourceTemplate(String sourceTemplate) { + this.sourceTemplate = sourceTemplate; + } + + public String getSinkTemplate() { + return sinkTemplate; + } + + public void setSinkTemplate(String sinkTemplate) { + this.sinkTemplate = sinkTemplate; + } + } + + /** + * Extract reader type from DataX JSON configuration + * + * @param sourceContent DataX JSON content + * @return Reader type (e.g., "mysqlreader") + */ + private String extractReaderType(String sourceContent) { + try { + ObjectMapper objectMapper = new ObjectMapper(); + JsonNode rootNode = objectMapper.readTree(sourceContent); + + JsonNode contentNode = rootNode.path("job").path("content"); + if (contentNode.isArray() && contentNode.size() > 0) { + JsonNode readerNode = contentNode.get(0).path("reader"); + if (readerNode.has("name")) { + return readerNode.get("name").asText(); + } + } + + throw new IllegalArgumentException( + "Cannot extract reader type from DataX configuration"); + } catch (Exception e) { + logger.error("Failed to extract reader type: {}", e.getMessage()); + throw new RuntimeException("Failed to extract reader type from DataX configuration", e); + } + } + + /** + * Extract writer type from DataX JSON configuration + * + * @param sourceContent DataX JSON content + * @return Writer type (e.g., "mysqlwriter") + */ + private String extractWriterType(String sourceContent) { + try { + ObjectMapper objectMapper = new ObjectMapper(); + JsonNode rootNode = objectMapper.readTree(sourceContent); + + JsonNode contentNode = rootNode.path("job").path("content"); + if (contentNode.isArray() && contentNode.size() > 0) { + JsonNode writerNode = contentNode.get(0).path("writer"); + if (writerNode.has("name")) { + return writerNode.get("name").asText(); + } + } + + throw new IllegalArgumentException( + "Cannot extract writer type from DataX configuration"); + } catch (Exception e) { + logger.error("Failed to extract writer type: {}", e.getMessage()); + throw new RuntimeException("Failed to extract writer type from DataX configuration", e); + } + } +} diff --git a/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/template/TemplateMappingManager.java b/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/template/TemplateMappingManager.java new file mode 100644 index 0000000..255a776 --- /dev/null +++ b/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/template/TemplateMappingManager.java @@ -0,0 +1,252 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.tools.x2seatunnel.template; + +import org.apache.seatunnel.tools.x2seatunnel.util.FileUtils; +import org.apache.seatunnel.tools.x2seatunnel.util.PathResolver; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.yaml.snakeyaml.Yaml; + +import java.util.HashMap; +import java.util.Map; + +/** + * Template mapping configuration manager responsible for loading and managing template-mapping.yaml + * configuration file + */ +public class TemplateMappingManager { + + private static final Logger logger = LoggerFactory.getLogger(TemplateMappingManager.class); + + private static final String TEMPLATE_MAPPING_CONFIG = "template-mapping.yaml"; + + private static TemplateMappingManager instance; + + private Map mappingConfig; + private Map sourceMappings; + private Map sinkMappings; + private Map envMappings; + private Map transformers; + + private TemplateMappingManager() { + loadMappingConfig(); + } + + public static synchronized TemplateMappingManager getInstance() { + if (instance == null) { + instance = new TemplateMappingManager(); + } + return instance; + } + + /** Load template mapping configuration */ + @SuppressWarnings("unchecked") + private void loadMappingConfig() { + logger.info("Loading template mapping configuration..."); + + try { + // 1. Try to load from file system + String configPath = PathResolver.resolveTemplatePath(TEMPLATE_MAPPING_CONFIG); + if (configPath != null && PathResolver.exists(configPath)) { + logger.info( + "Loading template mapping configuration from file system: {}", configPath); + String content = FileUtils.readFile(configPath); + parseMappingConfig(content); + return; + } + + // 2. Load from classpath (built-in configuration) + String resourcePath = "templates/" + TEMPLATE_MAPPING_CONFIG; + logger.info("Loading template mapping configuration from classpath: {}", resourcePath); + String content = FileUtils.readResourceFile(resourcePath); + parseMappingConfig(content); + + } catch (Exception e) { + logger.error("Failed to load template mapping configuration: {}", e.getMessage(), e); + // Use default configuration + initDefaultMappings(); + } + } + + /** Parse mapping configuration content */ + @SuppressWarnings("unchecked") + private void parseMappingConfig(String content) { + Yaml yaml = new Yaml(); + mappingConfig = yaml.load(content); + + if (mappingConfig != null && mappingConfig.containsKey("datax")) { + Map dataxConfig = (Map) mappingConfig.get("datax"); + + // Load source mappings + if (dataxConfig.containsKey("source_mappings")) { + sourceMappings = (Map) dataxConfig.get("source_mappings"); + logger.info("Loaded {} source mappings", sourceMappings.size()); + } + + // Load sink mappings + if (dataxConfig.containsKey("sink_mappings")) { + sinkMappings = (Map) dataxConfig.get("sink_mappings"); + logger.info("Loaded {} sink mappings", sinkMappings.size()); + } + + // Load environment mappings + if (dataxConfig.containsKey("env_mappings")) { + envMappings = (Map) dataxConfig.get("env_mappings"); + logger.info("Loaded {} environment mappings", envMappings.size()); + } + } + + // Load transformer configuration + if (mappingConfig != null && mappingConfig.containsKey("transformers")) { + transformers = (Map) mappingConfig.get("transformers"); + logger.info("Loaded {} transformers", transformers.size()); + } + + logger.info("Template mapping configuration loading completed"); + } + + /** Initialize default mappings (fallback) - use built-in configuration file */ + private void initDefaultMappings() { + logger.warn("Using built-in default template mapping configuration"); + + try { + // Try to load default configuration from built-in configuration file + String resourcePath = "templates/" + TEMPLATE_MAPPING_CONFIG; + String content = FileUtils.readResourceFile(resourcePath); + parseMappingConfig(content); + logger.info("Successfully loaded built-in default configuration"); + } catch (Exception e) { + logger.error( + "Failed to load built-in default configuration, system cannot work properly: {}", + e.getMessage()); + throw new RuntimeException( + "Unable to load template mapping configuration file, please check if " + + TEMPLATE_MAPPING_CONFIG + + " file exists", + e); + } + } + + /** Get corresponding source template path based on reader type */ + public String getSourceTemplate(String readerType) { + if (sourceMappings == null) { + logger.warn("Source mappings not initialized, using default template"); + return "datax/sources/jdbc-source.conf"; + } + + String template = sourceMappings.get(readerType.toLowerCase()); + if (template == null) { + logger.warn( + "Template mapping not found for reader type {}, using default template", + readerType); + return "datax/sources/jdbc-source.conf"; + } + + logger.debug("Selected template for reader type {}: {}", readerType, template); + return template; + } + + /** Get corresponding sink template path based on writer type */ + public String getSinkTemplate(String writerType) { + if (sinkMappings == null) { + logger.warn("Sink mappings not initialized, using default template"); + return "datax/sinks/hdfs-sink.conf"; + } + + String template = sinkMappings.get(writerType.toLowerCase()); + if (template == null) { + logger.warn( + "Template mapping not found for writer type {}, using default template", + writerType); + return "datax/sinks/hdfs-sink.conf"; + } + + logger.debug("Selected template for writer type {}: {}", writerType, template); + return template; + } + + /** Get corresponding environment template path based on job type */ + public String getEnvTemplate(String jobType) { + if (envMappings == null) { + logger.warn("Environment mappings not initialized, using default template"); + return "datax/env/batch-env.conf"; + } + + String template = envMappings.get(jobType.toLowerCase()); + if (template == null) { + logger.warn( + "Environment template mapping not found for job type {}, using default template", + jobType); + return "datax/env/batch-env.conf"; + } + + logger.debug("Selected environment template for job type {}: {}", jobType, template); + return template; + } + + /** Get transformer configuration */ + @SuppressWarnings("unchecked") + public Map getTransformer(String transformerName) { + if (transformers == null) { + logger.warn("Transformer configuration not initialized"); + return new HashMap<>(); + } + + Object transformer = transformers.get(transformerName); + if (transformer instanceof Map) { + return (Map) transformer; + } + + logger.warn("Transformer not found: {}", transformerName); + return new HashMap<>(); + } + + /** Check if specified reader type is supported */ + public boolean isReaderSupported(String readerType) { + return sourceMappings != null && sourceMappings.containsKey(readerType.toLowerCase()); + } + + /** Check if specified writer type is supported */ + public boolean isWriterSupported(String writerType) { + return sinkMappings != null && sinkMappings.containsKey(writerType.toLowerCase()); + } + + /** Get all supported reader types */ + public String[] getSupportedReaders() { + if (sourceMappings == null) { + return new String[0]; + } + return sourceMappings.keySet().toArray(new String[0]); + } + + /** Get all supported writer types */ + public String[] getSupportedWriters() { + if (sinkMappings == null) { + return new String[0]; + } + return sinkMappings.keySet().toArray(new String[0]); + } + + /** Reload configuration (for dynamic updates) */ + public void reload() { + logger.info("Reloading template mapping configuration..."); + loadMappingConfig(); + } +} diff --git a/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/template/TemplateVariableResolver.java b/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/template/TemplateVariableResolver.java new file mode 100644 index 0000000..197c542 --- /dev/null +++ b/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/template/TemplateVariableResolver.java @@ -0,0 +1,1653 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.tools.x2seatunnel.template; + +import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.JsonNode; +import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.ObjectMapper; + +import org.apache.seatunnel.tools.x2seatunnel.model.MappingTracker; +import org.apache.seatunnel.tools.x2seatunnel.util.DataXFieldExtractor; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Template variable resolver - supports basic variables, default values, conditional mapping and + * transformer calls + */ +public class TemplateVariableResolver { + + private static final Logger logger = LoggerFactory.getLogger(TemplateVariableResolver.class); + + // Constant definitions + private static final String DATAX_PREFIX = "datax."; + private static final String DATAX_JOB_PREFIX = "datax.job."; + private static final int DATAX_PREFIX_LENGTH = 6; + private static final String JOB_PREFIX = "job."; + private static final int INDENT_SIZE = 2; + private static final int TAB_SIZE = 4; + private static final String DEFAULT_JOIN_SEPARATOR = ","; + private static final String DEFAULT_SPLIT_DELIMITER = "/"; + + // Common string constants + private static final String EMPTY_STRING = ""; + private static final String EQUALS_SIGN = "="; + private static final String PIPE_SYMBOL = "|"; + private static final String OPEN_BRACE = "{"; + private static final String CLOSE_BRACE = "}"; + private static final String COMMENT_PREFIX = "#"; + private static final String NEWLINE = "\n"; + private static final String QUOTE_DOUBLE = "\""; + private static final String QUOTE_SINGLE = "'"; + private static final String TEMPLATE_VAR_START = "{{"; + private static final String TEMPLATE_VAR_END = "}}"; + + // Log message constants + private static final String LOG_MSG_TEMPLATE_RESOLUTION_START = + "Starting template variable resolution"; + private static final String LOG_MSG_TEMPLATE_RESOLUTION_COMPLETE = + "Template variable resolution completed"; + private static final String LOG_MSG_JINJA2_RESOLUTION_COMPLETE = + "Jinja2 variable resolution completed"; + private static final String LOG_MSG_TEMPLATE_ANALYSIS_COMPLETE = + "Template analysis resolution completed, total fields: {}"; + + // Error message constants + private static final String ERROR_MSG_TEMPLATE_RESOLUTION_FAILED = + "Template variable resolution failed"; + private static final String ERROR_MSG_TEMPLATE_ANALYSIS_FAILED = + "Template analysis resolution failed"; + + // Jinja2 variable pattern: {{ datax.path.to.value }} + private static final Pattern JINJA2_VARIABLE_PATTERN = + Pattern.compile("\\{\\{\\s*([^}|]+)\\s*\\}\\}"); + + // Jinja2 filter pattern: {{ datax.path.to.value | filter }} + private static final Pattern JINJA2_FILTER_PATTERN = + Pattern.compile("\\{\\{\\s*([^}|]+)\\s*\\|\\s*([^}]+)\\s*\\}\\}"); + + // Other patterns + private static final Pattern SET_PATTERN = + Pattern.compile("\\{%\\s*set\\s+(\\w+)\\s*=\\s*(.*?)\\s*%\\}"); + private static final Pattern FILTER_PATTERN = + Pattern.compile("\\|\\s*([a-zA-Z_][a-zA-Z0-9_]*)"); + + private final ObjectMapper objectMapper; + private final TemplateMappingManager templateMappingManager; + private final MappingTracker mappingTracker; + + // Current parsing context: records the target field path being parsed + private String currentTargetContext = null; + + // Flag: whether currently processing complex transformation (compound expressions containing + // filters) + private boolean processingComplexTransform = false; + + // Flag: suppress missing field recording when encountering default filter + private boolean suppressMissing = false; + + // Field reference tracker + private DataXFieldExtractor.FieldReferenceTracker fieldReferenceTracker; + + /** + * Constructor - supports full functionality + * + * @param templateMappingManager template mapping manager, can be null + * @param mappingTracker mapping tracker, can be null + */ + public TemplateVariableResolver( + TemplateMappingManager templateMappingManager, MappingTracker mappingTracker) { + this.objectMapper = createObjectMapper(); + this.templateMappingManager = templateMappingManager; + this.mappingTracker = mappingTracker; + } + + /** + * Constructor - supports template mapping manager only + * + * @param templateMappingManager template mapping manager, can be null + */ + public TemplateVariableResolver(TemplateMappingManager templateMappingManager) { + this(templateMappingManager, null); + } + + /** Default constructor - basic functionality */ + public TemplateVariableResolver() { + this(null, null); + } + + /** + * Create and configure ObjectMapper instance + * + * @return configured ObjectMapper instance + */ + private static ObjectMapper createObjectMapper() { + return new ObjectMapper(); + } + + /** + * Check if template content is empty + * + * @param templateContent template content + * @return true if empty + */ + private boolean isEmptyTemplate(String templateContent) { + return templateContent == null || templateContent.trim().isEmpty(); + } + + /** + * Core method for template resolution + * + * @param templateContent template content + * @param rootNode JSON root node + * @return resolved content + */ + private String resolveTemplate(String templateContent, JsonNode rootNode) { + String result = templateContent; + + // 1. Process {% set var = expr %} syntax (supports simple expressions only) + Map localVars = processSetStatements(result, rootNode); + result = SET_PATTERN.matcher(result).replaceAll(""); + + // 2. Simple string replacement for local variables + result = replaceLocalVariables(result, localVars); + + // 3. Use smart context resolution to handle all variables + result = resolveWithSmartContext(result, rootNode); + + logger.debug(LOG_MSG_TEMPLATE_RESOLUTION_COMPLETE); + return result; + } + + /** + * Process {% set var = expr %} statements + * + * @param content template content + * @param rootNode JSON root node + * @return local variable mapping + */ + private Map processSetStatements(String content, JsonNode rootNode) { + Map localVars = new HashMap<>(); + Matcher setMatcher = SET_PATTERN.matcher(content); + + while (setMatcher.find()) { + String varName = setMatcher.group(1); + String expr = setMatcher.group(2); + String exprTemplate = "{{ " + expr + " }}"; + String value = + resolveJinja2FilterVariables( + resolveJinja2Variables(exprTemplate, rootNode), rootNode); + localVars.put(varName, value); + logger.debug("Setting local variable: {} = {}", varName, value); + } + + return localVars; + } + + /** + * Replace local variables + * + * @param content template content + * @param localVars local variable mapping + * @return content after replacement + */ + private String replaceLocalVariables(String content, Map localVars) { + String result = content; + for (Map.Entry entry : localVars.entrySet()) { + result = result.replace("{{ " + entry.getKey() + " }}", entry.getValue()); + } + return result; + } + + /** + * Normalize DataX path, remove datax prefix and convert to job prefix + * + * @param path original path + * @return normalized path + */ + private String normalizeDataXPath(String path) { + if (path.startsWith(DATAX_JOB_PREFIX)) { + return path.substring(DATAX_PREFIX_LENGTH); + } else if (path.startsWith(DATAX_PREFIX)) { + return path.replace(DATAX_PREFIX, JOB_PREFIX); + } + return path; + } + + /** + * Unified method for handling template resolution exceptions + * + * @param operation operation description + * @param e original exception + * @throws TemplateResolutionException wrapped exception + */ + private void handleTemplateException(String operation, Exception e) { + String errorMsg = operation + ": " + e.getMessage(); + logger.error(errorMsg, e); + throw new TemplateResolutionException(errorMsg, e); + } + + /** Template resolution exception */ + public static class TemplateResolutionException extends RuntimeException { + public TemplateResolutionException(String message, Throwable cause) { + super(message, cause); + } + } + + /** + * Parse template variables (using raw JSON string) + * + * @param templateContent template content + * @param dataXJsonContent DataX JSON configuration content + * @return parsed content + */ + public String resolve(String templateContent, String dataXJsonContent) { + if (isEmptyTemplate(templateContent)) { + return templateContent; + } + + logger.debug(LOG_MSG_TEMPLATE_RESOLUTION_START); + + try { + // Parse JSON string directly to JsonNode + JsonNode rootNode = objectMapper.readTree(dataXJsonContent); + return resolveWithSmartContext(templateContent, rootNode); + + } catch (Exception e) { + handleTemplateException(ERROR_MSG_TEMPLATE_RESOLUTION_FAILED, e); + return null; // This line won't execute, but compiler needs it + } + } + + /** Parse Jinja2 style basic variables: {{ datax.path.to.value }} */ + private String resolveJinja2Variables(String content, JsonNode rootNode) { + logger.debug( + "Starting to parse Jinja2 variables, content length: {}, fieldReferenceTracker: {}", + content.length(), + fieldReferenceTracker != null ? "set" : "not set"); + + Matcher matcher = JINJA2_VARIABLE_PATTERN.matcher(content); + StringBuffer sb = new StringBuffer(); + + while (matcher.find()) { + String path = matcher.group(1).trim(); + String value = extractValueFromJinja2Path(rootNode, path); + String resolvedValue = (value != null) ? value : EMPTY_STRING; + + logger.debug("Found variable: {}, resolved value: {}", path, resolvedValue); + + // Increment field reference count + if (fieldReferenceTracker != null && path.startsWith(DATAX_PREFIX)) { + String normalizedPath = normalizeDataXPath(path); + logger.debug( + "Incrementing reference count when resolving variable: {} -> {}", + path, + normalizedPath); + incrementFieldReference(normalizedPath); + } else { + logger.debug( + "Skipping reference count: fieldReferenceTracker={}, path={}", + fieldReferenceTracker != null ? "set" : "not set", + path); + } + + matcher.appendReplacement(sb, Matcher.quoteReplacement(resolvedValue)); + } + matcher.appendTail(sb); + + logger.debug(LOG_MSG_JINJA2_RESOLUTION_COMPLETE); + return sb.toString(); + } + + /** Parse Jinja2 style filter variables: {{ datax.path.to.value | filter }} */ + private String resolveJinja2FilterVariables(String content, JsonNode rootNode) { + logger.debug("Starting to resolve filter variables, content: {}", content.trim()); + Matcher matcher = JINJA2_FILTER_PATTERN.matcher(content); + StringBuffer sb = new StringBuffer(); + + while (matcher.find()) { + String path = matcher.group(1).trim(); + String filterExpression = matcher.group(2).trim(); + + logger.debug("Found filter variable: {}, filter: {}", path, filterExpression); + + // Increment field reference count + if (fieldReferenceTracker != null && path.startsWith(DATAX_PREFIX)) { + String normalizedPath = normalizeDataXPath(path); + logger.debug( + "Incrementing reference count for filter variable: {} -> {}", + path, + normalizedPath); + incrementFieldReference(normalizedPath); + } + + // Parse filter chain: filter1 | filter2 | filter3 + String[] filters = parseFilterChain(filterExpression); + // If the first filter is default, suppress missing field recording + boolean needSuppress = filters.length > 0 && filters[0].startsWith("default"); + if (needSuppress) { + this.suppressMissing = true; + } + // Extract original value + String value = extractValueFromJinja2Path(rootNode, path); + if (needSuppress) { + this.suppressMissing = false; + } + + Object resolvedValue = value; + + for (String filter : filters) { + // Add null check to prevent null pointer exception + if (resolvedValue == null) { + resolvedValue = EMPTY_STRING; + } + + // Apply filter uniformly + resolvedValue = applyFilter(resolvedValue, filter.trim()); + } + + String finalValue = + resolvedValue instanceof String + ? (String) resolvedValue + : (resolvedValue != null ? resolvedValue.toString() : EMPTY_STRING); + matcher.appendReplacement(sb, Matcher.quoteReplacement(finalValue)); + } + matcher.appendTail(sb); + + return sb.toString(); + } + + /** Intelligently parse filter chain, correctly handle pipe symbols within parentheses */ + private String[] parseFilterChain(String filterExpression) { + List filters = new ArrayList<>(); + StringBuilder currentFilter = new StringBuilder(); + int depth = 0; + boolean inQuotes = false; + char quoteChar = '\0'; + + for (int i = 0; i < filterExpression.length(); i++) { + char c = filterExpression.charAt(i); + + if (!inQuotes && (c == '\'' || c == '"')) { + inQuotes = true; + quoteChar = c; + currentFilter.append(c); + } else if (inQuotes && c == quoteChar) { + inQuotes = false; + quoteChar = '\0'; + currentFilter.append(c); + } else if (!inQuotes && c == '(') { + depth++; + currentFilter.append(c); + } else if (!inQuotes && c == ')') { + depth--; + currentFilter.append(c); + } else if (!inQuotes && c == '|' && depth == 0) { + filters.add(currentFilter.toString().trim()); + currentFilter.setLength(0); + } else { + currentFilter.append(c); + } + } + + if (currentFilter.length() > 0) { + filters.add(currentFilter.toString().trim()); + } + + return filters.toArray(new String[0]); + } + + /** Extract value from Jinja2 style path: datax.job.content[0].reader.parameter.column */ + private String extractValueFromJinja2Path(JsonNode rootNode, String path) { + try { + JsonNode currentNode = rootNode; + + // Convert datax.job.content[0] to job.content[0] (remove datax prefix) + if (path.startsWith(DATAX_PREFIX)) { + path = path.substring(DATAX_PREFIX_LENGTH); + } + + String[] pathParts = path.split("\\."); + + for (String part : pathParts) { + if (currentNode == null) { + // Record missing field + if (mappingTracker != null && !suppressMissing) { + mappingTracker.recordMissingField( + path, "Field not found in DataX configuration"); + } + return null; + } + + // Handle array index, such as content[0] + if (part.contains("[") && part.contains("]")) { + String arrayName = part.substring(0, part.indexOf("[")); + String indexStr = part.substring(part.indexOf("[") + 1, part.indexOf("]")); + + currentNode = currentNode.get(arrayName); + if (currentNode != null && currentNode.isArray()) { + try { + int index = Integer.parseInt(indexStr); + currentNode = currentNode.get(index); + } catch (NumberFormatException e) { + logger.warn("Invalid array index: {}", indexStr); + if (mappingTracker != null && !suppressMissing) { + mappingTracker.recordMissingField( + path, "Invalid array index: " + indexStr); + } + return null; + } + } + } else { + currentNode = currentNode.get(part); + } + } + + if (currentNode != null && !currentNode.isNull()) { + String value; + if (currentNode.isArray()) { + // If it's an array, return all elements of the array + StringBuilder result = new StringBuilder(); + for (int i = 0; i < currentNode.size(); i++) { + if (i > 0) result.append(","); + result.append(currentNode.get(i).asText()); + } + value = result.toString(); + } else { + value = currentNode.asText(); + } + + // Record successful field extraction, unless suppressed or part of complex + // transformation + if (mappingTracker != null + && !suppressMissing + && value != null + && !value.isEmpty() + && !isPartOfComplexTransform()) { + mappingTracker.recordDirectMapping( + path, currentTargetContext, value, "Directly extracted from DataX"); + } + + return value; + } else { + // Record missing field + if (mappingTracker != null && !suppressMissing) { + mappingTracker.recordMissingField( + path, "Field value is empty in DataX configuration"); + } + } + + } catch (Exception e) { + logger.warn("Failed to extract Jinja2 path value: {}", path, e); + if (mappingTracker != null && !suppressMissing) { + mappingTracker.recordMissingField(path, "Extraction failed: " + e.getMessage()); + } + } + + return null; + } + + /** Find matching right parenthesis position, handle nested parentheses */ + private int findMatchingCloseParen(String text, int openParenPos) { + int depth = 1; + for (int i = openParenPos + 1; i < text.length(); i++) { + char c = text.charAt(i); + if (c == '(') { + depth++; + } else if (c == ')') { + depth--; + if (depth == 0) { + return i; + } + } + } + return -1; // No matching right parenthesis found + } + + /** Unified filter application method - supports strings and arrays */ + private Object applyFilter(Object value, String filterExpression) { + if (value == null) { + value = EMPTY_STRING; + } + + // Parse filter: join(',') or join(', ') or default('SELECT * FROM table') + String filterName; + String filterArgs = EMPTY_STRING; + + if (filterExpression.contains("(") && filterExpression.contains(")")) { + filterName = filterExpression.substring(0, filterExpression.indexOf("(")).trim(); + + // Find correct right parenthesis position (handle nested parentheses) + int openParenPos = filterExpression.indexOf("("); + int closeParenPos = findMatchingCloseParen(filterExpression, openParenPos); + + if (closeParenPos != -1) { + filterArgs = filterExpression.substring(openParenPos + 1, closeParenPos).trim(); + // Remove quotes + if (filterArgs.startsWith(QUOTE_SINGLE) && filterArgs.endsWith(QUOTE_SINGLE)) { + filterArgs = filterArgs.substring(1, filterArgs.length() - 1); + } else if (filterArgs.startsWith(QUOTE_DOUBLE) + && filterArgs.endsWith(QUOTE_DOUBLE)) { + filterArgs = filterArgs.substring(1, filterArgs.length() - 1); + } + } else { + logger.warn("Unable to find matching closing parenthesis: {}", filterExpression); + } + } else { + filterName = filterExpression.trim(); + } + + // Record original value for comparison to see if transformation occurred + Object originalValue = value; + + // Apply filter + Object result; + switch (filterName) { + case "join": + if (value instanceof String[]) { + result = + applyJoinFilterOnArray( + (String[]) value, + filterArgs.isEmpty() ? DEFAULT_JOIN_SEPARATOR : filterArgs); + } else { + result = + applyJoinFilter( + value.toString(), + filterArgs.isEmpty() ? DEFAULT_JOIN_SEPARATOR : filterArgs); + } + break; + case "escape": + // Turn actual control characters into escaped sequences for config files + result = escapeControlChars(value.toString()); + break; + case "default": + String stringValue = value.toString(); + boolean usedDefaultValue = stringValue.isEmpty(); + result = usedDefaultValue ? filterArgs : stringValue; + + // Record whether default value was used for subsequent mapping recording + if (mappingTracker != null && !isPartOfComplexTransform()) { + if (usedDefaultValue) { + // Used default value + mappingTracker.recordDefaultValue( + currentTargetContext, + result.toString(), + "Applied default value: " + filterArgs); + } else { + // Used original value, belongs to direct mapping + mappingTracker.recordDirectMapping( + null, + currentTargetContext, + result.toString(), + "Used original value, default value not applied"); + } + } + break; + case "upper": + result = value.toString().toUpperCase(); + break; + case "lower": + result = value.toString().toLowerCase(); + break; + case "regex_extract": + { + // Use original filterExpression to extract parameters, ensuring quotes and + // commas are included + int lpos = filterExpression.indexOf('('); + int rpos = findMatchingCloseParen(filterExpression, lpos); + String rawArgs = filterExpression.substring(lpos + 1, rpos); + String extractedVal = applyRegexExtract(value.toString(), rawArgs); + result = extractedVal; + // Record regex extraction transformation, only once + if (mappingTracker != null + && !equals(originalValue, result) + && !isPartOfComplexTransform()) { + mappingTracker.recordTransformMapping( + null, currentTargetContext, result.toString(), filterName); + } + } + break; + case "jdbc_driver_mapper": + result = applyTransformer(value.toString(), "jdbc_driver_mapper"); + break; + case "split": + result = applySplit(value.toString(), filterArgs); + break; + case "get": + result = applyGet(value, filterArgs); + break; + case "replace": + result = applyReplace(value.toString(), filterArgs); + break; + default: + // Check if it's a transformer call + if (templateMappingManager != null + && templateMappingManager.getTransformer(filterName) != null) { + result = applyTransformer(value.toString(), filterName); + } else { + logger.warn("Unsupported filter: {}", filterName); + result = value; + } + } + + // Record field transformation (if transformation occurred) + if (mappingTracker != null && !equals(originalValue, result)) { + if ("regex_extract".equals(filterName)) { + // Already recorded in regex_extract case, skip duplicate recording + } else if ("default".equals(filterName)) { + // Default filter mapping record already handled in case, skip duplicate recording + } else if (!isPartOfComplexTransform()) { + // Other filter transformations + mappingTracker.recordTransformMapping( + null, currentTargetContext, result.toString(), filterName); + } + } + + return result; + } + + /** Determine if two objects are equal */ + private boolean equals(Object obj1, Object obj2) { + if (obj1 == null && obj2 == null) return true; + if (obj1 == null || obj2 == null) return false; + return obj1.toString().equals(obj2.toString()); + } + + /** Apply transformer */ + private String applyTransformer(String value, String transformerName) { + if (templateMappingManager == null) { + logger.warn( + "TemplateMappingManager not initialized, cannot use transformer: {}", + transformerName); + return value; + } + + try { + Map transformer = + templateMappingManager.getTransformer(transformerName); + if (transformer == null) { + logger.warn("Transformer does not exist: {}", transformerName); + return value; + } + + logger.debug("Applying transformer {} to process value: {}", transformerName, value); + logger.debug("Transformer mapping table: {}", transformer); + + // Find matching transformer rules + for (Map.Entry entry : transformer.entrySet()) { + String pattern = entry.getKey(); + String mappedValue = entry.getValue(); + + // Support contains matching + if (value.toLowerCase().contains(pattern.toLowerCase())) { + logger.debug( + "Transformer {} matched successfully: {} -> {}", + transformerName, + value, + mappedValue); + return mappedValue; + } + } + + logger.debug( + "Transformer {} found no match, returning original value: {}", + transformerName, + value); + return value; + + } catch (Exception e) { + logger.error("Failed to apply transformer: {}", transformerName, e); + return value; + } + } + + /** Apply join filter */ + private String applyJoinFilter(String value, String separator) { + if (value == null || value.trim().isEmpty()) { + return ""; + } + + // If the value itself is a comma-separated string, directly join with specified separator + if (value.contains(",")) { + String[] parts = value.split(","); + StringBuilder result = new StringBuilder(); + for (int i = 0; i < parts.length; i++) { + if (i > 0) result.append(separator); + result.append(parts[i].trim()); + } + return result.toString(); + } + + return value; + } + + /** Escape control characters to literal sequences, e.g., newline -> \n, tab -> \t */ + private String escapeControlChars(String input) { + if (input == null) { + return EMPTY_STRING; + } + String out = input; + // Important: backslash must be escaped first to avoid double-processing + out = out.replace("\\", "\\\\"); + out = out.replace("\n", "\\n"); + out = out.replace("\r", "\\r"); + out = out.replace("\t", "\\t"); + // Keep quotes safe in properties-like files + out = out.replace("\"", "\\\""); + return out; + } + + /** Apply regular expression extraction filter */ + private String applyRegexExtract(String value, String regexPattern) { + if (value == null + || value.trim().isEmpty() + || regexPattern == null + || regexPattern.trim().isEmpty()) { + return value; + } + + try { + logger.debug( + "Regular expression extraction: input value='{}', parameters='{}'", + value, + regexPattern); + + // Support two formats: + // 1. Simple mode: regex_extract('pattern') - extract first matching group + // 2. Replacement mode: regex_extract('pattern', 'replacement') - use replacement + // pattern + + // Parse parameters, considering commas within quotes should not be split + String[] parts = parseRegexArgs(regexPattern); + String pattern = parts[0].trim(); + String replacement = parts.length > 1 ? parts[1].trim() : "$1"; + + logger.debug( + "Regular expression extraction: pattern='{}', replacement='{}', input value='{}'", + pattern, + replacement, + value); + + java.util.regex.Pattern compiledPattern = java.util.regex.Pattern.compile(pattern); + java.util.regex.Matcher matcher = compiledPattern.matcher(value); + + if (matcher.find()) { + // If replacement only contains group references, concatenate and return + // corresponding groups + if (replacement.matches("(\\$\\d+)(\\.\\$\\d+)*")) { + String extracted = replacement; + // Replace group references + for (int i = 1; i <= matcher.groupCount(); i++) { + extracted = extracted.replace("$" + i, matcher.group(i)); + } + logger.debug( + "Regular expression extraction successful: result='{}'", extracted); + return extracted; + } else { + String replaced = matcher.replaceFirst(replacement); + logger.debug( + "Regular expression replacement successful: result='{}'", replaced); + return replaced; + } + } else { + logger.warn( + "Regular expression extraction failed: pattern '{}' does not match input value '{}'", + pattern, + value); + return value; + } + + } catch (Exception e) { + logger.error( + "Regular expression extraction error: pattern='{}', value='{}'", + regexPattern, + value, + e); + return value; + } + } + + /** Parse regex_extract parameters, correctly handle commas within quotes */ + private String[] parseRegexArgs(String args) { + if (args == null || args.trim().isEmpty()) { + return new String[0]; + } + + List result = new ArrayList<>(); + StringBuilder currentArg = new StringBuilder(); + boolean inQuotes = false; + char quoteChar = '\0'; + + for (int i = 0; i < args.length(); i++) { + char c = args.charAt(i); + + if (!inQuotes && (c == '\'' || c == '"')) { + inQuotes = true; + quoteChar = c; + } else if (inQuotes && c == quoteChar) { + inQuotes = false; + quoteChar = '\0'; + } else if (!inQuotes && c == ',') { + result.add(currentArg.toString().trim()); + currentArg.setLength(0); + continue; + } + + currentArg.append(c); + } + + if (currentArg.length() > 0) { + result.add(currentArg.toString().trim()); + } + + // Remove quotes from each parameter + for (int i = 0; i < result.size(); i++) { + String arg = result.get(i); + if ((arg.startsWith("'") && arg.endsWith("'")) + || (arg.startsWith("\"") && arg.endsWith("\""))) { + result.set(i, arg.substring(1, arg.length() - 1)); + } + } + + return result.toArray(new String[0]); + } + + /** + * Apply split filter - string splitting + * + * @param value input string + * @param delimiter delimiter, default is "/" + * @return split string array + */ + private String[] applySplit(String value, String delimiter) { + if (value == null || value.trim().isEmpty()) { + return new String[0]; + } + + // If no delimiter is specified, use default delimiter + String actualDelimiter = + (delimiter != null && !delimiter.trim().isEmpty()) + ? delimiter.trim() + : DEFAULT_SPLIT_DELIMITER; + + logger.debug("String splitting: input value='{}', delimiter='{}'", value, actualDelimiter); + + String[] result = value.split(actualDelimiter); + logger.debug("Split result: {}", java.util.Arrays.toString(result)); + + return result; + } + + /** + * Apply get filter - get element at specified position in array + * + * @param value input value (may be string array) + * @param indexStr index string, supports negative index + * @return element at specified position + */ + private String applyGet(Object value, String indexStr) { + if (value == null) { + return ""; + } + + // If not a string array, return string form directly + if (!(value instanceof String[])) { + return value.toString(); + } + + String[] array = (String[]) value; + if (array.length == 0) { + return ""; + } + + try { + int index = Integer.parseInt(indexStr.trim()); + + // Support negative index + if (index < 0) { + index = array.length + index; + } + + if (index >= 0 && index < array.length) { + String result = array[index]; + logger.debug("Array get: index={}, result='{}'", indexStr, result); + return result; + } else { + logger.warn( + "Array index out of range: index={}, array length={}", + indexStr, + array.length); + return ""; + } + } catch (NumberFormatException e) { + logger.error("Invalid array index: {}", indexStr, e); + return ""; + } + } + + /** + * Apply replace filter - string replacement + * + * @param value input string + * @param args replacement parameters, format is "old,new" + * @return replaced string + */ + private String applyReplace(String value, String args) { + if (value == null) { + return ""; + } + + if (args == null || args.trim().isEmpty()) { + return value; + } + + // Parse replacement parameters, format is "old,new" + String[] parts = args.split(",", 2); + if (parts.length == 2) { + String oldStr = parts[0].trim(); + String newStr = parts[1].trim(); + + logger.debug( + "String replacement: input value='{}', replace '{}' -> '{}'", + value, + oldStr, + newStr); + + String result = value.replace(oldStr, newStr); + logger.debug("Replacement result: '{}'", result); + return result; + } else { + logger.warn( + "replace filter parameter format error, should be 'old,new', actual: {}", args); + return value; + } + } + + /** Apply join filter to array */ + private String applyJoinFilterOnArray(String[] value, String separator) { + if (value == null || value.length == 0) { + return ""; + } + + StringBuilder result = new StringBuilder(); + for (int i = 0; i < value.length; i++) { + if (i > 0) { + result.append(separator); + } + result.append(value[i] != null ? value[i].trim() : ""); + } + return result.toString(); + } + + /** + * Set current target context (for mapping tracking). This method can be called externally to + * set context when parsing specific configuration sections + */ + public void setCurrentTargetContext(String targetContext) { + this.currentTargetContext = targetContext; + } + + /** Clear current target context */ + public void clearCurrentTargetContext() { + this.currentTargetContext = null; + } + + /** Set field reference tracker */ + public void setFieldReferenceTracker(DataXFieldExtractor.FieldReferenceTracker tracker) { + this.fieldReferenceTracker = tracker; + } + + /** Get field reference tracker */ + public DataXFieldExtractor.FieldReferenceTracker getFieldReferenceTracker() { + return this.fieldReferenceTracker; + } + + /** Increment field reference count, supports intelligent matching of array fields */ + private void incrementFieldReference(String normalizedPath) { + if (fieldReferenceTracker == null) { + return; + } + + // Directly referenced field + fieldReferenceTracker.incrementReference(normalizedPath); + logger.debug("Field reference count: {}", normalizedPath); + + // Handle bidirectional matching of array fields + Map allFields = fieldReferenceTracker.getAllFields(); + + // Case 1: If referencing an array field, all elements of the array should also be marked as + // referenced + // For example: when referencing job.content[0].reader.parameter.connection[0].jdbcUrl, + // also mark job.content[0].reader.parameter.connection[0].jdbcUrl[0], jdbcUrl[1] etc. as + // referenced + for (String fieldPath : allFields.keySet()) { + if (isArrayElementOf(fieldPath, normalizedPath)) { + fieldReferenceTracker.incrementReference(fieldPath); + logger.debug( + "Array element reference count: {} (from array reference: {})", + fieldPath, + normalizedPath); + } + } + + // Case 2: If referencing an array element, the corresponding array itself should also be + // marked as referenced + // For example: when referencing job.content[0].reader.parameter.connection[0].jdbcUrl[0], + // also mark job.content[0].reader.parameter.connection[0].jdbcUrl as referenced + String arrayFieldName = getArrayFieldNameFromElement(normalizedPath); + if (arrayFieldName != null && allFields.containsKey(arrayFieldName)) { + fieldReferenceTracker.incrementReference(arrayFieldName); + logger.debug( + "Array field reference count: {} (from array element reference: {})", + arrayFieldName, + normalizedPath); + } + } + + /** + * Determine if fieldPath is an array element of arrayPath. For example: + * job.content[0].reader.parameter.connection[0].jdbcUrl[0] is an element of + * job.content[0].reader.parameter.connection[0].jdbcUrl + */ + private boolean isArrayElementOf(String fieldPath, String arrayPath) { + // Check if it's an array element pattern: arrayPath[index] + if (fieldPath.startsWith(arrayPath + "[") && fieldPath.endsWith("]")) { + // Extract index part, ensure it's a number + String indexPart = fieldPath.substring(arrayPath.length() + 1, fieldPath.length() - 1); + try { + Integer.parseInt(indexPart); + return true; + } catch (NumberFormatException e) { + return false; + } + } + return false; + } + + /** + * Extract array field name from array element path. For example: + * job.content[0].reader.parameter.connection[0].jdbcUrl[0] -> + * job.content[0].reader.parameter.connection[0].jdbcUrl + */ + private String getArrayFieldNameFromElement(String elementPath) { + // Check if it's an array element pattern: xxx[number] + if (elementPath.matches(".*\\[\\d+\\]$")) { + int lastBracket = elementPath.lastIndexOf('['); + return elementPath.substring(0, lastBracket); + } + return null; + } + + /** Check if line contains filters */ + private boolean containsFilters(String line) { + return line.contains(PIPE_SYMBOL) && containsVariable(line); + } + + /** Check if currently processing complex transformation */ + private boolean isPartOfComplexTransform() { + return processingComplexTransform; + } + + /** Check if it's a real complex transformation (multiple variables or complex expressions) */ + private boolean isReallyComplexTransform(String line) { + // Count number of variables + Pattern variablePattern = Pattern.compile("\\{\\{[^}]+\\}\\}"); + Matcher matcher = variablePattern.matcher(line); + int variableCount = 0; + while (matcher.find()) { + variableCount++; + } + + // If there are multiple variables, consider it a complex transformation + if (variableCount > 1) { + return true; + } + + // If there's only one variable, check if there's a complex filter chain (more than 2 + // filters) + if (variableCount == 1) { + matcher.reset(); + if (matcher.find()) { + String variable = matcher.group(); + // Count pipe symbols + long pipeCount = variable.chars().filter(ch -> ch == '|').count(); + // If there are more than 2 filters, consider it a complex transformation + return pipeCount > 2; + } + } + + return false; + } + + /** Record complex transformation mapping (lines containing multiple variables and filters) */ + private void recordComplexTransformMapping( + String originalLine, String resolvedLine, String targetContext) { + if (mappingTracker == null) { + return; + } + + // Extract original template expression + String templateExpression = extractTemplateExpression(originalLine); + + // Extract final value + String finalValue = extractFinalValue(resolvedLine); + + // Extract list of filters used + String filtersUsed = extractFiltersFromExpression(templateExpression); + + // Escape template expression for Markdown + String escapedTemplateExpression = escapeMarkdownTableContent(templateExpression); + + // Record as transformation mapping, using escaped template expression as source + mappingTracker.recordTransformMapping( + escapedTemplateExpression, targetContext, finalValue, filtersUsed); + + logger.debug( + "Record complex transformation mapping: {} -> {} = {}", + escapedTemplateExpression, + targetContext, + finalValue); + } + + /** Extract template expression */ + private String extractTemplateExpression(String line) { + // Extract part after =, remove quotes + if (line.contains("=")) { + String value = line.substring(line.indexOf("=") + 1).trim(); + if (value.startsWith("\"") && value.endsWith("\"")) { + value = value.substring(1, value.length() - 1); + } + return value; + } + return line.trim(); + } + + /** Extract final value */ + private String extractFinalValue(String resolvedLine) { + if (resolvedLine.contains("=")) { + String value = resolvedLine.substring(resolvedLine.indexOf("=") + 1).trim(); + if (value.startsWith("\"") && value.endsWith("\"")) { + value = value.substring(1, value.length() - 1); + } + return value; + } + return resolvedLine.trim(); + } + + /** Extract filter list from template expression */ + private String extractFiltersFromExpression(String templateExpression) { + if (templateExpression == null || !templateExpression.contains("|")) { + return ""; + } + + Set filters = new HashSet<>(); + Matcher matcher = FILTER_PATTERN.matcher(templateExpression); + + while (matcher.find()) { + String filter = matcher.group(1); + filters.add(filter); + } + + // Convert filter list to string, separated by commas + return String.join(", ", filters); + } + + /** Escape Markdown table content */ + private String escapeMarkdownTableContent(String content) { + if (content == null) { + return ""; + } + + // Escape special characters in Markdown table + return content.replace("|", "\\|") // Escape pipe symbol + .replace("\n", " ") // Replace newlines with spaces + .replace("\r", "") // Remove carriage returns + .trim(); + } + + /** Check if it's a hardcoded default value configuration line */ + private boolean isHardcodedDefaultValue(String trimmedLine) { + if (trimmedLine.isEmpty() + || trimmedLine.startsWith(COMMENT_PREFIX) + || !trimmedLine.contains(EQUALS_SIGN)) { + return false; + } + + // Exclude lines containing variables (these are already handled elsewhere) + if (containsVariable(trimmedLine)) { + return false; + } + + // Exclude structural lines (such as "}" etc.) + if (trimmedLine.equals(CLOSE_BRACE) || trimmedLine.equals(OPEN_BRACE)) { + return false; + } + + // General pattern: any key = value configuration line that doesn't contain variables is + // considered a hardcoded default value + // This includes: numbers, booleans, quoted strings, etc. + return trimmedLine.matches(".*=\\s*(.+)\\s*$"); + } + + /** Record hardcoded default value */ + private void recordHardcodedDefaultValue(String trimmedLine, String targetContext) { + if (mappingTracker == null) { + return; + } + + // Extract configuration key and value + String[] parts = trimmedLine.split(EQUALS_SIGN, 2); + if (parts.length != 2) { + return; + } + + String key = parts[0].trim(); + String value = parts[1].trim(); + + // Remove quotes + if (value.startsWith(QUOTE_DOUBLE) && value.endsWith(QUOTE_DOUBLE)) { + value = value.substring(1, value.length() - 1); + } + + // Record as default value + mappingTracker.recordDefaultValue(targetContext, value, "Template hardcoded default value"); + + logger.debug( + "Record hardcoded default value: {} = {} (path: {})", key, value, targetContext); + } + + /** + * Smart context parsing - analyze template structure line by line, infer accurate target field + * paths + */ + private String resolveWithSmartContext(String content, JsonNode rootNode) { + StringBuilder result = new StringBuilder(); + String[] lines = content.split("\n"); + List configPath = new ArrayList<>(); // Current configuration path stack + + for (String line : lines) { + String trimmedLine = line.trim(); + int indentLevel = getIndentLevel(line); + + // Update configuration path stack + updateConfigPath(configPath, trimmedLine, indentLevel); + + if (containsVariable(line)) { + String resolvedLine = processVariableLine(line, trimmedLine, configPath, rootNode); + result.append(resolvedLine).append("\n"); + } else { + processNonVariableLine(line, trimmedLine, configPath); + result.append(line).append("\n"); + } + } + + return removeTrailingNewline(result); + } + + /** + * Process lines containing variables + * + * @param line original line + * @param trimmedLine trimmed line + * @param configPath configuration path stack + * @param rootNode JSON root node + * @return parsed line + */ + private String processVariableLine( + String line, String trimmedLine, List configPath, JsonNode rootNode) { + logger.debug("Found line containing variables: {}", trimmedLine); + String targetContext = buildTargetContext(configPath, trimmedLine); + String previousContext = this.currentTargetContext; + this.currentTargetContext = targetContext; + + try { + boolean hasFilters = containsFilters(line); + String originalLine = line; + + // Check if it's a real complex transformation (multiple variables or complex + // expressions) + boolean isComplexTransform = hasFilters && isReallyComplexTransform(line); + + // Only set complex transformation flag for truly complex transformations + if (isComplexTransform) { + processingComplexTransform = true; + } + + // Parse variables in this line + String resolvedLine = resolveJinja2FilterVariables(line, rootNode); + resolvedLine = resolveJinja2Variables(resolvedLine, rootNode); + + // Only record as complex transformation mapping for truly complex transformations + if (isComplexTransform && mappingTracker != null) { + recordComplexTransformMapping(originalLine, resolvedLine, targetContext); + } + + return resolvedLine; + } finally { + // Restore previous context and flags + this.currentTargetContext = previousContext; + this.processingComplexTransform = false; + } + } + + /** + * Process lines not containing variables + * + * @param line original line + * @param trimmedLine trimmed line + * @param configPath configuration path stack + */ + private void processNonVariableLine(String line, String trimmedLine, List configPath) { + // Check if it's a hardcoded default value configuration line + if (isHardcodedDefaultValue(trimmedLine)) { + String targetContext = buildTargetContext(configPath, trimmedLine); + recordHardcodedDefaultValue(trimmedLine, targetContext); + } + } + + /** + * Remove trailing newline from result + * + * @param result string builder + * @return processed string + */ + private String removeTrailingNewline(StringBuilder result) { + if (result.length() > 0) { + result.setLength(result.length() - 1); + } + return result.toString(); + } + + /** Check if line contains template variables */ + private boolean containsVariable(String line) { + return line.contains(TEMPLATE_VAR_START) && line.contains(TEMPLATE_VAR_END); + } + + /** Get indentation level of line */ + private int getIndentLevel(String line) { + int indent = 0; + for (char c : line.toCharArray()) { + if (c == ' ') { + indent++; + } else if (c == '\t') { + indent += TAB_SIZE; // tab is considered as TAB_SIZE spaces + } else { + break; + } + } + return indent; + } + + /** Update configuration path stack */ + private void updateConfigPath(List configPath, String trimmedLine, int indentLevel) { + logger.debug( + "Update configuration path: indentLevel={}, current configPath={}, trimmedLine='{}'", + indentLevel, + configPath, + trimmedLine); + + // Ignore empty lines and comment lines, don't let them affect configuration path + if (trimmedLine.isEmpty() || trimmedLine.startsWith(COMMENT_PREFIX)) { + logger.debug( + "Ignore empty line or comment line, keep configPath unchanged: {}", configPath); + return; + } + + // Adjust path depth based on indentation (every INDENT_SIZE spaces is one level) + int targetDepth = indentLevel / INDENT_SIZE; + + logger.debug("Calculate target depth: targetDepth={}", targetDepth); + + while (configPath.size() > targetDepth) { + String removed = configPath.remove(configPath.size() - 1); + logger.debug("Remove path element: {}, remaining configPath={}", removed, configPath); + } + + // If this is the start of a configuration block, add to path + if (trimmedLine.endsWith(OPEN_BRACE)) { + String configKey = trimmedLine.substring(0, trimmedLine.indexOf(OPEN_BRACE)).trim(); + if (!configKey.isEmpty()) { + configPath.add(configKey); + logger.debug("Add path element: {}, updated configPath={}", configKey, configPath); + } + } + } + + /** Build target context path */ + private String buildTargetContext(List configPath, String trimmedLine) { + StringBuilder targetPath = new StringBuilder(); + + // Add configuration path + for (String pathPart : configPath) { + if (targetPath.length() > 0) { + targetPath.append("."); + } + targetPath.append(pathPart); + } + + // If current line contains specific configuration item (key = value format), add + // configuration key + if (trimmedLine.contains(EQUALS_SIGN)) { + String configKey = extractConfigKey(trimmedLine); + if (configKey != null && !configKey.isEmpty()) { + if (targetPath.length() > 0) { + targetPath.append("."); + } + targetPath.append(configKey); + } + } + + String result = targetPath.toString(); + logger.debug( + "Build target context: configPath={}, trimmedLine='{}', result='{}'", + configPath, + trimmedLine, + result); + return result; + } + + /** Extract configuration key name */ + private String extractConfigKey(String trimmedLine) { + if (trimmedLine.contains("=")) { + // key = value format + return trimmedLine.substring(0, trimmedLine.indexOf(EQUALS_SIGN)).trim(); + } + return null; + } + + /** + * Analyze template and extract field mapping relationships (alternative to HOCON parsing) + * + * @param templateContent template content + * @param templateType template type (source/sink) + * @return mapping from field paths to variable lists + */ + public Map> analyzeTemplateFieldMappings( + String templateContent, String templateType) { + Map> fieldMappings = new HashMap<>(); + + if (templateContent == null || templateContent.trim().isEmpty()) { + return fieldMappings; + } + + String[] lines = templateContent.split("\n"); + List configPath = new ArrayList<>(); + + for (String line : lines) { + String trimmedLine = line.trim(); + int indentLevel = getIndentLevel(line); + + // Update configuration path stack + updateConfigPath(configPath, trimmedLine, indentLevel); + + // If this line contains variables, extract field path and variables + if (containsVariable(line)) { + String fieldPath = buildFieldPath(templateType, configPath, trimmedLine); + List variables = extractVariablesFromLine(line); + + if (!variables.isEmpty()) { + fieldMappings.put(fieldPath, variables); + logger.debug("Extract field mapping: {} -> {}", fieldPath, variables); + } + } + } + + return fieldMappings; + } + + /** Extract all template variables from line */ + private List extractVariablesFromLine(String line) { + List variables = new ArrayList<>(); + + // Extract filter variables + Matcher filterMatcher = JINJA2_FILTER_PATTERN.matcher(line); + while (filterMatcher.find()) { + String path = filterMatcher.group(1).trim(); + variables.add(path); + } + + // Extract basic variables (excluding those already matched by filter pattern) + String lineAfterFilters = filterMatcher.replaceAll(""); + Matcher variableMatcher = JINJA2_VARIABLE_PATTERN.matcher(lineAfterFilters); + while (variableMatcher.find()) { + String path = variableMatcher.group(1).trim(); + variables.add(path); + } + + return variables; + } + + /** Build field path */ + private String buildFieldPath( + String templateType, List configPath, String trimmedLine) { + StringBuilder fieldPath = new StringBuilder(); + + // Add template type prefix + if (templateType != null && !templateType.isEmpty()) { + fieldPath.append(templateType); + } + + // Add configuration path + for (String pathPart : configPath) { + if (fieldPath.length() > 0) { + fieldPath.append("."); + } + fieldPath.append(pathPart); + } + + // If current line contains specific configuration item (key = value format), add + // configuration key + String configKey = extractConfigKey(trimmedLine); + if (configKey != null && !configKey.isEmpty()) { + if (fieldPath.length() > 0) { + fieldPath.append("."); + } + fieldPath.append(configKey); + } + + return fieldPath.toString(); + } + + /** + * Use template analysis to parse template and track field mappings (using raw JSON string) + * + * @param templateContent template content + * @param templateType template type (source/sink) + * @param dataXJsonContent DataX JSON configuration content + * @return parsed content + */ + public String resolveWithTemplateAnalysis( + String templateContent, String templateType, String dataXJsonContent) { + if (templateContent == null || templateContent.trim().isEmpty()) { + return templateContent; + } + + logger.info("Using template analysis to parse template type: {}", templateType); + + try { + // 1. Analyze template, extract field variable mappings + Map> fieldVariables = + analyzeTemplateFieldMappings(templateContent, templateType); + + // 2. Parse JSON string directly to JsonNode + JsonNode rootNode = objectMapper.readTree(dataXJsonContent); + + // 3. Use smart context parsing to handle all variables + String result = resolveWithSmartContext(templateContent, rootNode); + + logger.info(LOG_MSG_TEMPLATE_ANALYSIS_COMPLETE, fieldVariables.size()); + return result; + + } catch (Exception e) { + handleTemplateException(ERROR_MSG_TEMPLATE_ANALYSIS_FAILED, e); + return null; // This line won't execute, but compiler needs it + } + } + + /** Validate template syntax (based on Jinja2 pattern) */ + public boolean validateTemplate(String templateContent) { + if (templateContent == null || templateContent.trim().isEmpty()) { + return true; + } + + try { + // Check for unclosed template variables + long openCount = templateContent.chars().filter(ch -> ch == '{').count(); + long closeCount = templateContent.chars().filter(ch -> ch == '}').count(); + + if (openCount != closeCount) { + logger.warn("Template validation failed: mismatched braces"); + return false; + } + + // Check if variable syntax is correct + Matcher matcher = JINJA2_VARIABLE_PATTERN.matcher(templateContent); + while (matcher.find()) { + String variable = matcher.group(1).trim(); + if (variable.isEmpty()) { + logger.warn("Template validation failed: found empty variable"); + return false; + } + } + + Matcher filterMatcher = JINJA2_FILTER_PATTERN.matcher(templateContent); + while (filterMatcher.find()) { + String variable = filterMatcher.group(1).trim(); + String filter = filterMatcher.group(2).trim(); + if (variable.isEmpty() || filter.isEmpty()) { + logger.warn("Template validation failed: found empty variable or filter"); + return false; + } + } + + return true; + } catch (Exception e) { + logger.error("Template validation exception: {}", e.getMessage(), e); + return false; + } + } +} diff --git a/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/util/BatchConversionReport.java b/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/util/BatchConversionReport.java new file mode 100644 index 0000000..ef00b51 --- /dev/null +++ b/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/util/BatchConversionReport.java @@ -0,0 +1,244 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.tools.x2seatunnel.util; + +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +/** Batch conversion report, records successful and failed entries and outputs a report file */ +public class BatchConversionReport { + + private final List successList = new ArrayList<>(); + private final Map failureMap = new LinkedHashMap<>(); + + private String sourceDirectory; + private String outputDirectory; + private String reportDirectory; + private String filePattern; + private String templatePath; + private LocalDateTime startTime; + private LocalDateTime endTime; + + public static class ConversionRecord { + private final String sourceFile; + private final String targetFile; + private final String reportFile; + private final LocalDateTime convertTime; + + public ConversionRecord(String sourceFile, String targetFile, String reportFile) { + this.sourceFile = sourceFile; + this.targetFile = targetFile; + this.reportFile = reportFile; + this.convertTime = LocalDateTime.now(); + } + + public String getSourceFile() { + return sourceFile; + } + + public String getTargetFile() { + return targetFile; + } + + public String getReportFile() { + return reportFile; + } + + public LocalDateTime getConvertTime() { + return convertTime; + } + } + + public void setConversionConfig( + String sourceDirectory, + String outputDirectory, + String reportDirectory, + String filePattern, + String templatePath) { + this.sourceDirectory = sourceDirectory; + this.outputDirectory = outputDirectory; + this.reportDirectory = reportDirectory; + this.filePattern = filePattern; + this.templatePath = templatePath; + this.startTime = LocalDateTime.now(); + } + + public void recordSuccess(String sourceFile, String targetFile, String reportFile) { + successList.add(new ConversionRecord(sourceFile, targetFile, reportFile)); + } + + public void recordSuccess(String source) { + // For backward compatibility, generate default target and report file paths + String targetFile = generateDefaultTargetPath(source); + String reportFile = generateDefaultReportPath(source); + recordSuccess(source, targetFile, reportFile); + } + + public void recordFailure(String source, String reason) { + failureMap.put(source, reason); + } + + public void finish() { + this.endTime = LocalDateTime.now(); + } + + private String generateDefaultTargetPath(String sourceFile) { + if (outputDirectory != null) { + String fileName = FileUtils.getFileNameWithoutExtension(sourceFile); + return outputDirectory + "/" + fileName + ".conf"; + } + return sourceFile.replace(".json", ".conf"); + } + + private String generateDefaultReportPath(String sourceFile) { + if (reportDirectory != null) { + String fileName = FileUtils.getFileNameWithoutExtension(sourceFile); + return reportDirectory + "/" + fileName + ".md"; + } + return sourceFile.replace(".json", ".md"); + } + + /** + * Write report in Markdown format + * + * @param reportPath report file output path + */ + public void writeReport(String reportPath) { + if (endTime == null) { + finish(); // If finish() was not called, complete automatically + } + + StringBuilder sb = new StringBuilder(); + + // Title and basic information + sb.append("# Batch Conversion Report\n\n"); + sb.append("## 📋 Conversion Overview\n\n"); + sb.append("| Item | Value |\n"); + sb.append("|------|-------|\n"); + sb.append("| **Start Time** | ").append(formatDateTime(startTime)).append(" |\n"); + sb.append("| **End Time** | ").append(formatDateTime(endTime)).append(" |\n"); + sb.append("| **Duration** | ").append(calculateDuration()).append(" |\n"); + sb.append("| **Source Directory** | `") + .append(sourceDirectory != null ? sourceDirectory : "Not specified") + .append("` |\n"); + sb.append("| **Output Directory** | `") + .append(outputDirectory != null ? outputDirectory : "Not specified") + .append("` |\n"); + sb.append("| **Report Directory** | `") + .append(reportDirectory != null ? reportDirectory : "Not specified") + .append("` |\n"); + sb.append("| **File Pattern** | `") + .append(filePattern != null ? filePattern : "*.json") + .append("` |\n"); + sb.append("| **Custom Template** | `") + .append(templatePath != null ? templatePath : "Default template") + .append("` |\n"); + sb.append("| **Successful Conversions** | ") + .append(successList.size()) + .append(" files |\n"); + sb.append("| **Failed Conversions** | ").append(failureMap.size()).append(" files |\n"); + sb.append("| **Total** | ") + .append(successList.size() + failureMap.size()) + .append(" files |\n"); + sb.append("| **Success Rate** | ").append(calculateSuccessRate()).append(" |\n\n"); + + // Successful conversion details + sb.append("## ✅ Successful Conversions (").append(successList.size()).append(")\n\n"); + if (successList.isEmpty()) { + sb.append("*No successfully converted files*\n\n"); + } else { + sb.append("| # | Source File | Target File | Report File |\n"); + sb.append("|---|-------------|-------------|-------------|\n"); + for (int i = 0; i < successList.size(); i++) { + ConversionRecord record = successList.get(i); + sb.append("| ").append(i + 1).append(" | "); + sb.append("`").append(record.getSourceFile()).append("` | "); + sb.append("`").append(record.getTargetFile()).append("` | "); + sb.append("`").append(record.getReportFile()).append("` |\n"); + } + sb.append("\n"); + } + + // Failed conversion details + sb.append("## ❌ Failed Conversions (").append(failureMap.size()).append(")\n\n"); + if (failureMap.isEmpty()) { + sb.append("*No failed conversion files*\n\n"); + } else { + sb.append("| # | Source File | Failure Reason |\n"); + sb.append("|---|-------------|----------------|\n"); + int index = 1; + for (Map.Entry entry : failureMap.entrySet()) { + sb.append("| ").append(index++).append(" | "); + sb.append("`").append(entry.getKey()).append("` | "); + sb.append(entry.getValue()).append(" |\n"); + } + sb.append("\n"); + } + + // Add simple footer information + sb.append("---\n"); + sb.append("*Report generated at: ") + .append(formatDateTime(LocalDateTime.now())) + .append("*\n"); + sb.append("*Tool version: X2SeaTunnel v0.1*\n"); + + // Write to file + FileUtils.writeFile(reportPath, sb.toString()); + } + + /** Format date time */ + private String formatDateTime(LocalDateTime dateTime) { + if (dateTime == null) { + return "Unknown"; + } + return dateTime.format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")); + } + + /** Calculate conversion duration */ + private String calculateDuration() { + if (startTime == null || endTime == null) { + return "Unknown"; + } + + long seconds = java.time.Duration.between(startTime, endTime).getSeconds(); + if (seconds < 60) { + return seconds + " seconds"; + } else if (seconds < 3600) { + return (seconds / 60) + " minutes " + (seconds % 60) + " seconds"; + } else { + long hours = seconds / 3600; + long minutes = (seconds % 3600) / 60; + long remainingSeconds = seconds % 60; + return hours + " hours " + minutes + " minutes " + remainingSeconds + " seconds"; + } + } + + /** Calculate success rate */ + private String calculateSuccessRate() { + int total = successList.size() + failureMap.size(); + if (total == 0) { + return "0%"; + } + double rate = (double) successList.size() / total * 100; + return String.format("%.1f%%", rate); + } +} diff --git a/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/util/ConversionConfig.java b/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/util/ConversionConfig.java new file mode 100644 index 0000000..7d78249 --- /dev/null +++ b/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/util/ConversionConfig.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.tools.x2seatunnel.util; + +/** Convert the configuration object, supporting YAML or command - line argument mapping */ +public class ConversionConfig { + private String source; + private String target; + private String report; + private String template; + private String sourceType; + private boolean verbose; + + public String getSource() { + return source; + } + + public void setSource(String source) { + this.source = source; + } + + public String getTarget() { + return target; + } + + public void setTarget(String target) { + this.target = target; + } + + public String getReport() { + return report; + } + + public void setReport(String report) { + this.report = report; + } + + public String getTemplate() { + return template; + } + + public void setTemplate(String template) { + this.template = template; + } + + public String getSourceType() { + return sourceType; + } + + public void setSourceType(String sourceType) { + this.sourceType = sourceType; + } + + public boolean isVerbose() { + return verbose; + } + + public void setVerbose(boolean verbose) { + this.verbose = verbose; + } +} diff --git a/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/util/DataXFieldExtractor.java b/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/util/DataXFieldExtractor.java new file mode 100644 index 0000000..19c7d75 --- /dev/null +++ b/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/util/DataXFieldExtractor.java @@ -0,0 +1,341 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.tools.x2seatunnel.util; + +import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.JsonNode; +import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.ObjectMapper; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** DataX field extractor - extract all field paths from DataX JSON configuration */ +public class DataXFieldExtractor { + + private static final Logger logger = LoggerFactory.getLogger(DataXFieldExtractor.class); + private final ObjectMapper objectMapper = new ObjectMapper(); + + public Set extractAllFields(String dataXJsonContent) { + Set allFields = new HashSet<>(); + + try { + JsonNode rootNode = objectMapper.readTree(dataXJsonContent); + extractFieldsRecursively(rootNode, "", allFields); + return allFields; + + } catch (Exception e) { + logger.error("Failed to extract DataX fields: {}", e.getMessage(), e); + return allFields; + } + } + + /** + * Recursively extract all field paths from the JSON node + * + * @param node the current JSON node + * @param currentPath the current path + * @param allFields the set to collect all fields + */ + private void extractFieldsRecursively( + JsonNode node, String currentPath, Set allFields) { + if (node == null) { + return; + } + + if (node.isObject()) { + // Process object node + Iterator> fields = node.fields(); + while (fields.hasNext()) { + Map.Entry field = fields.next(); + String fieldName = field.getKey(); + JsonNode fieldValue = field.getValue(); + String fieldPath = + currentPath.isEmpty() ? fieldName : currentPath + "." + fieldName; + + if (fieldValue.isValueNode()) { + // Leaf node, record the field path + allFields.add(fieldPath); + logger.debug("Extracted field: {} = {}", fieldPath, fieldValue.asText()); + } else { + // Continue recursion + extractFieldsRecursively(fieldValue, fieldPath, allFields); + } + } + } else if (node.isArray()) { + // Process array node + for (int i = 0; i < node.size(); i++) { + JsonNode arrayElement = node.get(i); + String arrayPath = currentPath + "[" + i + "]"; + extractFieldsRecursively(arrayElement, arrayPath, allFields); + } + } else if (node.isValueNode()) { + // Value node, record the field path + allFields.add(currentPath); + logger.debug("Extracted field: {} = {}", currentPath, node.asText()); + } + } + + /** + * Filter meaningful DataX fields (excluding system fields) + * + * @param allFields all fields + * @return filtered meaningful fields + */ + public Set filterMeaningfulFields(Set allFields) { + Set meaningfulFields = new HashSet<>(); + + for (String field : allFields) { + // Only keep reader and writer parameters under content, and configurations under + // setting + if (field.contains(".content[") + && (field.contains(".reader.parameter.") + || field.contains(".writer.parameter."))) { + meaningfulFields.add(field); + } else if (field.contains(".setting.")) { + meaningfulFields.add(field); + } + // More filtering rules can be added as needed + } + + logger.debug("{} meaningful fields retained after filtering", meaningfulFields.size()); + return meaningfulFields; + } + + /** + * Extract mappings of all field paths and their values from DataX JSON string + * + * @param dataXJsonContent DataX JSON configuration content + * @return mappings from field paths to values + */ + public Map extractAllFieldsWithValues(String dataXJsonContent) { + Map fieldValueMap = new HashMap<>(); + + try { + JsonNode rootNode = objectMapper.readTree(dataXJsonContent); + extractFieldsWithValuesRecursively(rootNode, "", fieldValueMap); + + logger.debug( + "Extracted {} fields with values from DataX configuration", + fieldValueMap.size()); + return fieldValueMap; + + } catch (Exception e) { + logger.error("Failed to extract DataX fields and values: {}", e.getMessage(), e); + return fieldValueMap; + } + } + + /** + * Recursively extract all field paths and their values from the JSON node + * + * @param node the current JSON node + * @param currentPath the current path + * @param fieldValueMap the map to collect field paths and values + */ + private void extractFieldsWithValuesRecursively( + JsonNode node, String currentPath, Map fieldValueMap) { + if (node == null) { + return; + } + + if (node.isObject()) { + Iterator> fields = node.fields(); + while (fields.hasNext()) { + Map.Entry field = fields.next(); + String fieldName = field.getKey(); + JsonNode fieldValue = field.getValue(); + String fieldPath = + currentPath.isEmpty() ? fieldName : currentPath + "." + fieldName; + + if (fieldValue.isValueNode()) { + // Leaf node, record the field path and value + String value = fieldValue.asText(); + fieldValueMap.put(fieldPath, value); + logger.debug("Extracted field: {} = {}", fieldPath, value); + } else { + extractFieldsWithValuesRecursively(fieldValue, fieldPath, fieldValueMap); + } + } + } else if (node.isArray()) { + for (int i = 0; i < node.size(); i++) { + JsonNode arrayElement = node.get(i); + String arrayPath = currentPath + "[" + i + "]"; + extractFieldsWithValuesRecursively(arrayElement, arrayPath, fieldValueMap); + } + } else if (node.isValueNode()) { + // Value node, record the field path and value + String value = node.asText(); + fieldValueMap.put(currentPath, value); + logger.debug("Extracted field: {} = {}", currentPath, value); + } + } + + /** + * Filter meaningful DataX fields and their values + * + * @param allFieldsWithValues all fields and their values + * @return filtered meaningful fields and their values + */ + public Map filterMeaningfulFieldsWithValues( + Map allFieldsWithValues) { + Map meaningfulFields = new HashMap<>(); + Set arrayFieldsProcessed = new HashSet<>(); + + for (Map.Entry entry : allFieldsWithValues.entrySet()) { + String field = entry.getKey(); + String value = entry.getValue(); + + if (field.contains(".content[") + && (field.contains(".reader.parameter.") + || field.contains(".writer.parameter."))) { + + String arrayField = getArrayFieldName(field); + if (arrayField != null) { + // If it's an array element, only record the array itself, not each element + if (!arrayFieldsProcessed.contains(arrayField)) { + String arrayValues = collectArrayValues(allFieldsWithValues, arrayField); + meaningfulFields.put(arrayField, arrayValues); + arrayFieldsProcessed.add(arrayField); + logger.debug("Processed array field: {} = {}", arrayField, arrayValues); + } + } else { + // Non-array field, add directly + meaningfulFields.put(field, value); + } + } else if (field.contains(".setting.")) { + meaningfulFields.put(field, value); + } + } + + logger.debug( + "Retained {} meaningful fields and their values after filtering (array fields merged)", + meaningfulFields.size()); + return meaningfulFields; + } + + /** Field reference tracker - track reference status of DataX fields */ + public static class FieldReferenceTracker { + private final Map fieldValues = new HashMap<>(); + private final Map referenceCount = new HashMap<>(); + + public void addField(String fieldPath, String value) { + fieldValues.put(fieldPath, value); + referenceCount.put(fieldPath, 0); + } + + public void incrementReference(String fieldPath) { + referenceCount.put(fieldPath, referenceCount.getOrDefault(fieldPath, 0) + 1); + } + + public Map getUnreferencedFields() { + Map unreferenced = new HashMap<>(); + for (Map.Entry entry : referenceCount.entrySet()) { + if (entry.getValue() == 0) { + String fieldPath = entry.getKey(); + String value = fieldValues.get(fieldPath); + unreferenced.put(fieldPath, value); + } + } + return unreferenced; + } + + public int getTotalFields() { + return fieldValues.size(); + } + + public int getReferencedFieldCount() { + return (int) referenceCount.values().stream().filter(count -> count > 0).count(); + } + + public int getUnreferencedFieldCount() { + return (int) referenceCount.values().stream().filter(count -> count == 0).count(); + } + + public Map getAllFields() { + return new HashMap<>(fieldValues); + } + } + + /** + * Create a field reference tracker + * + * @param dataXJsonContent DataX JSON configuration content + * @return the field reference tracker + */ + public FieldReferenceTracker createFieldReferenceTracker(String dataXJsonContent) { + FieldReferenceTracker tracker = new FieldReferenceTracker(); + + try { + Map allFieldsWithValues = extractAllFieldsWithValues(dataXJsonContent); + Map meaningfulFields = + filterMeaningfulFieldsWithValues(allFieldsWithValues); + + for (Map.Entry entry : meaningfulFields.entrySet()) { + tracker.addField(entry.getKey(), entry.getValue()); + } + + logger.debug( + "Created field reference tracker with {} fields", tracker.getTotalFields()); + return tracker; + + } catch (Exception e) { + logger.error("Failed to create field reference tracker: {}", e.getMessage(), e); + return tracker; + } + } + + /** + * Check if a field is an array element. If so, return the array field name. For example: + * job.content[0].reader.parameter.column[1] -> job.content[0].reader.parameter.column + */ + private String getArrayFieldName(String field) { + // Match pattern: xxx[number] + if (field.matches(".*\\[\\d+\\]$")) { + int lastBracket = field.lastIndexOf('['); + return field.substring(0, lastBracket); + } + return null; + } + + /** + * Collect all values of an array field. For example: column[0]=id, column[1]=name -> "id,name" + */ + private String collectArrayValues(Map allFields, String arrayField) { + // Keep deterministic order by sorting values according to numeric index suffix + Pattern idx = Pattern.compile(Pattern.quote(arrayField) + "\\[(\\d+)\\]$"); + TreeMap ordered = new TreeMap<>(); + + for (Map.Entry entry : allFields.entrySet()) { + Matcher m = idx.matcher(entry.getKey()); + if (m.find()) { + int index = Integer.parseInt(m.group(1)); + ordered.put(index, entry.getValue()); + } + } + + return String.join(",", ordered.values()); + } +} diff --git a/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/util/DirectoryProcessor.java b/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/util/DirectoryProcessor.java new file mode 100644 index 0000000..a197b1d --- /dev/null +++ b/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/util/DirectoryProcessor.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.tools.x2seatunnel.util; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; + +/** Batch processing directory scanning tool */ +public class DirectoryProcessor { + private final String inputDir; + private final String outputDir; + + public DirectoryProcessor(String inputDir, String outputDir) { + this.inputDir = inputDir; + this.outputDir = outputDir; + } + + /** + * Get all files to be converted, filtered by extension (JSON/XML/TXT) + * + * @return list of file paths + */ + public List listSourceFiles() { + List result = new ArrayList<>(); + try { + Files.walk(Paths.get(inputDir)) + .filter(Files::isRegularFile) + .filter( + path -> { + String ext = FileUtils.getFileExtension(path.toString()); + return "json".equals(ext) || "xml".equals(ext) || "txt".equals(ext); + }) + .forEach(path -> result.add(path.toString())); + } catch (IOException e) { + throw new RuntimeException("Failed to scan directory: " + inputDir, e); + } + return result; + } + + /** + * Generate the target file path based on the source file path + * + * @param sourceFile the path of the source file + * @return the path of the target file + */ + public String resolveTargetPath(String sourceFile) { + String name = FileUtils.getFileNameWithoutExtension(sourceFile); + return Paths.get(outputDir, name + ".conf").toString(); + } + + /** + * Generate the report file path based on the source file path + * + * @param sourceFile the path of the source file + * @return the path of the report file + */ + public String resolveReportPath(String sourceFile) { + String name = FileUtils.getFileNameWithoutExtension(sourceFile); + return Paths.get(outputDir, name + ".md").toString(); + } +} diff --git a/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/util/FilePattern.java b/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/util/FilePattern.java new file mode 100644 index 0000000..fb0b2f6 --- /dev/null +++ b/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/util/FilePattern.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.tools.x2seatunnel.util; + +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +public class FilePattern { + + /** + * Filters the file list according to the wildcard patterns separated by commas. + * + * @param files The list of all file paths. + * @param patterns The wildcard patterns, such as "*.json,*.xml". + * @return The list of files that match the patterns. + */ + public static List filter(List files, String patterns) { + if (patterns == null || patterns.trim().isEmpty()) { + return files; + } + String[] pats = patterns.split(","); + List regexList = new ArrayList<>(); + for (String p : pats) { + String pat = p.trim().replace(".", "\\.").replace("*", ".*"); + regexList.add(Pattern.compile(pat)); + } + return files.stream() + .filter(f -> regexList.stream().anyMatch(r -> r.matcher(f).matches())) + .collect(Collectors.toList()); + } +} diff --git a/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/util/FileUtils.java b/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/util/FileUtils.java new file mode 100644 index 0000000..b2eedad --- /dev/null +++ b/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/util/FileUtils.java @@ -0,0 +1,216 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.tools.x2seatunnel.util; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + +/** Utility class for file operations. */ +public class FileUtils { + + private static final Logger logger = LoggerFactory.getLogger(FileUtils.class); + + /** + * Read the content of a file. + * + * @param filePath The path to the file. + * @return The content of the file. + */ + public static String readFile(String filePath) { + if (filePath == null || filePath.trim().isEmpty()) { + throw new RuntimeException("File path cannot be empty"); + } + + File file = new File(filePath); + if (!file.exists()) { + throw new RuntimeException("File does not exist: " + filePath); + } + + if (!file.isFile()) { + throw new RuntimeException("Invalid file: " + filePath); + } + + try { + logger.debug("Reading file: {}", filePath); + byte[] bytes = Files.readAllBytes(Paths.get(filePath)); + String content = new String(bytes, StandardCharsets.UTF_8); + logger.debug("File read successfully, content length: {}", content.length()); + return content; + } catch (IOException e) { + throw new RuntimeException("Failed to read file: " + filePath, e); + } + } + + /** + * Write content to a file. + * + * @param filePath The path to the file. + * @param content The content to write. + */ + public static void writeFile(String filePath, String content) { + if (filePath == null || filePath.trim().isEmpty()) { + throw new RuntimeException("File path cannot be empty"); + } + + if (content == null) { + content = ""; + } + + try { + File file = new File(filePath); + // Create directory + File parentDir = file.getParentFile(); + if (parentDir != null && !parentDir.exists()) { + if (!parentDir.mkdirs()) { + throw new RuntimeException( + "Failed to create directory: " + parentDir.getAbsolutePath()); + } + } + logger.debug("Writing file: {}", filePath); + Files.write(Paths.get(filePath), content.getBytes(StandardCharsets.UTF_8)); + logger.debug("File written successfully, content length: {}", content.length()); + } catch (IOException e) { + throw new RuntimeException("Failed to write file: " + filePath, e); + } + } + + /** + * Check if a file exists. + * + * @param filePath The path to the file. + * @return True if the file exists, false otherwise. + */ + public static boolean exists(String filePath) { + if (filePath == null || filePath.trim().isEmpty()) { + return false; + } + return new File(filePath).exists(); + } + + /** + * Create a directory. + * + * @param dirPath The path to the directory. + */ + public static void createDirectory(String dirPath) { + if (dirPath == null || dirPath.trim().isEmpty()) { + throw new RuntimeException("Directory path cannot be empty"); + } + Path path = Paths.get(dirPath); + if (!Files.exists(path)) { + try { + Files.createDirectories(path); + logger.debug("Directory created successfully: {}", dirPath); + } catch (IOException e) { + throw new RuntimeException("Failed to create directory: " + dirPath, e); + } + } + } + + /** + * Get the file extension. + * + * @param filePath The path to the file. + * @return The file extension or an empty string if there is none. + */ + public static String getFileExtension(String filePath) { + if (filePath == null || filePath.trim().isEmpty()) { + return ""; + } + int lastDotIndex = filePath.lastIndexOf('.'); + if (lastDotIndex == -1 || lastDotIndex == filePath.length() - 1) { + return ""; + } + return filePath.substring(lastDotIndex + 1).toLowerCase(); + } + + /** + * Get the file name without the extension. + * + * @param filePath The path to the file. + * @return The file name without the extension. + */ + public static String getFileNameWithoutExtension(String filePath) { + if (filePath == null || filePath.trim().isEmpty()) { + return ""; + } + + String fileName = Paths.get(filePath).getFileName().toString(); + int lastDotIndex = fileName.lastIndexOf('.'); + if (lastDotIndex == -1) { + return fileName; + } + + return fileName.substring(0, lastDotIndex); + } + + /** + * Read a resource file from the classpath. + * + * @param resourcePath The path to the resource (relative to the classpath root). + * @return The content of the resource file, or null if the file does not exist. + */ + public static String readResourceFile(String resourcePath) { + if (resourcePath == null || resourcePath.trim().isEmpty()) { + throw new RuntimeException("Resource path cannot be empty"); + } + + try { + logger.debug("Reading classpath resource: {}", resourcePath); + + // Get the resource input stream + InputStream inputStream = FileUtils.class.getResourceAsStream(resourcePath); + if (inputStream == null) { + logger.debug("Classpath resource does not exist: {}", resourcePath); + return null; + } + + // Read the stream content using a BufferedReader (Java 8 compatible) + try (java.io.BufferedReader reader = + new java.io.BufferedReader( + new java.io.InputStreamReader(inputStream, StandardCharsets.UTF_8))) { + + StringBuilder sb = new StringBuilder(); + String line; + while ((line = reader.readLine()) != null) { + if (sb.length() > 0) { + sb.append("\n"); + } + sb.append(line); + } + + String content = sb.toString(); + logger.debug( + "Resource file read successfully, content length: {}", content.length()); + return content; + } + + } catch (IOException e) { + logger.warn("Failed to read classpath resource: {}", resourcePath, e); + return null; + } + } +} diff --git a/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/util/PathResolver.java b/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/util/PathResolver.java new file mode 100644 index 0000000..d4d241d --- /dev/null +++ b/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/util/PathResolver.java @@ -0,0 +1,195 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.tools.x2seatunnel.util; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.net.URL; +import java.nio.file.Paths; + +/** X2SeaTunnel Intelligent Path Resolver */ +public class PathResolver { + + private static final Logger logger = LoggerFactory.getLogger(PathResolver.class); + + private static final String X2SEATUNNEL_HOME_PROPERTY = "X2SEATUNNEL_HOME"; + private static final String CONFIG_TEMPLATES_DIR = "templates"; + private static final String RESOURCE_TEMPLATES_PREFIX = "/templates"; + + private static String cachedHomePath = null; + + public static String getHomePath() { + if (cachedHomePath != null) { + return cachedHomePath; + } + + // 1. Priority: use system property (set by script) + String homePath = System.getProperty(X2SEATUNNEL_HOME_PROPERTY); + if (homePath != null && !homePath.trim().isEmpty()) { + cachedHomePath = new File(homePath).getAbsolutePath(); + logger.info("Using system property X2SEATUNNEL_HOME: {}", cachedHomePath); + return cachedHomePath; + } + + // 2. Automatically detect the JAR location to infer the home directory + homePath = autoDetectHomePath(); + if (homePath != null) { + cachedHomePath = homePath; + logger.info("Auto-detected X2SEATUNNEL_HOME: {}", cachedHomePath); + return cachedHomePath; + } + + // 3. Fallback to the current working directory + cachedHomePath = System.getProperty("user.dir"); + logger.warn( + "Unable to detect X2SEATUNNEL_HOME, using current working directory: {}", + cachedHomePath); + return cachedHomePath; + } + + /** Automatically detect the home directory path (based on JAR location) */ + private static String autoDetectHomePath() { + try { + // Get the location of the JAR file where the current class is located + URL classUrl = PathResolver.class.getProtectionDomain().getCodeSource().getLocation(); + if (classUrl != null) { + File jarFile = new File(classUrl.toURI()); + if (jarFile.isFile() && jarFile.getName().endsWith(".jar")) { + File parentDir = jarFile.getParentFile(); // lib/ or bin/ + if (parentDir != null) { + if ("lib".equals(parentDir.getName()) + || "bin".equals(parentDir.getName())) { + return parentDir.getParentFile().getAbsolutePath(); // x2seatunnel/ + } + } + } + + // If it is a development environment (target/classes), find the root directory of + // the x2seatunnel module + if (jarFile.getPath().contains("target" + File.separator + "classes")) { + File current = jarFile; + while (current != null) { + // Find the root directory of the x2seatunnel module + if (isX2SeaTunnelModuleRoot(current)) { + return current.getAbsolutePath(); + } + current = current.getParentFile(); + } + } + } + } catch (Exception e) { + logger.debug("Failed to auto-detect home directory: {}", e.getMessage()); + } + + return null; + } + + private static boolean isX2SeaTunnelModuleRoot(File dir) { + if (dir == null || !dir.isDirectory()) { + return false; + } + + return new File(dir, "pom.xml").exists() + && new File(dir, "src").exists() + && (new File(dir, "config").exists() + || new File(dir, "examples").exists() + || dir.getName().equals("x2seatunnel")); + } + + /** + * Resolve the template file path + * + * @param templatePath The template file path (can be an absolute or relative path) + * @return The resolved full path + */ + public static String resolveTemplatePath(String templatePath) { + if (templatePath == null || templatePath.trim().isEmpty()) { + throw new IllegalArgumentException("Template path cannot be empty"); + } + + templatePath = templatePath.trim(); + + // 1. If it is an absolute path, return it directly + if (Paths.get(templatePath).isAbsolute()) { + return templatePath; + } + + // 2. Look for it relative to the current working directory + File currentDirFile = new File(templatePath); + if (currentDirFile.exists()) { + String absolutePath = currentDirFile.getAbsolutePath(); + logger.info("Found template from current directory: {}", absolutePath); + return absolutePath; + } + + // 3. Look for it relative to X2SEATUNNEL_HOME/templates + String homePath = getHomePath(); + String homeTemplatePath = + Paths.get(homePath, CONFIG_TEMPLATES_DIR, templatePath).toString(); + File homeTemplateFile = new File(homeTemplatePath); + if (homeTemplateFile.exists()) { + logger.info("Found template from home directory configuration: {}", homeTemplatePath); + return homeTemplatePath; + } + + // 4. Try the development environment path (seatunnel/config/x2seatunnel/templates) + String devTemplatePath = + Paths.get(homePath, "config/x2seatunnel/templates", templatePath).toString(); + File devTemplateFile = new File(devTemplatePath); + if (devTemplateFile.exists()) { + logger.info( + "Found template from development environment configuration: {}", + devTemplatePath); + return devTemplatePath; + } + + // 5. If not found, return null, let the caller handle classpath lookup + logger.warn("Template file not found in the file system: {}", templatePath); + return null; + } + + /** + * Build the resource path (for classpath lookup) + * + * @param templatePath The template path + * @return The classpath resource path + */ + public static String buildResourcePath(String templatePath) { + if (!templatePath.startsWith("/")) { + templatePath = "/" + templatePath; + } + + // If it already contains the full path, return it directly + if (templatePath.startsWith(RESOURCE_TEMPLATES_PREFIX)) { + return templatePath; + } + + // Otherwise, concatenate the standard prefix + return RESOURCE_TEMPLATES_PREFIX + templatePath; + } + + public static String getConfigTemplatesDir() { + return Paths.get(getHomePath(), CONFIG_TEMPLATES_DIR).toString(); + } + + public static boolean exists(String path) { + return path != null && new File(path).exists(); + } +} diff --git a/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/util/TemplateFieldExtractor.java b/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/util/TemplateFieldExtractor.java new file mode 100644 index 0000000..2ae5675 --- /dev/null +++ b/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/util/TemplateFieldExtractor.java @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.tools.x2seatunnel.util; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.HashSet; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** Template field extractor - Extracts DataX field paths referenced in the template */ +public class TemplateFieldExtractor { + + private static final Logger logger = LoggerFactory.getLogger(TemplateFieldExtractor.class); + + // Regex for matching template variables: {{ datax.xxx }} + private static final Pattern DATAX_VARIABLE_PATTERN = + Pattern.compile("\\{\\{\\s*datax\\.([^}|\\s]+)(?:\\s*\\|[^}]*)?\\s*\\}\\}"); + + /** + * Extract all referenced DataX field paths from the template content + * + * @param templateContent The template content + * @return The set of referenced DataX field paths + */ + public Set extractReferencedFields(String templateContent) { + Set referencedFields = new HashSet<>(); + + if (templateContent == null || templateContent.trim().isEmpty()) { + return referencedFields; + } + + Matcher matcher = DATAX_VARIABLE_PATTERN.matcher(templateContent); + + while (matcher.find()) { + String fieldPath = matcher.group(1); // Extract the part after datax. + String normalizedPath = normalizeFieldPath(fieldPath); + referencedFields.add(normalizedPath); + + logger.debug( + "Extracted template reference field: {} -> {}", + matcher.group(0), + normalizedPath); + } + + logger.debug("Extracted {} referenced fields from the template", referencedFields.size()); + return referencedFields; + } + + /** + * Extract all referenced DataX field paths from multiple template contents + * + * @param templateContents Multiple template contents + * @return The set of referenced DataX field paths + */ + public Set extractReferencedFields(String... templateContents) { + Set allReferencedFields = new HashSet<>(); + + for (String templateContent : templateContents) { + if (templateContent != null) { + Set fields = extractReferencedFields(templateContent); + allReferencedFields.addAll(fields); + } + } + + logger.debug( + "Extracted {} referenced fields from {} templates", + templateContents.length, + allReferencedFields.size()); + return allReferencedFields; + } + + /** + * Normalize the field path, converting the template path format to a format consistent with + * DataX JSON paths + * + * @param fieldPath The original field path + * @return The normalized field path + */ + private String normalizeFieldPath(String fieldPath) { + // In template: job.content[0].reader.parameter.username + // Standardize as: job.content[0].reader.parameter.username + // Return directly, as the template is already in correct format + + return fieldPath; + } + + /** + * Check if the template content contains DataX variable references + * + * @param templateContent The template content + * @return Whether it contains DataX variable references + */ + public boolean containsDataXReferences(String templateContent) { + if (templateContent == null || templateContent.trim().isEmpty()) { + return false; + } + + return DATAX_VARIABLE_PATTERN.matcher(templateContent).find(); + } + + /** + * Get detailed information of all DataX variables in the template (including filters) + * + * @param templateContent The template content + * @return The set of variable details + */ + public Set extractVariableDetails(String templateContent) { + Set variableDetails = new HashSet<>(); + + if (templateContent == null || templateContent.trim().isEmpty()) { + return variableDetails; + } + + Matcher matcher = DATAX_VARIABLE_PATTERN.matcher(templateContent); + + while (matcher.find()) { + String fullVariable = matcher.group(0); + variableDetails.add(fullVariable); + + logger.trace("Extracted variable details: {}", fullVariable); + } + + return variableDetails; + } +} diff --git a/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/util/YamlConfigParser.java b/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/util/YamlConfigParser.java new file mode 100644 index 0000000..d333970 --- /dev/null +++ b/x2seatunnel/src/main/java/org/apache/seatunnel/tools/x2seatunnel/util/YamlConfigParser.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.tools.x2seatunnel.util; + +import org.yaml.snakeyaml.Yaml; + +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.Map; + +/** Parse the YAML configuration file and map it to the ConversionConfig object */ +public class YamlConfigParser { + @SuppressWarnings("unchecked") + public static ConversionConfig parse(String yamlPath) { + try (InputStream in = Files.newInputStream(Paths.get(yamlPath))) { + Yaml yaml = new Yaml(); + Map obj = yaml.load(in); + ConversionConfig config = new ConversionConfig(); + if (obj.containsKey("source")) { + Object s = obj.get("source"); + if (s instanceof Map) { + config.setSource(((Map) s).get("path")); + } else if (s instanceof String) { + config.setSource((String) s); + } + } + if (obj.containsKey("target")) { + config.setTarget((String) obj.get("target")); + } + if (obj.containsKey("report")) { + config.setReport((String) obj.get("report")); + } + if (obj.containsKey("template")) { + config.setTemplate((String) obj.get("template")); + } + if (obj.containsKey("sourceType")) { + config.setSourceType((String) obj.get("sourceType")); + } + if (obj.containsKey("options")) { + Map opt = (Map) obj.get("options"); + if (Boolean.TRUE.equals(opt.get("verbose"))) { + config.setVerbose(true); + } + } + return config; + } catch (Exception e) { + throw new RuntimeException("Failed to load YAML configuration: " + e.getMessage(), e); + } + } +} diff --git a/x2seatunnel/src/main/resources/bin/x2seatunnel.sh b/x2seatunnel/src/main/resources/bin/x2seatunnel.sh new file mode 100644 index 0000000..fcfa05f --- /dev/null +++ b/x2seatunnel/src/main/resources/bin/x2seatunnel.sh @@ -0,0 +1,136 @@ +#!/bin/bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# X2SeaTunnel configuration conversion tool startup script + +set -e + +# Get script directory +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +X2SEATUNNEL_HOME="$(dirname "$SCRIPT_DIR")" + +# Set X2SeaTunnel related environment variables +export X2SEATUNNEL_CONFIG_DIR="$X2SEATUNNEL_HOME/config" +export X2SEATUNNEL_TEMPLATES_DIR="$X2SEATUNNEL_HOME/templates" + +# Find X2SeaTunnel JAR file +find_jar() { + local jar_file="" + + # 1. First search in packaged lib directory (production environment) + if [ -d "$X2SEATUNNEL_HOME/lib" ]; then + jar_file=$(find "$X2SEATUNNEL_HOME/lib" -name "x2seatunnel-*.jar" 2>/dev/null | head -1) + fi + + # 2. Search in starter directory (SeaTunnel standard directory structure) + if [ -z "$jar_file" ] && [ -d "$X2SEATUNNEL_HOME/starter" ]; then + jar_file=$(find "$X2SEATUNNEL_HOME/starter" -name "x2seatunnel-*.jar" 2>/dev/null | head -1) + fi + + # 3. If running in development environment resource directory, locate target directory of x2seatunnel module root + module_root="$(cd "$SCRIPT_DIR/../../../../" && pwd)" + if [ -z "$jar_file" ] && [ -d "$module_root/target" ]; then + jar_file=$(find "$module_root/target" -name "x2seatunnel-*.jar" 2>/dev/null | grep -v sources | head -1) + fi + + if [ -z "$jar_file" ] || [ ! -f "$jar_file" ]; then + echo "Error: X2SeaTunnel JAR file not found" + echo "Search paths:" + echo " - $X2SEATUNNEL_HOME/lib/" + echo " - $X2SEATUNNEL_HOME/starter/" + echo " - $module_root/target/" + echo "" + echo "If in development environment, please compile first: mvn clean package -pl seatunnel-tools/x2seatunnel -am" + exit 1 + fi + + echo "$jar_file" +} + +# Check Java environment +check_java() { + if [ -n "$JAVA_HOME" ]; then + JAVA_CMD="$JAVA_HOME/bin/java" + else + JAVA_CMD="java" + fi + + if ! command -v "$JAVA_CMD" > /dev/null 2>&1; then + echo "Error: Java not found, please ensure JAVA_HOME is set correctly or java is in PATH" + exit 1 + fi + + # Check Java version + java_version=$("$JAVA_CMD" -version 2>&1 | head -1 | cut -d'"' -f2) + case "$java_version" in + 1.8*) + java_major_version=8 + ;; + *) + java_major_version=$(echo "$java_version" | cut -d'.' -f1) + ;; + esac + + if [ "$java_major_version" -lt 8 ]; then + echo "Error: Java 8 or higher is required, current version: $java_version" + exit 1 + fi +} + +# Main function +main() { + echo "Starting X2SeaTunnel configuration conversion tool..." + + # Check Java environment + check_java + + # Find JAR file + CLI_JAR=$(find_jar) + echo "Using JAR: $CLI_JAR" + echo "Java command: $JAVA_CMD" + + # Set JVM parameters + JVM_OPTS="-Xms512m -Xmx1024m" + + # Set log configuration file path + LOG4J2_CONFIG="$X2SEATUNNEL_CONFIG_DIR/log4j2.xml" + if [ -f "$LOG4J2_CONFIG" ]; then + JVM_OPTS="$JVM_OPTS -Dlog4j.configurationFile=$LOG4J2_CONFIG" + echo "Using log configuration: $LOG4J2_CONFIG" + else + echo "Warning: Log configuration file does not exist: $LOG4J2_CONFIG" + fi + + # Set log directory + LOG_DIR="$X2SEATUNNEL_HOME/logs" + mkdir -p "$LOG_DIR" + + # Build execution command + EXEC_CMD="\"$JAVA_CMD\" $JVM_OPTS \ + -DX2SEATUNNEL_HOME=\"$X2SEATUNNEL_HOME\" \ + -DX2SEATUNNEL_CONFIG_DIR=\"$X2SEATUNNEL_CONFIG_DIR\" \ + -DX2SEATUNNEL_TEMPLATES_DIR=\"$X2SEATUNNEL_TEMPLATES_DIR\" \ + -jar \"$CLI_JAR\" $@" + + echo + eval $EXEC_CMD +} + +# Run main function +main "$@" diff --git a/x2seatunnel/src/main/resources/config/log4j2.xml b/x2seatunnel/src/main/resources/config/log4j2.xml new file mode 100644 index 0000000..2f3c380 --- /dev/null +++ b/x2seatunnel/src/main/resources/config/log4j2.xml @@ -0,0 +1,49 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/x2seatunnel/src/main/resources/examples/report/.gitkeep b/x2seatunnel/src/main/resources/examples/report/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/x2seatunnel/src/main/resources/examples/source/datax-hdfs2mysql.json b/x2seatunnel/src/main/resources/examples/source/datax-hdfs2mysql.json new file mode 100644 index 0000000..5531c54 --- /dev/null +++ b/x2seatunnel/src/main/resources/examples/source/datax-hdfs2mysql.json @@ -0,0 +1,38 @@ +{ + "job": { + "setting": { + "speed": { + "channel": 4 + } + }, + "content": [ + { + "reader": { + "name": "hdfsreader", + "parameter": { + "defaultFS": "hdfs://localhost:9000", + "path": "/data/logs/*.txt", + "fileType": "text", + "fieldDelimiter": "\t", + "column": ["timestamp", "level", "service", "message"] + } + }, + "writer": { + "name": "mysqlwriter", + "parameter": { + "username": "root", + "password": "123456", + "connection": [ + { + "jdbcUrl": "jdbc:mysql://localhost:3306/logs", + "table": ["system_logs"] + } + ], + "column": ["log_time", "log_level", "service_name", "log_message"], + "writeMode": "insert" + } + } + } + ] + } +} diff --git a/x2seatunnel/src/main/resources/examples/source/datax-mysql2hdfs-full.json b/x2seatunnel/src/main/resources/examples/source/datax-mysql2hdfs-full.json new file mode 100644 index 0000000..80e4e28 --- /dev/null +++ b/x2seatunnel/src/main/resources/examples/source/datax-mysql2hdfs-full.json @@ -0,0 +1,75 @@ +{ + "job": { + "setting": { + "speed": { + "channel": 3 + } + }, + "content": [ + { + "reader": { + "name": "mysqlreader", + "parameter": { + "username": "root", + "password": "password", + "column": [ + "id", + "name", + "age", + "email", + "created_at" + ], + "splitPk": "id", + "connection": [ + { + "table": [ + "user_info" + ], + "jdbcUrl": [ + "jdbc:mysql://localhost:3306/test_db?useSSL=false&serverTimezone=UTC" + ] + } + ], + "fetchSize": 1000, + "where": "age > 18" + } + }, + "writer": { + "name": "hdfswriter", + "parameter": { + "defaultFS": "hdfs://localhost:9000", + "fileType": "text", + "path": "/user/seatunnel/output/mysql_data", + "fileName": "user_info", + "column": [ + { + "name": "id", + "type": "bigint" + }, + { + "name": "name", + "type": "string" + }, + { + "name": "age", + "type": "int" + }, + { + "name": "email", + "type": "string" + }, + { + "name": "created_at", + "type": "timestamp" + } + ], + "writeMode": "append", + "fieldDelimiter": "\t", + "compress": "none", + "encoding": "UTF-8" + } + } + } + ] + } +} diff --git a/x2seatunnel/src/main/resources/examples/source/datax-mysql2hdfs.json b/x2seatunnel/src/main/resources/examples/source/datax-mysql2hdfs.json new file mode 100644 index 0000000..485d47a --- /dev/null +++ b/x2seatunnel/src/main/resources/examples/source/datax-mysql2hdfs.json @@ -0,0 +1,43 @@ +{ + "job": { + "setting": { + "speed": { + "channel": 3 + } + }, + "content": [ + { + "reader": { + "name": "mysqlreader", + "parameter": { + "username": "root", + "password": "1234567", + "connection": [ + { + "jdbcUrl": ["jdbc:mysql://localhost:3306/testdb"], + "table": ["users"] + } + ], + "column": ["id", "name", "age", "email", "create_time"], + "splitPk": "id" + } + }, + "writer": { + "name": "hdfswriter", + "parameter": { + "defaultFS": "hdfs://localhost:9000", + "path": "/data/users", + "fileName": "users_export_${now}", + "fileType": "text", + "fieldDelimiter": "\t", + "rowDelimiter": "\n", + "writeMode": "append", + "compress": "gzip", + "encoding": "UTF-8", + "batchSize": 50000 + } + } + } + ] + } +} diff --git a/x2seatunnel/src/main/resources/examples/source/datax-mysql2hdfs2hive.json b/x2seatunnel/src/main/resources/examples/source/datax-mysql2hdfs2hive.json new file mode 100644 index 0000000..537af92 --- /dev/null +++ b/x2seatunnel/src/main/resources/examples/source/datax-mysql2hdfs2hive.json @@ -0,0 +1,94 @@ +{ + "job": { + "setting": { + "speed": { + "channel": 3 + }, + "errorLimit": { + "record": 0, + "percentage": 0.02 + } + }, + "content": [ + { + "reader": { + "name": "mysqlreader", + "parameter": { + "username": " ==", + "password": "a+ ==", + "column": [ + "`id`", + "`mainid`", + "`detail_signdate`", + "`detail_scheduletime`", + "`detail_attestatus`", + "`detail_signtime`", + "`detail_signtype`" + ], + "where": "", + "splitPk": "", + "connection": [ + { + "table": [ + "formtable_main_41_dt1" + ], + "jdbcUrl": [ + "jdbc:mysql://10.0.0.0:3306/ecology?useUnicode=true&characterEncoding=UTF-8&useSSL=false" + ] + } + ] + } + }, + "writer": { + "name": "hdfswriter", + "parameter": { + "defaultFS": "hdfs://nameservice1", + "fileType": "PAR", + "compress": "SNAPPY", + "path": "/user/hive/warehouse/test_ods.db/test_table/${partition}", + "fileName": "test_table", + "writeMode": "append", + "fieldDelimiter": "|", + "hadoopConfig": { + "dfs.nameservices": "nameservice1", + "dfs.ha.namenodes.nameservice1": "namenode1,namenode2", + "dfs.namenode.rpc-address.nameservice1.namenode1": "bi-prod-cdh-0001:8020", + "dfs.namenode.rpc-address.nameservice1.namenode2": "bi-prod-cdh-0002:8020", + "dfs.client.failover.proxy.provider.nameservice1": "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider" + }, + "column": [ + { + "name": "id", + "type": "int" + }, + { + "name": "mainid", + "type": "int" + }, + { + "name": "detail_signdate", + "type": "string" + }, + { + "name": "detail_scheduletime", + "type": "string" + }, + { + "name": "detail_attestatus", + "type": "string" + }, + { + "name": "detail_signtime", + "type": "string" + }, + { + "name": "detail_signtype", + "type": "int" + } + ] + } + } + } + ] + } +} \ No newline at end of file diff --git a/x2seatunnel/src/main/resources/examples/source/datax-mysql2mysql-full.json b/x2seatunnel/src/main/resources/examples/source/datax-mysql2mysql-full.json new file mode 100644 index 0000000..ffce013 --- /dev/null +++ b/x2seatunnel/src/main/resources/examples/source/datax-mysql2mysql-full.json @@ -0,0 +1,63 @@ +{ + "job": { + "setting": { + "speed": { + "channel": 3, + "record": 50000 + }, + "errorLimit": { + "record": 0, + "percentage": 0.02 + } + }, + "content": [ + { + "reader": { + "name": "mysqlreader", + "parameter": { + "username": "username", + "password": "password", + "connection": [ + { + "jdbcUrl": ["jdbc:mysql://192.168.1.1:3306/test?useUnicode=true&characterEncoding=UTF-8&serverTimezone=Asia/Shanghai&useSSL=false"], + "table": ["customer", "customer_profile"] + } + ], + "column": ["customer_id", "customer_name", "email", "phone", "region", "registration_date", "last_login", "status"], + "splitPk": "customer_id", + "where": "status IN ('active', 'premium') AND registration_date >= '2024-01-01'", + "fetchSize": 2000 + } + }, + "writer": { + "name": "mysqlwriter", + "parameter": { + "writeMode": "replace", + "username": "username", + "password": "password", + "column": ["customer_id", "customer_name", "email", "phone", "region", "registration_date", "last_login", "status"], + "session": [ + "set session sql_mode='STRICT_TRANS_TABLES'", + "set session innodb_lock_wait_timeout=120" + ], + "preSql": [ + "CREATE TABLE IF NOT EXISTS @table LIKE template_customer", + "TRUNCATE TABLE @table" + ], + "postSql": [ + "UPDATE @table SET sync_time = NOW() WHERE sync_time IS NULL", + "ANALYZE TABLE @table" + ], + "connection": [ + { + "jdbcUrl": "jdbc:mysql://192.168.1.200:3306/datawarehouse?useUnicode=true&characterEncoding=UTF-8&rewriteBatchedStatements=true&yearIsDateType=false&zeroDateTimeBehavior=convertToNull&serverTimezone=Asia/Shanghai&useSSL=false", + "table": ["dw_customer_snapshot"] + } + ], + "batchSize": 2000 + } + } + } + ] + } +} diff --git a/x2seatunnel/src/main/resources/examples/source/datax-mysql2mysql.json b/x2seatunnel/src/main/resources/examples/source/datax-mysql2mysql.json new file mode 100644 index 0000000..8358911 --- /dev/null +++ b/x2seatunnel/src/main/resources/examples/source/datax-mysql2mysql.json @@ -0,0 +1,45 @@ +{ + "job": { + "setting": { + "speed": { + "channel": 2 + } + }, + "content": [ + { + "reader": { + "name": "mysqlreader", + "parameter": { + "username": "source_user", + "password": "source_password", + "connection": [ + { + "jdbcUrl": ["jdbc:mysql://source-db:3306/test_db?useUnicode=true&characterEncoding=UTF-8&serverTimezone=UTC"], + "table": ["user_profile"] + } + ], + "column": ["id", "username", "email", "phone", "status", "created_at", "updated_at"], + "splitPk": "id", + "where": "status = 'active'" + } + }, + "writer": { + "name": "mysqlwriter", + "parameter": { + "writeMode": "insert", + "username": "target_user", + "password": "target_password", + "column": ["id", "username", "email", "phone", "status", "created_at", "updated_at"], + "connection": [ + { + "jdbcUrl": "jdbc:mysql://target-db:3306/warehouse_db?useUnicode=true&characterEncoding=UTF-8&rewriteBatchedStatements=true&serverTimezone=UTC", + "table": ["dim_user_profile"] + } + ], + "batchSize": 1000 + } + } + } + ] + } +} diff --git a/x2seatunnel/src/main/resources/examples/source/datax-oracle2hdfs-full.json b/x2seatunnel/src/main/resources/examples/source/datax-oracle2hdfs-full.json new file mode 100644 index 0000000..be86c83 --- /dev/null +++ b/x2seatunnel/src/main/resources/examples/source/datax-oracle2hdfs-full.json @@ -0,0 +1,75 @@ +{ + "job": { + "setting": { + "speed": { + "channel": 2 + } + }, + "content": [ + { + "reader": { + "name": "oraclereader", + "parameter": { + "username": "scott", + "password": "tiger", + "column": [ + "EMP_ID", + "EMP_NAME", + "DEPARTMENT", + "SALARY", + "HIRE_DATE" + ], + "connection": [ + { + "table": [ + "EMPLOYEES" + ], + "jdbcUrl": [ + "jdbc:oracle:thin:@localhost:1521:orcl" + ] + } + ], + "fetchSize": 500, + "where": "SALARY > 5000", + "splitPk": "EMP_ID" + } + }, + "writer": { + "name": "hdfswriter", + "parameter": { + "defaultFS": "hdfs://localhost:9000", + "fileType": "text", + "path": "/user/seatunnel/output/oracle_data", + "fileName": "employees", + "column": [ + { + "name": "EMP_ID", + "type": "bigint" + }, + { + "name": "EMP_NAME", + "type": "string" + }, + { + "name": "DEPARTMENT", + "type": "string" + }, + { + "name": "SALARY", + "type": "decimal" + }, + { + "name": "HIRE_DATE", + "type": "date" + } + ], + "writeMode": "append", + "fieldDelimiter": "|", + "compress": "none", + "encoding": "UTF-8" + } + } + } + ] + } +} diff --git a/x2seatunnel/src/main/resources/examples/source/datax-postgresql2hdfs-full.json b/x2seatunnel/src/main/resources/examples/source/datax-postgresql2hdfs-full.json new file mode 100644 index 0000000..fdb7ff6 --- /dev/null +++ b/x2seatunnel/src/main/resources/examples/source/datax-postgresql2hdfs-full.json @@ -0,0 +1,75 @@ +{ + "job": { + "setting": { + "speed": { + "channel": 2 + } + }, + "content": [ + { + "reader": { + "name": "postgresqlreader", + "parameter": { + "username": "postgres", + "password": "password", + "column": [ + "id", + "product_name", + "price", + "category", + "created_date" + ], + "connection": [ + { + "table": [ + "products" + ], + "jdbcUrl": [ + "jdbc:postgresql://localhost:5432/ecommerce?useSSL=false" + ] + } + ], + "fetchSize": 2000, + "where": "price > 100", + "splitPk": "id" + } + }, + "writer": { + "name": "hdfswriter", + "parameter": { + "defaultFS": "hdfs://localhost:9000", + "fileType": "text", + "path": "/user/seatunnel/output/postgresql_data", + "fileName": "products", + "column": [ + { + "name": "id", + "type": "bigint" + }, + { + "name": "product_name", + "type": "string" + }, + { + "name": "price", + "type": "decimal" + }, + { + "name": "category", + "type": "string" + }, + { + "name": "created_date", + "type": "date" + } + ], + "writeMode": "overwrite", + "fieldDelimiter": ",", + "compress": "gzip", + "encoding": "UTF-8" + } + } + } + ] + } +} diff --git a/x2seatunnel/src/main/resources/examples/source/datax-postgresql2hdfs.json b/x2seatunnel/src/main/resources/examples/source/datax-postgresql2hdfs.json new file mode 100644 index 0000000..b960c5e --- /dev/null +++ b/x2seatunnel/src/main/resources/examples/source/datax-postgresql2hdfs.json @@ -0,0 +1,40 @@ +{ + "job": { + "setting": { + "speed": { + "channel": 1 + } + }, + "content": [ + { + "reader": { + "name": "postgresqlreader", + "parameter": { + "username": "postgres", + "password": "postgres123", + "connection": [ + { + "jdbcUrl": ["jdbc:postgresql://localhost:5432/analytics"], + "table": ["user_behavior"] + } + ], + "column": ["user_id", "action", "timestamp", "ip_address", "user_agent"], + "splitPk": "user_id" + } + }, + "writer": { + "name": "hdfswriter", + "parameter": { + "defaultFS": "hdfs://localhost:9000", + "path": "/analytics/user_behavior", + "fileName": "behavior_export", + "fileType": "text", + "fieldDelimiter": ",", + "writeMode": "overwrite", + "compress": "gzip" + } + } + } + ] + } +} diff --git a/x2seatunnel/src/main/resources/examples/source/datax-sqlserver2hdfs-full.json b/x2seatunnel/src/main/resources/examples/source/datax-sqlserver2hdfs-full.json new file mode 100644 index 0000000..59042c4 --- /dev/null +++ b/x2seatunnel/src/main/resources/examples/source/datax-sqlserver2hdfs-full.json @@ -0,0 +1,75 @@ +{ + "job": { + "setting": { + "speed": { + "channel": 4 + } + }, + "content": [ + { + "reader": { + "name": "sqlserverreader", + "parameter": { + "username": "sa", + "password": "Password123", + "column": [ + "OrderID", + "CustomerID", + "OrderDate", + "TotalAmount", + "Status" + ], + "connection": [ + { + "table": [ + "Orders" + ], + "jdbcUrl": [ + "jdbc:sqlserver://localhost:1433;DatabaseName=SalesDB;encrypt=false" + ] + } + ], + "fetchSize": 1500, + "where": "TotalAmount > 1000", + "splitPk": "OrderID" + } + }, + "writer": { + "name": "hdfswriter", + "parameter": { + "defaultFS": "hdfs://localhost:9000", + "fileType": "text", + "path": "/user/seatunnel/output/sqlserver_data", + "fileName": "orders", + "column": [ + { + "name": "OrderID", + "type": "bigint" + }, + { + "name": "CustomerID", + "type": "string" + }, + { + "name": "OrderDate", + "type": "date" + }, + { + "name": "TotalAmount", + "type": "decimal" + }, + { + "name": "Status", + "type": "string" + } + ], + "writeMode": "overwrite", + "fieldDelimiter": "\t", + "compress": "snappy", + "encoding": "UTF-8" + } + } + } + ] + } +} diff --git a/x2seatunnel/src/main/resources/examples/yaml/datax-mysql2hdfs2hive.yaml b/x2seatunnel/src/main/resources/examples/yaml/datax-mysql2hdfs2hive.yaml new file mode 100644 index 0000000..1256202 --- /dev/null +++ b/x2seatunnel/src/main/resources/examples/yaml/datax-mysql2hdfs2hive.yaml @@ -0,0 +1,23 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +source: examples/source/datax-mysql2hdfs2hive.json +sourceType: datax +target: examples/target/mysql2hdfs2hive-result.conf +report: examples/report/mysql2hdfs2hive-report.md +template: datax/custom/mysql-to-hive.conf +options: + verbose: true diff --git a/x2seatunnel/src/main/resources/templates/datax/custom/mysql-to-hive.conf b/x2seatunnel/src/main/resources/templates/datax/custom/mysql-to-hive.conf new file mode 100644 index 0000000..de760ad --- /dev/null +++ b/x2seatunnel/src/main/resources/templates/datax/custom/mysql-to-hive.conf @@ -0,0 +1,73 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Custom conversion template from MySQL to Hive +# Supports extracting MySQL data source information from DataX and converting to Hive write configuration +# Syntax: Jinja2 style +# Version: 1.0 + +env { + execution.parallelism = {{ datax.job.setting.speed.channel | default(1) }} + job.mode = "BATCH" +} + +source { + Jdbc { + url = "{{ datax.job.content[0].reader.parameter.connection[0].jdbcUrl[0] }}" + driver = "com.mysql.cj.jdbc.Driver" + user = "{{ datax.job.content[0].reader.parameter.username }}" + password = "{{ datax.job.content[0].reader.parameter.password }}" + query = "{{ datax.job.content[0].reader.parameter.querySql[0] | default('SELECT') }} {{ datax.job.content[0].reader.parameter.column | join(',') }} FROM {{ datax.job.content[0].reader.parameter.connection[0].table[0] }}" + plugin_output = "source_table" + } +} + +sink { + Hive { + # Full table name, format: database.table_name + # + # Option 1: Direct specification (recommended) + # table_name = "test_ods.test_table" + + # Option 2: Extract from DataX configuration (if available) + # table_name = "{{ datax.job.content[0].writer.parameter.database | default('default') }}.{{ datax.job.content[0].writer.parameter.table | default('target_table') }}" + + # Option 3: Intelligently extract Hive table name from path + # Use split and get filters to extract database name and table name + # Step 1: Split path + # Step 2: Get second-to-last part as database name, remove .db suffix + # Step 3: Get last part as table name + table_name = "{{ datax.job.content[0].writer.parameter.path | split('/') | get(-3) | replace('.db,') }}.{{ datax.job.content[0].writer.parameter.path | split('/') | get(-2) }}" + + # Hive Metastore configuration + metastore_uri = "{{ datax.job.content[0].writer.parameter.metastoreUri | default('thrift://localhost:9083') }}" + + # Compression configuration + compress_codec = "{{ datax.job.content[0].writer.parameter.compress | default('none') }}" + + # Hadoop configuration file paths (optional) + # hdfs_site_path = "/etc/hadoop/conf/hdfs-site.xml" + # hive_site_path = "/etc/hadoop/conf/hive-site.xml" + + # Hadoop configuration (optional) + # hive.hadoop.conf = { + # "fs.defaultFS" = "{{ datax.job.content[0].writer.parameter.defaultFS | default('hdfs://localhost:9000') }}" + # } + + # Source table name + plugin_input = "source_table" + } +} diff --git a/x2seatunnel/src/main/resources/templates/datax/env/batch-env.conf b/x2seatunnel/src/main/resources/templates/datax/env/batch-env.conf new file mode 100644 index 0000000..b56420c --- /dev/null +++ b/x2seatunnel/src/main/resources/templates/datax/env/batch-env.conf @@ -0,0 +1,29 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# DataX Batch Processing Environment Configuration Template +# For batch data processing scenarios +# Template Type: Batch Environment +# Version: 1.0 + +env { + # Parallelism configuration - mapped from DataX channel count + parallelism = {{ datax.job.setting.speed.channel | default(1) }} + + # Job mode: batch processing + job.mode = "BATCH" +} \ No newline at end of file diff --git a/x2seatunnel/src/main/resources/templates/datax/sinks/hdfs-sink.conf b/x2seatunnel/src/main/resources/templates/datax/sinks/hdfs-sink.conf new file mode 100644 index 0000000..d552d4f --- /dev/null +++ b/x2seatunnel/src/main/resources/templates/datax/sinks/hdfs-sink.conf @@ -0,0 +1,63 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# DataX HDFS Writer to SeaTunnel HdfsFile Sink Conversion Template +# Based on core parameter configuration from SeaTunnel official documentation +# Template Type: HDFS Sink +# Version: 2.1 +sink { + HdfsFile { + # ===== Required Parameters ===== + + # HDFS cluster address (required) + fs.defaultFS = "{{ datax.job.content[0].writer.parameter.defaultFS }}" + + # Output path (required) + path = "{{ datax.job.content[0].writer.parameter.path }}" + + # ===== Core Configuration Parameters ===== + + # File format type + file_format_type = "{{ datax.job.content[0].writer.parameter.fileType | default('text') }}" + + # Field delimiter (required for text/csv format only) + field_delimiter = "{{ datax.job.content[0].writer.parameter.fieldDelimiter | default('\t') }}" + + # Row delimiter (required for text format only) + row_delimiter = "{{ datax.job.content[0].writer.parameter.rowDelimiter | default('\n') }}" + + # Compression codec + compress_codec = "{{ datax.job.content[0].writer.parameter.compress | compress_mapper | default('none') }}" + + # File encoding + encoding = "{{ datax.job.content[0].writer.parameter.encoding | default('UTF-8') }}" + + # Batch processing size + batch_size = {{ datax.job.content[0].writer.parameter.batchSize | default(1000000) }} + + # ===== Optional Configuration Parameters ===== + + # Temporary path - for transactional writing + tmp_path = "/tmp/seatunnel" + + # Enable transaction to guarantee exactly-once semantics + is_enable_transaction = true + + # Whether to write header (text/csv format only) + enable_header_write = {{ datax.job.content[0].writer.parameter.header | default(false) }} + } +} \ No newline at end of file diff --git a/x2seatunnel/src/main/resources/templates/datax/sinks/jdbc-sink.conf b/x2seatunnel/src/main/resources/templates/datax/sinks/jdbc-sink.conf new file mode 100644 index 0000000..506ce16 --- /dev/null +++ b/x2seatunnel/src/main/resources/templates/datax/sinks/jdbc-sink.conf @@ -0,0 +1,44 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# DataX Universal JDBC Sink Connector Template +# Based on SeaTunnel official JDBC Sink documentation specifications +# Supports all JDBC databases: MySQL, PostgreSQL, Oracle, SQL Server, etc. +# Template Type: JDBC Sink (Unified Template) +# Version: 0.1 +sink { + Jdbc { + # Required configuration: database connection + url = "{{ datax.job.content[0].writer.parameter.connection[0].jdbcUrl }}" + driver = "{{ datax.job.content[0].writer.parameter.connection[0].jdbcUrl | jdbc_driver_mapper }}" + user = "{{ datax.job.content[0].writer.parameter.username }}" + password = "{{ datax.job.content[0].writer.parameter.password }}" + + # Write configuration: database + table mode (recommended) + table = "{{ datax.job.content[0].writer.parameter.connection[0].table[0] }}" + + # Batch write configuration + batch_size = {{ datax.job.content[0].writer.parameter.batchSize | default(1000) }} + + # Transaction configuration + auto_commit = true + + # Schema and data processing configuration + schema_save_mode = "CREATE_SCHEMA_WHEN_NOT_EXIST" + data_save_mode = "{{ datax.job.content[0].writer.parameter.writeMode | writemode_to_datasavemode_mapper | default('APPEND_DATA') }}" + } +} \ No newline at end of file diff --git a/x2seatunnel/src/main/resources/templates/datax/sources/hdfs-source.conf b/x2seatunnel/src/main/resources/templates/datax/sources/hdfs-source.conf new file mode 100644 index 0000000..1830ad4 --- /dev/null +++ b/x2seatunnel/src/main/resources/templates/datax/sources/hdfs-source.conf @@ -0,0 +1,105 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# DataX HDFS Source Connector Template (Jinja2) +# For reading data from HDFS distributed file system +# Generation time: {{ generation_time }} +# Template type: HDFS Source +# Version: 1.0 + +source { + HdfsFile { + # HDFS connection configuration + fs.defaultFS = "{{ datax.job.content[0].reader.parameter.defaultFS | default('hdfs://localhost:9000') }}" + + # File path configuration - supports wildcards + path = "{{ datax.job.content[0].reader.parameter.path }}" + + # File format configuration + file_format_type = "{{ datax.job.content[0].reader.parameter.fileType | file_type_mapper }}" + + # Field delimiter configuration + field_delimiter = "{{ datax.job.content[0].reader.parameter.fieldDelimiter | default('\t') }}" + + # Row delimiter configuration + row_delimiter = "{{ datax.job.content[0].reader.parameter.rowDelimiter | default('\n') }}" + + # File encoding configuration + encoding = "{{ datax.job.content[0].reader.parameter.encoding | default('UTF-8') }}" + + # Compression format configuration + compress_codec = "{{ datax.job.content[0].reader.parameter.compress | compress_mapper }}" + + # Skip header row count + skip_header_row_number = {{ datax.job.content[0].reader.parameter.skipHeader | default(0) }} + + # Result table name + plugin_output = "hdfs_source_table" + + # Hadoop configuration + hadoop_conf = { + "fs.defaultFS" = "{{ datax.job.content[0].reader.parameter.defaultFS | default('hdfs://localhost:9000') }}" + "dfs.client.failover.proxy.provider" = "{{ datax.job.content[0].reader.parameter.proxyProvider | default('') }}" + "dfs.nameservices" = "{{ datax.job.content[0].reader.parameter.nameservices | default('') }}" + "hadoop.security.authentication" = "{{ datax.job.content[0].reader.parameter.authentication | default('simple') }}" + } + + # Read configuration + read_config = { + # Maximum file size + "max_file_size" = "{{ datax.job.content[0].reader.parameter.maxFileSize | default('2GB') }}" + + # Batch read size + "batch_size" = {{ datax.job.content[0].reader.parameter.batchSize | default(1000) }} + + # Whether to recursively read subdirectories + "recursive" = {{ datax.job.content[0].reader.parameter.recursive | default(false) }} + + # File filter pattern + "file_filter_pattern" = "{{ datax.job.content[0].reader.parameter.fileFilter | default('') }}" + } + + # Schema configuration (for structured files) + schema = { + fields = [ + {{ datax.job.content[0].reader.parameter.column | column_schema_mapper }} + ] + } + + # Partition configuration (if supported) + partition_by = [{{ datax.job.content[0].reader.parameter.partition | default('') }}] + + # Error handling configuration + error_handling = { + # Skip error records + "skip_errors" = {{ datax.job.content[0].reader.parameter.skipErrors | default(false) }} + + # Maximum error record count + "max_error_count" = {{ datax.job.content[0].reader.parameter.maxErrorCount | default(0) }} + + # Error file path + "error_file_path" = "{{ datax.job.content[0].reader.parameter.errorFilePath | default('') }}" + } + } +} + +# Usage Instructions: +# 1. path supports wildcard patterns, e.g., /data/2023/*/*.txt +# 2. Recommend adjusting batch_size and max_file_size based on file size +# 3. For partitioned tables, set appropriate partition_by configuration +# 4. Production environments should enable error handling and monitoring +# 5. Adjust hadoop_conf parameters according to Hadoop cluster configuration diff --git a/x2seatunnel/src/main/resources/templates/datax/sources/jdbc-source.conf b/x2seatunnel/src/main/resources/templates/datax/sources/jdbc-source.conf new file mode 100644 index 0000000..92686e2 --- /dev/null +++ b/x2seatunnel/src/main/resources/templates/datax/sources/jdbc-source.conf @@ -0,0 +1,58 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# DataX Universal JDBC Source Template +# Supports all JDBC databases: MySQL, PostgreSQL, Oracle, SQL Server, etc. +# Template Type: JDBC Source (Unified Template) +# Version: 1.0 + +source { + Jdbc { + # ===== Required Parameters (SeaTunnel JdbcSourceConfig Requirements) ===== + # Database connection URL (required) - Source: DataX connection.jdbcUrl + url = "{{ datax.job.content[0].reader.parameter.connection[0].jdbcUrl[0] }}" + + # Database driver class name (required) - Auto-inferred from jdbcUrl + driver = "{{ datax.job.content[0].reader.parameter.connection[0].jdbcUrl[0] | jdbc_driver_mapper }}" + + # Database username (required) - Source: DataX username + user = "{{ datax.job.content[0].reader.parameter.username }}" + + # Database password (required) - Source: DataX password + password = "{{ datax.job.content[0].reader.parameter.password }}" + + # Query SQL (required) - Prefer querySql, otherwise generate from table+column+where + query = "{{ datax.job.content[0].reader.parameter.querySql[0] | default('SELECT') }} {{ datax.job.content[0].reader.parameter.column | join(',') }} FROM {{ datax.job.content[0].reader.parameter.connection[0].table[0] }} WHERE {{ datax.job.content[0].reader.parameter.where | default('1=1') }}" + + # ===== Optional Parameters ===== + # Data partitioning configuration - Improve parallelism + partition_column = "{{ datax.job.content[0].reader.parameter.splitPk | default('') }}" + partition_num = {{ datax.job.setting.speed.channel | default(1) }} + + # Connection configuration + connection_check_timeout_sec = 60 + max_retries = 3 + + # Batch reading configuration + fetch_size = {{ datax.job.content[0].reader.parameter.fetchSize | default(1024) }} + + # Result table name + plugin_output = "jdbc_source_table" + + } +} + diff --git a/x2seatunnel/src/main/resources/templates/datax/sources/localfile-source.conf b/x2seatunnel/src/main/resources/templates/datax/sources/localfile-source.conf new file mode 100644 index 0000000..bfc5f29 --- /dev/null +++ b/x2seatunnel/src/main/resources/templates/datax/sources/localfile-source.conf @@ -0,0 +1,103 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# DataX LocalFile Source Connector Template (Jinja2) +# For reading data from local file system +# Generation time: {{ generation_time }} +# Template type: LocalFile Source +# Version: 1.0 + +source { + LocalFile { + # File path configuration - supports wildcards + path = "{{ datax.job.content[0].reader.parameter.path }}" + + # File format configuration + file_format_type = "{{ datax.job.content[0].reader.parameter.fileType | file_type_mapper }}" + + # Field delimiter configuration + field_delimiter = "{{ datax.job.content[0].reader.parameter.fieldDelimiter | default('\t') }}" + + # Row delimiter configuration + row_delimiter = "{{ datax.job.content[0].reader.parameter.rowDelimiter | default('\n') }}" + + # File encoding configuration + encoding = "{{ datax.job.content[0].reader.parameter.encoding | default('UTF-8') }}" + + # Compression format configuration + compress_codec = "{{ datax.job.content[0].reader.parameter.compress | compress_mapper }}" + + # Skip header row count + skip_header_row_number = {{ datax.job.content[0].reader.parameter.skipHeader | default(0) }} + + # Result table name + plugin_output = "localfile_source_table" + + # Read configuration + read_config = { + # Maximum file size + "max_file_size" = "{{ datax.job.content[0].reader.parameter.maxFileSize | default('1GB') }}" + + # Batch read size + "batch_size" = {{ datax.job.content[0].reader.parameter.batchSize | default(1000) }} + + # Whether to recursively read subdirectories + "recursive" = {{ datax.job.content[0].reader.parameter.recursive | default(false) }} + + # File filter pattern + "file_filter_pattern" = "{{ datax.job.content[0].reader.parameter.fileFilter | default('') }}" + } + + # Schema configuration + schema = { + fields = [ + {{ datax.job.content[0].reader.parameter.column | column_schema_mapper }} + ] + } + + # Error handling configuration + error_handling = { + # Skip error records + "skip_errors" = {{ datax.job.content[0].reader.parameter.skipErrors | default(false) }} + + # Maximum error record count + "max_error_count" = {{ datax.job.content[0].reader.parameter.maxErrorCount | default(0) }} + + # Error file path + "error_file_path" = "{{ datax.job.content[0].reader.parameter.errorFilePath | default('') }}" + } + + # File monitoring configuration (real-time reading) + file_monitor = { + # Whether to enable file monitoring + "enable" = {{ datax.job.content[0].reader.parameter.enableMonitor | default(false) }} + + # Monitoring interval (seconds) + "interval_sec" = {{ datax.job.content[0].reader.parameter.monitorInterval | default(30) }} + + # Whether to delete file after processing + "delete_after_process" = {{ datax.job.content[0].reader.parameter.deleteAfterProcess | default(false) }} + } + } +} + +# Usage Instructions: +# 1. path supports wildcard patterns, e.g., /data/*.txt or /data/**/*.csv +# 2. For large files, recommend adjusting batch_size and max_file_size parameters +# 3. Supports multiple file formats: text, csv, json, xml, etc. +# 4. Real-time scenarios can enable file_monitor configuration +# 5. Pay attention to file permissions and path access permissions diff --git a/x2seatunnel/src/main/resources/templates/report/report-template-zh.md b/x2seatunnel/src/main/resources/templates/report/report-template-zh.md new file mode 100644 index 0000000..26ee316 --- /dev/null +++ b/x2seatunnel/src/main/resources/templates/report/report-template-zh.md @@ -0,0 +1,49 @@ +# X2SeaTunnel 转换报告 + +## 📋 基本信息 + +| 项目 | 值 | +|------|----| +| **转换时间** | {{convertTime}} | +| **源文件** | `{{sourceFile}}` | +| **目标文件** | `{{targetFile}}` | +| **源类型** | {{sourceType}} | +| **目标类型** | SeaTunnel | +| **源连接器** | {{sourceConnector}} | +| **目标连接器** | {{sinkConnector}} | +| **转换状态** | {{status}} | +{{customTemplateInfo}} +| **工具版本** | 0.1 | + +{{errorInfo}} + +## 📊 转换统计 + +| 类型 | 数量 | 百分比 | +|------|------|--------| +| ✅ **直接映射** | {{directCount}} | {{directPercent}} | +| 🔧 **转换映射** | {{transformCount}} | {{transformPercent}} | +| 🔄 **使用默认值** | {{defaultCount}} | {{defaultPercent}} | +| ❌ **缺失字段** | {{missingCount}} | {{missingPercent}} | +| ⚠️ **未映射** | {{unmappedCount}} | {{unmappedPercent}} | +| **总计** | {{totalCount}} | 100% | + +## ✅ 直接映射的字段 + +{{directMappingTable}} + +## 🔧 转换映射的字段 + +{{transformMappingTable}} + +## 🔄 使用默认值的字段 + +{{defaultValuesTable}} + +## ❌ 缺失的字段 + +{{missingFieldsTable}} + +## ⚠️ 未映射的字段 + +{{unmappedFieldsTable}} diff --git a/x2seatunnel/src/main/resources/templates/report/report-template.md b/x2seatunnel/src/main/resources/templates/report/report-template.md new file mode 100644 index 0000000..717662c --- /dev/null +++ b/x2seatunnel/src/main/resources/templates/report/report-template.md @@ -0,0 +1,49 @@ +# X2SeaTunnel Conversion Report + +## 📋 Basic Information + +| Item | Value | +|------|----| +| **Conversion Time** | {{convertTime}} | +| **Source File** | `{{sourceFile}}` | +| **Target File** | `{{targetFile}}` | +| **Source Type** | {{sourceType}} | +| **Target Type** | SeaTunnel | +| **Source Connector** | {{sourceConnector}} | +| **Target Connector** | {{sinkConnector}} | +| **Conversion Status** | {{status}} | +{{customTemplateInfo}} +| **Tool Version** | 0.1 | + +{{errorInfo}} + +## 📊 Conversion Statistics + +| Type | Count | Percentage | +|------|------|--------| +| ✅ **Direct Mapping** | {{directCount}} | {{directPercent}} | +| 🔧 **Transform Mapping** | {{transformCount}} | {{transformPercent}} | +| 🔄 **Default Values Used** | {{defaultCount}} | {{defaultPercent}} | +| ❌ **Missing Fields** | {{missingCount}} | {{missingPercent}} | +| ⚠️ **Unmapped** | {{unmappedCount}} | {{unmappedPercent}} | +| **Total** | {{totalCount}} | 100% | + +## ✅ Direct Mapped Fields + +{{directMappingTable}} + +## 🔧 Transform Mapped Fields + +{{transformMappingTable}} + +## 🔄 Fields Using Default Values + +{{defaultValuesTable}} + +## ❌ Missing Fields + +{{missingFieldsTable}} + +## ⚠️ Unmapped Fields + +{{unmappedFieldsTable}} diff --git a/x2seatunnel/src/main/resources/templates/template-mapping.yaml b/x2seatunnel/src/main/resources/templates/template-mapping.yaml new file mode 100644 index 0000000..3cb3adb --- /dev/null +++ b/x2seatunnel/src/main/resources/templates/template-mapping.yaml @@ -0,0 +1,108 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# X2SeaTunnel Template Mapping Configuration +# Defines mapping relationships from DataX connector types to SeaTunnel template files +# Created: July 9, 2025 +# Version: 1.0 + +# DataX Connector Mapping Configuration +datax: + # Environment configuration mapping + env_mappings: + # Only batch mode is currently supported and used + "batch": "datax/env/batch-env.conf" + + # DataX Reader to Source template mapping + source_mappings: + # Database Readers - Unified JDBC template strategy + "mysqlreader": "datax/sources/jdbc-source.conf" + "postgresqlreader": "datax/sources/jdbc-source.conf" + "oraclereader": "datax/sources/jdbc-source.conf" + "sqlserverreader": "datax/sources/jdbc-source.conf" + + # File Readers + "txtfilereader": "datax/sources/localfile-source.conf" + "hdfsreader": "datax/sources/hdfs-source.conf" + + # DataX Writer to Sink template mapping + sink_mappings: + # Database Writers (unified JDBC template) + "mysqlwriter": "datax/sinks/jdbc-sink.conf" + "postgresqlwriter": "datax/sinks/jdbc-sink.conf" + "oraclewriter": "datax/sinks/jdbc-sink.conf" + "sqlserverwriter": "datax/sinks/jdbc-sink.conf" + + # File Writers + "hdfswriter": "datax/sinks/hdfs-sink.conf" + + # Default template configuration + defaults: + source_template: "datax/sources/jdbc-source.conf" + sink_template: "datax/sinks/hdfs-sink.conf" + env_template: "datax/env/batch-env.conf" + +# Field mapping transformer configuration +transformers: + # JDBC driver mapping (actively used in templates) + jdbc_driver_mapper: + "mysql": "com.mysql.cj.jdbc.Driver" + "postgresql": "org.postgresql.Driver" + "oracle": "oracle.jdbc.driver.OracleDriver" + "sqlserver": "com.microsoft.sqlserver.jdbc.SQLServerDriver" + "clickhouse": "com.clickhouse.jdbc.ClickHouseDriver" + "db2": "com.ibm.db2.jcc.DB2Driver" + "sybase": "com.sybase.jdbc4.jdbc.SybDriver" + + # Compression format mapping (used in HDFS sink template) + compress_mapper: + "gzip": "gzip" + "bzip2": "bzip2" + "snappy": "snappy" + "lzo": "lzo" + "lz4": "lz4" + "zstd": "zstd" + + # DataX writeMode to SeaTunnel data_save_mode mapping (used in JDBC sink template) + writemode_to_datasavemode_mapper: + "insert": "APPEND_DATA" + "replace": "DROP_DATA" + "update": "UPSERT_DATA" + "append": "APPEND_DATA" + "overwrite": "DROP_DATA" + +# Template selection strategy configuration +selection_strategy: + # Priority order: source_mappings + sink_mappings > defaults + priority_order: + - "source_mappings" + - "sink_mappings" + - "defaults" + + # Enable fallback to default templates + enable_fallback: true + + # Strict mode: report an error if no matching template is found + strict_mode: false + +# Configuration file version and compatibility +metadata: + version: "1.1" + compatible_versions: ["1.0", "1.1"] + created_at: "2025-07-09" + updated_at: "2025-08-04" + description: "Optimized DataX to SeaTunnel template mapping configuration" \ No newline at end of file diff --git a/x2seatunnel/src/test/java/org/apache/seatunnel/tools/x2seatunnel/cli/CommandLineOptionsTest.java b/x2seatunnel/src/test/java/org/apache/seatunnel/tools/x2seatunnel/cli/CommandLineOptionsTest.java new file mode 100644 index 0000000..885c574 --- /dev/null +++ b/x2seatunnel/src/test/java/org/apache/seatunnel/tools/x2seatunnel/cli/CommandLineOptionsTest.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.tools.x2seatunnel.cli; + +import org.apache.commons.cli.Options; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class CommandLineOptionsTest { + + @Test + public void testCreateOptions() { + Options options = CommandLineOptions.createOptions(); + + // Verify basic options exist + Assertions.assertTrue(options.hasOption("s"), "Should have source option"); + Assertions.assertTrue(options.hasOption("t"), "Should have target option"); + Assertions.assertTrue(options.hasOption("st"), "Should have source-type option"); + Assertions.assertTrue(options.hasOption("h"), "Should have help option"); + } +} diff --git a/x2seatunnel/src/test/java/org/apache/seatunnel/tools/x2seatunnel/model/MappingTrackerTest.java b/x2seatunnel/src/test/java/org/apache/seatunnel/tools/x2seatunnel/model/MappingTrackerTest.java new file mode 100644 index 0000000..3012725 --- /dev/null +++ b/x2seatunnel/src/test/java/org/apache/seatunnel/tools/x2seatunnel/model/MappingTrackerTest.java @@ -0,0 +1,208 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.tools.x2seatunnel.model; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** MappingTracker unit tests */ +public class MappingTrackerTest { + + private MappingTracker mappingTracker; + + @BeforeEach + public void setUp() { + mappingTracker = new MappingTracker(); + } + + @Test + public void testRecordDirectMapping() { + // Test recording direct mapping + mappingTracker.recordDirectMapping( + "job.content[0].reader.parameter.username", + "source.Jdbc.user", + "root", + "Directly extracted from DataX"); + mappingTracker.recordDirectMapping( + "job.content[0].reader.parameter.password", + "source.Jdbc.password", + "123456", + "Directly extracted from DataX"); + + MappingResult result = mappingTracker.generateMappingResult(); + + assertEquals(2, result.getSuccessMappings().size()); + assertEquals( + "job.content[0].reader.parameter.username", + result.getSuccessMappings().get(0).getSourceField()); + assertEquals("source.Jdbc.user", result.getSuccessMappings().get(0).getTargetField()); + assertEquals("root", result.getSuccessMappings().get(0).getValue()); + } + + @Test + public void testRecordTransformMapping() { + // Test recording transform mapping fields + mappingTracker.recordTransformMapping( + "job.content[0].reader.parameter.connection[0].jdbcUrl[0]", + "source.Jdbc.driver", + "com.mysql.cj.jdbc.Driver", + "jdbc_driver_mapper"); + mappingTracker.recordTransformMapping( + "job.content[0].reader.parameter.username", "source.Jdbc.user", "ROOT", "upper"); + + MappingResult result = mappingTracker.generateMappingResult(); + + assertEquals(2, result.getTransformMappings().size()); + assertEquals("source.Jdbc.driver", result.getTransformMappings().get(0).getTargetField()); + assertEquals("com.mysql.cj.jdbc.Driver", result.getTransformMappings().get(0).getValue()); + assertEquals("jdbc_driver_mapper", result.getTransformMappings().get(0).getFilterName()); + } + + @Test + public void testRecordDefaultValue() { + // Test recording default value fields + mappingTracker.recordDefaultValue("env.parallelism", "1", "Using default parallelism"); + mappingTracker.recordDefaultValue( + "env.job.mode", "BATCH", "DataX defaults to batch processing mode"); + + MappingResult result = mappingTracker.generateMappingResult(); + + assertEquals(2, result.getDefaultValues().size()); + assertEquals("env.parallelism", result.getDefaultValues().get(0).getFieldName()); + assertEquals("1", result.getDefaultValues().get(0).getValue()); + assertEquals("Using default parallelism", result.getDefaultValues().get(0).getReason()); + } + + @Test + public void testRecordMissingField() { + // Test recording missing fields + mappingTracker.recordMissingField( + "job.content[0].reader.parameter.host", "Field not found in DataX configuration"); + mappingTracker.recordMissingField( + "job.content[0].reader.parameter.port", + "Field value is empty in DataX configuration"); + + MappingResult result = mappingTracker.generateMappingResult(); + + assertEquals(2, result.getMissingRequiredFields().size()); + assertEquals( + "job.content[0].reader.parameter.host", + result.getMissingRequiredFields().get(0).getFieldName()); + assertEquals( + "Field not found in DataX configuration", + result.getMissingRequiredFields().get(0).getReason()); + } + + @Test + public void testRecordUnmappedField() { + // Test recording unmapped fields + mappingTracker.recordUnmappedField( + "job.content[0].reader.parameter.fetchSize", + "1000", + "DataX specific configuration, not needed by SeaTunnel"); + + MappingResult result = mappingTracker.generateMappingResult(); + + assertEquals(1, result.getUnmappedFields().size()); + assertEquals( + "job.content[0].reader.parameter.fetchSize", + result.getUnmappedFields().get(0).getFieldName()); + assertEquals("1000", result.getUnmappedFields().get(0).getValue()); + assertEquals( + "DataX specific configuration, not needed by SeaTunnel", + result.getUnmappedFields().get(0).getReason()); + } + + @Test + public void testMixedMappingTypes() { + // Test mixed mapping types + mappingTracker.recordDirectMapping( + "job.content[0].reader.parameter.username", + "source.Jdbc.user", + "root", + "Direct mapping"); + mappingTracker.recordTransformMapping( + "job.content[0].reader.parameter.connection[0].jdbcUrl[0]", + "source.Jdbc.driver", + "com.mysql.cj.jdbc.Driver", + "jdbc_driver_mapper"); + mappingTracker.recordDefaultValue("env.parallelism", "1", "Default value"); + mappingTracker.recordMissingField("missing.field", "Missing field"); + mappingTracker.recordUnmappedField("unmapped.field", "value", "Unmapped"); + + MappingResult result = mappingTracker.generateMappingResult(); + + assertEquals(1, result.getSuccessMappings().size()); + assertEquals(1, result.getTransformMappings().size()); + assertEquals(1, result.getDefaultValues().size()); + assertEquals(1, result.getMissingRequiredFields().size()); + assertEquals(1, result.getUnmappedFields().size()); + assertTrue(result.isSuccess()); + } + + @Test + public void testReset() { + // Add some mapping records + mappingTracker.recordDirectMapping("test.field", "target.field", "value", "test"); + mappingTracker.recordTransformMapping( + "source.field", "target.field", "transformed.value", "upper"); + + // Verify records exist + MappingResult result1 = mappingTracker.generateMappingResult(); + assertEquals(1, result1.getSuccessMappings().size()); + assertEquals(1, result1.getTransformMappings().size()); + + // Verify cleared after reset + mappingTracker.reset(); + MappingResult result2 = mappingTracker.generateMappingResult(); + assertEquals(0, result2.getSuccessMappings().size()); + assertEquals(0, result2.getTransformMappings().size()); + assertEquals(0, result2.getDefaultValues().size()); + assertEquals(0, result2.getMissingRequiredFields().size()); + assertEquals(0, result2.getUnmappedFields().size()); + } + + @Test + public void testGetStatistics() { + // Add various types of mapping records + mappingTracker.recordDirectMapping("direct1", "target1", "value1", "test"); + mappingTracker.recordDirectMapping("direct2", "target2", "value2", "test"); + mappingTracker.recordTransformMapping("transform1", "target3", "transformValue1", "upper"); + mappingTracker.recordDefaultValue("default1", "defaultValue1", "default test"); + mappingTracker.recordMissingField("missing1", "missing test"); + mappingTracker.recordUnmappedField("unmapped1", "unmappedValue1", "unmapped test"); + + String statistics = mappingTracker.getStatisticsText(); + + assertTrue(statistics.contains("Direct mappings: 2")); + assertTrue(statistics.contains("Transform mappings: 1")); + assertTrue(statistics.contains("Default values: 1")); + assertTrue(statistics.contains("Missing: 1")); + assertTrue(statistics.contains("Unmapped: 1")); + + MappingTracker.MappingStatistics stats = mappingTracker.getStatistics(); + assertEquals(2, stats.getDirectMappings()); + assertEquals(1, stats.getTransformMappings()); + assertEquals(1, stats.getDefaultValues()); + assertEquals(1, stats.getMissingFields()); + assertEquals(1, stats.getUnmappedFields()); + } +} diff --git a/x2seatunnel/src/test/java/org/apache/seatunnel/tools/x2seatunnel/report/MarkdownReportGeneratorEnhancedTest.java b/x2seatunnel/src/test/java/org/apache/seatunnel/tools/x2seatunnel/report/MarkdownReportGeneratorEnhancedTest.java new file mode 100644 index 0000000..ecdaea4 --- /dev/null +++ b/x2seatunnel/src/test/java/org/apache/seatunnel/tools/x2seatunnel/report/MarkdownReportGeneratorEnhancedTest.java @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.tools.x2seatunnel.report; + +import org.apache.seatunnel.tools.x2seatunnel.model.MappingResult; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** MarkdownReportGenerator unit tests - verifying enhanced report functionality */ +public class MarkdownReportGeneratorEnhancedTest { + + private MarkdownReportGenerator reportGenerator; + private MappingResult mappingResult; + + @BeforeEach + public void setUp() { + reportGenerator = new MarkdownReportGenerator(); + mappingResult = new MappingResult(); + + // Set up test data: containing various types of mappings + setupTestMappingResult(); + } + + private void setupTestMappingResult() { + // Add successful mappings + mappingResult.addSuccessMapping( + "job.content[0].reader.parameter.username", "source.Jdbc.user", "root"); + mappingResult.addSuccessMapping( + "job.content[0].reader.parameter.password", "source.Jdbc.password", "123456"); + mappingResult.addSuccessMapping( + "job.content[0].reader.parameter.connection[0].jdbcUrl[0]", + "source.Jdbc.url", + "jdbc:mysql://localhost:3306/test"); + mappingResult.addSuccessMapping( + "job.content[0].reader.parameter.connection[0].table[0]", + "source.Jdbc.table", + "users"); + + mappingResult.addDefaultValueField( + "source.Jdbc.driver", + "com.mysql.cj.jdbc.Driver", + "Automatically inferred from JDBC URL"); + mappingResult.addDefaultValueField( + "source.Jdbc.query", + "SELECT * FROM users", + "Automatically generated from table name"); + + mappingResult.addDefaultValueField("env.parallelism", "1", "Using default parallelism"); + mappingResult.addDefaultValueField("env.job.mode", "BATCH", "DataX defaults to BATCH mode"); + mappingResult.addDefaultValueField( + "source.Jdbc.fetchSize", "1000", "Using default fetch size"); + + mappingResult.addMissingRequiredField( + "job.content[0].reader.parameter.host", "Field not found in DataX configuration"); + + mappingResult.addUnmappedField( + "job.content[0].reader.parameter.splitPk", + "id", + "DataX-specific configuration, not needed in SeaTunnel"); + mappingResult.addUnmappedField( + "job.content[0].reader.parameter.where", + "status=1", + "DataX-specific configuration, not needed in SeaTunnel"); + + mappingResult.setSuccess(true); + } + + @Test + public void testEmptyMappingResult() { + MappingResult emptyResult = new MappingResult(); + emptyResult.setSuccess(true); + + String report = + reportGenerator.generateReport( + emptyResult, + "examples/empty-datax.json", + "examples/empty-seatunnel.conf", + "datax"); + + // Verify that an empty result can generate a report, without testing the specific format + assertTrue(report.length() > 0, "An empty result should generate a report"); + assertTrue( + report.contains("0") || report.contains("none") || report.contains("empty"), + "Should reflect the empty state"); + } + + @Test + public void testFailedConversionReport() { + MappingResult failedResult = new MappingResult(); + failedResult.setSuccess(false); + failedResult.setErrorMessage("Template parsing failed: syntax error"); + + String report = + reportGenerator.generateReport( + failedResult, + "examples/error-datax.json", + "examples/error-seatunnel.conf", + "datax"); + + // Verify that a failure report can be generated, without testing the specific format + assertTrue(report.length() > 0, "A failed result should generate a report"); + assertTrue( + report.contains("Failed") + || report.contains("Error") + || report.contains("error") + || report.contains("fail"), + "Should reflect the failure state"); + assertTrue(report.contains("Template parsing failed"), "Should contain the error message"); + } + + @Test + public void testBasicReportGeneration() { + String report = + reportGenerator.generateReport( + mappingResult, + "examples/test-datax.json", + "examples/test-seatunnel.conf", + "datax"); + + // Test only basic functionality: ensures a report is generated and contains basic info + assertTrue(report.length() > 0, "Should be able to generate a report"); + assertTrue( + report.contains("X2SeaTunnel") + || report.contains("Conversion") + || report.contains("report"), + "Should contain tool-related information"); + assertTrue( + report.contains("datax") || report.contains("test"), + "Should contain input file information"); + } +} diff --git a/x2seatunnel/src/test/java/org/apache/seatunnel/tools/x2seatunnel/template/TemplateVariableResolverMappingTest.java b/x2seatunnel/src/test/java/org/apache/seatunnel/tools/x2seatunnel/template/TemplateVariableResolverMappingTest.java new file mode 100644 index 0000000..b662fa1 --- /dev/null +++ b/x2seatunnel/src/test/java/org/apache/seatunnel/tools/x2seatunnel/template/TemplateVariableResolverMappingTest.java @@ -0,0 +1,259 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.tools.x2seatunnel.template; + +import org.apache.seatunnel.tools.x2seatunnel.model.MappingResult; +import org.apache.seatunnel.tools.x2seatunnel.model.MappingTracker; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +/** TemplateVariableResolver and MappingTracker integration tests */ +public class TemplateVariableResolverMappingTest { + + private TemplateVariableResolver resolver; + private MappingTracker mappingTracker; + private String testDataXJson; + + @BeforeEach + public void setUp() { + mappingTracker = new MappingTracker(); + resolver = new TemplateVariableResolver(null, mappingTracker); + + // Test DataX configuration JSON + testDataXJson = + "{\n" + + " \"job\": {\n" + + " \"content\": [{\n" + + " \"reader\": {\n" + + " \"name\": \"mysqlreader\",\n" + + " \"parameter\": {\n" + + " \"username\": \"root\",\n" + + " \"password\": \"123456\",\n" + + " \"connection\": [{\n" + + " \"jdbcUrl\": [\"jdbc:mysql://localhost:3306/test_db\"],\n" + + " \"table\": [\"user_info\"]\n" + + " }]\n" + + " }\n" + + " },\n" + + " \"writer\": {\n" + + " \"name\": \"hdfswriter\",\n" + + " \"parameter\": {\n" + + " \"path\": \"/warehouse/test_ods/ods_user_info/\",\n" + + " \"fileType\": \"orc\"\n" + + " }\n" + + " }\n" + + " }],\n" + + " \"setting\": {\n" + + " \"speed\": {\n" + + " \"channel\": 3\n" + + " }\n" + + " }\n" + + " }\n" + + "}"; + } + + @Test + public void testBasicFieldExtraction() { + // Test basic field extraction and track the mapping process + String template = "user: {{ datax.job.content[0].reader.parameter.username }}"; + + String result = resolver.resolve(template, testDataXJson); + + Assertions.assertEquals("user: root", result); + + // Verify mapping tracking + MappingResult mappingResult = mappingTracker.generateMappingResult(); + Assertions.assertEquals(1, mappingResult.getSuccessMappings().size()); + Assertions.assertEquals( + "job.content[0].reader.parameter.username", + mappingResult.getSuccessMappings().get(0).getSourceField()); + Assertions.assertEquals("root", mappingResult.getSuccessMappings().get(0).getValue()); + } + + @Test + public void testDefaultValueUsage() { + // Test default value usage and tracking + String template = + "host: {{ datax.job.content[0].reader.parameter.host | default('localhost') }}"; + + String result = resolver.resolve(template, testDataXJson); + + Assertions.assertEquals("host: localhost", result); + + // Verify mapping tracking - default values should be recorded + MappingResult mappingResult = mappingTracker.generateMappingResult(); + Assertions.assertEquals(1, mappingResult.getDefaultValues().size()); + Assertions.assertEquals("localhost", mappingResult.getDefaultValues().get(0).getValue()); + Assertions.assertTrue( + mappingResult.getDefaultValues().get(0).getReason().contains("default value")); + } + + @Test + public void testMissingFieldTracking() { + // Test missing field tracking + String template = "host: {{ datax.job.content[0].reader.parameter.nonexistent }}"; + + String result = resolver.resolve(template, testDataXJson); + + // Missing field should return an empty string + Assertions.assertEquals("host: ", result); + + // Verify mapping tracking - missing fields should be recorded + MappingResult mappingResult = mappingTracker.generateMappingResult(); + Assertions.assertTrue(mappingResult.getMissingRequiredFields().size() >= 1); + + // Find the corresponding missing field + boolean foundMissingField = + mappingResult.getMissingRequiredFields().stream() + .anyMatch( + field -> + field.getFieldName() + .equals( + "job.content[0].reader.parameter.nonexistent")); + Assertions.assertTrue(foundMissingField); + } + + @Test + public void testFilterTransformationTracking() { + // Test filter transformation tracking + String template = "username: {{ datax.job.content[0].reader.parameter.username | upper }}"; + + String result = resolver.resolve(template, testDataXJson); + + Assertions.assertEquals("username: ROOT", result); + + // Verify mapping tracking - filter transformations should be recorded as transformation + // mappings + MappingResult mappingResult = mappingTracker.generateMappingResult(); + + // Original field extraction is recorded as a direct mapping + Assertions.assertTrue(mappingResult.getSuccessMappings().size() >= 1); + Assertions.assertEquals("root", mappingResult.getSuccessMappings().get(0).getValue()); + + // Filter transformation is recorded as a transformation mapping + Assertions.assertEquals(1, mappingResult.getTransformMappings().size()); + Assertions.assertEquals("ROOT", mappingResult.getTransformMappings().get(0).getValue()); + Assertions.assertTrue( + mappingResult.getTransformMappings().get(0).getFilterName().contains("upper")); + } + + @Test + public void testComplexTemplateWithMixedMappingTypes() { + // Test complex template with mixed mapping types + String template = + "source {\n" + + " Jdbc {\n" + + " url = \"{{ datax.job.content[0].reader.parameter.connection[0].jdbcUrl[0] }}\"\n" + + " user = \"{{ datax.job.content[0].reader.parameter.username }}\"\n" + + " password = \"{{ datax.job.content[0].reader.parameter.password }}\"\n" + + " table = \"{{ datax.job.content[0].reader.parameter.connection[0].table[0] }}\"\n" + + " port = \"{{ datax.job.content[0].reader.parameter.port | default('3306') }}\"\n" + + " driver = \"{{ datax.job.content[0].reader.parameter.driver | default('com.mysql.cj.jdbc.Driver') }}\"\n" + + " fetchSize = \"{{ datax.job.content[0].reader.parameter.fetchSize }}\"\n" + + " }\n" + + "}"; + + String result = resolver.resolve(template, testDataXJson); + + // Verify parsing result + Assertions.assertTrue(result.contains("url = \"jdbc:mysql://localhost:3306/test_db\"")); + Assertions.assertTrue(result.contains("user = \"root\"")); + Assertions.assertTrue(result.contains("password = \"123456\"")); + Assertions.assertTrue(result.contains("table = \"user_info\"")); + Assertions.assertTrue(result.contains("port = \"3306\"")); + Assertions.assertTrue(result.contains("driver = \"com.mysql.cj.jdbc.Driver\"")); + Assertions.assertTrue(result.contains("fetchSize = \"\"")); + + // Verify mapping statistics + MappingResult mappingResult = mappingTracker.generateMappingResult(); + + // Direct mappings: url, user, password, table + Assertions.assertEquals(4, mappingResult.getSuccessMappings().size()); + + // Default values: port, driver + Assertions.assertEquals(2, mappingResult.getDefaultValues().size()); + + // Missing fields: fetchSize + Assertions.assertEquals(1, mappingResult.getMissingRequiredFields().size()); + + // Verify total count + int totalFields = + mappingResult.getSuccessMappings().size() + + mappingResult.getTransformMappings().size() + + mappingResult.getDefaultValues().size() + + mappingResult.getMissingRequiredFields().size() + + mappingResult.getUnmappedFields().size(); + + // Should match the number of fields in the template + Assertions.assertEquals(7, totalFields); + } + + @Test + public void testMappingTrackerReset() { + // Test MappingTracker reset functionality + String template1 = "user: {{ datax.job.content[0].reader.parameter.username }}"; + resolver.resolve(template1, testDataXJson); + + MappingResult result1 = mappingTracker.generateMappingResult(); + Assertions.assertEquals(1, result1.getSuccessMappings().size()); + + // Reset the tracker + mappingTracker.reset(); + + String template2 = "password: {{ datax.job.content[0].reader.parameter.password }}"; + resolver.resolve(template2, testDataXJson); + + MappingResult result2 = mappingTracker.generateMappingResult(); + Assertions.assertEquals(1, result2.getSuccessMappings().size()); + Assertions.assertEquals( + "job.content[0].reader.parameter.password", + result2.getSuccessMappings().get(0).getSourceField()); + } + + @Test + public void testRegexFilterWithMappingTracking() { + // Test regex filter with mapping tracking + String template = + "database: {{ datax.job.content[0].writer.parameter.path | regex_extract('/warehouse/([^/]+)/.*', '$1') | default('unknown') }}"; + + String result = resolver.resolve(template, testDataXJson); + + Assertions.assertEquals("database: test_ods", result); + + // Verify mapping tracking + MappingResult mappingResult = mappingTracker.generateMappingResult(); + + // Original path extraction is a direct mapping + Assertions.assertTrue(mappingResult.getSuccessMappings().size() >= 1); + Assertions.assertEquals( + "/warehouse/test_ods/ods_user_info/", + mappingResult.getSuccessMappings().get(0).getValue()); + + // Regex extraction is a transformation mapping + Assertions.assertEquals(1, mappingResult.getTransformMappings().size()); + Assertions.assertEquals("test_ods", mappingResult.getTransformMappings().get(0).getValue()); + Assertions.assertTrue( + mappingResult + .getTransformMappings() + .get(0) + .getFilterName() + .contains("regex_extract")); + } +} diff --git a/x2seatunnel/src/test/java/org/apache/seatunnel/tools/x2seatunnel/template/TemplateVariableResolverTest.java b/x2seatunnel/src/test/java/org/apache/seatunnel/tools/x2seatunnel/template/TemplateVariableResolverTest.java new file mode 100644 index 0000000..b86ce58 --- /dev/null +++ b/x2seatunnel/src/test/java/org/apache/seatunnel/tools/x2seatunnel/template/TemplateVariableResolverTest.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.tools.x2seatunnel.template; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** TemplateVariableResolver unit tests */ +public class TemplateVariableResolverTest { + + private TemplateVariableResolver resolver; + private String testDataXJson; + + @BeforeEach + public void setUp() { + resolver = new TemplateVariableResolver(); + + // Simplified DataX configuration JSON string + testDataXJson = + "{\n" + + " \"job\": {\n" + + " \"content\": [{\n" + + " \"reader\": {\n" + + " \"name\": \"mysqlreader\",\n" + + " \"parameter\": {\n" + + " \"username\": \"root\",\n" + + " \"connection\": [{\n" + + " \"jdbcUrl\": [\"jdbc:mysql://localhost:3306/test_db\"],\n" + + " \"table\": [\"user_info\"]\n" + + " }]\n" + + " }\n" + + " },\n" + + " \"writer\": {\n" + + " \"parameter\": {\n" + + " \"path\": \"/warehouse/test_ods/ods_user_info/\"\n" + + " }\n" + + " }\n" + + " }]\n" + + " }\n" + + "}"; + } + + @Test + public void testBasicVariableResolution() { + String template = "username: {{ datax.job.content[0].reader.parameter.username }}"; + String result = resolver.resolve(template, testDataXJson); + assertEquals("username: root", result); + } + + @Test + public void testRegexVariableResolution() { + String template = + "database: {{ datax.job.content[0].writer.parameter.path | regex_extract('/warehouse/([^/]+)/.*', '$1') | default('default_db') }}"; + String result = resolver.resolve(template, testDataXJson); + assertEquals("database: test_ods", result); + } + + @Test + public void testComplexTemplate() { + String template = + "source {\n" + + " Jdbc {\n" + + " url = \"{{ datax.job.content[0].reader.parameter.connection[0].jdbcUrl[0] }}\"\n" + + " user = \"{{ datax.job.content[0].reader.parameter.username }}\"\n" + + " table = \"{{ datax.job.content[0].reader.parameter.connection[0].table[0] }}\"\n" + + " }\n" + + "}"; + + String result = resolver.resolve(template, testDataXJson); + + assertTrue(result.contains("url = \"jdbc:mysql://localhost:3306/test_db\"")); + assertTrue(result.contains("user = \"root\"")); + assertTrue(result.contains("table = \"user_info\"")); + } + + @Test + public void testDefaultValue() { + String template = + "host: {{ datax.job.content[0].reader.parameter.host | default('localhost') }}"; + String result = resolver.resolve(template, testDataXJson); + assertEquals("host: localhost", result); + } +} diff --git a/x2seatunnel/src/test/java/org/apache/seatunnel/tools/x2seatunnel/util/FileUtilsTest.java b/x2seatunnel/src/test/java/org/apache/seatunnel/tools/x2seatunnel/util/FileUtilsTest.java new file mode 100644 index 0000000..65b5f4b --- /dev/null +++ b/x2seatunnel/src/test/java/org/apache/seatunnel/tools/x2seatunnel/util/FileUtilsTest.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.tools.x2seatunnel.util; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.io.File; +import java.io.IOException; + +public class FileUtilsTest { + + @Test + public void testBasicFileOperations() throws IOException { + String testFile = "target/test-file.txt"; + String testContent = "Hello, World!"; + + // Write file + FileUtils.writeFile(testFile, testContent); + + // Verify file exists + Assertions.assertTrue(FileUtils.exists(testFile)); + + // Read file + String content = FileUtils.readFile(testFile); + Assertions.assertEquals(testContent, content); + + // Cleanup + new File(testFile).delete(); + } +} diff --git a/x2seatunnel/src/test/java/org/apache/seatunnel/tools/x2seatunnel/util/YamlConfigParserTest.java b/x2seatunnel/src/test/java/org/apache/seatunnel/tools/x2seatunnel/util/YamlConfigParserTest.java new file mode 100644 index 0000000..0cfa3ea --- /dev/null +++ b/x2seatunnel/src/test/java/org/apache/seatunnel/tools/x2seatunnel/util/YamlConfigParserTest.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.seatunnel.tools.x2seatunnel.util; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +/** Unit tests for YamlConfigParser, verifying YAML configuration mapping is correct */ +public class YamlConfigParserTest { + + @Test + public void testParseConversionYaml() { + // Example file located at resources/examples/datax-mysql2hdfs2hive.yaml + String yamlPath = "src/main/resources/examples/yaml/datax-mysql2hdfs2hive.yaml"; + ConversionConfig config = YamlConfigParser.parse(yamlPath); + Assertions.assertNotNull(config); + Assertions.assertEquals("examples/source/datax-mysql2hdfs2hive.json", config.getSource()); + Assertions.assertEquals("datax", config.getSourceType()); + Assertions.assertEquals("examples/target/mysql2hdfs2hive-result.conf", config.getTarget()); + Assertions.assertEquals("examples/report/mysql2hdfs2hive-report.md", config.getReport()); + Assertions.assertEquals("datax/custom/mysql-to-hive.conf", config.getTemplate()); + Assertions.assertTrue(config.isVerbose(), "YAML options.verbose should be true"); + } + + @Test + public void testParseSimpleYamlWithStringSource() { + // Dynamically create and parse simple YAML, containing only source field + String yamlContent = "source: foo.json\n" + "target: bar.conf\n" + "report: report.md\n"; + try { + java.nio.file.Path tempFile = java.nio.file.Files.createTempFile("test", ".yaml"); + java.nio.file.Files.write(tempFile, yamlContent.getBytes()); + ConversionConfig config = YamlConfigParser.parse(tempFile.toString()); + Assertions.assertEquals("foo.json", config.getSource()); + Assertions.assertEquals("bar.conf", config.getTarget()); + Assertions.assertEquals("report.md", config.getReport()); + // Default values + Assertions.assertNull(config.getTemplate()); + Assertions.assertFalse(config.isVerbose()); + } catch (Exception e) { + Assertions.fail("Failed to parse simple YAML: " + e.getMessage()); + } + } +} From db3d1917f72b749e8b85433ac7eff9d6ad7ad606 Mon Sep 17 00:00:00 2001 From: wangxiaogang Date: Thu, 21 Aug 2025 17:38:03 +0800 Subject: [PATCH 3/5] update readme --- README.md | 44 ++++++++----------------------------- README_zh.md | 62 +++++++++++++++++++++++----------------------------- 2 files changed, 36 insertions(+), 70 deletions(-) diff --git a/README.md b/README.md index c9181ae..1817050 100644 --- a/README.md +++ b/README.md @@ -7,20 +7,6 @@ This repository hosts auxiliary tools for Apache SeaTunnel. It focuses on develo More tools may be added in the future. For the main data integration engine, see the [Apache SeaTunnel](https://github.com/apache/seatunnel) project. -## Modules documentation - -- x2seatunnel - - English: [x2seatunnel/README.md](x2seatunnel/README.md) - - 中文: [x2seatunnel/README_zh.md](x2seatunnel/README_zh.md) - -## Build and Test - -Prerequisites: -- Java 8+ -- Maven 3.6+ - -Build the whole repository: - ## Tool 1 - SeaTunnel MCP Server What is MCP? @@ -37,28 +23,16 @@ SeaTunnel MCP Server For screenshots, demo video, features, installation and usage instructions, please refer to the README in the seatunnel-mcp directory. -Get the main project from [Apache SeaTunnel](https://github.com/apache/seatunnel) -```bash -mvn -T 1C -e -DskipIT clean verify -``` - -Build only a submodule (x2seatunnel as example): +## Tool 2 - x2seatunnel -```bash -mvn -pl x2seatunnel -am -DskipTests clean package -``` - -Artifacts will be generated under `x2seatunnel/target/`: -- Runnable JAR: `x2seatunnel-.jar` -- Distribution ZIP: `x2seatunnel--bin.zip` (or similar) - -Unzip the distribution and follow the submodule README to run. - -## Versioning and Dependencies - -This repository depends on released Seatunnel artifacts (e.g., `seatunnel-common`, `seatunnel-jackson`). -Versions are centrally managed via the `seatunnel.version` property in the root POM. +What is x2seatunnel? +- x2seatunnel is a configuration conversion tool that helps users migrate from other data integration tools (e.g., DataX) to SeaTunnel by converting existing configurations into SeaTunnel-compatible formats. +- x2seatunnel + - English: [x2seatunnel/README.md](x2seatunnel/README.md) + - Chinese: [x2seatunnel/README_zh.md](x2seatunnel/README_zh.md) ## Contributing -Issues and PRs are welcome. \ No newline at end of file +Issues and PRs are welcome. + +Get the main project from [Apache SeaTunnel](https://github.com/apache/seatunnel) diff --git a/README_zh.md b/README_zh.md index afb36e0..08ceff5 100644 --- a/README_zh.md +++ b/README_zh.md @@ -1,47 +1,39 @@ -# SeaTunnel Tools(工具集) +```markdown +# Apache SeaTunnel 工具 -本仓库用于沉淀与 Apache SeaTunnel 相关的周边工具,目标是提升配置生产力、迁移与运维体验。目前包含: +本仓库托管 Apache SeaTunnel 的辅助工具,专注于配置、转换、打包和诊断等提升开发/运维效率的工具集。当前包含的模块: -- x2seatunnel:将 DataX 等配置转换为 SeaTunnel 配置文件的工具。 +- x2seatunnel:将其它工具(例如 DataX)的配置转换为 SeaTunnel 配置文件。 -未来可能会新增更多模块;SeaTunnel 引擎本体请参考 -[Apache SeaTunnel](https://github.com/apache/seatunnel)。 +未来可能会添加更多工具。如需主数据集成引擎,请参见 +[Apache SeaTunnel](https://github.com/apache/seatunnel) 项目。 -## 模块文档导航 +## 工具 1 - SeaTunnel MCP Server -- x2seatunnel - - 英文:[x2seatunnel/README.md](x2seatunnel/README.md) - - 中文:[x2seatunnel/README_zh.md](x2seatunnel/README_zh.md) +什么是 MCP? +- MCP(Model Context Protocol)是一种将 LLM 与工具、数据和系统连接的开放协议。通过 SeaTunnel MCP,你可以从基于 LLM 的界面直接操作 SeaTunnel,同时保持服务端逻辑安全且可审计。 +- 了解更多:https://github.com/modelcontextprotocol -## 构建与测试 +SeaTunnel MCP Server +- 源码目录: [seatunnel-mcp/](seatunnel-mcp/) +- 英文 README: [seatunnel-mcp/README.md](seatunnel-mcp/README.md) +- 中文 README: [seatunnel-mcp/README_CN.md](seatunnel-mcp/README_CN.md) +- 快速开始: [seatunnel-mcp/docs/QUICK_START.md](seatunnel-mcp/docs/QUICK_START.md) +- 用户指南: [seatunnel-mcp/docs/USER_GUIDE.md](seatunnel-mcp/docs/USER_GUIDE.md) +- 开发者指南: [seatunnel-mcp/docs/DEVELOPER_GUIDE.md](seatunnel-mcp/docs/DEVELOPER_GUIDE.md) -先决条件: -- Java 8+ -- Maven 3.6+ +有关截图、演示视频、功能、安装与使用说明,请参阅 `seatunnel-mcp` 目录下的 README。 -构建整个仓库: +## 工具 2 - x2seatunnel -```bash -mvn -T 1C -e -DskipIT clean verify -``` +x2seatunnel 是什么? +- x2seatunnel 是一个配置转换工具,帮助用户将来自其他数据集成工具(例如 DataX)的配置迁移到 SeaTunnel,通过自动转换现有配置生成 SeaTunnel 可识别的格式。 +- x2seatunnel 文档: + - 英文: [x2seatunnel/README.md](x2seatunnel/README.md) + - 中文: [x2seatunnel/README_zh.md](x2seatunnel/README_zh.md) -仅构建某个子模块(例如 x2seatunnel): - -```bash -mvn -pl x2seatunnel -am -DskipTests clean package -``` - -产物在 `x2seatunnel/target/`: -- 可运行 JAR:`x2seatunnel-.jar` -- 分发 ZIP:`x2seatunnel--bin.zip`(或类似命名) - -解压后参考子模块 README 进行运行。 - -## 版本与依赖 - -本仓库依赖已发布的 SeaTunnel 组件(如 `seatunnel-common`、`seatunnel-jackson`)。 -版本通过根 POM 的 `seatunnel.version` 统一管理(当前为 2.3.11)。 - -## 贡献 +## 参与贡献 欢迎提交 Issue 与 PR。 + +可从 [Apache SeaTunnel](https://github.com/apache/seatunnel) 获取主项目源码。 \ No newline at end of file From 7332e516b690239617b92999b1cda13d74552496 Mon Sep 17 00:00:00 2001 From: wangxiaogang Date: Thu, 21 Aug 2025 19:04:22 +0800 Subject: [PATCH 4/5] add .licenserc.yaml --- .licenserc.yaml | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 .licenserc.yaml diff --git a/.licenserc.yaml b/.licenserc.yaml new file mode 100644 index 0000000..0fbe452 --- /dev/null +++ b/.licenserc.yaml @@ -0,0 +1,45 @@ +# Licensed to Apache Software Foundation (ASF) under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Apache Software Foundation (ASF) licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +header: + license: + spdx-id: Apache-2.0 + copyright-owner: Apache Software Foundation + + paths-ignore: + - NOTICE + - LICENSE + - DISCLAIMER + - mvnw.cmd + - .mvn + - .gitmodules + - .gitattributes + - .github/actions + - '**/*.md' + - '**/*.mdx' + - '**/*.json' + - '**/*.iml' + - '**/*.ini' + - '**/*.svg' + - '**/*.txt' + - '**/*.csv' + - '**/.gitignore' + - '**/LICENSE' + - '**/NOTICE' + - '**/.gitkeep' + + comment: on-failure From 842ba0f7f1fd53cb0eead1b327a88bcac0c1fb04 Mon Sep 17 00:00:00 2001 From: wangxiaogang Date: Mon, 25 Aug 2025 13:57:45 +0800 Subject: [PATCH 5/5] add submodel seatunnel --- pom.xml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 1488ee7..834249d 100644 --- a/pom.xml +++ b/pom.xml @@ -34,6 +34,7 @@ Production ready big data processing product based on Apache Spark and Apache Flink. + seatunnel x2seatunnel @@ -47,7 +48,7 @@ - 2.3.11 + 2.3.12-SNAPSHOT 1.7.36 2.17.1 4.13.2