Skip to content

Commit

Permalink
[DOP-22141] Add logic for handling SFTP transfers
Browse files Browse the repository at this point in the history
  • Loading branch information
Ilyas Gasanov committed Jan 27, 2025
1 parent 85618f1 commit a1c8006
Show file tree
Hide file tree
Showing 17 changed files with 1,163 additions and 10 deletions.
7 changes: 7 additions & 0 deletions .env.docker
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,13 @@ TEST_HDFS_HOST=test-hive
TEST_HDFS_WEBHDFS_PORT=9870
TEST_HDFS_IPC_PORT=9820

TEST_SFTP_HOST_FOR_CONFTEST=localhost
TEST_SFTP_PORT_FOR_CONFTEST=2222
TEST_SFTP_HOST_FOR_WORKER=test-sftp
TEST_SFTP_PORT_FOR_WORKER=2222
TEST_SFTP_USER=syncmaster
TEST_SFTP_PASSWORD=AesujeifohgoaCu0Boosiet5aimeitho

SPARK_CONF_DIR=/app/tests/spark/hive/conf/
HADOOP_CONF_DIR=/app/tests/spark/hadoop/
HIVE_CONF_DIR=/app/tests/spark/hive/conf/
7 changes: 7 additions & 0 deletions .env.local
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,13 @@ export TEST_HDFS_HOST=test-hive
export TEST_HDFS_WEBHDFS_PORT=9870
export TEST_HDFS_IPC_PORT=9820

export TEST_SFTP_HOST_FOR_CONFTEST=localhost
export TEST_SFTP_PORT_FOR_CONFTEST=2222
export TEST_SFTP_HOST_FOR_WORKER=test-sftp
export TEST_SFTP_PORT_FOR_WORKER=2222
export TEST_SFTP_USER=syncmaster
export TEST_SFTP_PASSWORD=AesujeifohgoaCu0Boosiet5aimeitho

export SPARK_CONF_DIR=./tests/spark/hive/conf/
export HADOOP_CONF_DIR=./tests/spark/hadoop/
export HIVE_CONF_DIR=./tests/spark/hive/conf/
79 changes: 79 additions & 0 deletions .github/workflows/sftp-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
name: SFTP tests
on:
workflow_call:

env:
DEFAULT_PYTHON: '3.12'

jobs:
test:
name: Run SFTP tests
runs-on: ubuntu-latest

steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up QEMU
uses: docker/setup-qemu-action@v3

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Cache jars
uses: actions/cache@v4
with:
path: ./cached_jars
key: ${{ runner.os }}-python-${{ env.DEFAULT_PYTHON }}-test-sftp
restore-keys: |
${{ runner.os }}-python-${{ env.DEFAULT_PYTHON }}-test-sftp
${{ runner.os }}-python-
- name: Build Worker Image
uses: docker/build-push-action@v6
with:
context: .
tags: mtsrus/syncmaster-worker:${{ github.sha }}
target: test
file: docker/Dockerfile.worker
load: true
cache-from: mtsrus/syncmaster-worker:develop

- name: Docker compose up
run: |
docker compose -f docker-compose.test.yml --profile all down -v --remove-orphans
docker compose -f docker-compose.test.yml --profile sftp up -d --wait --wait-timeout 200
env:
WORKER_IMAGE_TAG: ${{ github.sha }}

- name: Run SFTP Tests
run: |
docker compose -f ./docker-compose.test.yml --profile sftp exec -T worker coverage run -m pytest -vvv -s -m "worker and sftp"
- name: Dump worker logs on failure
if: failure()
uses: jwalton/gh-docker-logs@v2
with:
images: mtsrus/syncmaster-worker
dest: ./logs

# This is important, as coverage is exported after receiving SIGTERM
- name: Shutdown
if: always()
run: |
docker compose -f docker-compose.test.yml --profile all down -v --remove-orphans
- name: Upload worker logs
uses: actions/upload-artifact@v4
if: failure()
with:
name: worker-logs-sftp
path: logs/*

- name: Upload coverage results
uses: actions/upload-artifact@v4
with:
name: coverage-sftp
path: reports/*
# https://github.com/actions/upload-artifact/issues/602
include-hidden-files: true
4 changes: 4 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ jobs:
name: S3 tests
uses: ./.github/workflows/s3-tests.yml

sftp_tests:
name: SFTP tests
uses: ./.github/workflows/sftp-tests.yml

scheduler_tests:
name: Scheduler tests
uses: ./.github/workflows/scheduler-tests.yml
Expand Down
1 change: 1 addition & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ List of currently supported connections:
* MySQL
* HDFS
* S3
* SFTP

Current Data.SyncMaster implementation provides following components:

Expand Down
18 changes: 16 additions & 2 deletions docker-compose.test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ services:
condition: service_completed_successfully
rabbitmq:
condition: service_healthy
profiles: [worker, scheduler, s3, oracle, hdfs, hive, clickhouse, mysql, mssql, all]
profiles: [worker, scheduler, s3, oracle, hdfs, hive, clickhouse, mysql, mssql, sftp, all]

test-postgres:
image: postgres
Expand All @@ -139,7 +139,7 @@ services:
interval: 30s
timeout: 5s
retries: 3
profiles: [s3, oracle, clickhouse, mysql, mssql, hdfs, hive, all]
profiles: [s3, oracle, clickhouse, mysql, mssql, hdfs, hive, sftp, all]

test-s3:
image: bitnami/minio:latest
Expand Down Expand Up @@ -266,6 +266,20 @@ services:
# writing spark dataframe to s3 xml file fails without running hive metastore server
profiles: [hive, hdfs, s3, all]

test-sftp:
image: ${SFTP_IMAGE:-linuxserver/openssh-server}
restart: unless-stopped
ports:
- 2222:2222
environment:
PUID: 1000
PGID: 1000
USER_NAME: syncmaster
PASSWORD_ACCESS: true
SUDO_ACCESS: true
USER_PASSWORD: AesujeifohgoaCu0Boosiet5aimeitho
profiles: [sftp, all]

volumes:
postgres_test_data:
rabbitmq_test_data:
Expand Down
1 change: 1 addition & 0 deletions docs/changelog/next_release/189.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add logic for handling SFTP transfers
Loading

0 comments on commit a1c8006

Please sign in to comment.