Skip to content

Commit

Permalink
Merge branch 'main' into brian/concurrent_cdk_non_incremental_substreams
Browse files Browse the repository at this point in the history
  • Loading branch information
brianjlai committed Dec 2, 2024
2 parents b23831b + 9587d4e commit 42e29b0
Show file tree
Hide file tree
Showing 289 changed files with 2,352 additions and 1,149 deletions.
46 changes: 25 additions & 21 deletions .github/workflows/connector-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,29 +74,34 @@ jobs:
cdk_extra: n/a
- connector: source-chargebee
cdk_extra: n/a
# Currently not passing CI (unrelated)
# - connector: source-zendesk-support
# cdk_extra: n/a
- connector: source-s3
cdk_extra: file-based
- connector: destination-pinecone
cdk_extra: vector-db-based
- connector: destination-motherduck
cdk_extra: sql
# ZenDesk currently failing (as of 2024-12-02)
# TODO: Re-enable once fixed
# - connector: source-zendesk-support
# cdk_extra: n/a
# TODO: These are manifest connectors and won't work as expected until we
# add `--use-local-cdk` support for manifest connectors.
- connector: source-the-guardian-api
cdk_extra: n/a
- connector: source-pokeapi
cdk_extra: n/a
# - connector: source-the-guardian-api
# cdk_extra: n/a
# - connector: source-pokeapi
# cdk_extra: n/a

name: "Check: '${{matrix.connector}}' (skip=${{needs.cdk_changes.outputs[matrix.cdk_extra] == 'false'}})"
steps:
- name: Abort if extra not changed (${{matrix.cdk_extra}})
id: no_changes
if: ${{ matrix.cdk_extra != 'n/a' && needs.cdk_changes.outputs[matrix.cdk_extra] == 'false' }}
if: ${{ needs.cdk_changes.outputs['src'] == 'false' || matrix.cdk_extra != 'n/a' && needs.cdk_changes.outputs[matrix.cdk_extra] == 'false' }}
run: |
echo "Aborting job as specified extra not changed: ${{matrix.cdk_extra}} = ${{ needs.cdk_changes.outputs[matrix.cdk_extra] }}"
echo "Aborting job."
echo "Source code changed: ${{ needs.cdk_changes.outputs['src'] }}"
if [ "${{ matrix.cdk_extra }}" != "n/a" ]; then
echo "Extra not changed: ${{ matrix.cdk_extra }} = ${{ needs.cdk_changes.outputs[matrix.cdk_extra] }}"
fi
echo "> Skipped '${{matrix.connector}}' (no relevant changes)" >> $GITHUB_STEP_SUMMARY
echo "status=cancelled" >> $GITHUB_OUTPUT
exit 0
Expand All @@ -112,8 +117,7 @@ jobs:
if: steps.no_changes.outputs.status != 'cancelled'
with:
repository: airbytehq/airbyte
# TODO: Revert to `master` after Airbyte CI released:
ref: aj/airbyte-ci/update-python-local-cdk-code
ref: master
path: airbyte
- name: Test Connector
if: steps.no_changes.outputs.status != 'cancelled'
Expand All @@ -133,16 +137,7 @@ jobs:
--skip-step qa_checks \
--skip-step connector_live_tests
# Upload the job output to the artifacts
- name: Upload Job Output
id: upload_job_output
if: always() && steps.no_changes.outputs.status != 'cancelled'
uses: actions/upload-artifact@v4
with:
name: ${{matrix.connector}}-job-output
path: airbyte/airbyte-ci/connectors/pipelines/pipeline_reports

- name: Evaluate Job Output
- name: Evaluate Test Output
if: always() && steps.no_changes.outputs.status != 'cancelled'
run: |
# save job output json file as ci step output
Expand All @@ -162,3 +157,12 @@ jobs:
echo "::error::Test failed for connector '${{ matrix.connector }}' on step '${failed_step}'. Check the logs for more details."
exit 1
fi
# Upload the job output to the artifacts
- name: Upload Job Output
id: upload_job_output
if: always() && steps.no_changes.outputs.status != 'cancelled'
uses: actions/upload-artifact@v4
with:
name: ${{matrix.connector}}-job-output
path: airbyte/airbyte-ci/connectors/pipelines/pipeline_reports
25 changes: 17 additions & 8 deletions .github/workflows/pytest_matrix.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,6 @@ on:
- 'poetry.lock'
- 'pyproject.toml'
pull_request:
paths:
- 'airbyte_cdk/**'
- 'unit_tests/**'
- 'poetry.lock'
- 'pyproject.toml'

jobs:
pytest:
Expand Down Expand Up @@ -52,21 +47,35 @@ jobs:
# Common steps:
- name: Checkout code
uses: actions/checkout@v4
- id: changes
uses: dorny/paths-filter@v3.0.2
with:
filters: |
src:
- 'airbyte_cdk/**'
- 'unit_tests/**'
- 'bin/**'
- 'poetry.lock'
- 'pyproject.toml'
- name: Set up Poetry
uses: Gr1N/setup-poetry@v9
if: steps.changes.outputs.src == 'true'
with:
poetry-version: "1.7.1"
- name: Set up Python
uses: actions/setup-python@v5
if: steps.changes.outputs.src == 'true'
with:
python-version: ${{ matrix.python-version }}
cache: "poetry"
- name: Install dependencies
if: steps.changes.outputs.src == 'true'
run: poetry install --all-extras

# Job-specific step(s):
- name: Run Pytest
timeout-minutes: 60
if: steps.changes.outputs.src == 'true'
env:
GCP_GSM_CREDENTIALS: ${{ secrets.GCP_GSM_CREDENTIALS }}
run: >
Expand All @@ -75,17 +84,17 @@ jobs:
-m "not linting and not super_slow and not flaky"
- name: Print Coverage Report
if: always()
if: always() && steps.changes.outputs.src == 'true'
run: poetry run coverage report

- name: Create Coverage Artifacts
if: always()
if: always() && steps.changes.outputs.src == 'true'
run: |
poetry run coverage html -d htmlcov
poetry run coverage xml -o htmlcov/coverage.xml
- name: Upload coverage to GitHub Artifacts
if: always()
if: always() && steps.changes.outputs.src == 'true'
uses: actions/upload-artifact@v4
with:
name: py${{ matrix.python-version }}-${{ matrix.os }}-test-coverage
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ dist
.venv
.pytest_cache
.idea
.vscode
**/__pycache__
17 changes: 12 additions & 5 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,19 @@ COPY dist/*.whl ./dist/
RUN poetry config virtualenvs.create false \
&& poetry install --only main --no-interaction --no-ansi || true

# Copy source code
COPY airbyte_cdk ./airbyte_cdk

# Build and install the package
RUN pip install dist/*.whl

# Recreate the original structure
RUN mkdir -p source_declarative_manifest \
&& echo 'from source_declarative_manifest.run import run\n\nif __name__ == "__main__":\n run()' > main.py \
&& touch source_declarative_manifest/__init__.py \
&& cp /usr/local/lib/python3.10/site-packages/airbyte_cdk/cli/source_declarative_manifest/_run.py source_declarative_manifest/run.py \
&& cp /usr/local/lib/python3.10/site-packages/airbyte_cdk/cli/source_declarative_manifest/spec.json source_declarative_manifest/

# Remove unnecessary build files
RUN rm -rf dist/ pyproject.toml poetry.lock README.md

# Set the entrypoint
ENV AIRBYTE_ENTRYPOINT="source-declarative-manifest"
ENTRYPOINT ["source-declarative-manifest"]
ENV AIRBYTE_ENTRYPOINT="python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,13 @@ Installing all extras is required to run the full suite of unit tests.

To see all available scripts, run `poetry run poe`.

#### Formatting the code

- Iterate on the CDK code locally
- Run `poetry run ruff format` to format your changes.

To see all available `ruff` options, run `poetry run ruff`.

##### Autogenerated files

Low-code CDK models are generated from `sources/declarative/declarative_component_schema.yaml`. If
Expand Down
3 changes: 2 additions & 1 deletion airbyte_cdk/cli/source_declarative_manifest/_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
from pathlib import Path
from typing import Any, cast

from orjson import orjson

from airbyte_cdk.entrypoint import AirbyteEntrypoint, launch
from airbyte_cdk.models import (
AirbyteErrorTraceMessage,
Expand All @@ -42,7 +44,6 @@
)
from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource
from airbyte_cdk.sources.source import TState
from orjson import orjson


class SourceLocalYaml(YamlDeclarativeSource):
Expand Down
3 changes: 2 additions & 1 deletion airbyte_cdk/config_observation.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from copy import copy
from typing import Any, List, MutableMapping

from orjson import orjson

from airbyte_cdk.models import (
AirbyteControlConnectorConfigMessage,
AirbyteControlMessage,
Expand All @@ -18,7 +20,6 @@
OrchestratorType,
Type,
)
from orjson import orjson


class ObservedDict(dict): # type: ignore # disallow_any_generics is set to True, and dict is equivalent to dict[Any]
Expand Down
1 change: 1 addition & 0 deletions airbyte_cdk/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from typing import Any, Generic, Mapping, Optional, Protocol, TypeVar

import yaml

from airbyte_cdk.models import (
AirbyteConnectionStatus,
ConnectorSpecification,
Expand Down
2 changes: 1 addition & 1 deletion airbyte_cdk/connector_builder/connector_builder_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
AirbyteRecordMessage,
AirbyteStateMessage,
ConfiguredAirbyteCatalog,
Type,
)
from airbyte_cdk.models import Type
from airbyte_cdk.models import Type as MessageType
from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource
from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
Expand Down
3 changes: 2 additions & 1 deletion airbyte_cdk/connector_builder/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import sys
from typing import Any, List, Mapping, Optional, Tuple

from orjson import orjson

from airbyte_cdk.connector import BaseConnector
from airbyte_cdk.connector_builder.connector_builder_handler import (
TestReadLimits,
Expand All @@ -25,7 +27,6 @@
from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
from airbyte_cdk.sources.source import Source
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
from orjson import orjson


def get_config_and_catalog_from_args(
Expand Down
3 changes: 2 additions & 1 deletion airbyte_cdk/destinations/destination.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from abc import ABC, abstractmethod
from typing import Any, Iterable, List, Mapping

from orjson import orjson

from airbyte_cdk.connector import Connector
from airbyte_cdk.exception_handler import init_uncaught_exception_handler
from airbyte_cdk.models import (
Expand All @@ -20,7 +22,6 @@
)
from airbyte_cdk.sources.utils.schema_helpers import check_config_against_spec_or_exit
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
from orjson import orjson

logger = logging.getLogger("airbyte")

Expand Down
3 changes: 2 additions & 1 deletion airbyte_cdk/destinations/vector_db_based/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
from typing import Any, Dict, List, Literal, Optional, Union

import dpath
from pydantic.v1 import BaseModel, Field

from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
from airbyte_cdk.utils.spec_schema_transformations import resolve_refs
from pydantic.v1 import BaseModel, Field


class SeparatorSplitterConfigModel(BaseModel):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@
from typing import Any, Dict, List, Mapping, Optional, Tuple

import dpath
from langchain.text_splitter import Language, RecursiveCharacterTextSplitter
from langchain.utils import stringify_dict
from langchain_core.documents.base import Document

from airbyte_cdk.destinations.vector_db_based.config import (
ProcessingConfigModel,
SeparatorSplitterConfigModel,
Expand All @@ -21,9 +25,6 @@
DestinationSyncMode,
)
from airbyte_cdk.utils.traced_exception import AirbyteTracedException, FailureType
from langchain.text_splitter import Language, RecursiveCharacterTextSplitter
from langchain.utils import stringify_dict
from langchain_core.documents.base import Document

METADATA_STREAM_FIELD = "_ab_stream"
METADATA_RECORD_ID_FIELD = "_ab_record_id"
Expand Down
9 changes: 5 additions & 4 deletions airbyte_cdk/destinations/vector_db_based/embedder.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@
from dataclasses import dataclass
from typing import List, Optional, Union, cast

from langchain.embeddings.cohere import CohereEmbeddings
from langchain.embeddings.fake import FakeEmbeddings
from langchain.embeddings.localai import LocalAIEmbeddings
from langchain.embeddings.openai import OpenAIEmbeddings

from airbyte_cdk.destinations.vector_db_based.config import (
AzureOpenAIEmbeddingConfigModel,
CohereEmbeddingConfigModel,
Expand All @@ -19,10 +24,6 @@
from airbyte_cdk.destinations.vector_db_based.utils import create_chunks, format_exception
from airbyte_cdk.models import AirbyteRecordMessage
from airbyte_cdk.utils.traced_exception import AirbyteTracedException, FailureType
from langchain.embeddings.cohere import CohereEmbeddings
from langchain.embeddings.fake import FakeEmbeddings
from langchain.embeddings.localai import LocalAIEmbeddings
from langchain.embeddings.openai import OpenAIEmbeddings


@dataclass
Expand Down
5 changes: 3 additions & 2 deletions airbyte_cdk/entrypoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
from urllib.parse import urlparse

import requests
from orjson import orjson
from requests import PreparedRequest, Response, Session

from airbyte_cdk.connector import TConfig
from airbyte_cdk.exception_handler import init_uncaught_exception_handler
from airbyte_cdk.logger import init_logger
Expand All @@ -38,8 +41,6 @@
from airbyte_cdk.utils.airbyte_secrets_utils import get_secrets, update_secrets
from airbyte_cdk.utils.constants import ENV_REQUEST_CACHE_PATH
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
from orjson import orjson
from requests import PreparedRequest, Response, Session

logger = init_logger("airbyte")

Expand Down
3 changes: 2 additions & 1 deletion airbyte_cdk/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import logging.config
from typing import Any, Callable, Mapping, Optional, Tuple

from orjson import orjson

from airbyte_cdk.models import (
AirbyteLogMessage,
AirbyteMessage,
Expand All @@ -15,7 +17,6 @@
Type,
)
from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
from orjson import orjson

LOGGING_CONFIG = {
"version": 1,
Expand Down
2 changes: 2 additions & 0 deletions airbyte_cdk/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@
FailureType,
Level,
OAuthConfigSpecification,
OauthConnectorInputSpecification,
OrchestratorType,
State,
Status,
StreamDescriptor,
SyncMode,
Expand Down
3 changes: 2 additions & 1 deletion airbyte_cdk/models/airbyte_protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
from dataclasses import InitVar, dataclass
from typing import Annotated, Any, Dict, List, Mapping, Optional, Union

from airbyte_cdk.models.file_transfer_record_message import AirbyteFileTransferRecordMessage
from airbyte_protocol_dataclasses.models import * # noqa: F403 # Allow '*'
from serpyco_rs.metadata import Alias

from airbyte_cdk.models.file_transfer_record_message import AirbyteFileTransferRecordMessage

# ruff: noqa: F405 # ignore fuzzy import issues with 'import *'


Expand Down
Loading

0 comments on commit 42e29b0

Please sign in to comment.