Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

📝 Add docstrings to aj/feat/accept-components-text-input #218

Closed
wants to merge 18 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 25 additions & 5 deletions airbyte_cdk/cli/source_declarative_manifest/_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,11 +155,25 @@ def handle_remote_manifest_command(args: list[str]) -> None:
def create_declarative_source(
args: list[str],
) -> ConcurrentDeclarativeSource: # type: ignore [type-arg]
"""Creates the source with the injected config.

This essentially does what other low-code sources do at build time, but at runtime,
with a user-provided manifest in the config. This better reflects what happens in the
connector builder.
"""
Create a declarative source with an injected manifest configuration.

This function dynamically creates a ConcurrentDeclarativeSource at runtime using a user-provided manifest, similar to how low-code sources are built. It validates the configuration and prepares the source for execution.

Parameters:
args (list[str]): Command-line arguments containing configuration, catalog, and state information.

Returns:
ConcurrentDeclarativeSource: A configured declarative source ready for sync operations.

Raises:
ValueError: If the configuration is invalid or missing required manifest information.
Exception: For any unexpected errors during source creation, with detailed error tracing.

Notes:
- Requires a configuration with an '__injected_declarative_manifest' key
- The manifest must be a dictionary
- Provides structured error reporting for configuration issues
"""
try:
config: Mapping[str, Any] | None
Expand All @@ -171,6 +185,12 @@ def create_declarative_source(
"Invalid config: `__injected_declarative_manifest` should be provided at the root "
f"of the config but config only has keys: {list(config.keys() if config else [])}"
)
if not isinstance(config["__injected_declarative_manifest"], dict):
raise ValueError(
"Invalid config: `__injected_declarative_manifest` should be a dictionary, "
f"but got type: {type(config['__injected_declarative_manifest'])}"
)

return ConcurrentDeclarativeSource(
config=config,
catalog=catalog,
Expand Down
138 changes: 125 additions & 13 deletions airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
from __future__ import annotations

import datetime
import importlib
import inspect
import re
import sys
import types
from functools import partial
from typing import (
Any,
Expand Down Expand Up @@ -980,14 +981,32 @@ def create_cursor_pagination(

def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> Any:
"""
Generically creates a custom component based on the model type and a class_name reference to the custom Python class being
instantiated. Only the model's additional properties that match the custom class definition are passed to the constructor
:param model: The Pydantic model of the custom component being created
:param config: The custom defined connector config
:return: The declarative component built from the Pydantic model to be used at runtime
Create a custom component from a Pydantic model with dynamic class instantiation.

This method dynamically creates a custom component by loading a class from a specified module and instantiating it with appropriate arguments. It handles complex scenarios such as nested components, type inference, and argument passing.

Parameters:
model (Any): A Pydantic model representing the custom component configuration.
config (Config): The connector configuration used for module and component resolution.
**kwargs (Any): Additional keyword arguments to override or supplement model arguments.

Returns:
Any: An instantiated custom component with resolved nested components and configurations.

Raises:
ValueError: If the component class cannot be loaded or instantiated.
TypeError: If arguments do not match the component's constructor signature.

Notes:
- Supports nested component creation
- Performs type inference for component fields
- Handles both dictionary and list-based component configurations
- Prioritizes kwargs over model arguments in case of field collisions
"""

custom_component_class = self._get_class_from_fully_qualified_class_name(model.class_name)
custom_component_class = self._get_class_from_fully_qualified_class_name(
full_qualified_class_name=model.class_name,
components_module=self._get_components_module_object(config=config),
)
component_fields = get_type_hints(custom_component_class)
model_args = model.dict()
model_args["config"] = config
Expand Down Expand Up @@ -1040,17 +1059,110 @@ def create_custom_component(self, model: Any, config: Config, **kwargs: Any) ->
return custom_component_class(**kwargs)

@staticmethod
def _get_class_from_fully_qualified_class_name(full_qualified_class_name: str) -> Any:
def _get_components_module_object(
config: Config,
) -> types.ModuleType:
"""
Get a components module object based on the provided configuration.

This method dynamically creates a module for custom Python components defined in the configuration. It ensures that custom components are defined in a module named 'components' and allows runtime module creation and execution.

Parameters:
config (Config): A configuration object containing the custom components definition.

Returns:
types.ModuleType: A dynamically created module containing the custom components.

Raises:
ValueError: If no custom components are provided or if the components are not defined in a module named 'components'.

Notes:
- Uses the special key '__injected_components_py' to retrieve custom component code
- Creates a new module dynamically using types.ModuleType
- Executes the provided Python code within the new module's namespace
- Registers the module in sys.modules for future imports
"""
INJECTED_COMPONENTS_PY = "__injected_components_py"
COMPONENTS_MODULE_NAME = "components"

components_module: types.ModuleType
if not INJECTED_COMPONENTS_PY in config:
raise ValueError(
"Custom components must be defined in a module named `components`. Please provide a custom components module."
)

# Create a new module object and execute the provided Python code text within it
components_module = types.ModuleType(name=COMPONENTS_MODULE_NAME)
python_text = config[INJECTED_COMPONENTS_PY]
exec(python_text, components_module.__dict__)
sys.modules[COMPONENTS_MODULE_NAME] = components_module
return components_module

@staticmethod
def _get_class_from_fully_qualified_class_name(
full_qualified_class_name: str,
components_module: types.ModuleType,
) -> Any:
"""
Retrieve a class from its fully qualified name within a predefined components module.

Parameters:
full_qualified_class_name (str): The complete dot-separated path to the class (e.g., "source_declarative_manifest.components.ClassName").
components_module (types.ModuleType): The pre-parsed module containing custom components.

Returns:
Any: The requested class object.

Raises:
ValueError: If the class cannot be loaded or does not meet module naming conventions.
- Raised when the module is not named "components"
- Raised when the full module path is not "source_declarative_manifest.components"
- Raised when the specific class cannot be found in the module

Notes:
- Enforces strict naming conventions for custom component modules
- Provides detailed error messages for debugging component loading issues
"""
split = full_qualified_class_name.split(".")
module = ".".join(split[:-1])
module_name_full = ".".join(split[:-1])
module_name = split[-2]
class_name = split[-1]

if module_name != "components":
raise ValueError(
"Custom components must be defined in a module named "
f"`components`. Found `{module_name}` instead."
)
if module_name_full != "source_declarative_manifest.components":
raise ValueError(
"Custom components must be defined in a module named "
f"`source_declarative_manifest.components`. Found `{module_name_full}` instead."
)

try:
return getattr(importlib.import_module(module), class_name)
except AttributeError:
raise ValueError(f"Could not load class {full_qualified_class_name}.")
return getattr(components_module, class_name)
except (AttributeError, ModuleNotFoundError) as e:
raise ValueError(f"Could not load class {full_qualified_class_name}.") from e

@staticmethod
def _derive_component_type_from_type_hints(field_type: Any) -> Optional[str]:
"""
Derive the component type name from type hints by unwrapping nested generic types.

This method extracts the underlying type from potentially nested generic type hints,
such as List[T], Optional[List[T]], etc., and returns the type name if it's a non-builtin type.

Parameters:
field_type (Any): The type hint to analyze for component type extraction.

Returns:
Optional[str]: The name of the underlying type if it's a non-builtin type, otherwise None.

Examples:
- List[str] returns None
- List[CustomType] returns "CustomType"
- Optional[List[CustomType]] returns "CustomType"
"""
interface = field_type
while True:
origin = get_origin(interface)
Expand Down
79 changes: 73 additions & 6 deletions airbyte_cdk/test/utils/manifest_only_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@


import importlib.util
import types
from pathlib import Path
from types import ModuleType
from typing import Optional

import pytest

Expand All @@ -30,10 +30,29 @@ def connector_dir(request: pytest.FixtureRequest) -> Path:


@pytest.fixture(scope="session")
def components_module(connector_dir: Path) -> Optional[ModuleType]:
"""Load and return the components module from the connector directory.

This assumes the components module is located at <connector_dir>/components.py.
def components_module(connector_dir: Path) -> ModuleType | None:
"""
Load and return the components module from the connector directory.

This function attempts to load the 'components.py' module from the specified connector directory. It handles various potential failure scenarios during module loading.

Parameters:
connector_dir (Path): The root directory of the connector containing the components module.

Returns:
ModuleType | None: The loaded components module if successful, or None if:
- The components.py file does not exist
- The module specification cannot be created
- The module loader is unavailable

Raises:
No explicit exceptions are raised; returns None on failure.

Example:
components = components_module(Path('/path/to/connector'))
if components:
# Use the loaded module
some_component = components.SomeComponent()
"""
components_path = connector_dir / "components.py"
if not components_path.exists():
Expand All @@ -51,9 +70,57 @@ def components_module(connector_dir: Path) -> Optional[ModuleType]:
return components_module


def components_module_from_string(components_py_text: str) -> ModuleType | None:
"""
Load a Python module from a string containing module code.

Parameters:
components_py_text (str): A string containing valid Python code representing a module.

Returns:
ModuleType | None: A dynamically created module object containing the executed code, or None if execution fails.

Raises:
Exception: Potential runtime errors during code execution.

Example:
components_code = '''
def sample_component():
return "Hello, World!"
'''
module = components_module_from_string(components_code)
result = module.sample_component() # Returns "Hello, World!"
"""
module_name = "components"

# Create a new module object
components_module = types.ModuleType(name=module_name)

# Execute the module text in the module's namespace
exec(components_py_text, components_module.__dict__)

# Now you can import and use the module
return components_module


@pytest.fixture(scope="session")
def manifest_path(connector_dir: Path) -> Path:
"""Return the path to the connector's manifest file."""
"""
Return the path to the connector's manifest file.

Parameters:
connector_dir (Path): The root directory of the connector.

Returns:
Path: The absolute path to the manifest.yaml file.

Raises:
FileNotFoundError: If the manifest.yaml file does not exist in the specified connector directory.

Example:
manifest_file = manifest_path(Path('/path/to/connector'))
# Returns Path('/path/to/connector/manifest.yaml')
"""
path = connector_dir / "manifest.yaml"
if not path.exists():
raise FileNotFoundError(f"Manifest file not found at {path}")
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ select = ["I"]
[tool.poe.tasks]
# Installation
install = { shell = "poetry install --all-extras" }
lock = { shell = "poetry lock --no-update" }

# Build tasks
assemble = {cmd = "bin/generate-component-manifest-dagger.sh", help = "Generate component manifest files."}
Expand Down
44 changes: 43 additions & 1 deletion unit_tests/source_declarative_manifest/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,55 @@
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
#

import hashlib
import os
from typing import Literal

import pytest
import yaml


def get_fixture_path(file_name):
def hash_text(input_text: str, hash_type: Literal["md5", "sha256"] = "md5") -> str:
"""
Compute the hash of the input text using the specified hashing algorithm.

Parameters:
input_text (str): The text to be hashed.
hash_type (Literal["md5", "sha256"], optional): The hashing algorithm to use.
Defaults to "md5". Supports "md5" and "sha256" algorithms.

Returns:
str: The hexadecimal digest of the hashed input text.

Examples:
>>> hash_text("hello world")
'5eb63bbbe01eeed093cb22bb8f5acdc3'
>>> hash_text("hello world", hash_type="sha256")
'b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9'
"""
hashers = {
"md5": hashlib.md5,
"sha256": hashlib.sha256,
}
hash_object = hashers[hash_type]()
hash_object.update(input_text.encode())
return hash_object.hexdigest()


def get_fixture_path(file_name) -> str:
"""
Construct the full path to a fixture file relative to the current script's directory.

Parameters:
file_name (str): The name of the fixture file to locate.

Returns:
str: The absolute path to the specified fixture file.

Example:
>>> get_fixture_path('config.json')
'/path/to/current/directory/config.json'
"""
return os.path.join(os.path.dirname(__file__), file_name)


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
secrets*
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# The Guardian API Tests

For these tests to work, you'll need to create a `secrets.yaml` file in this directory that looks like this:

```yml
api_key: ******
```

The `.gitignore` file in this directory should ensure your file is not committed to git, but it's a good practice to double-check. 👀
Loading
Loading