From 99739556c6e607b5254b1ab14ecc0448db550623 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Fri, 13 Dec 2024 15:42:38 -0800 Subject: [PATCH 01/42] skeleton: components module from dynamic text input --- .../parsers/model_to_component_factory.py | 55 ++++++++++++++++--- .../test/utils/manifest_only_fixtures.py | 20 +++++++ 2 files changed, 68 insertions(+), 7 deletions(-) diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index 215d6fff9..65b1e0ea2 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -8,6 +8,7 @@ import importlib import inspect import re +import types from functools import partial from typing import ( Any, @@ -986,8 +987,25 @@ def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> :param config: The custom defined connector config :return: The declarative component built from the Pydantic model to be used at runtime """ + INJECTED_COMPONENTS_PY = "__injected_components_py" - custom_component_class = self._get_class_from_fully_qualified_class_name(model.class_name) + components_module: Optional[types.ModuleType] = None + if INJECTED_COMPONENTS_PY in config: + # declares a dynamic module `components` from provided text + python_text = config[INJECTED_COMPONENTS_PY] + module_name = "components" + + # Create a new module object + components_module = types.ModuleType(module_name) + # Execute the module text in the module's namespace + exec(python_text, components_module.__dict__) + # Skip insert the module into sys.modules because we pass by reference below + # sys.modules[module_name] = components_module + + custom_component_class = self._get_class_from_fully_qualified_class_name( + full_qualified_class_name=model.class_name, + components_module=components_module, + ) component_fields = get_type_hints(custom_component_class) model_args = model.dict() model_args["config"] = config @@ -1039,15 +1057,38 @@ def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> } return custom_component_class(**kwargs) - @staticmethod - def _get_class_from_fully_qualified_class_name(full_qualified_class_name: str) -> Any: + def _get_class_from_fully_qualified_class_name( + full_qualified_class_name: str, + components_module: Optional[types.ModuleType] = None, + ) -> Any: + """ + Get a class from its fully qualified name, optionally using a pre-parsed module. + + Args: + full_qualified_class_name (str): The fully qualified name of the class (e.g., "module.ClassName"). + components_module (Optional[ModuleType]): An optional pre-parsed module. + + Returns: + Any: The class object. + + Raises: + ValueError: If the class cannot be loaded. + """ split = full_qualified_class_name.split(".") - module = ".".join(split[:-1]) + module_name = ".".join(split[:-1]) class_name = split[-1] + try: - return getattr(importlib.import_module(module), class_name) - except AttributeError: - raise ValueError(f"Could not load class {full_qualified_class_name}.") + # Use the provided module if available and if module name matches + if components_module and components_module.__name__ == module_name: + return getattr(components_module, class_name) + + # Fallback to importing the module dynamically + module = importlib.import_module(module_name) + return getattr(module, class_name) + + except (AttributeError, ModuleNotFoundError) as e: + raise ValueError(f"Could not load class {full_qualified_class_name}.") from e @staticmethod def _derive_component_type_from_type_hints(field_type: Any) -> Optional[str]: diff --git a/airbyte_cdk/test/utils/manifest_only_fixtures.py b/airbyte_cdk/test/utils/manifest_only_fixtures.py index 47620e7c1..01b2b393d 100644 --- a/airbyte_cdk/test/utils/manifest_only_fixtures.py +++ b/airbyte_cdk/test/utils/manifest_only_fixtures.py @@ -2,6 +2,7 @@ import importlib.util +import types from pathlib import Path from types import ModuleType from typing import Optional @@ -51,6 +52,25 @@ def components_module(connector_dir: Path) -> Optional[ModuleType]: return components_module +def components_module_from_string(components_py_text: str) -> Optional[ModuleType]: + """Load and return the components module from a provided string containing the python code. + + This assumes the components module is located at /components.py. + + TODO: Make new unit test to leverage this fixture + """ + module_name = "components" + + # Create a new module object + components_module = types.ModuleType(name=module_name) + + # Execute the module text in the module's namespace + exec(components_py_text, components_module.__dict__) + + # Now you can import and use the module + return components_module + + @pytest.fixture(scope="session") def manifest_path(connector_dir: Path) -> Path: """Return the path to the connector's manifest file.""" From 8309f7910c223568a8b516ae1336f9a20a80bd9e Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 16 Dec 2024 08:52:17 -0800 Subject: [PATCH 02/42] refactor / clean up --- .../parsers/model_to_component_factory.py | 57 +++++++++++-------- 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index 65b1e0ea2..8d4ded84f 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -987,21 +987,7 @@ def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> :param config: The custom defined connector config :return: The declarative component built from the Pydantic model to be used at runtime """ - INJECTED_COMPONENTS_PY = "__injected_components_py" - - components_module: Optional[types.ModuleType] = None - if INJECTED_COMPONENTS_PY in config: - # declares a dynamic module `components` from provided text - python_text = config[INJECTED_COMPONENTS_PY] - module_name = "components" - - # Create a new module object - components_module = types.ModuleType(module_name) - # Execute the module text in the module's namespace - exec(python_text, components_module.__dict__) - # Skip insert the module into sys.modules because we pass by reference below - # sys.modules[module_name] = components_module - + components_module = self._get_components_module_object(config=config) custom_component_class = self._get_class_from_fully_qualified_class_name( full_qualified_class_name=model.class_name, components_module=components_module, @@ -1057,9 +1043,31 @@ def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> } return custom_component_class(**kwargs) + def _get_components_module_object( + config: Config, + ) -> None: + """Get a components module object based on the provided config. + + If custom python components is provided, this will be loaded. Otherwise, we will + attempt to load from the `components` module already imported. + """ + INJECTED_COMPONENTS_PY = "__injected_components_py" + COMPONENTS_MODULE_NAME = "components" + + components_module: types.ModuleType + if INJECTED_COMPONENTS_PY in config: + # Create a new module object and execute the provided Python code text within it + components_module = types.ModuleType(name=COMPONENTS_MODULE_NAME) + python_text = config[INJECTED_COMPONENTS_PY] + exec(python_text, components_module.__dict__) + # Skip insert the module into sys.modules because we pass by reference below + # sys.modules[module_name] = components_module + else: + components_module = importlib.import_module(name=COMPONENTS_MODULE_NAME) + def _get_class_from_fully_qualified_class_name( full_qualified_class_name: str, - components_module: Optional[types.ModuleType] = None, + components_module: types.ModuleType, ) -> Any: """ Get a class from its fully qualified name, optionally using a pre-parsed module. @@ -1075,18 +1083,17 @@ def _get_class_from_fully_qualified_class_name( ValueError: If the class cannot be loaded. """ split = full_qualified_class_name.split(".") - module_name = ".".join(split[:-1]) + module_name_full = ".".join(split[:-1]) + module_name = split[:-2] class_name = split[-1] - try: - # Use the provided module if available and if module name matches - if components_module and components_module.__name__ == module_name: - return getattr(components_module, class_name) - - # Fallback to importing the module dynamically - module = importlib.import_module(module_name) - return getattr(module, class_name) + if module_name != "components": + raise ValueError( + f"Custom components must be defined in a module named `components`. Found {module_name} instead." + ) + try: + return getattr(components_module, class_name) except (AttributeError, ModuleNotFoundError) as e: raise ValueError(f"Could not load class {full_qualified_class_name}.") from e From 399dd7ba5a36d93891ff1592158b0ef46a87716c Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Wed, 18 Dec 2024 16:07:47 -0800 Subject: [PATCH 03/42] add test resource for py_components unit test --- .../resources/valid_py_components_code.py | 15 + .../resources/valid_py_components_config.json | 3 + .../valid_py_components_manifest.yaml | 1368 +++++++++++++++++ 3 files changed, 1386 insertions(+) create mode 100644 unit_tests/source_declarative_manifest/resources/valid_py_components_code.py create mode 100644 unit_tests/source_declarative_manifest/resources/valid_py_components_config.json create mode 100644 unit_tests/source_declarative_manifest/resources/valid_py_components_manifest.yaml diff --git a/unit_tests/source_declarative_manifest/resources/valid_py_components_code.py b/unit_tests/source_declarative_manifest/resources/valid_py_components_code.py new file mode 100644 index 000000000..06c95e78a --- /dev/null +++ b/unit_tests/source_declarative_manifest/resources/valid_py_components_code.py @@ -0,0 +1,15 @@ +"""Custom Python components.py file for testing. + +This file is mostly a no-op (for now) but should trigger a failure if code file is not +correctly parsed. +""" + +from airbyte_cdk.sources.declarative.models import DeclarativeStream + + +class CustomDeclarativeStream(DeclarativeStream): + """Custom declarative stream class. + + We don't change anything from the base class, but this should still be enough to confirm + that the components.py file is correctly parsed. + """ diff --git a/unit_tests/source_declarative_manifest/resources/valid_py_components_config.json b/unit_tests/source_declarative_manifest/resources/valid_py_components_config.json new file mode 100644 index 000000000..214fc684f --- /dev/null +++ b/unit_tests/source_declarative_manifest/resources/valid_py_components_config.json @@ -0,0 +1,3 @@ +{ + "pokemon_name": "blastoise" +} diff --git a/unit_tests/source_declarative_manifest/resources/valid_py_components_manifest.yaml b/unit_tests/source_declarative_manifest/resources/valid_py_components_manifest.yaml new file mode 100644 index 000000000..bf15e3138 --- /dev/null +++ b/unit_tests/source_declarative_manifest/resources/valid_py_components_manifest.yaml @@ -0,0 +1,1368 @@ +version: 3.9.6 + +type: DeclarativeSource + +description: This is just a test, with custom Python components enabled. Copied from Pokemon example. + +check: + type: CheckStream + stream_names: + - pokemon + +definitions: + streams: + pokemon: + type: components.CustomDeclarativeStream + name: pokemon + retriever: + type: SimpleRetriever + requester: + $ref: "#/definitions/base_requester" + path: /{{config['pokemon_name']}} + http_method: GET + record_selector: + type: RecordSelector + extractor: + type: DpathExtractor + field_path: [] + primary_key: + - id + schema_loader: + type: InlineSchemaLoader + schema: + $ref: "#/schemas/pokemon" + base_requester: + type: HttpRequester + url_base: https://pokeapi.co/api/v2/pokemon + +streams: + - $ref: "#/definitions/streams/pokemon" + +spec: + type: Spec + connection_specification: + type: object + $schema: http://json-schema.org/draft-07/schema# + required: + - pokemon_name + properties: + pokemon_name: + type: string + description: Pokemon requested from the API. + enum: + - bulbasaur + - ivysaur + - venusaur + - charmander + - charmeleon + - charizard + - squirtle + - wartortle + - blastoise + - caterpie + - metapod + - butterfree + - weedle + - kakuna + - beedrill + - pidgey + - pidgeotto + - pidgeot + - rattata + - raticate + - spearow + - fearow + - ekans + - arbok + - pikachu + - raichu + - sandshrew + - sandslash + - nidoranf + - nidorina + - nidoqueen + - nidoranm + - nidorino + - nidoking + - clefairy + - clefable + - vulpix + - ninetales + - jigglypuff + - wigglytuff + - zubat + - golbat + - oddish + - gloom + - vileplume + - paras + - parasect + - venonat + - venomoth + - diglett + - dugtrio + - meowth + - persian + - psyduck + - golduck + - mankey + - primeape + - growlithe + - arcanine + - poliwag + - poliwhirl + - poliwrath + - abra + - kadabra + - alakazam + - machop + - machoke + - machamp + - bellsprout + - weepinbell + - victreebel + - tentacool + - tentacruel + - geodude + - graveler + - golem + - ponyta + - rapidash + - slowpoke + - slowbro + - magnemite + - magneton + - farfetchd + - doduo + - dodrio + - seel + - dewgong + - grimer + - muk + - shellder + - cloyster + - gastly + - haunter + - gengar + - onix + - drowzee + - hypno + - krabby + - kingler + - voltorb + - electrode + - exeggcute + - exeggutor + - cubone + - marowak + - hitmonlee + - hitmonchan + - lickitung + - koffing + - weezing + - rhyhorn + - rhydon + - chansey + - tangela + - kangaskhan + - horsea + - seadra + - goldeen + - seaking + - staryu + - starmie + - mrmime + - scyther + - jynx + - electabuzz + - magmar + - pinsir + - tauros + - magikarp + - gyarados + - lapras + - ditto + - eevee + - vaporeon + - jolteon + - flareon + - porygon + - omanyte + - omastar + - kabuto + - kabutops + - aerodactyl + - snorlax + - articuno + - zapdos + - moltres + - dratini + - dragonair + - dragonite + - mewtwo + - mew + - chikorita + - bayleef + - meganium + - cyndaquil + - quilava + - typhlosion + - totodile + - croconaw + - feraligatr + - sentret + - furret + - hoothoot + - noctowl + - ledyba + - ledian + - spinarak + - ariados + - crobat + - chinchou + - lanturn + - pichu + - cleffa + - igglybuff + - togepi + - togetic + - natu + - xatu + - mareep + - flaaffy + - ampharos + - bellossom + - marill + - azumarill + - sudowoodo + - politoed + - hoppip + - skiploom + - jumpluff + - aipom + - sunkern + - sunflora + - yanma + - wooper + - quagsire + - espeon + - umbreon + - murkrow + - slowking + - misdreavus + - unown + - wobbuffet + - girafarig + - pineco + - forretress + - dunsparce + - gligar + - steelix + - snubbull + - granbull + - qwilfish + - scizor + - shuckle + - heracross + - sneasel + - teddiursa + - ursaring + - slugma + - magcargo + - swinub + - piloswine + - corsola + - remoraid + - octillery + - delibird + - mantine + - skarmory + - houndour + - houndoom + - kingdra + - phanpy + - donphan + - porygon2 + - stantler + - smeargle + - tyrogue + - hitmontop + - smoochum + - elekid + - magby + - miltank + - blissey + - raikou + - entei + - suicune + - larvitar + - pupitar + - tyranitar + - lugia + - ho-oh + - celebi + - treecko + - grovyle + - sceptile + - torchic + - combusken + - blaziken + - mudkip + - marshtomp + - swampert + - poochyena + - mightyena + - zigzagoon + - linoone + - wurmple + - silcoon + - beautifly + - cascoon + - dustox + - lotad + - lombre + - ludicolo + - seedot + - nuzleaf + - shiftry + - taillow + - swellow + - wingull + - pelipper + - ralts + - kirlia + - gardevoir + - surskit + - masquerain + - shroomish + - breloom + - slakoth + - vigoroth + - slaking + - nincada + - ninjask + - shedinja + - whismur + - loudred + - exploud + - makuhita + - hariyama + - azurill + - nosepass + - skitty + - delcatty + - sableye + - mawile + - aron + - lairon + - aggron + - meditite + - medicham + - electrike + - manectric + - plusle + - minun + - volbeat + - illumise + - roselia + - gulpin + - swalot + - carvanha + - sharpedo + - wailmer + - wailord + - numel + - camerupt + - torkoal + - spoink + - grumpig + - spinda + - trapinch + - vibrava + - flygon + - cacnea + - cacturne + - swablu + - altaria + - zangoose + - seviper + - lunatone + - solrock + - barboach + - whiscash + - corphish + - crawdaunt + - baltoy + - claydol + - lileep + - cradily + - anorith + - armaldo + - feebas + - milotic + - castform + - kecleon + - shuppet + - banette + - duskull + - dusclops + - tropius + - chimecho + - absol + - wynaut + - snorunt + - glalie + - spheal + - sealeo + - walrein + - clamperl + - huntail + - gorebyss + - relicanth + - luvdisc + - bagon + - shelgon + - salamence + - beldum + - metang + - metagross + - regirock + - regice + - registeel + - latias + - latios + - kyogre + - groudon + - rayquaza + - jirachi + - deoxys + - turtwig + - grotle + - torterra + - chimchar + - monferno + - infernape + - piplup + - prinplup + - empoleon + - starly + - staravia + - staraptor + - bidoof + - bibarel + - kricketot + - kricketune + - shinx + - luxio + - luxray + - budew + - roserade + - cranidos + - rampardos + - shieldon + - bastiodon + - burmy + - wormadam + - mothim + - combee + - vespiquen + - pachirisu + - buizel + - floatzel + - cherubi + - cherrim + - shellos + - gastrodon + - ambipom + - drifloon + - drifblim + - buneary + - lopunny + - mismagius + - honchkrow + - glameow + - purugly + - chingling + - stunky + - skuntank + - bronzor + - bronzong + - bonsly + - mimejr + - happiny + - chatot + - spiritomb + - gible + - gabite + - garchomp + - munchlax + - riolu + - lucario + - hippopotas + - hippowdon + - skorupi + - drapion + - croagunk + - toxicroak + - carnivine + - finneon + - lumineon + - mantyke + - snover + - abomasnow + - weavile + - magnezone + - lickilicky + - rhyperior + - tangrowth + - electivire + - magmortar + - togekiss + - yanmega + - leafeon + - glaceon + - gliscor + - mamoswine + - porygon-z + - gallade + - probopass + - dusknoir + - froslass + - rotom + - uxie + - mesprit + - azelf + - dialga + - palkia + - heatran + - regigigas + - giratina + - cresselia + - phione + - manaphy + - darkrai + - shaymin + - arceus + - victini + - snivy + - servine + - serperior + - tepig + - pignite + - emboar + - oshawott + - dewott + - samurott + - patrat + - watchog + - lillipup + - herdier + - stoutland + - purrloin + - liepard + - pansage + - simisage + - pansear + - simisear + - panpour + - simipour + - munna + - musharna + - pidove + - tranquill + - unfezant + - blitzle + - zebstrika + - roggenrola + - boldore + - gigalith + - woobat + - swoobat + - drilbur + - excadrill + - audino + - timburr + - gurdurr + - conkeldurr + - tympole + - palpitoad + - seismitoad + - throh + - sawk + - sewaddle + - swadloon + - leavanny + - venipede + - whirlipede + - scolipede + - cottonee + - whimsicott + - petilil + - lilligant + - basculin + - sandile + - krokorok + - krookodile + - darumaka + - darmanitan + - maractus + - dwebble + - crustle + - scraggy + - scrafty + - sigilyph + - yamask + - cofagrigus + - tirtouga + - carracosta + - archen + - archeops + - trubbish + - garbodor + - zorua + - zoroark + - minccino + - cinccino + - gothita + - gothorita + - gothitelle + - solosis + - duosion + - reuniclus + - ducklett + - swanna + - vanillite + - vanillish + - vanilluxe + - deerling + - sawsbuck + - emolga + - karrablast + - escavalier + - foongus + - amoonguss + - frillish + - jellicent + - alomomola + - joltik + - galvantula + - ferroseed + - ferrothorn + - klink + - klang + - klinklang + - tynamo + - eelektrik + - eelektross + - elgyem + - beheeyem + - litwick + - lampent + - chandelure + - axew + - fraxure + - haxorus + - cubchoo + - beartic + - cryogonal + - shelmet + - accelgor + - stunfisk + - mienfoo + - mienshao + - druddigon + - golett + - golurk + - pawniard + - bisharp + - bouffalant + - rufflet + - braviary + - vullaby + - mandibuzz + - heatmor + - durant + - deino + - zweilous + - hydreigon + - larvesta + - volcarona + - cobalion + - terrakion + - virizion + - tornadus + - thundurus + - reshiram + - zekrom + - landorus + - kyurem + - keldeo + - meloetta + - genesect + - chespin + - quilladin + - chesnaught + - fennekin + - braixen + - delphox + - froakie + - frogadier + - greninja + - bunnelby + - diggersby + - fletchling + - fletchinder + - talonflame + - scatterbug + - spewpa + - vivillon + - litleo + - pyroar + - flabebe + - floette + - florges + - skiddo + - gogoat + - pancham + - pangoro + - furfrou + - espurr + - meowstic + - honedge + - doublade + - aegislash + - spritzee + - aromatisse + - swirlix + - slurpuff + - inkay + - malamar + - binacle + - barbaracle + - skrelp + - dragalge + - clauncher + - clawitzer + - helioptile + - heliolisk + - tyrunt + - tyrantrum + - amaura + - aurorus + - sylveon + - hawlucha + - dedenne + - carbink + - goomy + - sliggoo + - goodra + - klefki + - phantump + - trevenant + - pumpkaboo + - gourgeist + - bergmite + - avalugg + - noibat + - noivern + - xerneas + - yveltal + - zygarde + - diancie + - hoopa + - volcanion + - rowlet + - dartrix + - decidueye + - litten + - torracat + - incineroar + - popplio + - brionne + - primarina + - pikipek + - trumbeak + - toucannon + - yungoos + - gumshoos + - grubbin + - charjabug + - vikavolt + - crabrawler + - crabominable + - oricorio + - cutiefly + - ribombee + - rockruff + - lycanroc + - wishiwashi + - mareanie + - toxapex + - mudbray + - mudsdale + - dewpider + - araquanid + - fomantis + - lurantis + - morelull + - shiinotic + - salandit + - salazzle + - stufful + - bewear + - bounsweet + - steenee + - tsareena + - comfey + - oranguru + - passimian + - wimpod + - golisopod + - sandygast + - palossand + - pyukumuku + - typenull + - silvally + - minior + - komala + - turtonator + - togedemaru + - mimikyu + - bruxish + - drampa + - dhelmise + - jangmo-o + - hakamo-o + - kommo-o + - tapukoko + - tapulele + - tapubulu + - tapufini + - cosmog + - cosmoem + - solgaleo + - lunala + - nihilego + - buzzwole + - pheromosa + - xurkitree + - celesteela + - kartana + - guzzlord + - necrozma + - magearna + - marshadow + - poipole + - naganadel + - stakataka + - blacephalon + - zeraora + - meltan + - melmetal + - grookey + - thwackey + - rillaboom + - scorbunny + - raboot + - cinderace + - sobble + - drizzile + - inteleon + - skwovet + - greedent + - rookidee + - corvisquire + - corviknight + - blipbug + - dottler + - orbeetle + - nickit + - thievul + - gossifleur + - eldegoss + - wooloo + - dubwool + - chewtle + - drednaw + - yamper + - boltund + - rolycoly + - carkol + - coalossal + - applin + - flapple + - appletun + - silicobra + - sandaconda + - cramorant + - arrokuda + - barraskewda + - toxel + - toxtricity + - sizzlipede + - centiskorch + - clobbopus + - grapploct + - sinistea + - polteageist + - hatenna + - hattrem + - hatterene + - impidimp + - morgrem + - grimmsnarl + - obstagoon + - perrserker + - cursola + - sirfetchd + - mrrime + - runerigus + - milcery + - alcremie + - falinks + - pincurchin + - snom + - frosmoth + - stonjourner + - eiscue + - indeedee + - morpeko + - cufant + - copperajah + - dracozolt + - arctozolt + - dracovish + - arctovish + - duraludon + - dreepy + - drakloak + - dragapult + - zacian + - zamazenta + - eternatus + - kubfu + - urshifu + - zarude + - regieleki + - regidrago + - glastrier + - spectrier + - calyrex + order: 0 + title: Pokemon Name + pattern: ^[a-z0-9_\-]+$ + examples: + - ditto + - luxray + - snorlax + additionalProperties: true + +metadata: + testedStreams: + pokemon: + hasRecords: true + streamHash: f619395f8c7a553f51cec2a7274a4ce517ab46c8 + hasResponse: true + primaryKeysAreUnique: true + primaryKeysArePresent: true + responsesAreSuccessful: true + autoImportSchema: + pokemon: false + +schemas: + pokemon: + type: object + $schema: http://json-schema.org/draft-07/schema# + properties: + id: + type: + - "null" + - integer + name: + type: + - "null" + - string + forms: + type: + - "null" + - array + items: + type: + - "null" + - object + properties: + url: + type: + - "null" + - string + name: + type: + - "null" + - string + additionalProperties: true + moves: + type: + - "null" + - array + items: + type: + - "null" + - object + properties: + move: + type: + - "null" + - object + properties: + url: + type: + - "null" + - string + name: + type: + - "null" + - string + additionalProperties: true + version_group_details: + type: + - "null" + - array + items: + type: + - "null" + - object + properties: + version_group: + type: + - "null" + - object + properties: + url: + type: + - "null" + - string + name: + type: + - "null" + - string + additionalProperties: true + level_learned_at: + type: + - "null" + - integer + move_learn_method: + type: + - "null" + - object + properties: + url: + type: + - "null" + - string + name: + type: + - "null" + - string + additionalProperties: true + additionalProperties: true + additionalProperties: true + order: + type: + - "null" + - integer + stats: + type: + - "null" + - array + items: + type: + - "null" + - object + properties: + stat: + type: + - "null" + - object + properties: + url: + type: + - "null" + - string + name: + type: + - "null" + - string + additionalProperties: true + effort: + type: + - "null" + - integer + base_stat: + type: + - "null" + - integer + additionalProperties: true + types: + type: + - "null" + - array + items: + type: + - "null" + - object + properties: + type: + type: + - "null" + - object + properties: + url: + type: + - "null" + - string + name: + type: + - "null" + - string + slot: + type: + - "null" + - integer + additionalProperties: true + height: + type: + - "null" + - integer + weight: + type: + - "null" + - integer + species: + type: + - "null" + - object + properties: + url: + type: + - "null" + - string + name: + type: + - "null" + - string + additionalProperties: true + sprites: + type: + - "null" + - object + properties: + back_shiny: + type: + - "null" + - string + back_female: + type: + - "null" + - string + front_shiny: + type: + - "null" + - string + back_default: + type: + - "null" + - string + front_female: + type: + - "null" + - string + front_default: + type: + - "null" + - string + back_shiny_female: + type: + - "null" + - string + front_shiny_female: + type: + - "null" + - string + additionalProperties: true + abilities: + type: + - "null" + - array + items: + type: + - "null" + - object + properties: + slot: + type: + - "null" + - integer + ability: + type: + - "null" + - object + properties: + url: + type: + - "null" + - string + name: + type: + - "null" + - string + additionalProperties: true + is_hidden: + type: + - "null" + - boolean + additionalProperties: true + held_items: + type: + - "null" + - array + items: + type: + - "null" + - object + properties: + item: + type: + - "null" + - object + properties: + url: + type: + - "null" + - string + name: + type: + - "null" + - string + additionalProperties: true + version_details: + type: + - "null" + - array + items: + type: + - "null" + - object + properties: + version: + type: + - "null" + - object + properties: + url: + type: + - "null" + - string + name: + type: + - "null" + - string + additionalProperties: true + rarity: + type: + - "null" + - integer + additionalProperties: true + additionalProperties: true + is_default: + type: + - "null" + - boolean + past_types: + type: + - "null" + - array + items: + type: + - "null" + - object + properties: + types: + type: + - "null" + - array + items: + type: + - "null" + - object + properties: + type: + type: + - "null" + - object + properties: + url: + type: + - "null" + - string + name: + type: + - "null" + - string + additionalProperties: true + slot: + type: + - "null" + - integer + additionalProperties: true + generation: + type: + - "null" + - object + properties: + url: + type: + - "null" + - string + name: + type: + - "null" + - string + additionalProperties: true + additionalProperties: true + game_indices: + type: + - "null" + - array + items: + type: + - "null" + - object + properties: + version: + type: + - "null" + - object + properties: + url: + type: + - "null" + - string + name: + type: + - "null" + - string + additionalProperties: true + game_index: + type: + - "null" + - integer + additionalProperties: true + base_experience: + type: + - "null" + - integer + location_area_encounters: + type: + - "null" + - string + additionalProperties: true From 9115757b137524cc33df870e756ed26ed2f2fa61 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Wed, 18 Dec 2024 16:29:17 -0800 Subject: [PATCH 04/42] add fixture for custom py components scenario --- .../source_declarative_manifest/conftest.py | 33 ++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/unit_tests/source_declarative_manifest/conftest.py b/unit_tests/source_declarative_manifest/conftest.py index 3d61e65e8..a2598822d 100644 --- a/unit_tests/source_declarative_manifest/conftest.py +++ b/unit_tests/source_declarative_manifest/conftest.py @@ -2,13 +2,26 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. # +import hashlib import os +from pathlib import Path +from typing import Any, Literal import pytest import yaml -def get_fixture_path(file_name): +def hash_text(input_text: str, hash_type: Literal["md5", "sha256"] = "md5") -> str: + hashers = { + "md5": hashlib.md5, + "sha256": hashlib.sha256, + } + hash_object = hashers[hash_type]() + hash_object.update(input_text.encode()) + return hash_object.hexdigest() + + +def get_fixture_path(file_name) -> str: return os.path.join(os.path.dirname(__file__), file_name) @@ -52,3 +65,21 @@ def valid_local_config_file(): @pytest.fixture def invalid_local_config_file(): return get_fixture_path("resources/invalid_local_pokeapi_config.json") + + +@pytest.fixture +def py_components_config_dict() -> dict[str, Any]: + manifest_dict = yaml.safe_load( + get_fixture_path("resources/valid_py_components.yaml"), + ) + custom_py_code_path = get_fixture_path("resources/valid_py_components_code.py") + custom_py_code = Path(custom_py_code_path).read_text() + combined_config_dict = { + "__injected_declarative_manifest": manifest_dict, + "__injected_components_py": custom_py_code, + "__injected_components_py_checksum": { + "md5": hash_text(custom_py_code, "md5"), + "sha256": hash_text(custom_py_code, "sha256"), + }, + } + return combined_config_dict From 5dc664c95b3bea0eab71646527f39e74352224df Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Wed, 18 Dec 2024 16:47:52 -0800 Subject: [PATCH 05/42] add test --- ..._source_declarative_w_custom_components.py | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py diff --git a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py new file mode 100644 index 000000000..c3ea2059d --- /dev/null +++ b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py @@ -0,0 +1,24 @@ +# +# Copyright (c) 2024 Airbyte, Inc., all rights reserved. +# + +import json +from tempfile import NamedTemporaryFile +from typing import Any + +from airbyte_cdk.cli.source_declarative_manifest._run import ( + create_declarative_source, +) +from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource + + +def test_given_injected_declarative_manifest_and_py_components_then_return_declarative_manifest( + py_components_config_dict: dict[str, Any], +): + with NamedTemporaryFile(delete=False, suffix=".json") as temp_config_file: + json.dump(py_components_config_dict, temp_config_file) + temp_config_file.flush() + source = create_declarative_source( + ["check", "--config", temp_config_file.name], + ) + assert isinstance(source, ManifestDeclarativeSource) From 5be084f053bcf313d43cda750a7292c049f2b4a0 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 13 Jan 2025 13:52:25 -0800 Subject: [PATCH 06/42] chore: add missing guard statement --- airbyte_cdk/cli/source_declarative_manifest/_run.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/airbyte_cdk/cli/source_declarative_manifest/_run.py b/airbyte_cdk/cli/source_declarative_manifest/_run.py index 232ac302f..5def00602 100644 --- a/airbyte_cdk/cli/source_declarative_manifest/_run.py +++ b/airbyte_cdk/cli/source_declarative_manifest/_run.py @@ -171,6 +171,12 @@ def create_declarative_source( "Invalid config: `__injected_declarative_manifest` should be provided at the root " f"of the config but config only has keys: {list(config.keys() if config else [])}" ) + if not isinstance(config["__injected_declarative_manifest"], dict): + raise ValueError( + "Invalid config: `__injected_declarative_manifest` should be a dictionary, " + f"but got type: {type(config['__injected_declarative_manifest'])}" + ) + return ConcurrentDeclarativeSource( config=config, catalog=catalog, From 7379eeadd1902cfc39a26f6cb8d06b9c313a3836 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 13 Jan 2025 13:53:35 -0800 Subject: [PATCH 07/42] chore: remove stale comment --- airbyte_cdk/test/utils/manifest_only_fixtures.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/airbyte_cdk/test/utils/manifest_only_fixtures.py b/airbyte_cdk/test/utils/manifest_only_fixtures.py index 01b2b393d..bd53e2081 100644 --- a/airbyte_cdk/test/utils/manifest_only_fixtures.py +++ b/airbyte_cdk/test/utils/manifest_only_fixtures.py @@ -56,8 +56,6 @@ def components_module_from_string(components_py_text: str) -> Optional[ModuleTyp """Load and return the components module from a provided string containing the python code. This assumes the components module is located at /components.py. - - TODO: Make new unit test to leverage this fixture """ module_name = "components" From 51cbcbd98c5ab8b25ea909416ff7abc2021d8359 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 13 Jan 2025 13:55:03 -0800 Subject: [PATCH 08/42] checkpoint: passing tests with pokeapi --- .../parsers/model_to_component_factory.py | 3 +- .../source_declarative_manifest/conftest.py | 18 ----- .../valid_py_components_manifest.yaml | 2 +- ..._source_declarative_w_custom_components.py | 77 +++++++++++++++++-- 4 files changed, 74 insertions(+), 26 deletions(-) diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index 8d4ded84f..1668bc50c 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -987,10 +987,9 @@ def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> :param config: The custom defined connector config :return: The declarative component built from the Pydantic model to be used at runtime """ - components_module = self._get_components_module_object(config=config) custom_component_class = self._get_class_from_fully_qualified_class_name( full_qualified_class_name=model.class_name, - components_module=components_module, + components_module=self._get_components_module_object(config=config), ) component_fields = get_type_hints(custom_component_class) model_args = model.dict() diff --git a/unit_tests/source_declarative_manifest/conftest.py b/unit_tests/source_declarative_manifest/conftest.py index a2598822d..8aafe924a 100644 --- a/unit_tests/source_declarative_manifest/conftest.py +++ b/unit_tests/source_declarative_manifest/conftest.py @@ -65,21 +65,3 @@ def valid_local_config_file(): @pytest.fixture def invalid_local_config_file(): return get_fixture_path("resources/invalid_local_pokeapi_config.json") - - -@pytest.fixture -def py_components_config_dict() -> dict[str, Any]: - manifest_dict = yaml.safe_load( - get_fixture_path("resources/valid_py_components.yaml"), - ) - custom_py_code_path = get_fixture_path("resources/valid_py_components_code.py") - custom_py_code = Path(custom_py_code_path).read_text() - combined_config_dict = { - "__injected_declarative_manifest": manifest_dict, - "__injected_components_py": custom_py_code, - "__injected_components_py_checksum": { - "md5": hash_text(custom_py_code, "md5"), - "sha256": hash_text(custom_py_code, "sha256"), - }, - } - return combined_config_dict diff --git a/unit_tests/source_declarative_manifest/resources/valid_py_components_manifest.yaml b/unit_tests/source_declarative_manifest/resources/valid_py_components_manifest.yaml index bf15e3138..2ffcd2be5 100644 --- a/unit_tests/source_declarative_manifest/resources/valid_py_components_manifest.yaml +++ b/unit_tests/source_declarative_manifest/resources/valid_py_components_manifest.yaml @@ -12,7 +12,7 @@ check: definitions: streams: pokemon: - type: components.CustomDeclarativeStream + type: DeclarativeStream name: pokemon retriever: type: SimpleRetriever diff --git a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py index c3ea2059d..89eaa5807 100644 --- a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py +++ b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py @@ -3,22 +3,89 @@ # import json +import os +import types +from collections.abc import Mapping +from pathlib import Path from tempfile import NamedTemporaryFile from typing import Any +import yaml + from airbyte_cdk.cli.source_declarative_manifest._run import ( create_declarative_source, ) from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource +from airbyte_cdk.test.utils.manifest_only_fixtures import components_module_from_string +from unit_tests.source_declarative_manifest.conftest import hash_text + +SAMPLE_COMPONENTS_PY_TEXT = """ +def sample_function() -> str: + return "Hello, World!" + +class SimpleClass: + def sample_method(self) -> str: + return sample_function() +""" + + +def get_fixture_path(file_name) -> str: + return os.path.join(os.path.dirname(__file__), file_name) + + +def test_components_module_from_string() -> None: + # Call the function to get the module + components_module: types.ModuleType = components_module_from_string(SAMPLE_COMPONENTS_PY_TEXT) + + # Check that the module is created and is of the correct type + assert isinstance(components_module, types.ModuleType) + + # Check that the function is correctly defined in the module + assert hasattr(components_module, "sample_function") + + # Check that simple functions are callable + assert components_module.sample_function() == "Hello, World!" + + # Check class definitions work as expected + assert isinstance(components_module.SimpleClass, type) + obj = components_module.SimpleClass() + assert isinstance(obj, components_module.SimpleClass) + assert obj.sample_method() == "Hello, World!" + + +def get_py_components_config_dict() -> dict[str, Any]: + manifest_dict = yaml.safe_load( + Path(get_fixture_path("resources/valid_py_components_manifest.yaml")).read_text(), + ) + assert manifest_dict, "Failed to load the manifest file." + assert isinstance( + manifest_dict, Mapping + ), f"Manifest file is type {type(manifest_dict).__name__}, not a mapping: {manifest_dict}" + + custom_py_code_path = get_fixture_path("resources/valid_py_components_code.py") + custom_py_code = Path(custom_py_code_path).read_text() + combined_config_dict = { + "__injected_declarative_manifest": manifest_dict, + "__injected_components_py": custom_py_code, + "__injected_components_py_checksum": { + "md5": hash_text(custom_py_code, "md5"), + "sha256": hash_text(custom_py_code, "sha256"), + }, + } + return combined_config_dict -def test_given_injected_declarative_manifest_and_py_components_then_return_declarative_manifest( - py_components_config_dict: dict[str, Any], -): +def test_given_injected_declarative_manifest_and_py_components() -> None: + py_components_config_dict = get_py_components_config_dict() + assert isinstance(py_components_config_dict, dict) + assert "__injected_declarative_manifest" in py_components_config_dict + assert "__injected_components_py" in py_components_config_dict with NamedTemporaryFile(delete=False, suffix=".json") as temp_config_file: - json.dump(py_components_config_dict, temp_config_file) + json_str = json.dumps(py_components_config_dict) + Path(temp_config_file.name).write_text(json_str) temp_config_file.flush() source = create_declarative_source( ["check", "--config", temp_config_file.name], ) - assert isinstance(source, ManifestDeclarativeSource) + assert isinstance(source, ManifestDeclarativeSource) + source.check(logger=None, config=source._source_config) From aaef28508be73a98c78449dbb66672313c5fca40 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 13 Jan 2025 13:56:02 -0800 Subject: [PATCH 09/42] chore: add `poe lock` task definition --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index fbc7ad7af..8e3bfb0fe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -126,6 +126,7 @@ select = ["I"] [tool.poe.tasks] # Installation install = { shell = "poetry install --all-extras" } +lock = { shell = "poetry lock --no-update" } # Build tasks assemble = {cmd = "bin/generate-component-manifest-dagger.sh", help = "Generate component manifest files."} From e7c3eae77ffad22da87468422204b6664bb55b55 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 13 Jan 2025 14:30:26 -0800 Subject: [PATCH 10/42] add 'source_the_guardian_api' test resources --- .../source_the_guardian_api/components.py | 36 ++ .../source_the_guardian_api/manifest.yaml | 376 ++++++++++++++++++ 2 files changed, 412 insertions(+) create mode 100644 unit_tests/source_declarative_manifest/resources/source_the_guardian_api/components.py create mode 100644 unit_tests/source_declarative_manifest/resources/source_the_guardian_api/manifest.yaml diff --git a/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/components.py b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/components.py new file mode 100644 index 000000000..db5b07971 --- /dev/null +++ b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/components.py @@ -0,0 +1,36 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +from dataclasses import dataclass +from typing import Any, Mapping, Optional + +import requests + +from airbyte_cdk.sources.declarative.requesters.paginators.strategies.page_increment import ( + PageIncrement, +) + + +@dataclass +class CustomPageIncrement(PageIncrement): + """ + Starts page from 1 instead of the default value that is 0. Stops Pagination when currentPage is equal to totalPages. + """ + + def next_page_token(self, response: requests.Response, *args) -> Optional[Any]: + res = response.json().get("response") + currPage = res.get("currentPage") + totalPages = res.get("pages") + if currPage < totalPages: + self._page += 1 + return self._page + else: + return None + + def __post_init__(self, parameters: Mapping[str, Any]): + super().__post_init__(parameters) + self._page = 1 + + def reset(self): + self._page = 1 diff --git a/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/manifest.yaml b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/manifest.yaml new file mode 100644 index 000000000..7b440631f --- /dev/null +++ b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/manifest.yaml @@ -0,0 +1,376 @@ +version: "4.3.2" +definitions: + selector: + extractor: + field_path: + - response + - results + requester: + url_base: "https://content.guardianapis.com" + http_method: "GET" + request_parameters: + api-key: "{{ config['api_key'] }}" + q: "{{ config['query'] }}" + tag: "{{ config['tag'] }}" + section: "{{ config['section'] }}" + order-by: "oldest" + incremental_sync: + type: DatetimeBasedCursor + start_datetime: + datetime: "{{ config['start_date'] }}" + datetime_format: "%Y-%m-%d" + end_datetime: + datetime: "{{ config['end_date'] or now_utc().strftime('%Y-%m-%d') }}" + datetime_format: "%Y-%m-%d" + step: "P7D" + datetime_format: "%Y-%m-%dT%H:%M:%SZ" + cursor_granularity: "PT1S" + cursor_field: "webPublicationDate" + start_time_option: + field_name: "from-date" + inject_into: "request_parameter" + end_time_option: + field_name: "to-date" + inject_into: "request_parameter" + retriever: + record_selector: + extractor: + field_path: + - response + - results + paginator: + type: DefaultPaginator + pagination_strategy: + type: CustomPaginationStrategy + class_name: "source_declarative_manifest.components.CustomPageIncrement" + page_size: 10 + page_token_option: + type: RequestOption + inject_into: "request_parameter" + field_name: "page" + page_size_option: + inject_into: "body_data" + field_name: "page_size" + requester: + url_base: "https://content.guardianapis.com" + http_method: "GET" + request_parameters: + api-key: "{{ config['api_key'] }}" + q: "{{ config['query'] }}" + tag: "{{ config['tag'] }}" + section: "{{ config['section'] }}" + order-by: "oldest" + base_stream: + incremental_sync: + type: DatetimeBasedCursor + start_datetime: + datetime: "{{ config['start_date'] }}" + datetime_format: "%Y-%m-%d" + end_datetime: + datetime: "{{ config['end_date'] or now_utc().strftime('%Y-%m-%d') }}" + datetime_format: "%Y-%m-%d" + step: "P7D" + datetime_format: "%Y-%m-%dT%H:%M:%SZ" + cursor_granularity: "PT1S" + cursor_field: "webPublicationDate" + start_time_option: + field_name: "from-date" + inject_into: "request_parameter" + end_time_option: + field_name: "to-date" + inject_into: "request_parameter" + retriever: + record_selector: + extractor: + field_path: + - response + - results + paginator: + type: DefaultPaginator + pagination_strategy: + type: CustomPaginationStrategy + class_name: "source_declarative_manifest.components.CustomPageIncrement" + page_size: 10 + page_token_option: + type: RequestOption + inject_into: "request_parameter" + field_name: "page" + page_size_option: + inject_into: "body_data" + field_name: "page_size" + requester: + url_base: "https://content.guardianapis.com" + http_method: "GET" + request_parameters: + api-key: "{{ config['api_key'] }}" + q: "{{ config['query'] }}" + tag: "{{ config['tag'] }}" + section: "{{ config['section'] }}" + order-by: "oldest" + content_stream: + incremental_sync: + type: DatetimeBasedCursor + start_datetime: + datetime: "{{ config['start_date'] }}" + datetime_format: "%Y-%m-%d" + end_datetime: + datetime: "{{ config['end_date'] or now_utc().strftime('%Y-%m-%d') }}" + datetime_format: "%Y-%m-%d" + step: "P7D" + datetime_format: "%Y-%m-%dT%H:%M:%SZ" + cursor_granularity: "PT1S" + cursor_field: "webPublicationDate" + start_time_option: + field_name: "from-date" + inject_into: "request_parameter" + end_time_option: + field_name: "to-date" + inject_into: "request_parameter" + retriever: + record_selector: + extractor: + field_path: + - response + - results + paginator: + type: "DefaultPaginator" + pagination_strategy: + type: CustomPaginationStrategy + class_name: "source_declarative_manifest.components.CustomPageIncrement" + page_size: 10 + page_token_option: + type: RequestOption + inject_into: "request_parameter" + field_name: "page" + page_size_option: + inject_into: "body_data" + field_name: "page_size" + requester: + url_base: "https://content.guardianapis.com" + http_method: "GET" + request_parameters: + api-key: "{{ config['api_key'] }}" + q: "{{ config['query'] }}" + tag: "{{ config['tag'] }}" + section: "{{ config['section'] }}" + order-by: "oldest" + schema_loader: + type: InlineSchemaLoader + schema: + $schema: http://json-schema.org/draft-04/schema# + type: object + properties: + id: + type: string + type: + type: string + sectionId: + type: string + sectionName: + type: string + webPublicationDate: + type: string + webTitle: + type: string + webUrl: + type: string + apiUrl: + type: string + isHosted: + type: boolean + pillarId: + type: string + pillarName: + type: string + required: + - id + - type + - sectionId + - sectionName + - webPublicationDate + - webTitle + - webUrl + - apiUrl + - isHosted + - pillarId + - pillarName +streams: + - incremental_sync: + type: DatetimeBasedCursor + start_datetime: + datetime: "{{ config['start_date'] }}" + datetime_format: "%Y-%m-%d" + type: MinMaxDatetime + end_datetime: + datetime: "{{ config['end_date'] or now_utc().strftime('%Y-%m-%d') }}" + datetime_format: "%Y-%m-%d" + type: MinMaxDatetime + step: "P7D" + datetime_format: "%Y-%m-%dT%H:%M:%SZ" + cursor_granularity: "PT1S" + cursor_field: "webPublicationDate" + start_time_option: + field_name: "from-date" + inject_into: "request_parameter" + type: RequestOption + end_time_option: + field_name: "to-date" + inject_into: "request_parameter" + type: RequestOption + retriever: + record_selector: + extractor: + field_path: + - response + - results + type: DpathExtractor + type: RecordSelector + paginator: + type: "DefaultPaginator" + pagination_strategy: + class_name: source_declarative_manifest.components.CustomPageIncrement + page_size: 10 + type: CustomPaginationStrategy + page_token_option: + type: RequestOption + inject_into: "request_parameter" + field_name: "page" + page_size_option: + inject_into: "body_data" + field_name: "page_size" + type: RequestOption + requester: + url_base: "https://content.guardianapis.com" + http_method: "GET" + request_parameters: + api-key: "{{ config['api_key'] }}" + q: "{{ config['query'] }}" + tag: "{{ config['tag'] }}" + section: "{{ config['section'] }}" + order-by: "oldest" + type: HttpRequester + path: "/search" + type: SimpleRetriever + schema_loader: + type: InlineSchemaLoader + schema: + $schema: http://json-schema.org/draft-04/schema# + type: object + properties: + id: + type: string + type: + type: string + sectionId: + type: string + sectionName: + type: string + webPublicationDate: + type: string + webTitle: + type: string + webUrl: + type: string + apiUrl: + type: string + isHosted: + type: boolean + pillarId: + type: string + pillarName: + type: string + required: + - id + - type + - sectionId + - sectionName + - webPublicationDate + - webTitle + - webUrl + - apiUrl + - isHosted + - pillarId + - pillarName + type: DeclarativeStream + name: "content" + primary_key: "id" +check: + stream_names: + - "content" + type: CheckStream +type: DeclarativeSource +spec: + type: Spec + documentation_url: https://docs.airbyte.com/integrations/sources/the-guardian-api + connection_specification: + $schema: http://json-schema.org/draft-07/schema# + title: The Guardian Api Spec + type: object + required: + - api_key + - start_date + additionalProperties: true + properties: + api_key: + title: API Key + type: string + description: + Your API Key. See here. + The key is case sensitive. + airbyte_secret: true + start_date: + title: Start Date + type: string + description: + Use this to set the minimum date (YYYY-MM-DD) of the results. + Results older than the start_date will not be shown. + pattern: ^([1-9][0-9]{3})\-(0?[1-9]|1[012])\-(0?[1-9]|[12][0-9]|3[01])$ + examples: + - YYYY-MM-DD + query: + title: Query + type: string + description: + (Optional) The query (q) parameter filters the results to only + those that include that search term. The q parameter supports AND, OR and + NOT operators. + examples: + - environment AND NOT water + - environment AND political + - amusement park + - political + tag: + title: Tag + type: string + description: + (Optional) A tag is a piece of data that is used by The Guardian + to categorise content. Use this parameter to filter results by showing only + the ones matching the entered tag. See here + for a list of all tags, and here + for the tags endpoint documentation. + examples: + - environment/recycling + - environment/plasticbags + - environment/energyefficiency + section: + title: Section + type: string + description: + (Optional) Use this to filter the results by a particular section. + See here + for a list of all sections, and here + for the sections endpoint documentation. + examples: + - media + - technology + - housing-network + end_date: + title: End Date + type: string + description: + (Optional) Use this to set the maximum date (YYYY-MM-DD) of the + results. Results newer than the end_date will not be shown. Default is set + to the current date (today) for incremental syncs. + pattern: ^([1-9][0-9]{3})\-(0?[1-9]|1[012])\-(0?[1-9]|[12][0-9]|3[01])$ + examples: + - YYYY-MM-DD From 2300f7a7d28e5a5718ee7d5b5711fd0c89db9a97 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 13 Jan 2025 14:32:05 -0800 Subject: [PATCH 11/42] checkpoint: working `check` --- .../parsers/model_to_component_factory.py | 28 +++++++++++-------- ..._source_declarative_w_custom_components.py | 14 ++++++---- 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index 1668bc50c..8c70c2b88 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -5,9 +5,9 @@ from __future__ import annotations import datetime -import importlib import inspect import re +import sys import types from functools import partial from typing import ( @@ -1043,8 +1043,9 @@ def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> return custom_component_class(**kwargs) def _get_components_module_object( + self, config: Config, - ) -> None: + ) -> types.ModuleType: """Get a components module object based on the provided config. If custom python components is provided, this will be loaded. Otherwise, we will @@ -1054,17 +1055,20 @@ def _get_components_module_object( COMPONENTS_MODULE_NAME = "components" components_module: types.ModuleType - if INJECTED_COMPONENTS_PY in config: - # Create a new module object and execute the provided Python code text within it - components_module = types.ModuleType(name=COMPONENTS_MODULE_NAME) - python_text = config[INJECTED_COMPONENTS_PY] - exec(python_text, components_module.__dict__) - # Skip insert the module into sys.modules because we pass by reference below - # sys.modules[module_name] = components_module - else: - components_module = importlib.import_module(name=COMPONENTS_MODULE_NAME) + if not INJECTED_COMPONENTS_PY in config: + raise ValueError( + "Custom components must be defined in a module named `components`. Please provide a custom components module." + ) + + # Create a new module object and execute the provided Python code text within it + components_module = types.ModuleType(name=COMPONENTS_MODULE_NAME) + python_text = config[INJECTED_COMPONENTS_PY] + exec(python_text, components_module.__dict__) + sys.modules[COMPONENTS_MODULE_NAME] = components_module + return components_module def _get_class_from_fully_qualified_class_name( + self, full_qualified_class_name: str, components_module: types.ModuleType, ) -> Any: @@ -1086,7 +1090,7 @@ def _get_class_from_fully_qualified_class_name( module_name = split[:-2] class_name = split[-1] - if module_name != "components": + if "components" not in split: raise ValueError( f"Custom components must be defined in a module named `components`. Found {module_name} instead." ) diff --git a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py index 89eaa5807..7afb24bff 100644 --- a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py +++ b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py @@ -3,6 +3,7 @@ # import json +import logging import os import types from collections.abc import Mapping @@ -11,6 +12,7 @@ from typing import Any import yaml +from airbyte_protocol_dataclasses.models.airbyte_protocol import AirbyteCatalog from airbyte_cdk.cli.source_declarative_manifest._run import ( create_declarative_source, @@ -54,16 +56,16 @@ def test_components_module_from_string() -> None: def get_py_components_config_dict() -> dict[str, Any]: - manifest_dict = yaml.safe_load( - Path(get_fixture_path("resources/valid_py_components_manifest.yaml")).read_text(), - ) + connector_dir = Path(get_fixture_path("resources/source_the_guardian_api")) + manifest_yml_path: Path = connector_dir / "manifest.yaml" + custom_py_code_path: Path = connector_dir / "components.py" + manifest_dict = yaml.safe_load(manifest_yml_path.read_text()) assert manifest_dict, "Failed to load the manifest file." assert isinstance( manifest_dict, Mapping ), f"Manifest file is type {type(manifest_dict).__name__}, not a mapping: {manifest_dict}" - custom_py_code_path = get_fixture_path("resources/valid_py_components_code.py") - custom_py_code = Path(custom_py_code_path).read_text() + custom_py_code = custom_py_code_path.read_text() combined_config_dict = { "__injected_declarative_manifest": manifest_dict, "__injected_components_py": custom_py_code, @@ -88,4 +90,4 @@ def test_given_injected_declarative_manifest_and_py_components() -> None: ["check", "--config", temp_config_file.name], ) assert isinstance(source, ManifestDeclarativeSource) - source.check(logger=None, config=source._source_config) + source.check(logger=logging.getLogger(), config=py_components_config_dict) From 4efcd4032abe0ae33890334599b64fb2d56e8a1d Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 13 Jan 2025 14:51:11 -0800 Subject: [PATCH 12/42] checkpoint: working discover --- .../resources/source_the_guardian_api/.gitignore | 1 + .../resources/source_the_guardian_api/README.md | 9 +++++++++ .../source_the_guardian_api/valid_config.yaml | 3 +++ .../test_source_declarative_w_custom_components.py | 14 ++++++++++++++ 4 files changed, 27 insertions(+) create mode 100644 unit_tests/source_declarative_manifest/resources/source_the_guardian_api/.gitignore create mode 100644 unit_tests/source_declarative_manifest/resources/source_the_guardian_api/README.md create mode 100644 unit_tests/source_declarative_manifest/resources/source_the_guardian_api/valid_config.yaml diff --git a/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/.gitignore b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/.gitignore new file mode 100644 index 000000000..c4ab49a30 --- /dev/null +++ b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/.gitignore @@ -0,0 +1 @@ +secrets* diff --git a/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/README.md b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/README.md new file mode 100644 index 000000000..403a4baba --- /dev/null +++ b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/README.md @@ -0,0 +1,9 @@ +# The Guardian API Tests + +For these tests to work, you'll need to create a `secrets.yaml` file in this directory that looks like this: + +```yml +api_key: ****** +``` + +The `.gitignore` file in this directory should ensure your file is not committed to git, but it's a good practice to double-check. 👀 diff --git a/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/valid_config.yaml b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/valid_config.yaml new file mode 100644 index 000000000..e31112780 --- /dev/null +++ b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/valid_config.yaml @@ -0,0 +1,3 @@ +{ + "start_date": "2024-01-01", +} diff --git a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py index 7afb24bff..7605b4531 100644 --- a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py +++ b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py @@ -59,6 +59,9 @@ def get_py_components_config_dict() -> dict[str, Any]: connector_dir = Path(get_fixture_path("resources/source_the_guardian_api")) manifest_yml_path: Path = connector_dir / "manifest.yaml" custom_py_code_path: Path = connector_dir / "components.py" + config_yaml_path: Path = connector_dir / "valid_config.yaml" + secrets_yaml_path: Path = connector_dir / "secrets.yaml" + manifest_dict = yaml.safe_load(manifest_yml_path.read_text()) assert manifest_dict, "Failed to load the manifest file." assert isinstance( @@ -74,6 +77,8 @@ def get_py_components_config_dict() -> dict[str, Any]: "sha256": hash_text(custom_py_code, "sha256"), }, } + combined_config_dict.update(yaml.safe_load(config_yaml_path.read_text())) + combined_config_dict.update(yaml.safe_load(secrets_yaml_path.read_text())) return combined_config_dict @@ -82,6 +87,7 @@ def test_given_injected_declarative_manifest_and_py_components() -> None: assert isinstance(py_components_config_dict, dict) assert "__injected_declarative_manifest" in py_components_config_dict assert "__injected_components_py" in py_components_config_dict + with NamedTemporaryFile(delete=False, suffix=".json") as temp_config_file: json_str = json.dumps(py_components_config_dict) Path(temp_config_file.name).write_text(json_str) @@ -91,3 +97,11 @@ def test_given_injected_declarative_manifest_and_py_components() -> None: ) assert isinstance(source, ManifestDeclarativeSource) source.check(logger=logging.getLogger(), config=py_components_config_dict) + catalog: AirbyteCatalog = source.discover( + logger=logging.getLogger(), config=py_components_config_dict + ) + assert isinstance(catalog, AirbyteCatalog) + + # source.read( + # logger=logging.getLogger(), config=py_components_config_dict, catalog=None, state=None + # ) From cb6a4ab10e875219fdc373f46d5a6d0254a24f2c Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 13 Jan 2025 15:06:02 -0800 Subject: [PATCH 13/42] checkpoint: working sync --- ..._source_declarative_w_custom_components.py | 28 +++++++++++++++++-- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py index 7605b4531..55da8fbb6 100644 --- a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py +++ b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py @@ -2,6 +2,7 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. # +import datetime import json import logging import os @@ -17,8 +18,10 @@ from airbyte_cdk.cli.source_declarative_manifest._run import ( create_declarative_source, ) +from airbyte_cdk.models import ConfiguredAirbyteCatalog, ConfiguredAirbyteStream from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource from airbyte_cdk.test.utils.manifest_only_fixtures import components_module_from_string +from unit_tests.connector_builder.test_connector_builder_handler import configured_catalog from unit_tests.source_declarative_manifest.conftest import hash_text SAMPLE_COMPONENTS_PY_TEXT = """ @@ -84,6 +87,10 @@ def get_py_components_config_dict() -> dict[str, Any]: def test_given_injected_declarative_manifest_and_py_components() -> None: py_components_config_dict = get_py_components_config_dict() + # Truncate the start_date to speed up tests + py_components_config_dict["start_date"] = ( + datetime.datetime.now() - datetime.timedelta(days=2) + ).strftime("%Y-%m-%d") assert isinstance(py_components_config_dict, dict) assert "__injected_declarative_manifest" in py_components_config_dict assert "__injected_components_py" in py_components_config_dict @@ -101,7 +108,22 @@ def test_given_injected_declarative_manifest_and_py_components() -> None: logger=logging.getLogger(), config=py_components_config_dict ) assert isinstance(catalog, AirbyteCatalog) + configured_catalog = ConfiguredAirbyteCatalog( + streams=[ + ConfiguredAirbyteStream( + stream=stream, + sync_mode="full_refresh", + destination_sync_mode="overwrite", + ) + for stream in catalog.streams + ] + ) - # source.read( - # logger=logging.getLogger(), config=py_components_config_dict, catalog=None, state=None - # ) + msg_iterator = source.read( + logger=logging.getLogger(), + config=py_components_config_dict, + catalog=configured_catalog, + state=None, + ) + for msg in msg_iterator: + assert msg From 051c57bad129eb0d9a858f013af331c792b08c52 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 13 Jan 2025 15:10:18 -0800 Subject: [PATCH 14/42] improve module name parsing --- .../sources/declarative/parsers/model_to_component_factory.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index 8c70c2b88..86ff2ca89 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -1087,10 +1087,10 @@ def _get_class_from_fully_qualified_class_name( """ split = full_qualified_class_name.split(".") module_name_full = ".".join(split[:-1]) - module_name = split[:-2] + module_name = split[-2] class_name = split[-1] - if "components" not in split: + if module_name != "components": raise ValueError( f"Custom components must be defined in a module named `components`. Found {module_name} instead." ) From e511a2b49a168ffed285b21f9fa89eadc658d253 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 13 Jan 2025 15:12:23 -0800 Subject: [PATCH 15/42] remove unused files --- .../source_declarative_manifest/conftest.py | 3 +- .../resources/valid_py_components_code.py | 15 - .../resources/valid_py_components_config.json | 3 - .../valid_py_components_manifest.yaml | 1368 ----------------- 4 files changed, 1 insertion(+), 1388 deletions(-) delete mode 100644 unit_tests/source_declarative_manifest/resources/valid_py_components_code.py delete mode 100644 unit_tests/source_declarative_manifest/resources/valid_py_components_config.json delete mode 100644 unit_tests/source_declarative_manifest/resources/valid_py_components_manifest.yaml diff --git a/unit_tests/source_declarative_manifest/conftest.py b/unit_tests/source_declarative_manifest/conftest.py index 8aafe924a..d4c67a33e 100644 --- a/unit_tests/source_declarative_manifest/conftest.py +++ b/unit_tests/source_declarative_manifest/conftest.py @@ -4,8 +4,7 @@ import hashlib import os -from pathlib import Path -from typing import Any, Literal +from typing import Literal import pytest import yaml diff --git a/unit_tests/source_declarative_manifest/resources/valid_py_components_code.py b/unit_tests/source_declarative_manifest/resources/valid_py_components_code.py deleted file mode 100644 index 06c95e78a..000000000 --- a/unit_tests/source_declarative_manifest/resources/valid_py_components_code.py +++ /dev/null @@ -1,15 +0,0 @@ -"""Custom Python components.py file for testing. - -This file is mostly a no-op (for now) but should trigger a failure if code file is not -correctly parsed. -""" - -from airbyte_cdk.sources.declarative.models import DeclarativeStream - - -class CustomDeclarativeStream(DeclarativeStream): - """Custom declarative stream class. - - We don't change anything from the base class, but this should still be enough to confirm - that the components.py file is correctly parsed. - """ diff --git a/unit_tests/source_declarative_manifest/resources/valid_py_components_config.json b/unit_tests/source_declarative_manifest/resources/valid_py_components_config.json deleted file mode 100644 index 214fc684f..000000000 --- a/unit_tests/source_declarative_manifest/resources/valid_py_components_config.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "pokemon_name": "blastoise" -} diff --git a/unit_tests/source_declarative_manifest/resources/valid_py_components_manifest.yaml b/unit_tests/source_declarative_manifest/resources/valid_py_components_manifest.yaml deleted file mode 100644 index 2ffcd2be5..000000000 --- a/unit_tests/source_declarative_manifest/resources/valid_py_components_manifest.yaml +++ /dev/null @@ -1,1368 +0,0 @@ -version: 3.9.6 - -type: DeclarativeSource - -description: This is just a test, with custom Python components enabled. Copied from Pokemon example. - -check: - type: CheckStream - stream_names: - - pokemon - -definitions: - streams: - pokemon: - type: DeclarativeStream - name: pokemon - retriever: - type: SimpleRetriever - requester: - $ref: "#/definitions/base_requester" - path: /{{config['pokemon_name']}} - http_method: GET - record_selector: - type: RecordSelector - extractor: - type: DpathExtractor - field_path: [] - primary_key: - - id - schema_loader: - type: InlineSchemaLoader - schema: - $ref: "#/schemas/pokemon" - base_requester: - type: HttpRequester - url_base: https://pokeapi.co/api/v2/pokemon - -streams: - - $ref: "#/definitions/streams/pokemon" - -spec: - type: Spec - connection_specification: - type: object - $schema: http://json-schema.org/draft-07/schema# - required: - - pokemon_name - properties: - pokemon_name: - type: string - description: Pokemon requested from the API. - enum: - - bulbasaur - - ivysaur - - venusaur - - charmander - - charmeleon - - charizard - - squirtle - - wartortle - - blastoise - - caterpie - - metapod - - butterfree - - weedle - - kakuna - - beedrill - - pidgey - - pidgeotto - - pidgeot - - rattata - - raticate - - spearow - - fearow - - ekans - - arbok - - pikachu - - raichu - - sandshrew - - sandslash - - nidoranf - - nidorina - - nidoqueen - - nidoranm - - nidorino - - nidoking - - clefairy - - clefable - - vulpix - - ninetales - - jigglypuff - - wigglytuff - - zubat - - golbat - - oddish - - gloom - - vileplume - - paras - - parasect - - venonat - - venomoth - - diglett - - dugtrio - - meowth - - persian - - psyduck - - golduck - - mankey - - primeape - - growlithe - - arcanine - - poliwag - - poliwhirl - - poliwrath - - abra - - kadabra - - alakazam - - machop - - machoke - - machamp - - bellsprout - - weepinbell - - victreebel - - tentacool - - tentacruel - - geodude - - graveler - - golem - - ponyta - - rapidash - - slowpoke - - slowbro - - magnemite - - magneton - - farfetchd - - doduo - - dodrio - - seel - - dewgong - - grimer - - muk - - shellder - - cloyster - - gastly - - haunter - - gengar - - onix - - drowzee - - hypno - - krabby - - kingler - - voltorb - - electrode - - exeggcute - - exeggutor - - cubone - - marowak - - hitmonlee - - hitmonchan - - lickitung - - koffing - - weezing - - rhyhorn - - rhydon - - chansey - - tangela - - kangaskhan - - horsea - - seadra - - goldeen - - seaking - - staryu - - starmie - - mrmime - - scyther - - jynx - - electabuzz - - magmar - - pinsir - - tauros - - magikarp - - gyarados - - lapras - - ditto - - eevee - - vaporeon - - jolteon - - flareon - - porygon - - omanyte - - omastar - - kabuto - - kabutops - - aerodactyl - - snorlax - - articuno - - zapdos - - moltres - - dratini - - dragonair - - dragonite - - mewtwo - - mew - - chikorita - - bayleef - - meganium - - cyndaquil - - quilava - - typhlosion - - totodile - - croconaw - - feraligatr - - sentret - - furret - - hoothoot - - noctowl - - ledyba - - ledian - - spinarak - - ariados - - crobat - - chinchou - - lanturn - - pichu - - cleffa - - igglybuff - - togepi - - togetic - - natu - - xatu - - mareep - - flaaffy - - ampharos - - bellossom - - marill - - azumarill - - sudowoodo - - politoed - - hoppip - - skiploom - - jumpluff - - aipom - - sunkern - - sunflora - - yanma - - wooper - - quagsire - - espeon - - umbreon - - murkrow - - slowking - - misdreavus - - unown - - wobbuffet - - girafarig - - pineco - - forretress - - dunsparce - - gligar - - steelix - - snubbull - - granbull - - qwilfish - - scizor - - shuckle - - heracross - - sneasel - - teddiursa - - ursaring - - slugma - - magcargo - - swinub - - piloswine - - corsola - - remoraid - - octillery - - delibird - - mantine - - skarmory - - houndour - - houndoom - - kingdra - - phanpy - - donphan - - porygon2 - - stantler - - smeargle - - tyrogue - - hitmontop - - smoochum - - elekid - - magby - - miltank - - blissey - - raikou - - entei - - suicune - - larvitar - - pupitar - - tyranitar - - lugia - - ho-oh - - celebi - - treecko - - grovyle - - sceptile - - torchic - - combusken - - blaziken - - mudkip - - marshtomp - - swampert - - poochyena - - mightyena - - zigzagoon - - linoone - - wurmple - - silcoon - - beautifly - - cascoon - - dustox - - lotad - - lombre - - ludicolo - - seedot - - nuzleaf - - shiftry - - taillow - - swellow - - wingull - - pelipper - - ralts - - kirlia - - gardevoir - - surskit - - masquerain - - shroomish - - breloom - - slakoth - - vigoroth - - slaking - - nincada - - ninjask - - shedinja - - whismur - - loudred - - exploud - - makuhita - - hariyama - - azurill - - nosepass - - skitty - - delcatty - - sableye - - mawile - - aron - - lairon - - aggron - - meditite - - medicham - - electrike - - manectric - - plusle - - minun - - volbeat - - illumise - - roselia - - gulpin - - swalot - - carvanha - - sharpedo - - wailmer - - wailord - - numel - - camerupt - - torkoal - - spoink - - grumpig - - spinda - - trapinch - - vibrava - - flygon - - cacnea - - cacturne - - swablu - - altaria - - zangoose - - seviper - - lunatone - - solrock - - barboach - - whiscash - - corphish - - crawdaunt - - baltoy - - claydol - - lileep - - cradily - - anorith - - armaldo - - feebas - - milotic - - castform - - kecleon - - shuppet - - banette - - duskull - - dusclops - - tropius - - chimecho - - absol - - wynaut - - snorunt - - glalie - - spheal - - sealeo - - walrein - - clamperl - - huntail - - gorebyss - - relicanth - - luvdisc - - bagon - - shelgon - - salamence - - beldum - - metang - - metagross - - regirock - - regice - - registeel - - latias - - latios - - kyogre - - groudon - - rayquaza - - jirachi - - deoxys - - turtwig - - grotle - - torterra - - chimchar - - monferno - - infernape - - piplup - - prinplup - - empoleon - - starly - - staravia - - staraptor - - bidoof - - bibarel - - kricketot - - kricketune - - shinx - - luxio - - luxray - - budew - - roserade - - cranidos - - rampardos - - shieldon - - bastiodon - - burmy - - wormadam - - mothim - - combee - - vespiquen - - pachirisu - - buizel - - floatzel - - cherubi - - cherrim - - shellos - - gastrodon - - ambipom - - drifloon - - drifblim - - buneary - - lopunny - - mismagius - - honchkrow - - glameow - - purugly - - chingling - - stunky - - skuntank - - bronzor - - bronzong - - bonsly - - mimejr - - happiny - - chatot - - spiritomb - - gible - - gabite - - garchomp - - munchlax - - riolu - - lucario - - hippopotas - - hippowdon - - skorupi - - drapion - - croagunk - - toxicroak - - carnivine - - finneon - - lumineon - - mantyke - - snover - - abomasnow - - weavile - - magnezone - - lickilicky - - rhyperior - - tangrowth - - electivire - - magmortar - - togekiss - - yanmega - - leafeon - - glaceon - - gliscor - - mamoswine - - porygon-z - - gallade - - probopass - - dusknoir - - froslass - - rotom - - uxie - - mesprit - - azelf - - dialga - - palkia - - heatran - - regigigas - - giratina - - cresselia - - phione - - manaphy - - darkrai - - shaymin - - arceus - - victini - - snivy - - servine - - serperior - - tepig - - pignite - - emboar - - oshawott - - dewott - - samurott - - patrat - - watchog - - lillipup - - herdier - - stoutland - - purrloin - - liepard - - pansage - - simisage - - pansear - - simisear - - panpour - - simipour - - munna - - musharna - - pidove - - tranquill - - unfezant - - blitzle - - zebstrika - - roggenrola - - boldore - - gigalith - - woobat - - swoobat - - drilbur - - excadrill - - audino - - timburr - - gurdurr - - conkeldurr - - tympole - - palpitoad - - seismitoad - - throh - - sawk - - sewaddle - - swadloon - - leavanny - - venipede - - whirlipede - - scolipede - - cottonee - - whimsicott - - petilil - - lilligant - - basculin - - sandile - - krokorok - - krookodile - - darumaka - - darmanitan - - maractus - - dwebble - - crustle - - scraggy - - scrafty - - sigilyph - - yamask - - cofagrigus - - tirtouga - - carracosta - - archen - - archeops - - trubbish - - garbodor - - zorua - - zoroark - - minccino - - cinccino - - gothita - - gothorita - - gothitelle - - solosis - - duosion - - reuniclus - - ducklett - - swanna - - vanillite - - vanillish - - vanilluxe - - deerling - - sawsbuck - - emolga - - karrablast - - escavalier - - foongus - - amoonguss - - frillish - - jellicent - - alomomola - - joltik - - galvantula - - ferroseed - - ferrothorn - - klink - - klang - - klinklang - - tynamo - - eelektrik - - eelektross - - elgyem - - beheeyem - - litwick - - lampent - - chandelure - - axew - - fraxure - - haxorus - - cubchoo - - beartic - - cryogonal - - shelmet - - accelgor - - stunfisk - - mienfoo - - mienshao - - druddigon - - golett - - golurk - - pawniard - - bisharp - - bouffalant - - rufflet - - braviary - - vullaby - - mandibuzz - - heatmor - - durant - - deino - - zweilous - - hydreigon - - larvesta - - volcarona - - cobalion - - terrakion - - virizion - - tornadus - - thundurus - - reshiram - - zekrom - - landorus - - kyurem - - keldeo - - meloetta - - genesect - - chespin - - quilladin - - chesnaught - - fennekin - - braixen - - delphox - - froakie - - frogadier - - greninja - - bunnelby - - diggersby - - fletchling - - fletchinder - - talonflame - - scatterbug - - spewpa - - vivillon - - litleo - - pyroar - - flabebe - - floette - - florges - - skiddo - - gogoat - - pancham - - pangoro - - furfrou - - espurr - - meowstic - - honedge - - doublade - - aegislash - - spritzee - - aromatisse - - swirlix - - slurpuff - - inkay - - malamar - - binacle - - barbaracle - - skrelp - - dragalge - - clauncher - - clawitzer - - helioptile - - heliolisk - - tyrunt - - tyrantrum - - amaura - - aurorus - - sylveon - - hawlucha - - dedenne - - carbink - - goomy - - sliggoo - - goodra - - klefki - - phantump - - trevenant - - pumpkaboo - - gourgeist - - bergmite - - avalugg - - noibat - - noivern - - xerneas - - yveltal - - zygarde - - diancie - - hoopa - - volcanion - - rowlet - - dartrix - - decidueye - - litten - - torracat - - incineroar - - popplio - - brionne - - primarina - - pikipek - - trumbeak - - toucannon - - yungoos - - gumshoos - - grubbin - - charjabug - - vikavolt - - crabrawler - - crabominable - - oricorio - - cutiefly - - ribombee - - rockruff - - lycanroc - - wishiwashi - - mareanie - - toxapex - - mudbray - - mudsdale - - dewpider - - araquanid - - fomantis - - lurantis - - morelull - - shiinotic - - salandit - - salazzle - - stufful - - bewear - - bounsweet - - steenee - - tsareena - - comfey - - oranguru - - passimian - - wimpod - - golisopod - - sandygast - - palossand - - pyukumuku - - typenull - - silvally - - minior - - komala - - turtonator - - togedemaru - - mimikyu - - bruxish - - drampa - - dhelmise - - jangmo-o - - hakamo-o - - kommo-o - - tapukoko - - tapulele - - tapubulu - - tapufini - - cosmog - - cosmoem - - solgaleo - - lunala - - nihilego - - buzzwole - - pheromosa - - xurkitree - - celesteela - - kartana - - guzzlord - - necrozma - - magearna - - marshadow - - poipole - - naganadel - - stakataka - - blacephalon - - zeraora - - meltan - - melmetal - - grookey - - thwackey - - rillaboom - - scorbunny - - raboot - - cinderace - - sobble - - drizzile - - inteleon - - skwovet - - greedent - - rookidee - - corvisquire - - corviknight - - blipbug - - dottler - - orbeetle - - nickit - - thievul - - gossifleur - - eldegoss - - wooloo - - dubwool - - chewtle - - drednaw - - yamper - - boltund - - rolycoly - - carkol - - coalossal - - applin - - flapple - - appletun - - silicobra - - sandaconda - - cramorant - - arrokuda - - barraskewda - - toxel - - toxtricity - - sizzlipede - - centiskorch - - clobbopus - - grapploct - - sinistea - - polteageist - - hatenna - - hattrem - - hatterene - - impidimp - - morgrem - - grimmsnarl - - obstagoon - - perrserker - - cursola - - sirfetchd - - mrrime - - runerigus - - milcery - - alcremie - - falinks - - pincurchin - - snom - - frosmoth - - stonjourner - - eiscue - - indeedee - - morpeko - - cufant - - copperajah - - dracozolt - - arctozolt - - dracovish - - arctovish - - duraludon - - dreepy - - drakloak - - dragapult - - zacian - - zamazenta - - eternatus - - kubfu - - urshifu - - zarude - - regieleki - - regidrago - - glastrier - - spectrier - - calyrex - order: 0 - title: Pokemon Name - pattern: ^[a-z0-9_\-]+$ - examples: - - ditto - - luxray - - snorlax - additionalProperties: true - -metadata: - testedStreams: - pokemon: - hasRecords: true - streamHash: f619395f8c7a553f51cec2a7274a4ce517ab46c8 - hasResponse: true - primaryKeysAreUnique: true - primaryKeysArePresent: true - responsesAreSuccessful: true - autoImportSchema: - pokemon: false - -schemas: - pokemon: - type: object - $schema: http://json-schema.org/draft-07/schema# - properties: - id: - type: - - "null" - - integer - name: - type: - - "null" - - string - forms: - type: - - "null" - - array - items: - type: - - "null" - - object - properties: - url: - type: - - "null" - - string - name: - type: - - "null" - - string - additionalProperties: true - moves: - type: - - "null" - - array - items: - type: - - "null" - - object - properties: - move: - type: - - "null" - - object - properties: - url: - type: - - "null" - - string - name: - type: - - "null" - - string - additionalProperties: true - version_group_details: - type: - - "null" - - array - items: - type: - - "null" - - object - properties: - version_group: - type: - - "null" - - object - properties: - url: - type: - - "null" - - string - name: - type: - - "null" - - string - additionalProperties: true - level_learned_at: - type: - - "null" - - integer - move_learn_method: - type: - - "null" - - object - properties: - url: - type: - - "null" - - string - name: - type: - - "null" - - string - additionalProperties: true - additionalProperties: true - additionalProperties: true - order: - type: - - "null" - - integer - stats: - type: - - "null" - - array - items: - type: - - "null" - - object - properties: - stat: - type: - - "null" - - object - properties: - url: - type: - - "null" - - string - name: - type: - - "null" - - string - additionalProperties: true - effort: - type: - - "null" - - integer - base_stat: - type: - - "null" - - integer - additionalProperties: true - types: - type: - - "null" - - array - items: - type: - - "null" - - object - properties: - type: - type: - - "null" - - object - properties: - url: - type: - - "null" - - string - name: - type: - - "null" - - string - slot: - type: - - "null" - - integer - additionalProperties: true - height: - type: - - "null" - - integer - weight: - type: - - "null" - - integer - species: - type: - - "null" - - object - properties: - url: - type: - - "null" - - string - name: - type: - - "null" - - string - additionalProperties: true - sprites: - type: - - "null" - - object - properties: - back_shiny: - type: - - "null" - - string - back_female: - type: - - "null" - - string - front_shiny: - type: - - "null" - - string - back_default: - type: - - "null" - - string - front_female: - type: - - "null" - - string - front_default: - type: - - "null" - - string - back_shiny_female: - type: - - "null" - - string - front_shiny_female: - type: - - "null" - - string - additionalProperties: true - abilities: - type: - - "null" - - array - items: - type: - - "null" - - object - properties: - slot: - type: - - "null" - - integer - ability: - type: - - "null" - - object - properties: - url: - type: - - "null" - - string - name: - type: - - "null" - - string - additionalProperties: true - is_hidden: - type: - - "null" - - boolean - additionalProperties: true - held_items: - type: - - "null" - - array - items: - type: - - "null" - - object - properties: - item: - type: - - "null" - - object - properties: - url: - type: - - "null" - - string - name: - type: - - "null" - - string - additionalProperties: true - version_details: - type: - - "null" - - array - items: - type: - - "null" - - object - properties: - version: - type: - - "null" - - object - properties: - url: - type: - - "null" - - string - name: - type: - - "null" - - string - additionalProperties: true - rarity: - type: - - "null" - - integer - additionalProperties: true - additionalProperties: true - is_default: - type: - - "null" - - boolean - past_types: - type: - - "null" - - array - items: - type: - - "null" - - object - properties: - types: - type: - - "null" - - array - items: - type: - - "null" - - object - properties: - type: - type: - - "null" - - object - properties: - url: - type: - - "null" - - string - name: - type: - - "null" - - string - additionalProperties: true - slot: - type: - - "null" - - integer - additionalProperties: true - generation: - type: - - "null" - - object - properties: - url: - type: - - "null" - - string - name: - type: - - "null" - - string - additionalProperties: true - additionalProperties: true - game_indices: - type: - - "null" - - array - items: - type: - - "null" - - object - properties: - version: - type: - - "null" - - object - properties: - url: - type: - - "null" - - string - name: - type: - - "null" - - string - additionalProperties: true - game_index: - type: - - "null" - - integer - additionalProperties: true - base_experience: - type: - - "null" - - integer - location_area_encounters: - type: - - "null" - - string - additionalProperties: true From a19b5c16abba382329a931187700a0e13bbb220e Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 13 Jan 2025 15:22:04 -0800 Subject: [PATCH 16/42] tidy up --- .../parsers/model_to_component_factory.py | 12 +++++++++--- airbyte_cdk/test/utils/manifest_only_fixtures.py | 5 ++--- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index 86ff2ca89..adb126b5c 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -1042,8 +1042,8 @@ def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> } return custom_component_class(**kwargs) + @staticmethod def _get_components_module_object( - self, config: Config, ) -> types.ModuleType: """Get a components module object based on the provided config. @@ -1067,8 +1067,8 @@ def _get_components_module_object( sys.modules[COMPONENTS_MODULE_NAME] = components_module return components_module + @staticmethod def _get_class_from_fully_qualified_class_name( - self, full_qualified_class_name: str, components_module: types.ModuleType, ) -> Any: @@ -1092,7 +1092,13 @@ def _get_class_from_fully_qualified_class_name( if module_name != "components": raise ValueError( - f"Custom components must be defined in a module named `components`. Found {module_name} instead." + "Custom components must be defined in a module named " + f"`components`. Found `{module_name}` instead." + ) + if module_name_full != "source_declarative_manifest.components": + raise ValueError( + "Custom components must be defined in a module named " + f"`source_declarative_manifest.components`. Found `{module_name_full}` instead." ) try: diff --git a/airbyte_cdk/test/utils/manifest_only_fixtures.py b/airbyte_cdk/test/utils/manifest_only_fixtures.py index bd53e2081..43e90a2c4 100644 --- a/airbyte_cdk/test/utils/manifest_only_fixtures.py +++ b/airbyte_cdk/test/utils/manifest_only_fixtures.py @@ -5,7 +5,6 @@ import types from pathlib import Path from types import ModuleType -from typing import Optional import pytest @@ -31,7 +30,7 @@ def connector_dir(request: pytest.FixtureRequest) -> Path: @pytest.fixture(scope="session") -def components_module(connector_dir: Path) -> Optional[ModuleType]: +def components_module(connector_dir: Path) -> ModuleType | None: """Load and return the components module from the connector directory. This assumes the components module is located at /components.py. @@ -52,7 +51,7 @@ def components_module(connector_dir: Path) -> Optional[ModuleType]: return components_module -def components_module_from_string(components_py_text: str) -> Optional[ModuleType]: +def components_module_from_string(components_py_text: str) -> ModuleType | None: """Load and return the components module from a provided string containing the python code. This assumes the components module is located at /components.py. From c837745bc410f522431674339253b31282620e69 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 13 Jan 2025 15:32:03 -0800 Subject: [PATCH 17/42] skip if no creds --- .../test_source_declarative_w_custom_components.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py index 55da8fbb6..42880baca 100644 --- a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py +++ b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py @@ -12,6 +12,7 @@ from tempfile import NamedTemporaryFile from typing import Any +import pytest import yaml from airbyte_protocol_dataclasses.models.airbyte_protocol import AirbyteCatalog @@ -85,6 +86,10 @@ def get_py_components_config_dict() -> dict[str, Any]: return combined_config_dict +@pytest.mark.skipif( + condition=not Path(get_fixture_path("resources/source_the_guardian_api/secrets.yaml")).exists(), + reason="Skipped due to missing 'secrets.yaml'.", +) def test_given_injected_declarative_manifest_and_py_components() -> None: py_components_config_dict = get_py_components_config_dict() # Truncate the start_date to speed up tests From c54a73d0f16021f6ec725b5b425857234711e685 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Tue, 14 Jan 2025 17:58:58 -0800 Subject: [PATCH 18/42] cosmetic: cleaner diff --- .../parsers/model_to_component_factory.py | 50 +++++++++---------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index adb126b5c..a604c5c8d 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -1042,31 +1042,6 @@ def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> } return custom_component_class(**kwargs) - @staticmethod - def _get_components_module_object( - config: Config, - ) -> types.ModuleType: - """Get a components module object based on the provided config. - - If custom python components is provided, this will be loaded. Otherwise, we will - attempt to load from the `components` module already imported. - """ - INJECTED_COMPONENTS_PY = "__injected_components_py" - COMPONENTS_MODULE_NAME = "components" - - components_module: types.ModuleType - if not INJECTED_COMPONENTS_PY in config: - raise ValueError( - "Custom components must be defined in a module named `components`. Please provide a custom components module." - ) - - # Create a new module object and execute the provided Python code text within it - components_module = types.ModuleType(name=COMPONENTS_MODULE_NAME) - python_text = config[INJECTED_COMPONENTS_PY] - exec(python_text, components_module.__dict__) - sys.modules[COMPONENTS_MODULE_NAME] = components_module - return components_module - @staticmethod def _get_class_from_fully_qualified_class_name( full_qualified_class_name: str, @@ -1106,6 +1081,31 @@ def _get_class_from_fully_qualified_class_name( except (AttributeError, ModuleNotFoundError) as e: raise ValueError(f"Could not load class {full_qualified_class_name}.") from e + @staticmethod + def _get_components_module_object( + config: Config, + ) -> types.ModuleType: + """Get a components module object based on the provided config. + + If custom python components is provided, this will be loaded. Otherwise, we will + attempt to load from the `components` module already imported. + """ + INJECTED_COMPONENTS_PY = "__injected_components_py" + COMPONENTS_MODULE_NAME = "components" + + components_module: types.ModuleType + if not INJECTED_COMPONENTS_PY in config: + raise ValueError( + "Custom components must be defined in a module named `components`. Please provide a custom components module." + ) + + # Create a new module object and execute the provided Python code text within it + components_module = types.ModuleType(name=COMPONENTS_MODULE_NAME) + python_text = config[INJECTED_COMPONENTS_PY] + exec(python_text, components_module.__dict__) + sys.modules[COMPONENTS_MODULE_NAME] = components_module + return components_module + @staticmethod def _derive_component_type_from_type_hints(field_type: Any) -> Optional[str]: interface = field_type From 3f66c46ec4322873b1fe078e96421a390cf3d933 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Tue, 14 Jan 2025 18:03:46 -0800 Subject: [PATCH 19/42] don't fail when custom components.py is already grafted into filesystem --- .../declarative/parsers/model_to_component_factory.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index a604c5c8d..6ba8caab4 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -1095,9 +1095,9 @@ def _get_components_module_object( components_module: types.ModuleType if not INJECTED_COMPONENTS_PY in config: - raise ValueError( - "Custom components must be defined in a module named `components`. Please provide a custom components module." - ) + # Use the existing components module. We expect this to be already grafted into the + # connector module. + components_module = sys.modules.get(COMPONENTS_MODULE_NAME) # Create a new module object and execute the provided Python code text within it components_module = types.ModuleType(name=COMPONENTS_MODULE_NAME) From 75332e89c8772c8b77b1f7712f2621b48435e72c Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Tue, 14 Jan 2025 19:07:05 -0800 Subject: [PATCH 20/42] clean up import code --- .../parsers/model_to_component_factory.py | 15 +++++++++++---- airbyte_cdk/test/utils/manifest_only_fixtures.py | 6 ++++++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index 6ba8caab4..af044b8db 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -404,6 +404,10 @@ from airbyte_cdk.sources.streams.http.error_handlers.response_models import ResponseAction from airbyte_cdk.sources.types import Config from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer +from airbyte_cdk.test.utils.manifest_only_fixtures import ( + COMPONENTS_MODULE_NAME, + components_module_from_string, +) ComponentDefinition = Mapping[str, Any] @@ -1091,19 +1095,22 @@ def _get_components_module_object( attempt to load from the `components` module already imported. """ INJECTED_COMPONENTS_PY = "__injected_components_py" - COMPONENTS_MODULE_NAME = "components" components_module: types.ModuleType if not INJECTED_COMPONENTS_PY in config: # Use the existing components module. We expect this to be already grafted into the # connector module. components_module = sys.modules.get(COMPONENTS_MODULE_NAME) + if not components_module: + raise ValueError( + f"Could not find module '{COMPONENTS_MODULE_NAME}' in `sys.modules` " + f"and '{INJECTED_COMPONENTS_PY}' was not provided in config" + ) + return components_module # Create a new module object and execute the provided Python code text within it - components_module = types.ModuleType(name=COMPONENTS_MODULE_NAME) python_text = config[INJECTED_COMPONENTS_PY] - exec(python_text, components_module.__dict__) - sys.modules[COMPONENTS_MODULE_NAME] = components_module + components_module = components_module_from_string(components_py_text=python_text) return components_module @staticmethod diff --git a/airbyte_cdk/test/utils/manifest_only_fixtures.py b/airbyte_cdk/test/utils/manifest_only_fixtures.py index 43e90a2c4..4d286246c 100644 --- a/airbyte_cdk/test/utils/manifest_only_fixtures.py +++ b/airbyte_cdk/test/utils/manifest_only_fixtures.py @@ -2,12 +2,15 @@ import importlib.util +import sys import types from pathlib import Path from types import ModuleType import pytest +COMPONENTS_MODULE_NAME = "components" + # The following fixtures are used to load a manifest-only connector's components module and manifest file. # They can be accessed from any test file in the connector's unit_tests directory by importing them as follows: @@ -64,6 +67,9 @@ def components_module_from_string(components_py_text: str) -> ModuleType | None: # Execute the module text in the module's namespace exec(components_py_text, components_module.__dict__) + # Add the module to sys.modules so it can be imported + sys.modules[COMPONENTS_MODULE_NAME] = components_module + # Now you can import and use the module return components_module From 67b84a0c5f5d4e8b3639a2766e28d6b40a7946de Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Wed, 15 Jan 2025 14:23:53 -0800 Subject: [PATCH 21/42] clean up imports, implement safety mechanisms and blocked-by-default behavior --- .../cli/source_declarative_manifest/_run.py | 17 ++- .../parsers/custom_code_compiler.py | 76 +++++++++++ .../parsers/model_to_component_factory.py | 14 +- .../test/utils/manifest_only_fixtures.py | 22 ---- .../source_declarative_manifest/conftest.py | 15 +-- .../source_the_guardian_api/valid_config.yaml | 4 +- ..._source_declarative_w_custom_components.py | 121 ++++++++++++++++-- 7 files changed, 218 insertions(+), 51 deletions(-) create mode 100644 airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py diff --git a/airbyte_cdk/cli/source_declarative_manifest/_run.py b/airbyte_cdk/cli/source_declarative_manifest/_run.py index 5def00602..d3c018ae0 100644 --- a/airbyte_cdk/cli/source_declarative_manifest/_run.py +++ b/airbyte_cdk/cli/source_declarative_manifest/_run.py @@ -17,6 +17,7 @@ from __future__ import annotations import json +import os import pkgutil import sys import traceback @@ -42,6 +43,11 @@ from airbyte_cdk.sources.declarative.concurrent_declarative_source import ( ConcurrentDeclarativeSource, ) +from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import ( + ENV_VAR_ALLOW_CUSTOM_CODE, + INJECTED_COMPONENTS_PY, + INJECTED_MANIFEST, +) from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource from airbyte_cdk.sources.source import TState @@ -176,12 +182,19 @@ def create_declarative_source( "Invalid config: `__injected_declarative_manifest` should be a dictionary, " f"but got type: {type(config['__injected_declarative_manifest'])}" ) - + if ( + INJECTED_COMPONENTS_PY in config + and os.environ.get(ENV_VAR_ALLOW_CUSTOM_CODE, "").lower() != "true" + ): + raise RuntimeError( + "Custom connector code is not allowed in this environment. " + "Set the `AIRBYTE_ALLOW_CUSTOM_CODE` environment variable to 'true' to enable custom code." + ) return ConcurrentDeclarativeSource( config=config, catalog=catalog, state=state, - source_config=cast(dict[str, Any], config["__injected_declarative_manifest"]), + source_config=cast(dict[str, Any], config[INJECTED_MANIFEST]), ) except Exception as error: print( diff --git a/airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py b/airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py new file mode 100644 index 000000000..5efa36b84 --- /dev/null +++ b/airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py @@ -0,0 +1,76 @@ +"""Contains functions to compile custom code from text.""" + +import hashlib +import sys +from types import ModuleType + +from typing_extensions import Literal + +ChecksumType = Literal["md5", "sha256"] +CHECKSUM_FUNCTIONS = { + "md5": hashlib.md5, + "sha256": hashlib.sha256, +} +COMPONENTS_MODULE_NAME = "components" +INJECTED_MANIFEST = "__injected_declarative_manifest" +INJECTED_COMPONENTS_PY = "__injected_components_py" +INJECTED_COMPONENTS_PY_CHECKSUMS = "__injected_components_py_checksums" +ENV_VAR_ALLOW_CUSTOM_CODE = "AIRBYTE_ALLOW_CUSTOM_CODE" + + +def _hash_text(input_text: str, hash_type: Literal["md5", "sha256"] = "md5") -> str: + hash_object = CHECKSUM_FUNCTIONS[hash_type]() + hash_object.update(input_text.encode()) + return hash_object.hexdigest() + + +class AirbyteCodeTamperedError(Exception): + """Raised when the connector's components module does not match its checksum. + + This is a fatal error, as it can be a sign of code tampering. + """ + + +def validate_python_code( + code_text: str, + checksums: dict[ChecksumType, str] | None, +) -> None: + """""" + if not checksums: + raise ValueError(f"A checksum is required to validate the code. Received: {checksums}") + + for checksum_type, checksum in checksums.items(): + if checksum_type not in CHECKSUM_FUNCTIONS: + raise ValueError( + f"Unsupported checksum type: {checksum_type}. Supported checksum types are: {CHECKSUM_FUNCTIONS.keys()}" + ) + + if checksum_type == "md5": + if _hash_text(code_text, "md5") != checksum: + raise AirbyteCodeTamperedError("MD5 checksum does not match.") + continue + + if checksum_type == "sha256": + if _hash_text(code_text, "sha256") != checksum: + raise AirbyteCodeTamperedError("SHA256 checksum does not match.") + continue + + +def components_module_from_string(components_py_text: str) -> ModuleType | None: + """Load and return the components module from a provided string containing the python code. + + This assumes the components module is located at /components.py. + """ + module_name = "components" + + # Create a new module object + components_module = ModuleType(name=module_name) + + # Execute the module text in the module's namespace + exec(components_py_text, components_module.__dict__) + + # Add the module to sys.modules so it can be imported + sys.modules[COMPONENTS_MODULE_NAME] = components_module + + # Now you can import and use the module + return components_module diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index af044b8db..d2becc934 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -317,6 +317,14 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( XmlDecoder as XmlDecoderModel, ) +from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import ( + COMPONENTS_MODULE_NAME, + INJECTED_COMPONENTS_PY, + INJECTED_COMPONENTS_PY_CHECKSUMS, + AirbyteCodeTamperedError, + components_module_from_string, + validate_python_code, +) from airbyte_cdk.sources.declarative.partition_routers import ( CartesianProductStreamSlicer, ListPartitionRouter, @@ -404,10 +412,6 @@ from airbyte_cdk.sources.streams.http.error_handlers.response_models import ResponseAction from airbyte_cdk.sources.types import Config from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer -from airbyte_cdk.test.utils.manifest_only_fixtures import ( - COMPONENTS_MODULE_NAME, - components_module_from_string, -) ComponentDefinition = Mapping[str, Any] @@ -1094,7 +1098,6 @@ def _get_components_module_object( If custom python components is provided, this will be loaded. Otherwise, we will attempt to load from the `components` module already imported. """ - INJECTED_COMPONENTS_PY = "__injected_components_py" components_module: types.ModuleType if not INJECTED_COMPONENTS_PY in config: @@ -1110,6 +1113,7 @@ def _get_components_module_object( # Create a new module object and execute the provided Python code text within it python_text = config[INJECTED_COMPONENTS_PY] + validate_python_code(python_text, config.get(INJECTED_COMPONENTS_PY_CHECKSUMS, None)) components_module = components_module_from_string(components_py_text=python_text) return components_module diff --git a/airbyte_cdk/test/utils/manifest_only_fixtures.py b/airbyte_cdk/test/utils/manifest_only_fixtures.py index 4d286246c..452705d64 100644 --- a/airbyte_cdk/test/utils/manifest_only_fixtures.py +++ b/airbyte_cdk/test/utils/manifest_only_fixtures.py @@ -9,8 +9,6 @@ import pytest -COMPONENTS_MODULE_NAME = "components" - # The following fixtures are used to load a manifest-only connector's components module and manifest file. # They can be accessed from any test file in the connector's unit_tests directory by importing them as follows: @@ -54,26 +52,6 @@ def components_module(connector_dir: Path) -> ModuleType | None: return components_module -def components_module_from_string(components_py_text: str) -> ModuleType | None: - """Load and return the components module from a provided string containing the python code. - - This assumes the components module is located at /components.py. - """ - module_name = "components" - - # Create a new module object - components_module = types.ModuleType(name=module_name) - - # Execute the module text in the module's namespace - exec(components_py_text, components_module.__dict__) - - # Add the module to sys.modules so it can be imported - sys.modules[COMPONENTS_MODULE_NAME] = components_module - - # Now you can import and use the module - return components_module - - @pytest.fixture(scope="session") def manifest_path(connector_dir: Path) -> Path: """Return the path to the connector's manifest file.""" diff --git a/unit_tests/source_declarative_manifest/conftest.py b/unit_tests/source_declarative_manifest/conftest.py index d4c67a33e..f84696680 100644 --- a/unit_tests/source_declarative_manifest/conftest.py +++ b/unit_tests/source_declarative_manifest/conftest.py @@ -2,22 +2,15 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. # -import hashlib import os -from typing import Literal import pytest import yaml - -def hash_text(input_text: str, hash_type: Literal["md5", "sha256"] = "md5") -> str: - hashers = { - "md5": hashlib.md5, - "sha256": hashlib.sha256, - } - hash_object = hashers[hash_type]() - hash_object.update(input_text.encode()) - return hash_object.hexdigest() +from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import ( + # CustomCodeCompiler, + _hash_text, +) def get_fixture_path(file_name) -> str: diff --git a/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/valid_config.yaml b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/valid_config.yaml index e31112780..b2f752ea1 100644 --- a/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/valid_config.yaml +++ b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/valid_config.yaml @@ -1,3 +1 @@ -{ - "start_date": "2024-01-01", -} +{ "start_date": "2024-01-01" } diff --git a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py index 42880baca..3a18b11ee 100644 --- a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py +++ b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py @@ -21,9 +21,15 @@ ) from airbyte_cdk.models import ConfiguredAirbyteCatalog, ConfiguredAirbyteStream from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource -from airbyte_cdk.test.utils.manifest_only_fixtures import components_module_from_string -from unit_tests.connector_builder.test_connector_builder_handler import configured_catalog -from unit_tests.source_declarative_manifest.conftest import hash_text +from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import ( + ENV_VAR_ALLOW_CUSTOM_CODE, + INJECTED_COMPONENTS_PY, + INJECTED_COMPONENTS_PY_CHECKSUMS, + INJECTED_MANIFEST, + AirbyteCodeTamperedError, + _hash_text, + components_module_from_string, +) SAMPLE_COMPONENTS_PY_TEXT = """ def sample_function() -> str: @@ -74,11 +80,11 @@ def get_py_components_config_dict() -> dict[str, Any]: custom_py_code = custom_py_code_path.read_text() combined_config_dict = { - "__injected_declarative_manifest": manifest_dict, - "__injected_components_py": custom_py_code, - "__injected_components_py_checksum": { - "md5": hash_text(custom_py_code, "md5"), - "sha256": hash_text(custom_py_code, "sha256"), + INJECTED_MANIFEST: manifest_dict, + INJECTED_COMPONENTS_PY: custom_py_code, + INJECTED_COMPONENTS_PY_CHECKSUMS: { + "md5": _hash_text(custom_py_code, "md5"), + "sha256": _hash_text(custom_py_code, "sha256"), }, } combined_config_dict.update(yaml.safe_load(config_yaml_path.read_text())) @@ -99,6 +105,7 @@ def test_given_injected_declarative_manifest_and_py_components() -> None: assert isinstance(py_components_config_dict, dict) assert "__injected_declarative_manifest" in py_components_config_dict assert "__injected_components_py" in py_components_config_dict + assert "__injected_components_py_checksums" in py_components_config_dict with NamedTemporaryFile(delete=False, suffix=".json") as temp_config_file: json_str = json.dumps(py_components_config_dict) @@ -132,3 +139,101 @@ def test_given_injected_declarative_manifest_and_py_components() -> None: ) for msg in msg_iterator: assert msg + +def test_missing_checksum_fails_to_run() -> None: + """Assert that missing checksum in the config will raise an error.""" + py_components_config_dict = get_py_components_config_dict() + # Truncate the start_date to speed up tests + py_components_config_dict["start_date"] = ( + datetime.datetime.now() - datetime.timedelta(days=2) + ).strftime("%Y-%m-%d") + + py_components_config_dict.pop("__injected_components_py_checksums") + + with NamedTemporaryFile(delete=False, suffix=".json") as temp_config_file: + json_str = json.dumps(py_components_config_dict) + Path(temp_config_file.name).write_text(json_str) + temp_config_file.flush() + source = create_declarative_source( + ["check", "--config", temp_config_file.name], + ) + assert isinstance(source, ManifestDeclarativeSource) + with pytest.raises(ValueError): + source.check(logger=logging.getLogger(), config=py_components_config_dict) + + +@pytest.mark.parametrize( + "hash_type", + [ + "md5", + "sha256", + ], +) +def test_invalid_checksum_fails_to_run(hash_type: str) -> None: + """Assert that an invalid checksum in the config will raise an error.""" + py_components_config_dict = get_py_components_config_dict() + # Truncate the start_date to speed up tests + py_components_config_dict["start_date"] = ( + datetime.datetime.now() - datetime.timedelta(days=2) + ).strftime("%Y-%m-%d") + + py_components_config_dict["__injected_components_py_checksums"][hash_type] = "invalid_checksum" + + with NamedTemporaryFile(delete=False, suffix=".json") as temp_config_file: + json_str = json.dumps(py_components_config_dict) + Path(temp_config_file.name).write_text(json_str) + temp_config_file.flush() + source = create_declarative_source( + ["check", "--config", temp_config_file.name], + ) + assert isinstance(source, ManifestDeclarativeSource) + with pytest.raises(AirbyteCodeTamperedError): + source.check(logger=logging.getLogger(), config=py_components_config_dict) + + +@pytest.mark.parametrize( + "env_value, should_raise", + [ + ("true", False), + ("True", False), + ("TRUE", False), + ("1", True), + ("false", True), + ("False", True), + ("", True), + ("0", True), + ("True", True), + ], +) +def test_fail_unless_custom_code_enabled_explicitly( + env_value: Any, + should_raise: bool, +) -> None: + """Fails if the environment variable to allow custom code is not set. + + A missing value should fail. + Any value other than "true" (case insensitive) should fail. + """ + os.environ.pop(ENV_VAR_ALLOW_CUSTOM_CODE, None) + if env_value is not None: + os.environ[ENV_VAR_ALLOW_CUSTOM_CODE] = env_value + + py_components_config_dict = get_py_components_config_dict() + # Truncate the start_date to speed up tests + py_components_config_dict["start_date"] = ( + datetime.datetime.now() - datetime.timedelta(days=2) + ).strftime("%Y-%m-%d") + + with NamedTemporaryFile(delete=False, suffix=".json") as temp_config_file: + json_str = json.dumps(py_components_config_dict) + Path(temp_config_file.name).write_text(json_str) + temp_config_file.flush() + try: + source = create_declarative_source( + ["check", "--config", temp_config_file.name], + ) + except: + if should_raise: + return # Success + + raise From 58056496bcc0648fbd1f8015189bc4f75ab9001d Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Wed, 15 Jan 2025 14:31:40 -0800 Subject: [PATCH 22/42] fix mypy issues --- .../sources/declarative/parsers/custom_code_compiler.py | 2 +- .../declarative/parsers/model_to_component_factory.py | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py b/airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py index 5efa36b84..14b1e89a3 100644 --- a/airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +++ b/airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py @@ -56,7 +56,7 @@ def validate_python_code( continue -def components_module_from_string(components_py_text: str) -> ModuleType | None: +def components_module_from_string(components_py_text: str) -> ModuleType: """Load and return the components module from a provided string containing the python code. This assumes the components module is located at /components.py. diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index d2becc934..613a0260d 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -20,6 +20,7 @@ Optional, Type, Union, + cast, get_args, get_origin, get_type_hints, @@ -1099,16 +1100,18 @@ def _get_components_module_object( attempt to load from the `components` module already imported. """ - components_module: types.ModuleType + components_module: types.ModuleType | None if not INJECTED_COMPONENTS_PY in config: # Use the existing components module. We expect this to be already grafted into the # connector module. - components_module = sys.modules.get(COMPONENTS_MODULE_NAME) - if not components_module: + if COMPONENTS_MODULE_NAME not in sys.modules: raise ValueError( f"Could not find module '{COMPONENTS_MODULE_NAME}' in `sys.modules` " f"and '{INJECTED_COMPONENTS_PY}' was not provided in config" ) + + # We now know this is not `None` + components_module = cast(types.ModuleType, sys.modules.get(COMPONENTS_MODULE_NAME)) return components_module # Create a new module object and execute the provided Python code text within it From 3251e5cd49ffeb15ed8ff6ee9e1f52a53088044b Mon Sep 17 00:00:00 2001 From: "Aaron (\"AJ\") Steers" Date: Wed, 15 Jan 2025 14:34:00 -0800 Subject: [PATCH 23/42] Update unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py --- .../test_source_declarative_w_custom_components.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py index 3a18b11ee..afc506287 100644 --- a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py +++ b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py @@ -197,12 +197,11 @@ def test_invalid_checksum_fails_to_run(hash_type: str) -> None: ("true", False), ("True", False), ("TRUE", False), - ("1", True), + ("1", True), # Not accepted as truthy as of now ("false", True), ("False", True), ("", True), ("0", True), - ("True", True), ], ) def test_fail_unless_custom_code_enabled_explicitly( From 877d721314291430de8b86df693373f2ed0ce56f Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Wed, 15 Jan 2025 14:47:37 -0800 Subject: [PATCH 24/42] more clean up --- .../cli/source_declarative_manifest/_run.py | 13 ++++----- .../parsers/custom_code_compiler.py | 28 +++++++++++++++++-- ..._source_declarative_w_custom_components.py | 3 ++ 3 files changed, 33 insertions(+), 11 deletions(-) diff --git a/airbyte_cdk/cli/source_declarative_manifest/_run.py b/airbyte_cdk/cli/source_declarative_manifest/_run.py index d3c018ae0..45e722d2c 100644 --- a/airbyte_cdk/cli/source_declarative_manifest/_run.py +++ b/airbyte_cdk/cli/source_declarative_manifest/_run.py @@ -47,6 +47,8 @@ ENV_VAR_ALLOW_CUSTOM_CODE, INJECTED_COMPONENTS_PY, INJECTED_MANIFEST, + AirbyteCustomCodeNotPermittedError, + custom_code_execution_permitted, ) from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource from airbyte_cdk.sources.source import TState @@ -182,14 +184,9 @@ def create_declarative_source( "Invalid config: `__injected_declarative_manifest` should be a dictionary, " f"but got type: {type(config['__injected_declarative_manifest'])}" ) - if ( - INJECTED_COMPONENTS_PY in config - and os.environ.get(ENV_VAR_ALLOW_CUSTOM_CODE, "").lower() != "true" - ): - raise RuntimeError( - "Custom connector code is not allowed in this environment. " - "Set the `AIRBYTE_ALLOW_CUSTOM_CODE` environment variable to 'true' to enable custom code." - ) + if INJECTED_COMPONENTS_PY in config and not custom_code_execution_permitted(): + raise AirbyteCustomCodeNotPermittedError + return ConcurrentDeclarativeSource( config=config, catalog=catalog, diff --git a/airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py b/airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py index 14b1e89a3..b67b0cd4e 100644 --- a/airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +++ b/airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py @@ -1,6 +1,7 @@ """Contains functions to compile custom code from text.""" import hashlib +import os import sys from types import ModuleType @@ -18,17 +19,38 @@ ENV_VAR_ALLOW_CUSTOM_CODE = "AIRBYTE_ALLOW_CUSTOM_CODE" +class AirbyteCodeTamperedError(Exception): + """Raised when the connector's components module does not match its checksum. + + This is a fatal error, as it can be a sign of code tampering. + """ + + +class AirbyteCustomCodeNotPermittedError(Exception): + """Raised when custom code is attempted to be run in an environment that does not support it.""" + + def __init__(self) -> None: + super().__init__( + "Custom connector code is not permitted in this environment. " + "If you need to run custom code, please ask your administrator to set the `AIRBYTE_ALLOW_CUSTOM_CODE` " + "environment variable to 'true' in your Airbyte environment. " + "If you see this message in Airbyte Cloud, your workspace does not allow executing " + "custom connector code." + ) + + def _hash_text(input_text: str, hash_type: Literal["md5", "sha256"] = "md5") -> str: hash_object = CHECKSUM_FUNCTIONS[hash_type]() hash_object.update(input_text.encode()) return hash_object.hexdigest() -class AirbyteCodeTamperedError(Exception): - """Raised when the connector's components module does not match its checksum. +def custom_code_execution_permitted() -> bool: + """Return `True` if custom code execution is permitted, otherwise `False`. - This is a fatal error, as it can be a sign of code tampering. + Custom code execution is permitted if the `AIRBYTE_ALLOW_CUSTOM_CODE` environment variable is set to 'true'. """ + return os.environ.get(ENV_VAR_ALLOW_CUSTOM_CODE, "").lower() == "true" def validate_python_code( diff --git a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py index afc506287..121b7a518 100644 --- a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py +++ b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py @@ -29,6 +29,7 @@ AirbyteCodeTamperedError, _hash_text, components_module_from_string, + custom_code_execution_permitted, ) SAMPLE_COMPONENTS_PY_TEXT = """ @@ -217,6 +218,8 @@ def test_fail_unless_custom_code_enabled_explicitly( if env_value is not None: os.environ[ENV_VAR_ALLOW_CUSTOM_CODE] = env_value + assert custom_code_execution_permitted() == (not should_raise) + py_components_config_dict = get_py_components_config_dict() # Truncate the start_date to speed up tests py_components_config_dict["start_date"] = ( From 7531ed0bf87d8aa01d5d34ef3a03dba961e82aa3 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Wed, 15 Jan 2025 15:12:42 -0800 Subject: [PATCH 25/42] fix ruff format issue --- .../test_source_declarative_w_custom_components.py | 1 + 1 file changed, 1 insertion(+) diff --git a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py index 121b7a518..ea6cd701d 100644 --- a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py +++ b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py @@ -141,6 +141,7 @@ def test_given_injected_declarative_manifest_and_py_components() -> None: for msg in msg_iterator: assert msg + def test_missing_checksum_fails_to_run() -> None: """Assert that missing checksum in the config will raise an error.""" py_components_config_dict = get_py_components_config_dict() From 5e7e826eb70ffe25859c63356288da838ec3b717 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Wed, 15 Jan 2025 15:26:16 -0800 Subject: [PATCH 26/42] add intentionally failing use case --- .../components_failing.py | 33 +++++++++++++ ..._source_declarative_w_custom_components.py | 49 +++++++++++++------ 2 files changed, 68 insertions(+), 14 deletions(-) create mode 100644 unit_tests/source_declarative_manifest/resources/source_the_guardian_api/components_failing.py diff --git a/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/components_failing.py b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/components_failing.py new file mode 100644 index 000000000..bca3a0bb0 --- /dev/null +++ b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/components_failing.py @@ -0,0 +1,33 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +from dataclasses import dataclass +from typing import Any, Mapping, Optional + +import requests + +from airbyte_cdk.sources.declarative.requesters.paginators.strategies.page_increment import ( + PageIncrement, +) + + +class IntentionalException(Exception): + """This exception is raised intentionally in order to test error handling.""" + + +@dataclass +class CustomPageIncrement(PageIncrement): + """ + Starts page from 1 instead of the default value that is 0. Stops Pagination when currentPage is equal to totalPages. + """ + + def next_page_token(self, response: requests.Response, *args) -> Optional[Any]: + raise IntentionalException() + + def __post_init__(self, parameters: Mapping[str, Any]): + super().__post_init__(parameters) + self._page = 1 + + def reset(self): + self._page = 1 diff --git a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py index ea6cd701d..8d8e238a0 100644 --- a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py +++ b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py @@ -7,7 +7,7 @@ import logging import os import types -from collections.abc import Mapping +from collections.abc import Callable, Mapping from pathlib import Path from tempfile import NamedTemporaryFile from typing import Any @@ -27,6 +27,7 @@ INJECTED_COMPONENTS_PY_CHECKSUMS, INJECTED_MANIFEST, AirbyteCodeTamperedError, + AirbyteCustomCodeNotPermittedError, _hash_text, components_module_from_string, custom_code_execution_permitted, @@ -66,10 +67,12 @@ def test_components_module_from_string() -> None: assert obj.sample_method() == "Hello, World!" -def get_py_components_config_dict() -> dict[str, Any]: +def get_py_components_config_dict(failing_components: bool = False) -> dict[str, Any]: connector_dir = Path(get_fixture_path("resources/source_the_guardian_api")) manifest_yml_path: Path = connector_dir / "manifest.yaml" - custom_py_code_path: Path = connector_dir / "components.py" + custom_py_code_path: Path = connector_dir / ( + "components.py" if not failing_components else "components_failing.py" + ) config_yaml_path: Path = connector_dir / "valid_config.yaml" secrets_yaml_path: Path = connector_dir / "secrets.yaml" @@ -97,8 +100,19 @@ def get_py_components_config_dict() -> dict[str, Any]: condition=not Path(get_fixture_path("resources/source_the_guardian_api/secrets.yaml")).exists(), reason="Skipped due to missing 'secrets.yaml'.", ) -def test_given_injected_declarative_manifest_and_py_components() -> None: - py_components_config_dict = get_py_components_config_dict() +@pytest.mark.parametrize( + "failing_components", + [ + False, + True, + ], +) +def test_given_injected_declarative_manifest_and_py_components( + failing_components: bool, +) -> None: + os.environ[ENV_VAR_ALLOW_CUSTOM_CODE] = "true" + + py_components_config_dict = get_py_components_config_dict(failing_components) # Truncate the start_date to speed up tests py_components_config_dict["start_date"] = ( datetime.datetime.now() - datetime.timedelta(days=2) @@ -138,6 +152,12 @@ def test_given_injected_declarative_manifest_and_py_components() -> None: catalog=configured_catalog, state=None, ) + if failing_components: + with pytest.raises(Exception): + for msg in msg_iterator: + assert msg + return + for msg in msg_iterator: assert msg @@ -231,12 +251,13 @@ def test_fail_unless_custom_code_enabled_explicitly( json_str = json.dumps(py_components_config_dict) Path(temp_config_file.name).write_text(json_str) temp_config_file.flush() - try: - source = create_declarative_source( - ["check", "--config", temp_config_file.name], - ) - except: - if should_raise: - return # Success - - raise + fn: Callable = lambda: create_declarative_source( + ["check", "--config", temp_config_file.name], + ) + if should_raise: + with pytest.raises(AirbyteCustomCodeNotPermittedError): + fn() + + return # Success + + fn() From c654ef5ce578161ac6c965bfbaa29d106f7d2772 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Wed, 15 Jan 2025 15:30:14 -0800 Subject: [PATCH 27/42] validate input text --- .../sources/declarative/parsers/custom_code_compiler.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py b/airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py index b67b0cd4e..76e028ce8 100644 --- a/airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +++ b/airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py @@ -40,6 +40,10 @@ def __init__(self) -> None: def _hash_text(input_text: str, hash_type: Literal["md5", "sha256"] = "md5") -> str: + """Return the hash of the input text using the specified hash type.""" + if not input_text: + raise ValueError("Input text cannot be empty.") + hash_object = CHECKSUM_FUNCTIONS[hash_type]() hash_object.update(input_text.encode()) return hash_object.hexdigest() From 6badf7eb3d35a2816d7e0864291cb0fa35a1381a Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Wed, 15 Jan 2025 15:51:06 -0800 Subject: [PATCH 28/42] clean up module name parsing --- .../declarative/parsers/model_to_component_factory.py | 8 +++++--- .../resources/source_the_guardian_api/manifest.yaml | 8 ++++---- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index 613a0260d..58b4eb9ee 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -1071,7 +1071,9 @@ def _get_class_from_fully_qualified_class_name( """ split = full_qualified_class_name.split(".") module_name_full = ".".join(split[:-1]) - module_name = split[-2] + module_name = ( # If bare class name passed, assume "components" module name + split[-2] or "components" + ) class_name = split[-1] if module_name != "components": @@ -1079,9 +1081,9 @@ def _get_class_from_fully_qualified_class_name( "Custom components must be defined in a module named " f"`components`. Found `{module_name}` instead." ) - if module_name_full != "source_declarative_manifest.components": + if module_name_full not in {"components", "source_declarative_manifest.components"}: raise ValueError( - "Custom components must be defined in a module named " + "Custom components must be defined in a module named `components` or " f"`source_declarative_manifest.components`. Found `{module_name_full}` instead." ) diff --git a/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/manifest.yaml b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/manifest.yaml index 7b440631f..a42e0ebba 100644 --- a/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/manifest.yaml +++ b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/manifest.yaml @@ -42,7 +42,7 @@ definitions: type: DefaultPaginator pagination_strategy: type: CustomPaginationStrategy - class_name: "source_declarative_manifest.components.CustomPageIncrement" + class_name: "CustomPageIncrement" page_size: 10 page_token_option: type: RequestOption @@ -89,7 +89,7 @@ definitions: type: DefaultPaginator pagination_strategy: type: CustomPaginationStrategy - class_name: "source_declarative_manifest.components.CustomPageIncrement" + class_name: "CustomPageIncrement" page_size: 10 page_token_option: type: RequestOption @@ -136,7 +136,7 @@ definitions: type: "DefaultPaginator" pagination_strategy: type: CustomPaginationStrategy - class_name: "source_declarative_manifest.components.CustomPageIncrement" + class_name: "components.CustomPageIncrement" page_size: 10 page_token_option: type: RequestOption @@ -228,7 +228,7 @@ streams: paginator: type: "DefaultPaginator" pagination_strategy: - class_name: source_declarative_manifest.components.CustomPageIncrement + class_name: components.CustomPageIncrement page_size: 10 type: CustomPaginationStrategy page_token_option: From b81ca33ab770be22dee0b78c0a1131b183de10e6 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Thu, 16 Jan 2025 10:15:02 -0800 Subject: [PATCH 29/42] refactor and clean up interfaces --- .../cli/source_declarative_manifest/_run.py | 2 - .../concurrent_declarative_source.py | 1 + .../manifest_declarative_source.py | 17 ++++-- .../parsers/custom_code_compiler.py | 55 +++++++++++++++-- .../parsers/model_to_component_factory.py | 60 +++---------------- ..._source_declarative_w_custom_components.py | 20 ++++++- 6 files changed, 92 insertions(+), 63 deletions(-) diff --git a/airbyte_cdk/cli/source_declarative_manifest/_run.py b/airbyte_cdk/cli/source_declarative_manifest/_run.py index 45e722d2c..df8d12994 100644 --- a/airbyte_cdk/cli/source_declarative_manifest/_run.py +++ b/airbyte_cdk/cli/source_declarative_manifest/_run.py @@ -184,8 +184,6 @@ def create_declarative_source( "Invalid config: `__injected_declarative_manifest` should be a dictionary, " f"but got type: {type(config['__injected_declarative_manifest'])}" ) - if INJECTED_COMPONENTS_PY in config and not custom_code_execution_permitted(): - raise AirbyteCustomCodeNotPermittedError return ConcurrentDeclarativeSource( config=config, diff --git a/airbyte_cdk/sources/declarative/concurrent_declarative_source.py b/airbyte_cdk/sources/declarative/concurrent_declarative_source.py index aa3cea705..85ab1e82b 100644 --- a/airbyte_cdk/sources/declarative/concurrent_declarative_source.py +++ b/airbyte_cdk/sources/declarative/concurrent_declarative_source.py @@ -81,6 +81,7 @@ def __init__( ) super().__init__( + config=config, source_config=source_config, debug=debug, emit_connector_builder_messages=emit_connector_builder_messages, diff --git a/airbyte_cdk/sources/declarative/manifest_declarative_source.py b/airbyte_cdk/sources/declarative/manifest_declarative_source.py index 83c5fa5f3..9ae14452d 100644 --- a/airbyte_cdk/sources/declarative/manifest_declarative_source.py +++ b/airbyte_cdk/sources/declarative/manifest_declarative_source.py @@ -7,6 +7,7 @@ import pkgutil from copy import deepcopy from importlib import metadata +from types import ModuleType from typing import Any, Dict, Iterator, List, Mapping, Optional, Set import yaml @@ -31,6 +32,9 @@ DeclarativeStream as DeclarativeStreamModel, ) from airbyte_cdk.sources.declarative.models.declarative_component_schema import Spec as SpecModel +from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import ( + get_registered_components_module, +) from airbyte_cdk.sources.declarative.parsers.manifest_component_transformer import ( ManifestComponentTransformer, ) @@ -57,23 +61,28 @@ class ManifestDeclarativeSource(DeclarativeSource): def __init__( self, + config: dict[str, Any], source_config: ConnectionDefinition, debug: bool = False, emit_connector_builder_messages: bool = False, component_factory: Optional[ModelToComponentFactory] = None, ): """ - :param source_config(Mapping[str, Any]): The manifest of low-code components that describe the source connector - :param debug(bool): True if debug mode is enabled - :param component_factory(ModelToComponentFactory): optional factory if ModelToComponentFactory's default behaviour needs to be tweaked + Args: + config: The provided config dict. + source_config: The manifest of low-code components that describe the source connector + debug: True if debug mode is enabled + component_factory: optional factory if ModelToComponentFactory's default behaviour needs to be tweaked """ self.logger = logging.getLogger(f"airbyte.{self.name}") - # For ease of use we don't require the type to be specified at the top level manifest, but it should be included during processing manifest = dict(source_config) if "type" not in manifest: manifest["type"] = "DeclarativeSource" + # If custom components are needed, locate and/or register them. + self.components_module: ModuleType | None = get_registered_components_module(config=config) + resolved_source_config = ManifestReferenceResolver().preprocess_manifest(manifest) propagated_source_config = ManifestComponentTransformer().propagate_types_and_parameters( "", resolved_source_config, {} diff --git a/airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py b/airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py index 76e028ce8..06422cd1b 100644 --- a/airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +++ b/airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py @@ -4,6 +4,7 @@ import os import sys from types import ModuleType +from typing import Any, cast from typing_extensions import Literal @@ -13,6 +14,7 @@ "sha256": hashlib.sha256, } COMPONENTS_MODULE_NAME = "components" +SDM_COMPONENTS_MODULE_NAME = "source_declarative_manifest.components" INJECTED_MANIFEST = "__injected_declarative_manifest" INJECTED_COMPONENTS_PY = "__injected_components_py" INJECTED_COMPONENTS_PY_CHECKSUMS = "__injected_components_py_checksums" @@ -82,20 +84,65 @@ def validate_python_code( continue -def components_module_from_string(components_py_text: str) -> ModuleType: +def get_registered_components_module( + config: dict, +) -> ModuleType | None: + """Get a components module object based on the provided config. + + If custom python components is provided, this will be loaded. Otherwise, we will + attempt to load from the `components` module already imported/registered in sys.modules. + + If custom `components.py` text is provided in config, it will be registered with sys.modules + so that it can be later imported by manifest declarations which reference the provided classes. + + Returns `None` if no components is provided and the `components` module is not found. + """ + if INJECTED_COMPONENTS_PY in config: + if not custom_code_execution_permitted(): + raise AirbyteCustomCodeNotPermittedError + + # Create a new module object and execute the provided Python code text within it + python_text = config[INJECTED_COMPONENTS_PY] + return register_components_module_from_string( + components_py_text=python_text, + checksums=config.get(INJECTED_COMPONENTS_PY_CHECKSUMS, None), + ) + + # Check for `components` or `source_declarative_manifest.components`. + if SDM_COMPONENTS_MODULE_NAME in sys.modules: + return cast(ModuleType, sys.modules.get(SDM_COMPONENTS_MODULE_NAME)) + + if COMPONENTS_MODULE_NAME in sys.modules: + return cast(ModuleType, sys.modules.get(COMPONENTS_MODULE_NAME)) + + # Could not find module 'components' in `sys.modules` + # and INJECTED_COMPONENTS_PY was not provided in config. + return None + + +def register_components_module_from_string( + components_py_text: str, + checksums: dict[str, Any] | None, +) -> ModuleType: """Load and return the components module from a provided string containing the python code. This assumes the components module is located at /components.py. """ - module_name = "components" + # First validate the code + validate_python_code( + code_text=components_py_text, + checksums=checksums, + ) # Create a new module object - components_module = ModuleType(name=module_name) + components_module = ModuleType(name=COMPONENTS_MODULE_NAME) # Execute the module text in the module's namespace exec(components_py_text, components_module.__dict__) - # Add the module to sys.modules so it can be imported + # Register the module in `sys.modules`` so it can be imported as + # `source_declarative_manifest.components` and/or `components`. + sys.modules[SDM_COMPONENTS_MODULE_NAME] = components_module sys.modules[COMPONENTS_MODULE_NAME] = components_module # Now you can import and use the module diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index 58b4eb9ee..8ec8051f6 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -320,11 +320,7 @@ ) from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import ( COMPONENTS_MODULE_NAME, - INJECTED_COMPONENTS_PY, - INJECTED_COMPONENTS_PY_CHECKSUMS, - AirbyteCodeTamperedError, - components_module_from_string, - validate_python_code, + SDM_COMPONENTS_MODULE_NAME, ) from airbyte_cdk.sources.declarative.partition_routers import ( CartesianProductStreamSlicer, @@ -998,7 +994,6 @@ def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> """ custom_component_class = self._get_class_from_fully_qualified_class_name( full_qualified_class_name=model.class_name, - components_module=self._get_components_module_object(config=config), ) component_fields = get_type_hints(custom_component_class) model_args = model.dict() @@ -1054,14 +1049,14 @@ def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> @staticmethod def _get_class_from_fully_qualified_class_name( full_qualified_class_name: str, - components_module: types.ModuleType, ) -> Any: - """ - Get a class from its fully qualified name, optionally using a pre-parsed module. + """Get a class from its fully qualified name. + + If a custom components module is needed, we assume it is already registered - probably + as `source_declarative_manifest.components` or `components`. Args: full_qualified_class_name (str): The fully qualified name of the class (e.g., "module.ClassName"). - components_module (Optional[ModuleType]): An optional pre-parsed module. Returns: Any: The class object. @@ -1076,52 +1071,15 @@ def _get_class_from_fully_qualified_class_name( ) class_name = split[-1] - if module_name != "components": - raise ValueError( - "Custom components must be defined in a module named " - f"`components`. Found `{module_name}` instead." - ) - if module_name_full not in {"components", "source_declarative_manifest.components"}: - raise ValueError( - "Custom components must be defined in a module named `components` or " - f"`source_declarative_manifest.components`. Found `{module_name_full}` instead." - ) + if module_name_full == COMPONENTS_MODULE_NAME: + # Assume "components" on its own means "source_declarative_manifest.components" + module_name_full = SDM_COMPONENTS_MODULE_NAME try: - return getattr(components_module, class_name) + return getattr(sys.modules[module_name_full], class_name) except (AttributeError, ModuleNotFoundError) as e: raise ValueError(f"Could not load class {full_qualified_class_name}.") from e - @staticmethod - def _get_components_module_object( - config: Config, - ) -> types.ModuleType: - """Get a components module object based on the provided config. - - If custom python components is provided, this will be loaded. Otherwise, we will - attempt to load from the `components` module already imported. - """ - - components_module: types.ModuleType | None - if not INJECTED_COMPONENTS_PY in config: - # Use the existing components module. We expect this to be already grafted into the - # connector module. - if COMPONENTS_MODULE_NAME not in sys.modules: - raise ValueError( - f"Could not find module '{COMPONENTS_MODULE_NAME}' in `sys.modules` " - f"and '{INJECTED_COMPONENTS_PY}' was not provided in config" - ) - - # We now know this is not `None` - components_module = cast(types.ModuleType, sys.modules.get(COMPONENTS_MODULE_NAME)) - return components_module - - # Create a new module object and execute the provided Python code text within it - python_text = config[INJECTED_COMPONENTS_PY] - validate_python_code(python_text, config.get(INJECTED_COMPONENTS_PY_CHECKSUMS, None)) - components_module = components_module_from_string(components_py_text=python_text) - return components_module - @staticmethod def _derive_component_type_from_type_hints(field_type: Any) -> Optional[str]: interface = field_type diff --git a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py index 8d8e238a0..7813586a9 100644 --- a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py +++ b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py @@ -6,6 +6,7 @@ import json import logging import os +import sys import types from collections.abc import Callable, Mapping from pathlib import Path @@ -29,8 +30,8 @@ AirbyteCodeTamperedError, AirbyteCustomCodeNotPermittedError, _hash_text, - components_module_from_string, custom_code_execution_permitted, + register_components_module_from_string, ) SAMPLE_COMPONENTS_PY_TEXT = """ @@ -49,7 +50,9 @@ def get_fixture_path(file_name) -> str: def test_components_module_from_string() -> None: # Call the function to get the module - components_module: types.ModuleType = components_module_from_string(SAMPLE_COMPONENTS_PY_TEXT) + components_module: types.ModuleType = register_components_module_from_string( + components_py_text=SAMPLE_COMPONENTS_PY_TEXT, + ) # Check that the module is created and is of the correct type assert isinstance(components_module, types.ModuleType) @@ -66,6 +69,19 @@ def test_components_module_from_string() -> None: assert isinstance(obj, components_module.SimpleClass) assert obj.sample_method() == "Hello, World!" + # Check we can get the class definition from sys.modules + module_lookup = sys.modules[components_module.__name__] + class_lookup = getattr(sys.modules[components_module.__name__], "SimpleClass") + + assert module_lookup == components_module + assert class_lookup == components_module.SimpleClass + assert class_lookup().sample_method() == "Hello, World!" + + # Check we can import the module by name + from source_declarative_manifest.components import sample_function as imported_sample_function # type: ignore [import] # noqa: I001 + + assert imported_sample_function() == "Hello, World!" + def get_py_components_config_dict(failing_components: bool = False) -> dict[str, Any]: connector_dir = Path(get_fixture_path("resources/source_the_guardian_api")) From ceab6fd3504fd38b1e6fffbe20103238a8416926 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Thu, 16 Jan 2025 10:22:23 -0800 Subject: [PATCH 30/42] use monkeypatch for setting env vars --- ..._source_declarative_w_custom_components.py | 40 ++++++++++++------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py index 7813586a9..e9f95ede8 100644 --- a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py +++ b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py @@ -52,6 +52,9 @@ def test_components_module_from_string() -> None: # Call the function to get the module components_module: types.ModuleType = register_components_module_from_string( components_py_text=SAMPLE_COMPONENTS_PY_TEXT, + checksums={ + "md5": _hash_text(SAMPLE_COMPONENTS_PY_TEXT, "md5"), + }, ) # Check that the module is created and is of the correct type @@ -125,8 +128,9 @@ def get_py_components_config_dict(failing_components: bool = False) -> dict[str, ) def test_given_injected_declarative_manifest_and_py_components( failing_components: bool, + monkeypatch: pytest.MonkeyPatch, ) -> None: - os.environ[ENV_VAR_ALLOW_CUSTOM_CODE] = "true" + monkeypatch.setenv(ENV_VAR_ALLOW_CUSTOM_CODE, "true") py_components_config_dict = get_py_components_config_dict(failing_components) # Truncate the start_date to speed up tests @@ -178,8 +182,12 @@ def test_given_injected_declarative_manifest_and_py_components( assert msg -def test_missing_checksum_fails_to_run() -> None: +def test_missing_checksum_fails_to_run( + monkeypatch: pytest.MonkeyPatch, +) -> None: """Assert that missing checksum in the config will raise an error.""" + monkeypatch.setenv(ENV_VAR_ALLOW_CUSTOM_CODE, "true") + py_components_config_dict = get_py_components_config_dict() # Truncate the start_date to speed up tests py_components_config_dict["start_date"] = ( @@ -192,12 +200,10 @@ def test_missing_checksum_fails_to_run() -> None: json_str = json.dumps(py_components_config_dict) Path(temp_config_file.name).write_text(json_str) temp_config_file.flush() - source = create_declarative_source( - ["check", "--config", temp_config_file.name], - ) - assert isinstance(source, ManifestDeclarativeSource) with pytest.raises(ValueError): - source.check(logger=logging.getLogger(), config=py_components_config_dict) + source = create_declarative_source( + ["check", "--config", temp_config_file.name], + ) @pytest.mark.parametrize( @@ -207,8 +213,13 @@ def test_missing_checksum_fails_to_run() -> None: "sha256", ], ) -def test_invalid_checksum_fails_to_run(hash_type: str) -> None: +def test_invalid_checksum_fails_to_run( + hash_type: str, + monkeypatch: pytest.MonkeyPatch, +) -> None: """Assert that an invalid checksum in the config will raise an error.""" + monkeypatch.setenv(ENV_VAR_ALLOW_CUSTOM_CODE, "true") + py_components_config_dict = get_py_components_config_dict() # Truncate the start_date to speed up tests py_components_config_dict["start_date"] = ( @@ -221,12 +232,10 @@ def test_invalid_checksum_fails_to_run(hash_type: str) -> None: json_str = json.dumps(py_components_config_dict) Path(temp_config_file.name).write_text(json_str) temp_config_file.flush() - source = create_declarative_source( - ["check", "--config", temp_config_file.name], - ) - assert isinstance(source, ManifestDeclarativeSource) with pytest.raises(AirbyteCodeTamperedError): - source.check(logger=logging.getLogger(), config=py_components_config_dict) + source = create_declarative_source( + ["check", "--config", temp_config_file.name], + ) @pytest.mark.parametrize( @@ -245,15 +254,16 @@ def test_invalid_checksum_fails_to_run(hash_type: str) -> None: def test_fail_unless_custom_code_enabled_explicitly( env_value: Any, should_raise: bool, + monkeypatch: pytest.MonkeyPatch, ) -> None: """Fails if the environment variable to allow custom code is not set. A missing value should fail. Any value other than "true" (case insensitive) should fail. """ - os.environ.pop(ENV_VAR_ALLOW_CUSTOM_CODE, None) + monkeypatch.delenv(ENV_VAR_ALLOW_CUSTOM_CODE, raising=False) if env_value is not None: - os.environ[ENV_VAR_ALLOW_CUSTOM_CODE] = env_value + monkeypatch.setenv(ENV_VAR_ALLOW_CUSTOM_CODE, env_value) assert custom_code_execution_permitted() == (not should_raise) From 714360c7e6532f83530de4d6a2e3a7adc54b9001 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Thu, 16 Jan 2025 10:32:33 -0800 Subject: [PATCH 31/42] full code review and cleanup --- airbyte_cdk/cli/source_declarative_manifest/_run.py | 10 +--------- .../connector_builder/connector_builder_handler.py | 1 + .../sources/declarative/manifest_declarative_source.py | 9 +++++---- .../declarative/parsers/custom_code_compiler.py | 9 ++++++--- .../declarative/parsers/model_to_component_factory.py | 5 ----- airbyte_cdk/test/utils/manifest_only_fixtures.py | 2 -- unit_tests/source_declarative_manifest/conftest.py | 7 +------ 7 files changed, 14 insertions(+), 29 deletions(-) diff --git a/airbyte_cdk/cli/source_declarative_manifest/_run.py b/airbyte_cdk/cli/source_declarative_manifest/_run.py index df8d12994..5def00602 100644 --- a/airbyte_cdk/cli/source_declarative_manifest/_run.py +++ b/airbyte_cdk/cli/source_declarative_manifest/_run.py @@ -17,7 +17,6 @@ from __future__ import annotations import json -import os import pkgutil import sys import traceback @@ -43,13 +42,6 @@ from airbyte_cdk.sources.declarative.concurrent_declarative_source import ( ConcurrentDeclarativeSource, ) -from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import ( - ENV_VAR_ALLOW_CUSTOM_CODE, - INJECTED_COMPONENTS_PY, - INJECTED_MANIFEST, - AirbyteCustomCodeNotPermittedError, - custom_code_execution_permitted, -) from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource from airbyte_cdk.sources.source import TState @@ -189,7 +181,7 @@ def create_declarative_source( config=config, catalog=catalog, state=state, - source_config=cast(dict[str, Any], config[INJECTED_MANIFEST]), + source_config=cast(dict[str, Any], config["__injected_declarative_manifest"]), ) except Exception as error: print( diff --git a/airbyte_cdk/connector_builder/connector_builder_handler.py b/airbyte_cdk/connector_builder/connector_builder_handler.py index 2c241f6fb..b2a728570 100644 --- a/airbyte_cdk/connector_builder/connector_builder_handler.py +++ b/airbyte_cdk/connector_builder/connector_builder_handler.py @@ -52,6 +52,7 @@ def get_limits(config: Mapping[str, Any]) -> TestReadLimits: def create_source(config: Mapping[str, Any], limits: TestReadLimits) -> ManifestDeclarativeSource: manifest = config["__injected_declarative_manifest"] return ManifestDeclarativeSource( + config=config, emit_connector_builder_messages=True, source_config=manifest, component_factory=ModelToComponentFactory( diff --git a/airbyte_cdk/sources/declarative/manifest_declarative_source.py b/airbyte_cdk/sources/declarative/manifest_declarative_source.py index 9ae14452d..cd4f4a96c 100644 --- a/airbyte_cdk/sources/declarative/manifest_declarative_source.py +++ b/airbyte_cdk/sources/declarative/manifest_declarative_source.py @@ -61,7 +61,7 @@ class ManifestDeclarativeSource(DeclarativeSource): def __init__( self, - config: dict[str, Any], + config: dict[str, Any] | None, source_config: ConnectionDefinition, debug: bool = False, emit_connector_builder_messages: bool = False, @@ -70,9 +70,10 @@ def __init__( """ Args: config: The provided config dict. - source_config: The manifest of low-code components that describe the source connector - debug: True if debug mode is enabled - component_factory: optional factory if ModelToComponentFactory's default behaviour needs to be tweaked + source_config: The manifest of low-code components that describe the source connector. + debug: True if debug mode is enabled. + emit_connector_builder_messages: True if messages should be emitted to the connector builder. + component_factory: optional factory if ModelToComponentFactory's default behavior needs to be tweaked. """ self.logger = logging.getLogger(f"airbyte.{self.name}") # For ease of use we don't require the type to be specified at the top level manifest, but it should be included during processing diff --git a/airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py b/airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py index 06422cd1b..f55772a93 100644 --- a/airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +++ b/airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py @@ -61,9 +61,12 @@ def custom_code_execution_permitted() -> bool: def validate_python_code( code_text: str, - checksums: dict[ChecksumType, str] | None, + checksums: dict[str, str] | None, ) -> None: - """""" + """Validate the provided Python code text against the provided checksums. + + Currently we fail if no checksums are provided, although this may change in the future. + """ if not checksums: raise ValueError(f"A checksum is required to validate the code. Received: {checksums}") @@ -85,7 +88,7 @@ def validate_python_code( def get_registered_components_module( - config: dict, + config: dict[str, Any], ) -> ModuleType | None: """Get a components module object based on the provided config. diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index 8ec8051f6..8b84d1d0b 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -8,7 +8,6 @@ import inspect import re import sys -import types from functools import partial from typing import ( Any, @@ -20,7 +19,6 @@ Optional, Type, Union, - cast, get_args, get_origin, get_type_hints, @@ -1066,9 +1064,6 @@ def _get_class_from_fully_qualified_class_name( """ split = full_qualified_class_name.split(".") module_name_full = ".".join(split[:-1]) - module_name = ( # If bare class name passed, assume "components" module name - split[-2] or "components" - ) class_name = split[-1] if module_name_full == COMPONENTS_MODULE_NAME: diff --git a/airbyte_cdk/test/utils/manifest_only_fixtures.py b/airbyte_cdk/test/utils/manifest_only_fixtures.py index 452705d64..28015d05b 100644 --- a/airbyte_cdk/test/utils/manifest_only_fixtures.py +++ b/airbyte_cdk/test/utils/manifest_only_fixtures.py @@ -2,8 +2,6 @@ import importlib.util -import sys -import types from pathlib import Path from types import ModuleType diff --git a/unit_tests/source_declarative_manifest/conftest.py b/unit_tests/source_declarative_manifest/conftest.py index f84696680..3d61e65e8 100644 --- a/unit_tests/source_declarative_manifest/conftest.py +++ b/unit_tests/source_declarative_manifest/conftest.py @@ -7,13 +7,8 @@ import pytest import yaml -from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import ( - # CustomCodeCompiler, - _hash_text, -) - -def get_fixture_path(file_name) -> str: +def get_fixture_path(file_name): return os.path.join(os.path.dirname(__file__), file_name) From c8de81a13f0708830e045e41fd52ed737b07e010 Mon Sep 17 00:00:00 2001 From: "Aaron (\"AJ\") Steers" Date: Thu, 16 Jan 2025 11:31:38 -0800 Subject: [PATCH 32/42] apply suggestion --- .../sources/declarative/parsers/model_to_component_factory.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index 8b84d1d0b..59d49d154 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -990,9 +990,7 @@ def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> :param config: The custom defined connector config :return: The declarative component built from the Pydantic model to be used at runtime """ - custom_component_class = self._get_class_from_fully_qualified_class_name( - full_qualified_class_name=model.class_name, - ) + custom_component_class = self._get_class_from_fully_qualified_class_name(model.class_name) component_fields = get_type_hints(custom_component_class) model_args = model.dict() model_args["config"] = config From a084e7aab563902921e3126378c5a3f8f0ffd2de Mon Sep 17 00:00:00 2001 From: "Aaron (\"AJ\") Steers" Date: Thu, 16 Jan 2025 11:34:42 -0800 Subject: [PATCH 33/42] apply suggestion --- .../declarative/parsers/custom_code_compiler.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py b/airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py index f55772a93..cdaa88516 100644 --- a/airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +++ b/airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py @@ -76,15 +76,8 @@ def validate_python_code( f"Unsupported checksum type: {checksum_type}. Supported checksum types are: {CHECKSUM_FUNCTIONS.keys()}" ) - if checksum_type == "md5": - if _hash_text(code_text, "md5") != checksum: - raise AirbyteCodeTamperedError("MD5 checksum does not match.") - continue - - if checksum_type == "sha256": - if _hash_text(code_text, "sha256") != checksum: - raise AirbyteCodeTamperedError("SHA256 checksum does not match.") - continue + if _hash_text(code_text, checksum_type) != checksum: + raise AirbyteCodeTamperedError(f"{checksum_type} checksum does not match.") def get_registered_components_module( From 0491b99bc3841114e8620bdf1bff5d10efc148d1 Mon Sep 17 00:00:00 2001 From: "Aaron (\"AJ\") Steers" Date: Thu, 16 Jan 2025 11:36:14 -0800 Subject: [PATCH 34/42] apply suggestion --- .../sources/declarative/parsers/custom_code_compiler.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py b/airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py index cdaa88516..f98162d94 100644 --- a/airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +++ b/airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py @@ -120,10 +120,7 @@ def register_components_module_from_string( components_py_text: str, checksums: dict[str, Any] | None, ) -> ModuleType: - """Load and return the components module from a provided string containing the python code. - - This assumes the components module is located at /components.py. - """ + """Load and return the components module from a provided string containing the python code.""" # First validate the code validate_python_code( code_text=components_py_text, From 7134340afca736cddf69d33492a050d977757de3 Mon Sep 17 00:00:00 2001 From: "Aaron (\"AJ\") Steers" Date: Thu, 16 Jan 2025 11:38:51 -0800 Subject: [PATCH 35/42] apply suggestion --- .../test_source_declarative_w_custom_components.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py index e9f95ede8..ec083ddec 100644 --- a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py +++ b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py @@ -256,7 +256,7 @@ def test_fail_unless_custom_code_enabled_explicitly( should_raise: bool, monkeypatch: pytest.MonkeyPatch, ) -> None: - """Fails if the environment variable to allow custom code is not set. + """Assert that we properly fail if the environment variable to allow custom code is not set. A missing value should fail. Any value other than "true" (case insensitive) should fail. From bff4dc42d2075f54f0f25fe044797e23dda1e9b4 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Tue, 21 Jan 2025 08:03:33 -0800 Subject: [PATCH 36/42] fix lint issues --- .../sources/declarative/manifest_declarative_source.py | 2 +- .../sources/declarative/parsers/custom_code_compiler.py | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/airbyte_cdk/sources/declarative/manifest_declarative_source.py b/airbyte_cdk/sources/declarative/manifest_declarative_source.py index cd4f4a96c..7f3959ae2 100644 --- a/airbyte_cdk/sources/declarative/manifest_declarative_source.py +++ b/airbyte_cdk/sources/declarative/manifest_declarative_source.py @@ -61,7 +61,7 @@ class ManifestDeclarativeSource(DeclarativeSource): def __init__( self, - config: dict[str, Any] | None, + config: Mapping[str, Any] | None, source_config: ConnectionDefinition, debug: bool = False, emit_connector_builder_messages: bool = False, diff --git a/airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py b/airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py index f98162d94..8a6638fad 100644 --- a/airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +++ b/airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py @@ -3,6 +3,7 @@ import hashlib import os import sys +from collections.abc import Mapping from types import ModuleType from typing import Any, cast @@ -41,7 +42,7 @@ def __init__(self) -> None: ) -def _hash_text(input_text: str, hash_type: Literal["md5", "sha256"] = "md5") -> str: +def _hash_text(input_text: str, hash_type: str = "md5") -> str: """Return the hash of the input text using the specified hash type.""" if not input_text: raise ValueError("Input text cannot be empty.") @@ -81,7 +82,7 @@ def validate_python_code( def get_registered_components_module( - config: dict[str, Any], + config: Mapping[str, Any] | None, ) -> ModuleType | None: """Get a components module object based on the provided config. @@ -93,12 +94,12 @@ def get_registered_components_module( Returns `None` if no components is provided and the `components` module is not found. """ - if INJECTED_COMPONENTS_PY in config: + if config and INJECTED_COMPONENTS_PY in config: if not custom_code_execution_permitted(): raise AirbyteCustomCodeNotPermittedError # Create a new module object and execute the provided Python code text within it - python_text = config[INJECTED_COMPONENTS_PY] + python_text: str = config[INJECTED_COMPONENTS_PY] return register_components_module_from_string( components_py_text=python_text, checksums=config.get(INJECTED_COMPONENTS_PY_CHECKSUMS, None), From 15cd254c483e24f68fdd35524cd439e7ef567c9f Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Tue, 21 Jan 2025 08:20:20 -0800 Subject: [PATCH 37/42] clean up tests --- ..._source_declarative_w_custom_components.py | 161 ++++++++++-------- 1 file changed, 88 insertions(+), 73 deletions(-) diff --git a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py index ec083ddec..83f69005b 100644 --- a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py +++ b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py @@ -86,7 +86,11 @@ def test_components_module_from_string() -> None: assert imported_sample_function() == "Hello, World!" -def get_py_components_config_dict(failing_components: bool = False) -> dict[str, Any]: +def get_py_components_config_dict( + *, + failing_components: bool = False, + needs_secrets: bool = True, +) -> dict[str, Any]: connector_dir = Path(get_fixture_path("resources/source_the_guardian_api")) manifest_yml_path: Path = connector_dir / "manifest.yaml" custom_py_code_path: Path = connector_dir / ( @@ -111,75 +115,10 @@ def get_py_components_config_dict(failing_components: bool = False) -> dict[str, }, } combined_config_dict.update(yaml.safe_load(config_yaml_path.read_text())) - combined_config_dict.update(yaml.safe_load(secrets_yaml_path.read_text())) - return combined_config_dict - - -@pytest.mark.skipif( - condition=not Path(get_fixture_path("resources/source_the_guardian_api/secrets.yaml")).exists(), - reason="Skipped due to missing 'secrets.yaml'.", -) -@pytest.mark.parametrize( - "failing_components", - [ - False, - True, - ], -) -def test_given_injected_declarative_manifest_and_py_components( - failing_components: bool, - monkeypatch: pytest.MonkeyPatch, -) -> None: - monkeypatch.setenv(ENV_VAR_ALLOW_CUSTOM_CODE, "true") - - py_components_config_dict = get_py_components_config_dict(failing_components) - # Truncate the start_date to speed up tests - py_components_config_dict["start_date"] = ( - datetime.datetime.now() - datetime.timedelta(days=2) - ).strftime("%Y-%m-%d") - assert isinstance(py_components_config_dict, dict) - assert "__injected_declarative_manifest" in py_components_config_dict - assert "__injected_components_py" in py_components_config_dict - assert "__injected_components_py_checksums" in py_components_config_dict - - with NamedTemporaryFile(delete=False, suffix=".json") as temp_config_file: - json_str = json.dumps(py_components_config_dict) - Path(temp_config_file.name).write_text(json_str) - temp_config_file.flush() - source = create_declarative_source( - ["check", "--config", temp_config_file.name], - ) - assert isinstance(source, ManifestDeclarativeSource) - source.check(logger=logging.getLogger(), config=py_components_config_dict) - catalog: AirbyteCatalog = source.discover( - logger=logging.getLogger(), config=py_components_config_dict - ) - assert isinstance(catalog, AirbyteCatalog) - configured_catalog = ConfiguredAirbyteCatalog( - streams=[ - ConfiguredAirbyteStream( - stream=stream, - sync_mode="full_refresh", - destination_sync_mode="overwrite", - ) - for stream in catalog.streams - ] - ) + if needs_secrets: + combined_config_dict.update(yaml.safe_load(secrets_yaml_path.read_text())) - msg_iterator = source.read( - logger=logging.getLogger(), - config=py_components_config_dict, - catalog=configured_catalog, - state=None, - ) - if failing_components: - with pytest.raises(Exception): - for msg in msg_iterator: - assert msg - return - - for msg in msg_iterator: - assert msg + return combined_config_dict def test_missing_checksum_fails_to_run( @@ -188,7 +127,9 @@ def test_missing_checksum_fails_to_run( """Assert that missing checksum in the config will raise an error.""" monkeypatch.setenv(ENV_VAR_ALLOW_CUSTOM_CODE, "true") - py_components_config_dict = get_py_components_config_dict() + py_components_config_dict = get_py_components_config_dict( + needs_secrets=False, + ) # Truncate the start_date to speed up tests py_components_config_dict["start_date"] = ( datetime.datetime.now() - datetime.timedelta(days=2) @@ -220,7 +161,9 @@ def test_invalid_checksum_fails_to_run( """Assert that an invalid checksum in the config will raise an error.""" monkeypatch.setenv(ENV_VAR_ALLOW_CUSTOM_CODE, "true") - py_components_config_dict = get_py_components_config_dict() + py_components_config_dict = get_py_components_config_dict( + needs_secrets=False, + ) # Truncate the start_date to speed up tests py_components_config_dict["start_date"] = ( datetime.datetime.now() - datetime.timedelta(days=2) @@ -252,7 +195,7 @@ def test_invalid_checksum_fails_to_run( ], ) def test_fail_unless_custom_code_enabled_explicitly( - env_value: Any, + env_value: str | None, should_raise: bool, monkeypatch: pytest.MonkeyPatch, ) -> None: @@ -267,7 +210,9 @@ def test_fail_unless_custom_code_enabled_explicitly( assert custom_code_execution_permitted() == (not should_raise) - py_components_config_dict = get_py_components_config_dict() + py_components_config_dict = get_py_components_config_dict( + needs_secrets=False, + ) # Truncate the start_date to speed up tests py_components_config_dict["start_date"] = ( datetime.datetime.now() - datetime.timedelta(days=2) @@ -287,3 +232,73 @@ def test_fail_unless_custom_code_enabled_explicitly( return # Success fn() + + +# TODO: Create a new test source that doesn't require credentials to run. +@pytest.mark.skipif( + condition=not Path(get_fixture_path("resources/source_the_guardian_api/secrets.yaml")).exists(), + reason="Skipped due to missing 'secrets.yaml'.", +) +@pytest.mark.parametrize( + "failing_components", + [ + False, + True, + ], +) +def test_sync_with_injected_py_components( + failing_components: bool, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setenv(ENV_VAR_ALLOW_CUSTOM_CODE, "true") + + py_components_config_dict = get_py_components_config_dict( + failing_components=failing_components, + ) + # Truncate the start_date to speed up tests + py_components_config_dict["start_date"] = ( + datetime.datetime.now() - datetime.timedelta(days=2) + ).strftime("%Y-%m-%d") + assert isinstance(py_components_config_dict, dict) + assert "__injected_declarative_manifest" in py_components_config_dict + assert "__injected_components_py" in py_components_config_dict + assert "__injected_components_py_checksums" in py_components_config_dict + + with NamedTemporaryFile(delete=False, suffix=".json") as temp_config_file: + json_str = json.dumps(py_components_config_dict) + Path(temp_config_file.name).write_text(json_str) + temp_config_file.flush() + source = create_declarative_source( + ["check", "--config", temp_config_file.name], + ) + assert isinstance(source, ManifestDeclarativeSource) + source.check(logger=logging.getLogger(), config=py_components_config_dict) + catalog: AirbyteCatalog = source.discover( + logger=logging.getLogger(), config=py_components_config_dict + ) + assert isinstance(catalog, AirbyteCatalog) + configured_catalog = ConfiguredAirbyteCatalog( + streams=[ + ConfiguredAirbyteStream( + stream=stream, + sync_mode="full_refresh", + destination_sync_mode="overwrite", + ) + for stream in catalog.streams + ] + ) + + msg_iterator = source.read( + logger=logging.getLogger(), + config=py_components_config_dict, + catalog=configured_catalog, + state=None, + ) + if failing_components: + with pytest.raises(Exception): + for msg in msg_iterator: + assert msg + return + + for msg in msg_iterator: + assert msg From 6c4e01fa50c50c150c08d0243ede6fca51b43ca1 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Tue, 21 Jan 2025 08:37:53 -0800 Subject: [PATCH 38/42] autofix lint issue --- .../sources/declarative/parsers/model_to_component_factory.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index bfcc9f8d9..3fcb0928e 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -359,6 +359,8 @@ ) from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( XmlDecoder as XmlDecoderModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( ZipfileDecoder as ZipfileDecoderModel, ) from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import ( From 6c811154afbd70adc86f648839c35a86c4eecb23 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Tue, 21 Jan 2025 09:04:23 -0800 Subject: [PATCH 39/42] fix tests --- .../concurrent_declarative_source.py | 2 +- .../manifest_declarative_source.py | 3 +- .../test_connector_builder_handler.py | 6 +- .../source_the_guardian_api/components.py | 61 +++++++++++++------ .../components_failing.py | 8 ++- 5 files changed, 56 insertions(+), 24 deletions(-) diff --git a/airbyte_cdk/sources/declarative/concurrent_declarative_source.py b/airbyte_cdk/sources/declarative/concurrent_declarative_source.py index 1ba44cac2..5db0b0909 100644 --- a/airbyte_cdk/sources/declarative/concurrent_declarative_source.py +++ b/airbyte_cdk/sources/declarative/concurrent_declarative_source.py @@ -76,8 +76,8 @@ def __init__( ) super().__init__( - config=config, source_config=source_config, + config=config, debug=debug, emit_connector_builder_messages=emit_connector_builder_messages, component_factory=component_factory, diff --git a/airbyte_cdk/sources/declarative/manifest_declarative_source.py b/airbyte_cdk/sources/declarative/manifest_declarative_source.py index b7d967e3d..deef5a3be 100644 --- a/airbyte_cdk/sources/declarative/manifest_declarative_source.py +++ b/airbyte_cdk/sources/declarative/manifest_declarative_source.py @@ -62,8 +62,9 @@ class ManifestDeclarativeSource(DeclarativeSource): def __init__( self, - config: Mapping[str, Any] | None, source_config: ConnectionDefinition, + *, + config: Mapping[str, Any] | None = None, debug: bool = False, emit_connector_builder_messages: bool = False, component_factory: Optional[ModelToComponentFactory] = None, diff --git a/unit_tests/connector_builder/test_connector_builder_handler.py b/unit_tests/connector_builder/test_connector_builder_handler.py index aac00a889..c00a7e2f1 100644 --- a/unit_tests/connector_builder/test_connector_builder_handler.py +++ b/unit_tests/connector_builder/test_connector_builder_handler.py @@ -344,7 +344,7 @@ def test_resolve_manifest(valid_resolve_manifest_config_file): config = copy.deepcopy(RESOLVE_MANIFEST_CONFIG) command = "resolve_manifest" config["__command"] = command - source = ManifestDeclarativeSource(MANIFEST) + source = ManifestDeclarativeSource(source_config=MANIFEST) limits = TestReadLimits() resolved_manifest = handle_connector_builder_request( source, command, config, create_configured_catalog("dummy_stream"), _A_STATE, limits @@ -505,7 +505,7 @@ def resolved_manifest(self): def test_read(): config = TEST_READ_CONFIG - source = ManifestDeclarativeSource(MANIFEST) + source = ManifestDeclarativeSource(source_config=MANIFEST) real_record = AirbyteRecordMessage( data={"id": "1234", "key": "value"}, emitted_at=1, stream=_stream_name @@ -592,7 +592,7 @@ def test_config_update() -> None: "client_secret": "a client secret", "refresh_token": "a refresh token", } - source = ManifestDeclarativeSource(manifest) + source = ManifestDeclarativeSource(source_config=manifest) refresh_request_response = { "access_token": "an updated access token", diff --git a/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/components.py b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/components.py index db5b07971..98a9f7ad5 100644 --- a/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/components.py +++ b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/components.py @@ -2,35 +2,60 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -from dataclasses import dataclass -from typing import Any, Mapping, Optional +from dataclasses import InitVar, dataclass +from typing import Any, Mapping, Optional, Union import requests -from airbyte_cdk.sources.declarative.requesters.paginators.strategies.page_increment import ( - PageIncrement, -) +from airbyte_cdk.sources.declarative.interpolation import InterpolatedString +from airbyte_cdk.sources.declarative.requesters.paginators import PaginationStrategy +from airbyte_cdk.sources.declarative.types import Config, Record @dataclass -class CustomPageIncrement(PageIncrement): +class CustomPageIncrement(PaginationStrategy): """ Starts page from 1 instead of the default value that is 0. Stops Pagination when currentPage is equal to totalPages. """ - def next_page_token(self, response: requests.Response, *args) -> Optional[Any]: + config: Config + page_size: Optional[Union[str, int]] + parameters: InitVar[Mapping[str, Any]] + start_from_page: int = 0 + inject_on_first_request: bool = False + + def __post_init__(self, parameters: Mapping[str, Any]) -> None: + if isinstance(self.page_size, int) or (self.page_size is None): + self._page_size = self.page_size + else: + page_size = InterpolatedString(self.page_size, parameters=parameters).eval(self.config) + if not isinstance(page_size, int): + raise Exception(f"{page_size} is of type {type(page_size)}. Expected {int}") + self._page_size = page_size + + @property + def initial_token(self) -> Optional[Any]: + if self.inject_on_first_request: + return self.start_from_page + return None + + def next_page_token( + self, + response: requests.Response, + last_page_size: int, + last_record: Optional[Record], + last_page_token_value: Optional[Any], + ) -> Optional[Any]: res = response.json().get("response") - currPage = res.get("currentPage") - totalPages = res.get("pages") - if currPage < totalPages: - self._page += 1 - return self._page + current_page = res.get("currentPage") + total_pages = res.get("pages") + + # The first request to the API does not include the page_token, so it comes in as None when determing whether to paginate + last_page_token_value = last_page_token_value or 0 + if current_page < total_pages: + return last_page_token_value + 1 else: return None - def __post_init__(self, parameters: Mapping[str, Any]): - super().__post_init__(parameters) - self._page = 1 - - def reset(self): - self._page = 1 + def get_page_size(self) -> Optional[int]: + return self._page_size diff --git a/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/components_failing.py b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/components_failing.py index bca3a0bb0..f5988a814 100644 --- a/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/components_failing.py +++ b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/components_failing.py @@ -22,7 +22,13 @@ class CustomPageIncrement(PageIncrement): Starts page from 1 instead of the default value that is 0. Stops Pagination when currentPage is equal to totalPages. """ - def next_page_token(self, response: requests.Response, *args) -> Optional[Any]: + def next_page_token( + self, + response: requests.Response, + last_page_size: int, + last_record: Optional[Record], + last_page_token_value: Optional[Any], + ) -> Optional[Mapping[str, Any]]: raise IntentionalException() def __post_init__(self, parameters: Mapping[str, Any]): From 1c355771397c3e57770eea7c282600272b5411c0 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Tue, 21 Jan 2025 09:15:05 -0800 Subject: [PATCH 40/42] fix another test --- .../components_failing.py | 41 +++++++++++++------ 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/components_failing.py b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/components_failing.py index f5988a814..8655bdf2d 100644 --- a/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/components_failing.py +++ b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/components_failing.py @@ -2,14 +2,14 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -from dataclasses import dataclass -from typing import Any, Mapping, Optional +from dataclasses import InitVar, dataclass +from typing import Any, Mapping, Optional, Union import requests -from airbyte_cdk.sources.declarative.requesters.paginators.strategies.page_increment import ( - PageIncrement, -) +from airbyte_cdk.sources.declarative.interpolation import InterpolatedString +from airbyte_cdk.sources.declarative.requesters.paginators import PaginationStrategy +from airbyte_cdk.sources.declarative.types import Config, Record class IntentionalException(Exception): @@ -17,23 +17,38 @@ class IntentionalException(Exception): @dataclass -class CustomPageIncrement(PageIncrement): +class CustomPageIncrement(PaginationStrategy): """ Starts page from 1 instead of the default value that is 0. Stops Pagination when currentPage is equal to totalPages. """ + config: Config + page_size: Optional[Union[str, int]] + parameters: InitVar[Mapping[str, Any]] + start_from_page: int = 0 + inject_on_first_request: bool = False + + def __post_init__(self, parameters: Mapping[str, Any]) -> None: + if isinstance(self.page_size, int) or (self.page_size is None): + self._page_size = self.page_size + else: + page_size = InterpolatedString(self.page_size, parameters=parameters).eval(self.config) + if not isinstance(page_size, int): + raise Exception(f"{page_size} is of type {type(page_size)}. Expected {int}") + self._page_size = page_size + + @property + def initial_token(self) -> Optional[Any]: + raise IntentionalException() + def next_page_token( self, response: requests.Response, last_page_size: int, last_record: Optional[Record], last_page_token_value: Optional[Any], - ) -> Optional[Mapping[str, Any]]: + ) -> Optional[Any]: raise IntentionalException() - def __post_init__(self, parameters: Mapping[str, Any]): - super().__post_init__(parameters) - self._page = 1 - - def reset(self): - self._page = 1 + def get_page_size(self) -> Optional[int]: + return self._page_size From e6b28b60aefa1cb0a4f558391ffa45b549c4d87b Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Tue, 21 Jan 2025 17:53:01 -0800 Subject: [PATCH 41/42] fix failing test --- .../sources/declarative/test_manifest_declarative_source.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/unit_tests/sources/declarative/test_manifest_declarative_source.py b/unit_tests/sources/declarative/test_manifest_declarative_source.py index b3c9ab4bb..b20adaa93 100644 --- a/unit_tests/sources/declarative/test_manifest_declarative_source.py +++ b/unit_tests/sources/declarative/test_manifest_declarative_source.py @@ -16,6 +16,7 @@ import yaml from jsonschema.exceptions import ValidationError +import unit_tests.sources.declarative.external_component # Needed for dynamic imports to work from airbyte_cdk.models import ( AirbyteLogMessage, AirbyteMessage, @@ -264,6 +265,11 @@ def test_valid_manifest(self): ], "check": {"type": "CheckStream", "stream_names": ["lists"]}, } + assert "unit_tests" in sys.modules + assert "unit_tests.sources" in sys.modules + assert "unit_tests.sources.declarative" in sys.modules + assert "unit_tests.sources.declarative.external_component" in sys.modules + source = ManifestDeclarativeSource(source_config=manifest) check_stream = source.connection_checker From f29f616d69930e31b27f815dd15d7376b39cf1a6 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Tue, 21 Jan 2025 18:05:19 -0800 Subject: [PATCH 42/42] mark full sync as slow test (~60s) --- .../test_source_declarative_w_custom_components.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py index 83f69005b..d608e7620 100644 --- a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py +++ b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py @@ -242,7 +242,7 @@ def test_fail_unless_custom_code_enabled_explicitly( @pytest.mark.parametrize( "failing_components", [ - False, + pytest.param(False, marks=pytest.mark.slow), # Slow because we run a full sync True, ], )