diff --git a/.circleci/config.yml b/.circleci/config.yml index 5b4c214..e406df3 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -8,7 +8,7 @@ jobs: - run: name: 'Setup virtual env' command: | - uv venv --python 3.9 /usr/local/share/virtualenvs/tap-iterable + uv venv --python 3.12 /usr/local/share/virtualenvs/tap-iterable source /usr/local/share/virtualenvs/tap-iterable/bin/activate uv pip install -U 'pip<19.2' 'setuptools<51.0.0' uv pip install .[dev] @@ -22,9 +22,8 @@ jobs: name: 'Unit Tests' command: | source /usr/local/share/virtualenvs/tap-iterable/bin/activate - uv pip install coverage - uv pip install nose coverage parameterized - nosetests --with-coverage --cover-erase --cover-package=tap_iterable --cover-html-dir=htmlcov tests/unittests + uv pip install pytest coverage parameterized + coverage run -m pytest tests/unittests coverage html - run: name: 'Integration Tests' diff --git a/CHANGELOG.md b/CHANGELOG.md index b165ffd..b9e408a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,8 @@ # Changelog ## 1.1.0 - * Adds parent-tap-stream-id field to catalog for child streams [#30](https://github.com/singer-io/tap-iterable/pull/30) + * Fix bookmarking issue in template stream and integration tests by improving backoff strategy [#31](https://github.com/singer-io/tap-iterable/pull/31) + * Adds parent-tap-stream-id field to catalog for child streams [#30](https://github.com/singer-io/tap-iterable/pull/30) ## 1.0.2 * Dependency upgrades [#28](https://github.com/singer-io/tap-iterable/pull/28) diff --git a/setup.py b/setup.py index 068c41d..d15a040 100755 --- a/setup.py +++ b/setup.py @@ -10,14 +10,13 @@ classifiers=["Programming Language :: Python :: 3 :: Only"], py_modules=["tap_iterable"], install_requires=[ - "singer-python==5.13.2", - "requests==2.32.4" + "singer-python==6.3.0", + "requests==2.32.5" ], extras_require={ 'dev': [ 'pylint', - 'ipdb', - 'requests==2.32.4' + 'ipdb' ] }, entry_points=""" diff --git a/tap_iterable/iterable.py b/tap_iterable/iterable.py index b127330..daa175b 100644 --- a/tap_iterable/iterable.py +++ b/tap_iterable/iterable.py @@ -43,11 +43,12 @@ def _daterange(self, start_date, end_date): else: yield strptime_with_tz(start_date).strftime("%Y-%m-%d %H:%M:%S") - @backoff.on_exception(backoff.constant, + @backoff.on_exception(backoff.expo, (IterableRateLimitError, IterableNotAvailableError), + max_tries=7, jitter=None, - interval=30, - max_tries=5) + base=2, + factor=2) def _get(self, path, stream=True, **kwargs): """ The actual `get` request. """ uri = "{uri}{path}".format(uri=self.uri, path=path) @@ -141,17 +142,29 @@ def templates(self, column_name, bookmark): ] # `templates` API bug where it doesn't extract the records # where `startDateTime`= 2023-03-01+07%3A31%3A15 though record exists - # hence, substracting one second so that we could extract atleast one record - bookmark_val = strptime_with_tz(bookmark) - timedelta(seconds=1) + # hence, substracting one second so that we could extract atleast one record + + # ################################ Template API Issue WORKAROUND ################################# + # We are moving with pseudo-incremental logic here to avoid missing out records due to API limitations. + + # Reason for the workaround: + # ------------------------- + # Iterable’s Templates API is not designed to support update-based incremental syncs. + # While it does have an updatedAt field, the API does not allow filtering based on it. + # Using updatedAt as a bookmark key will have a possibility of missing out records as API will always filter on createdAt. + # Even very old records if updated recently will be missed out as createdAt will always be old and we are never passing that created on to API. + + ######################################################################################## + + bookmark_val = strptime_with_tz(bookmark) bookmark = strftime(bookmark_val) for template_type in template_types: for medium in message_mediums: - for kwargs in self.get_start_end_date(bookmark): - res = self.get("templates", templateTypes=template_type, messageMedium=medium, **kwargs) - for t in res["templates"]: - rec_date_time = strptime_with_tz(helper.epoch_to_datetime_string(t[column_name])) - if rec_date_time >= bookmark_val: - yield t + res = self.get("templates", templateType=template_type, messageMedium=medium) + for t in res["templates"]: + rec_date_time = strptime_with_tz(helper.epoch_to_datetime_string(t[column_name])) + if rec_date_time >= bookmark_val: + yield t def metadata(self, column_name=None, bookmark=None): @@ -182,4 +195,3 @@ def get_data_export_generator(self, data_type_name, bookmark=None): def get_data(): return self._get("export/data.json", dataTypeName=data_type_name, **kwargs), kwargs['endDateTime'] yield get_data - \ No newline at end of file diff --git a/tap_iterable/streams.py b/tap_iterable/streams.py index 90c351a..5519125 100644 --- a/tap_iterable/streams.py +++ b/tap_iterable/streams.py @@ -196,7 +196,7 @@ class Templates(Stream): name = "templates" replication_method = "INCREMENTAL" replication_key = "updatedAt" - key_properties = [ "templateId" ] + key_properties = ["templateId"] class Metadata(Stream): diff --git a/tests/base.py b/tests/base.py index d5d2971..4e0bc2c 100644 --- a/tests/base.py +++ b/tests/base.py @@ -31,7 +31,7 @@ class IterableBase(unittest.TestCase): API_WINDOWS_IN_DAYS = 60 # Skipping streams from testing because we were unable to generate test data - MISSING_DATA_STREAMS = {"metadata", "email_send_skip", "email_complaint", "email_click"} + MISSING_DATA_STREAMS = {"metadata", "email_send_skip", "email_complaint", "email_click", "email_bounce"} def tap_name(self): return "tap-iterable" @@ -60,7 +60,7 @@ def get_properties(self, original: bool = True): Setting required properties as environment variables. """ return_value = { - 'start_date':'2023-01-25T00:00:00Z', + 'start_date':'2023-01-24T00:00:00Z', 'api_key':os.getenv('ITERABLE_API_KEY'), "api_window_in_days": 30 } @@ -451,7 +451,7 @@ def calculated_states_by_stream(self, current_state): days, hours, minutes = timedelta_by_stream[stream] calculated_state_as_datetime = state_as_datetime - timedelta(days=days, hours=hours, minutes=minutes) - state_format = "%Y-%m-%dT00:00:00Z" + state_format = "%Y-%m-%dT%H:%M:%SZ" calculated_state_formatted = dt.strftime(calculated_state_as_datetime, state_format) stream_to_calculated_state["bookmarks"][stream][replication_key] = calculated_state_formatted diff --git a/tests/test_all_fields.py b/tests/test_all_fields.py index a351068..2e93e16 100644 --- a/tests/test_all_fields.py +++ b/tests/test_all_fields.py @@ -7,9 +7,6 @@ class AllFieldsTest(IterableBase): # Skipping fields since we were not able to genrate test data for following fields in the streams MISSING_FILEDS = { - "templates": { - "campaignId" - }, "email_subscribe": { "itblInternal", "workflowId" @@ -19,34 +16,40 @@ class AllFieldsTest(IterableBase): "workflowId" }, "email_send": { - "itblInternal" + "itblInternal", + "transactionalData" }, "email_open": { "itblInternal", - "eventName" + "eventName", + "proxySource" }, "email_subscribe": { - "workflowId", + "workflowId", "itblInternal", "eventName", - "campaignId", - "templateId", - "emailListId" + "campaignId", + "templateId", + "emailListId", + "userId", + "profileUpdatedAt" }, "email_unsubscribe": { - "emailListIds", "workflowId", "emailListId", "itblInternal", "channelId", - "eventName" + "eventName", + "bounceMessage", + "status", + "recipientState" }, "email_bounce": { "itblInternal", "eventName" }, "users": { - "browserTokens", + "browserTokens", "devices.applicationName", "devices.endpointEnabled", "devices.platform", @@ -84,7 +87,149 @@ class AllFieldsTest(IterableBase): "vegetarian_menu.featured_item_image_url", "vegetarian_menu.featured_item_menu_availability", "vegetarian_menu.featured_item_menu_date", - "vegetarian_menu" + "vegetarian_menu", + "jobRecommendations.description", + "times_purchased", + "estimatedSizing", + "auctionDigest", + "house Districts", + "passively_seeking", + "accessIp", + "offers.description", + "interested_in_toilet_paper", + "job_categories_interested", + "region", + "recommendedVehicles.estimateDescription", + "wishList", + "jobRecommendations.name", + "paid_user", + "offers.id", + "favoriteCategories.category", + "offers.quantity", + "shoppingCartItems.categories", + "offers.intro APR", + "total_playtime", + "loyalty_program", + "totalSpent", + "lifetime Dontation", + "hasMobileApp", + "type", + "favoriteAnimal", + "offers.name", + "recommendedVehicles.imageUrl", + "shoppingCartItems.sku", + "recommendedVehicles.category", + "experience", + "wishList.name", + "highestBidPrice", + "newListedVehicles.noHagglePrice", + "actively_seeking", + "counties", + "daysSinceLastOrder", + "current_employer_id", + "shoppingCartItems.quantity", + "jobRecommendations.applicationURL", + "times donated", + "last_game_played", + "city", + "jobRecommendations.id", + "promoCode", + "shoppingCartItems.imageUrl", + "swingTrader_subscription", + "badgeCount", + "Industry", + "recommendedVehicles", + "totalOrders", + "readingList.avgRating", + "shoppingCartItems.name", + "auctionDigest.name", + "newListedVehicles.name", + "offers.categories", + "shoppingCartItems.url", + "shoppingCartItems.price", + "should_receive_recommendation", + "offers.imageUrl", + "newListedVehicles.imageUrl", + "last_purchased", + "state", + "shoppingCartItems.id", + "favoriteRestaurant", + "interested_in_soap", + "sat", + "newListedVehicles.sku", + "offers", + "auctionDigest.auctionHouse", + "senate Districts", + "congressional Districts", + "booked_activity_before", + "readingList.bookName", + "shoppingCartItems", + "totalPurchases", + "favorite_category", + "bestFriend", + "age", + "timeZone", + "university_interest", + "lastOrderrestaurant", + "jobRecommendations.imageUrl", + "date_last_booked_package", + "shoppingCartItems.description", + "lastKnownLatitude", + "favoriteShowCategories", + "is_available", + "auctionDigest.auctionDateLocation", + "wishList.price", + "acquisition_source", + "location", + "fb_follow", + "user_type", + "zip", + "tagline", + "recommendedVehicles.TrueCar Estimate", + "newListedVehicles.category", + "major", + "auctionDigest.auctionImageUrl", + "lastAccessedAgent", + "favoriteCuisine", + "readingList.bookAuthor", + "is_active", + "offers.Intro APR", + "loyalty_points", + "jobRecommendations", + "recommendedVehicles.name", + "job_title", + "readingList", + "lastOrderlocation", + "recommendedVehicles.sku", + "averageOrderValue", + "phoneNumber", + "marketSmith_subscription", + "interested_in_detergent", + "favoriteCategory", + "vip", + "favoriteItem", + "streetAddress", + "lastKnownLongitude", + "newListedVehicles.miles", + "CCProvider", + "totalOrderCount", + "uploaded_resume", + "lifetime_Spent", + "last_purchased_category", + "loyalty_member", + "offers.sku", + "readingList.imageUrl", + "current_employer", + "offers.url", + "favoriteProduct", + "industry", + "gender", + "favoriteCategories", + "favoritedShows", + "booked_package_before", + "locale", + "auctionDigest.auctionInfo", + "newListedVehicles", }, } @@ -103,7 +248,7 @@ def test_run(self): """ # We need to set older start date to increase stream field coverage # While doing so test performace may impact so setting large window size - self.START_DATE = '2018-09-01T00:00:00Z' + self.START_DATE = '2020-09-01T00:00:00Z' self.API_WINDOWS_IN_DAYS = 365 # instantiate connection diff --git a/tests/test_interrupted_sync.py b/tests/test_interrupted_sync.py index 06c55ab..334da06 100644 --- a/tests/test_interrupted_sync.py +++ b/tests/test_interrupted_sync.py @@ -78,16 +78,13 @@ def test_run(self): "currently_syncing": "email_open", "bookmarks": { "campaigns": { - "updatedAt": "2023-03-02 10:18:55.000000Z" - }, - "templates": { - "updatedAt": "2023-02-22 07:31:15.000000Z" + "updatedAt": "2025-03-02T10:18:55.000000Z" }, "email_bounce": { - "createdAt": "2023-03-01 10:58:00.000000Z" + "createdAt": "2025-03-01T10:58:00.000000Z" }, "email_open": { - "createdAt": "2023-02-22 7:43:31.000000Z" + "createdAt": "2025-02-22T07:43:31.000000Z" } } } diff --git a/tests/test_start_date.py b/tests/test_start_date.py index 4a9c511..909915a 100644 --- a/tests/test_start_date.py +++ b/tests/test_start_date.py @@ -26,8 +26,8 @@ def test_run(self): """ self.start_date_1 = self.get_properties() - self.start_date_2 = '2023-02-20T00:00:00Z' - self.START_DATE = self.start_date_1 + self.start_date_2 = '2024-02-20T00:00:00Z' + self.START_DATE = self.start_date_1['start_date'] ########################################################################## ### First Sync @@ -51,7 +51,6 @@ def test_run(self): # run initial sync record_count_by_stream_1 = self.run_and_verify_sync(conn_id_1) synced_records_1 = runner.get_records_from_target_output() - # Verify that you get some records for each stream for stream in streams_to_test: with self.subTest(stream=stream): diff --git a/tests/unittests/test_backoff.py b/tests/unittests/test_backoff.py index de4a5a1..c76fe0f 100644 --- a/tests/unittests/test_backoff.py +++ b/tests/unittests/test_backoff.py @@ -41,9 +41,9 @@ class TestBackoff(unittest.TestCase): @parameterized.expand([ [400, IterableBadRequestError, 1, "A validation exception has occurred."], [401, IterableUnauthorizedError, 1, "Invalid authorization credentials."], - [429, IterableRateLimitError, 5, + [429, IterableRateLimitError, 7, "The API rate limit for your organisation/application pairing has been exceeded."], - [503, IterableNotAvailableError, 5, "API service is currently unavailable."], + [503, IterableNotAvailableError, 7, "API service is currently unavailable."], ]) @mock.patch("time.sleep") @mock.patch("requests.get")