diff --git a/.trunk/configs/.markdownlint.yaml b/.trunk/configs/.markdownlint.yaml index 325667c..b40ee9d 100644 --- a/.trunk/configs/.markdownlint.yaml +++ b/.trunk/configs/.markdownlint.yaml @@ -1,13 +1,2 @@ -# Autoformatter friendly markdownlint config (all formatting rules disabled) -default: true -blank_lines: false -bullet: false -html: false -indentation: false -line_length: false -no-duplicate-heading: - siblings_only: true -no-trailing-punctuation: false -spaces: false -url: false -whitespace: false +# Prettier friendly markdownlint config (all formatting rules disabled) +extends: markdownlint/style/prettier diff --git a/.trunk/trunk.yaml b/.trunk/trunk.yaml index 4c56246..de41f06 100644 --- a/.trunk/trunk.yaml +++ b/.trunk/trunk.yaml @@ -2,12 +2,12 @@ # To learn more about the format of this file, see https://docs.trunk.io/reference/trunk-yaml version: 0.1 cli: - version: 1.21.0 + version: 1.22.0 # Trunk provides extensibility via plugins. (https://docs.trunk.io/plugins) plugins: sources: - id: trunk - ref: v1.4.5 + ref: v1.5.0 uri: https://github.com/trunk-io/plugins # Many linters and tools depend on runtimes - configure them here. (https://docs.trunk.io/runtimes) runtimes: @@ -27,30 +27,25 @@ lint: commands: - name: lint run: bandit --exit-zero -c bandit.yaml --format json --output ${tmpfile} ${target} - - name: trufflehog - commands: - - name: lint - run: trufflehog filesystem --json --fail --exclude-paths=/.gitignore ${target} enabled: - actionlint@1.6.27 - bandit@1.7.8 - - checkov@3.2.53 + - checkov@3.2.74 - git-diff-check - markdownlint@0.39.0 - - osv-scanner@1.7.0 + - osv-scanner@1.7.2 - prettier@3.2.5 - - ruff@0.3.5 - - semgrep@1.67.0 + - ruff@0.4.1 - sourcery@1.16.0 - taplo@0.8.1 - - trivy@0.50.1 - - trufflehog-git@3.72.0 - # - trufflehog@3.71.0 + - trivy@0.50.4 + - trufflehog@3.74.0 + - trufflehog-git@3.74.0 - yamllint@1.35.1 disabled: - - trufflehog - black - isort + - semgrep actions: enabled: - commitizen diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 69e1f5a..8eb9bad 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,12 +1,12 @@ -## Contributing to Diffbot Knowledge Graph Client +# Contributing to Diffbot Knowledge Graph Client First off, thanks for taking the time to contribute! The following is a set of guidelines for contributing to `diffbot-kg`, which is hosted on GitHub. These are mostly guidelines, not rules. Use your best judgment, and feel free to propose changes to this document in a pull request. -### How Can I Contribute? +## How Can I Contribute? -#### Reporting Bugs +### Reporting Bugs This section guides you through submitting a bug report for `diffbot-kg`. Following these guidelines helps the maintainer and the community understand your report, reproduce the behavior, and find related reports. @@ -17,7 +17,7 @@ This section guides you through submitting a bug report for `diffbot-kg`. Follow - **Explain which behavior you expected to see instead and why.** - **Include screenshots and/or animated GIFs** which help demonstrate the steps or point out the part of Indeed Job Scraper which the suggestion is related to. -#### Pull Requests +### Pull Requests Please follow these steps to have your contribution considered by the maintainer: @@ -25,17 +25,18 @@ Please follow these steps to have your contribution considered by the maintainer - While the maintainer reviews your PR, you can also ask for specific people to review your changes. - Once your pull request is created, it will be reviewed by the maintainer of the project. You may be asked to make changes to your pull request. There's always a chance your pull request won't be accepted. -#### Python Styleguide +### Python Styleguide -- All Python must adhere to [PEP 8](https://www.python.org/dev/peps/pep-0008/). -- Use type annotations according to [PEP 484](https://www.python.org/dev/peps/pep-0484/) and [PEP 526](https://www.python.org/dev/peps/pep-0526/). -- Format your python code with [Black](https://github.com/ambv/black). -- Lint your python code with [Ruff](https://github.com/jendrikseipp/ruff). +- All Python must adhere to [PEP 8][PEP8]. +- Use type annotations according to [PEP 484][PEP484] and [PEP 526][PEP526]. +- Format and lint your python code with [Ruff](https://github.com/jendrikseipp/ruff). - Include docstrings and comments where appropriate. - Write tests for new features and bug fixes. -To automatically format and lint your code on commit, run `pre-commit install` in the root of the repository. - -### Attribution +## Attribution This Contributing guide is adapted from the [Contributing to Atom](https://github.com/atom/atom/blob/master/CONTRIBUTING.md) guide. + +[PEP8]: https://www.python.org/dev/peps/pep-0008/ +[PEP484]: https://www.python.org/dev/peps/pep-0484/ +[PEP526]: https://www.python.org/dev/peps/pep-0526/ diff --git a/README.md b/README.md index d4f3aeb..08eba42 100644 --- a/README.md +++ b/README.md @@ -1,29 +1,23 @@ -Diffbot Knowledge Graph Client -============= - -![](https://www.diffbot.com/assets/img/diffbot-logo-darkbg.svg) +# Diffbot Knowledge Graph Client +![Diffbot Logo](https://www.diffbot.com/assets/img/diffbot-logo-darkbg.svg) [![CodeFactor](https://www.codefactor.io/repository/github/brendancsmith/diffbot-kg/badge)](https://www.codefactor.io/repository/github/brendancsmith/diffbot-kg) ![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/brendancsmith/diffbot-kg/python-package.yml) ![PyPI - Version](https://img.shields.io/pypi/v/diffbot-kg) ![GitHub License](https://img.shields.io/github/license/brendancsmith/diffbot-kg) - -Description ------------ +## Description Python client for the Diffbot Knowledge Graph API. -Installation ------------- +## Installation ```sh pip install diffbot-kg ``` -Usage ------ +## Usage ```python from diffbot_kg import DiffbotSearchClient, DiffbotEnhanceClient @@ -38,12 +32,10 @@ search_results = search_client.search({query='type:Organization name:Diffbot'}) enhanced_entity = enhance_client.enhance({query='type:Organization name:Diffbot'}) ``` -Contributing ------------- +## Contributing Contributions to this project are welcome. - see the CONTRIBUTING.md file for details. -License -------- +## License This project is licensed under the MIT License - see the LICENSE file for details. diff --git a/pyproject.toml b/pyproject.toml index 86eefc7..0e401b1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "diffbot-kg" -version = "0.2.0" +version = "0.2.1" description = "Python client for the Diffbot Knowledge Graph API." authors = ["Brendan C. Smith"] license = "MIT" diff --git a/src/diffbot_kg/__init__.py b/src/diffbot_kg/__init__.py index e69de29..c1edc5b 100644 --- a/src/diffbot_kg/__init__.py +++ b/src/diffbot_kg/__init__.py @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- + +from diffbot_kg.clients.enhance import DiffbotEnhanceClient # noqa: F401 +from diffbot_kg.clients.search import DiffbotSearchClient # noqa: F401 diff --git a/src/diffbot_kg/clients/__init__.py b/src/diffbot_kg/clients/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/diffbot_kg/clients/base.py b/src/diffbot_kg/clients/base.py index 3790a28..85ad949 100644 --- a/src/diffbot_kg/clients/base.py +++ b/src/diffbot_kg/clients/base.py @@ -1,8 +1,9 @@ from typing import Any -from diffbot_kg.clients.session import BaseDiffbotResponse, DiffbotSession from yarl import URL +from diffbot_kg.clients.session import BaseDiffbotResponse, DiffbotSession + class BaseDiffbotKGClient: """ @@ -40,6 +41,8 @@ def _merge_params(self, params) -> dict[str, Any]: params = params or {} params = {**self.default_params, **params} + + # sourcery skip: inline-immediately-returned-variable params = {k: v for k, v in params.items() if v is not None} return params @@ -58,9 +61,11 @@ async def _get( BaseDiffbotResponse: The response from the API. """ - headers = {"accept": "application/json", **(headers or {})} + headers = headers or {} params = self._merge_params(params) + + # sourcery skip: inline-immediately-returned-variable resp = await self.s.get(url, params=params, headers=headers) return resp @@ -87,10 +92,10 @@ async def _post( headers = { "content-type": "application/json", - "accept": "application/json", **(headers or {}), } + # sourcery skip: inline-immediately-returned-variable resp = await self.s.post(url, params=params, headers=headers, json=json) return resp diff --git a/src/diffbot_kg/clients/enhance.py b/src/diffbot_kg/clients/enhance.py index e103d46..1debead 100644 --- a/src/diffbot_kg/clients/enhance.py +++ b/src/diffbot_kg/clients/enhance.py @@ -125,6 +125,7 @@ async def bulkjob_coverage_report( url = self.bulk_job_coverage_report_url.human_repr().format( bulkjobId=bulkjobId, reportId=reportId ) + resp = await self._get(url) resp.__class__ = DiffbotCoverageReportResponse return cast(DiffbotCoverageReportResponse, resp) diff --git a/src/diffbot_kg/clients/session.py b/src/diffbot_kg/clients/session.py index ac1ce59..411c943 100644 --- a/src/diffbot_kg/clients/session.py +++ b/src/diffbot_kg/clients/session.py @@ -36,16 +36,31 @@ class DiffbotSession: """ def __init__(self) -> None: - headers = {"accept": "application/json"} - timeout = aiohttp.ClientTimeout(total=60, sock_connect=5) - self._session = aiohttp.ClientSession(headers=headers, timeout=timeout) + self._headers = {"accept": "application/json"} + self._timeout = aiohttp.ClientTimeout(total=60, sock_connect=5) + + self.is_open = False + + async def open(self) -> Self: + self._session = aiohttp.ClientSession(headers=self._headers, timeout=self._timeout) self._limiter = aiolimiter.AsyncLimiter(max_rate=5, time_period=1) + self.is_open = True + return self + async def get(self, url, **kwargs) -> BaseDiffbotResponse: + if not self.is_open: + await self.open() + + # sourcery skip: inline-immediately-returned-variable resp = await self._request(HTTPMethod.GET, url, **kwargs) return resp async def post(self, url, **kwargs) -> BaseDiffbotResponse: + if not self.is_open: + await self.open() + + # sourcery skip: inline-immediately-returned-variable resp = await self._request(HTTPMethod.POST, url, **kwargs) return resp @@ -53,6 +68,8 @@ async def close(self) -> None: if not self._session.closed: await self._session.close() + self.is_open = False + @retry( retry=retry_if_exception_type(RetryableException), reraise=True, diff --git a/tests/functional/clients/test_enhance_client.py b/tests/functional/clients/test_enhance_client.py index 1542c92..0086a53 100644 --- a/tests/functional/clients/test_enhance_client.py +++ b/tests/functional/clients/test_enhance_client.py @@ -89,15 +89,21 @@ async def test_bulkjob_status(self, request, token: Secret): job_id = _get_job_id(request) - DELAY = 10 + TIMEOUT = 60 + BACKOFF_FACTOR = 1.5 + backoff = 1 start = time.time() # ACT - while time.time() - start <= DELAY: + while True: response = await client.bulkjob_status(job_id) if response.complete: break - time.sleep(1) + elif time.time() - start > TIMEOUT: + pytest.fail("Bulk job status check did not complete in time") + + time.sleep(backoff) + backoff *= BACKOFF_FACTOR # ASSERT assert response.status == 200 @@ -159,45 +165,54 @@ async def test_single_bulkjob_result(self, request, token: Secret): await client.close() @pytest.mark.asyncio - async def test_bulkjob_stop(self, request, token: Secret): + async def test_bulkjob_coverage_report(self, request, token: Secret): # ARRANGE client = DiffbotEnhanceClient(token=token.value) job_id = _get_job_id(request) + report_id = request.config.cache.get("enhanceBulkJobCoverageReportId", None) + if report_id is None: + pytest.fail("Enhance bulk job coverage report ID not found in cache") + + + TIMEOUT = 60 + BACKOFF_FACTOR = 1.5 + backoff = 1 + start = time.time() # ACT - response = await client.stop_bulkjob(job_id) + while True: + try: + response = await client.bulkjob_coverage_report(job_id, report_id) + except ClientResponseError as e: + if e.status == 400: + time.sleep(backoff) + backoff *= BACKOFF_FACTOR + else: + if response.status == 200: + break + elif time.time() - start > TIMEOUT: + pytest.fail("Bulk job coverage report did not generate in time") # ASSERT assert response.status == 200 - assert response.content["status"] == "COMPLETE" - assert response.content["message"] == f"Bulkjob [{job_id}] is completed" + assert len(response.content.strip().split("\n")) == 4 - # TEARDOWN - await client.close() @pytest.mark.asyncio - async def test_bulkjob_coverage_report(self, request, token: Secret): + async def test_bulkjob_stop(self, request, token: Secret): # ARRANGE client = DiffbotEnhanceClient(token=token.value) job_id = _get_job_id(request) - report_id = request.config.cache.get("enhanceBulkJobCoverageReportId", None) - if report_id is None: - pytest.fail("Enhance bulk job coverage report ID not found in cache") - - DELAY = 10 - start = time.time() # ACT - while time.time() - start <= DELAY: - try: - response = await client.bulkjob_coverage_report(job_id, report_id) - except ClientResponseError: - time.sleep(1) - else: - break + response = await client.stop_bulkjob(job_id) # ASSERT assert response.status == 200 - assert len(response.content.strip().split("\n")) == 4 + assert response.content["status"] == "COMPLETE" + assert response.content["message"] == f"Bulkjob [{job_id}] is completed" + + # TEARDOWN + await client.close() diff --git a/tests/functional/clients/test_search_client.py b/tests/functional/clients/test_search_client.py index 3ddd38a..9693664 100644 --- a/tests/functional/clients/test_search_client.py +++ b/tests/functional/clients/test_search_client.py @@ -1,6 +1,6 @@ import pytest -from diffbot_kg.clients.search import DiffbotSearchClient +from diffbot_kg.clients.search import DiffbotSearchClient from tests.functional.conftest import ORG_ENTITY_ID, ORG_NAME, Secret diff --git a/tests/unit/clients/test_enhance_client.py b/tests/unit/clients/test_enhance_client.py index 6d385fe..1db34c4 100644 --- a/tests/unit/clients/test_enhance_client.py +++ b/tests/unit/clients/test_enhance_client.py @@ -30,10 +30,10 @@ async def test_mocked_enhance(self, mocker, client): response = await client.enhance(params) # ASSERT - assert DiffbotSession.get.is_called_with( + DiffbotSession.get.assert_called_with( DiffbotEnhanceClient.enhance_url, - params=params, - headers={"accept": "application/json"}, + params={**params, "token": "valid_token"}, + headers={} ) assert isinstance(response, DiffbotEntitiesResponse) assert response.status == 200 @@ -56,10 +56,11 @@ async def test_mocked_create_bulkjob(self, mocker, client): response = await client.create_bulkjob(params) # ASSERT - assert DiffbotSession.post.is_called_with( + DiffbotSession.post.assert_called_with( DiffbotEnhanceClient.bulk_job_url, - params=params, - headers={"accept": "application/json"}, + params={"token": "valid_token"}, + headers={"content-type": "application/json"}, + json=params ) assert isinstance(response, DiffbotBulkJobCreateResponse) assert response.status == 202 diff --git a/tests/unit/clients/test_search_client.py b/tests/unit/clients/test_search_client.py index 7941d42..aa055ae 100644 --- a/tests/unit/clients/test_search_client.py +++ b/tests/unit/clients/test_search_client.py @@ -37,7 +37,7 @@ async def test_mocked_search(self, mocker, client): DiffbotSession.get.assert_called_with( DiffbotSearchClient.search_url, params=params, - headers={"accept": "application/json"}, + headers={}, ) assert isinstance(response, DiffbotEntitiesResponse) assert response.status == 200