From 1736a130d5442d91c07f4f50ae528cedcb2953f1 Mon Sep 17 00:00:00 2001 From: Dhruv Bhanushali Date: Mon, 6 May 2024 22:36:33 +0400 Subject: [PATCH] Update `openverse-attribution` with new features and improvements (#4250) --- api/api/admin/media_report.py | 7 +- api/api/models/media.py | 18 +- api/api/serializers/media_serializers.py | 4 +- api/api/utils/watermark.py | 7 +- .../python/openverse-attribution/pdm.lock | 298 +++++++++++++++- .../openverse-attribution/pyproject.toml | 8 +- .../src/openverse_attribution/attribution.py | 65 ---- .../openverse_attribution/data/__init__.py | 0 .../data/all_licenses.json | 161 +++++++++ .../data/all_licenses.py | 6 + .../src/openverse_attribution/license.py | 324 +++++++++++++----- .../src/openverse_attribution/license_name.py | 126 +++++++ .../tests/test_attribution.py | 63 ++-- .../tests/test_license.py | 185 +++++++--- .../tests/test_license_name.py | 82 +++++ 15 files changed, 1097 insertions(+), 257 deletions(-) delete mode 100644 packages/python/openverse-attribution/src/openverse_attribution/attribution.py create mode 100644 packages/python/openverse-attribution/src/openverse_attribution/data/__init__.py create mode 100644 packages/python/openverse-attribution/src/openverse_attribution/data/all_licenses.json create mode 100644 packages/python/openverse-attribution/src/openverse_attribution/data/all_licenses.py create mode 100644 packages/python/openverse-attribution/src/openverse_attribution/license_name.py create mode 100644 packages/python/openverse-attribution/tests/test_license_name.py diff --git a/api/api/admin/media_report.py b/api/api/admin/media_report.py index a596ee494a0..cf5abc618a6 100644 --- a/api/api/admin/media_report.py +++ b/api/api/admin/media_report.py @@ -111,9 +111,10 @@ def _get_media_obj_data(self, obj): additional_data = { "other_reports": self.get_other_reports(obj), "media_obj": obj.media_obj, - "license": License(obj.media_obj.license).name( - obj.media_obj.license_version - ), + "license": License( + obj.media_obj.license, + obj.media_obj.license_version, + ).full_name, "tags": tags_by_provider, "description": obj.media_obj.meta_data.get("description", ""), } diff --git a/api/api/models/media.py b/api/api/models/media.py index 953c817a5ec..b2fd53e0cf1 100644 --- a/api/api/models/media.py +++ b/api/api/models/media.py @@ -96,25 +96,25 @@ def license_url(self) -> str | None: if self.meta_data and (url := self.meta_data.get("license_url")): return url try: - lic = License(self.license.lower()) + return License(self.license.lower(), self.license_version).url except ValueError: return None - return lic.url(self.license_version) @property def attribution(self) -> str | None: """Legally valid attribution for the media item in plain-text English.""" try: - lic = License(self.license) + return License( + self.license.lower(), + self.license_version, + ).get_attribution_text( + self.title, + self.creator, + self.license_url, + ) except ValueError: return None - return lic.attribution( - self.title, - self.creator, - self.license_version, - self.license_url, - ) class Meta: """ diff --git a/api/api/serializers/media_serializers.py b/api/api/serializers/media_serializers.py index a4f7df4d009..9ff56a29949 100644 --- a/api/api/serializers/media_serializers.py +++ b/api/api/serializers/media_serializers.py @@ -742,8 +742,8 @@ def to_representation(self, *args, **kwargs): if output.get("license_url") is None: try: - lic = License(output["license"]) - output["license_url"] = lic.url(output["license_version"]) + lic = License(output["license"], output["license_version"]) + output["license_url"] = lic.url except ValueError: pass diff --git a/api/api/utils/watermark.py b/api/api/utils/watermark.py index a2249bf430f..764a0a69ada 100644 --- a/api/api/utils/watermark.py +++ b/api/api/utils/watermark.py @@ -175,7 +175,7 @@ def _print_attribution_on_image(img: Image.Image, image_info): """ try: - lic = License(image_info["license"]) + lic = License(image_info["license"], image_info["license_version"]) except ValueError: return img @@ -195,11 +195,10 @@ def _print_attribution_on_image(img: Image.Image, image_info): font = ImageFont.truetype(_get_font_path(), size=font_size) - text = lic.attribution( + text = lic.get_attribution_text( image_info["title"], image_info["creator"], - image_info["license_version"], - False, + url=False, ) text = _fit_in_width(text, font, new_width) attribution_height = _get_attribution_height(text, font) diff --git a/packages/python/openverse-attribution/pdm.lock b/packages/python/openverse-attribution/pdm.lock index 7a02ea70c78..d7e099365d2 100644 --- a/packages/python/openverse-attribution/pdm.lock +++ b/packages/python/openverse-attribution/pdm.lock @@ -2,23 +2,106 @@ # It is not intended for manual editing. [metadata] -groups = ["default", "test"] +groups = ["default", "test", "dev"] strategy = ["cross_platform", "inherit_metadata"] lock_version = "4.4.1" -content_hash = "sha256:927a125627d86a84db83dbccc5db86d1b25c3df2fbef6bef0ccb6b2fa42ca0b1" +content_hash = "sha256:9e8592470696f7b0f5fa00e4871b8290dbc12e9b91afa771a5406db1393f6a1f" + +[[package]] +name = "asttokens" +version = "2.4.1" +summary = "Annotate AST trees with source code positions" +groups = ["dev"] +dependencies = [ + "six>=1.12.0", +] +files = [ + {file = "asttokens-2.4.1-py2.py3-none-any.whl", hash = "sha256:051ed49c3dcae8913ea7cd08e46a606dba30b79993209636c4875bc1d637bc24"}, + {file = "asttokens-2.4.1.tar.gz", hash = "sha256:b03869718ba9a6eb027e134bfdf69f38a236d681c83c160d510768af11254ba0"}, +] + +[[package]] +name = "certifi" +version = "2024.2.2" +requires_python = ">=3.6" +summary = "Python package for providing Mozilla's CA Bundle." +groups = ["test"] +files = [ + {file = "certifi-2024.2.2-py3-none-any.whl", hash = "sha256:dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1"}, + {file = "certifi-2024.2.2.tar.gz", hash = "sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f"}, +] + +[[package]] +name = "charset-normalizer" +version = "3.3.2" +requires_python = ">=3.7.0" +summary = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +groups = ["test"] +files = [ + {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"}, + {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, +] [[package]] name = "colorama" version = "0.4.6" requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" summary = "Cross-platform colored terminal text." -groups = ["test"] +groups = ["dev", "test"] marker = "sys_platform == \"win32\"" files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +[[package]] +name = "decorator" +version = "5.1.1" +requires_python = ">=3.5" +summary = "Decorators for Humans" +groups = ["dev"] +files = [ + {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"}, + {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"}, +] + +[[package]] +name = "executing" +version = "2.0.1" +requires_python = ">=3.5" +summary = "Get the currently executing AST node of a frame, and other information" +groups = ["dev"] +files = [ + {file = "executing-2.0.1-py2.py3-none-any.whl", hash = "sha256:eac49ca94516ccc753f9fb5ce82603156e590b27525a8bc32cce8ae302eb61bc"}, + {file = "executing-2.0.1.tar.gz", hash = "sha256:35afe2ce3affba8ee97f2d69927fa823b08b472b7b994e36a52a964b93d16147"}, +] + +[[package]] +name = "idna" +version = "3.7" +requires_python = ">=3.5" +summary = "Internationalized Domain Names in Applications (IDNA)" +groups = ["test"] +files = [ + {file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"}, + {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"}, +] + [[package]] name = "iniconfig" version = "2.0.0" @@ -30,6 +113,57 @@ files = [ {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, ] +[[package]] +name = "ipython" +version = "8.24.0" +requires_python = ">=3.10" +summary = "IPython: Productive Interactive Computing" +groups = ["dev"] +dependencies = [ + "colorama; sys_platform == \"win32\"", + "decorator", + "jedi>=0.16", + "matplotlib-inline", + "pexpect>4.3; sys_platform != \"win32\" and sys_platform != \"emscripten\"", + "prompt-toolkit<3.1.0,>=3.0.41", + "pygments>=2.4.0", + "stack-data", + "traitlets>=5.13.0", + "typing-extensions>=4.6; python_version < \"3.12\"", +] +files = [ + {file = "ipython-8.24.0-py3-none-any.whl", hash = "sha256:d7bf2f6c4314984e3e02393213bab8703cf163ede39672ce5918c51fe253a2a3"}, + {file = "ipython-8.24.0.tar.gz", hash = "sha256:010db3f8a728a578bb641fdd06c063b9fb8e96a9464c63aec6310fbcb5e80501"}, +] + +[[package]] +name = "jedi" +version = "0.19.1" +requires_python = ">=3.6" +summary = "An autocompletion tool for Python that can be used for text editors." +groups = ["dev"] +dependencies = [ + "parso<0.9.0,>=0.8.3", +] +files = [ + {file = "jedi-0.19.1-py2.py3-none-any.whl", hash = "sha256:e983c654fe5c02867aef4cdfce5a2fbb4a50adc0af145f70504238f18ef5e7e0"}, + {file = "jedi-0.19.1.tar.gz", hash = "sha256:cf0496f3651bc65d7174ac1b7d043eff454892c708a87d1b683e57b569927ffd"}, +] + +[[package]] +name = "matplotlib-inline" +version = "0.1.7" +requires_python = ">=3.8" +summary = "Inline Matplotlib backend for Jupyter" +groups = ["dev"] +dependencies = [ + "traitlets", +] +files = [ + {file = "matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca"}, + {file = "matplotlib_inline-0.1.7.tar.gz", hash = "sha256:8423b23ec666be3d16e16b60bdd8ac4e86e840ebd1dd11a30b9f117f2fa0ab90"}, +] + [[package]] name = "packaging" version = "24.0" @@ -41,6 +175,31 @@ files = [ {file = "packaging-24.0.tar.gz", hash = "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9"}, ] +[[package]] +name = "parso" +version = "0.8.4" +requires_python = ">=3.6" +summary = "A Python Parser" +groups = ["dev"] +files = [ + {file = "parso-0.8.4-py2.py3-none-any.whl", hash = "sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18"}, + {file = "parso-0.8.4.tar.gz", hash = "sha256:eb3a7b58240fb99099a345571deecc0f9540ea5f4dd2fe14c2a99d6b281ab92d"}, +] + +[[package]] +name = "pexpect" +version = "4.9.0" +summary = "Pexpect allows easy control of interactive console applications." +groups = ["dev"] +marker = "sys_platform != \"win32\" and sys_platform != \"emscripten\"" +dependencies = [ + "ptyprocess>=0.5", +] +files = [ + {file = "pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523"}, + {file = "pexpect-4.9.0.tar.gz", hash = "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f"}, +] + [[package]] name = "pluggy" version = "1.4.0" @@ -52,6 +211,52 @@ files = [ {file = "pluggy-1.4.0.tar.gz", hash = "sha256:8c85c2876142a764e5b7548e7d9a0e0ddb46f5185161049a79b7e974454223be"}, ] +[[package]] +name = "prompt-toolkit" +version = "3.0.43" +requires_python = ">=3.7.0" +summary = "Library for building powerful interactive command lines in Python" +groups = ["dev"] +dependencies = [ + "wcwidth", +] +files = [ + {file = "prompt_toolkit-3.0.43-py3-none-any.whl", hash = "sha256:a11a29cb3bf0a28a387fe5122cdb649816a957cd9261dcedf8c9f1fef33eacf6"}, + {file = "prompt_toolkit-3.0.43.tar.gz", hash = "sha256:3527b7af26106cbc65a040bcc84839a3566ec1b051bb0bfe953631e704b0ff7d"}, +] + +[[package]] +name = "ptyprocess" +version = "0.7.0" +summary = "Run a subprocess in a pseudo terminal" +groups = ["dev"] +marker = "sys_platform != \"win32\" and sys_platform != \"emscripten\"" +files = [ + {file = "ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35"}, + {file = "ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220"}, +] + +[[package]] +name = "pure-eval" +version = "0.2.2" +summary = "Safely evaluate AST nodes without side effects" +groups = ["dev"] +files = [ + {file = "pure_eval-0.2.2-py3-none-any.whl", hash = "sha256:01eaab343580944bc56080ebe0a674b39ec44a945e6d09ba7db3cb8cec289350"}, + {file = "pure_eval-0.2.2.tar.gz", hash = "sha256:2b45320af6dfaa1750f543d714b6d1c520a1688dec6fd24d339063ce0aaa9ac3"}, +] + +[[package]] +name = "pygments" +version = "2.17.2" +requires_python = ">=3.7" +summary = "Pygments is a syntax highlighting package written in Python." +groups = ["dev"] +files = [ + {file = "pygments-2.17.2-py3-none-any.whl", hash = "sha256:b27c2826c47d0f3219f29554824c30c5e8945175d888647acd804ddd04af846c"}, + {file = "pygments-2.17.2.tar.gz", hash = "sha256:da46cec9fd2de5be3a8a784f434e4c4ab670b4ff54d605c4c2717e9d49c4c367"}, +] + [[package]] name = "pytest" version = "8.1.1" @@ -84,6 +289,49 @@ files = [ {file = "pytest_sugar-1.0.0-py3-none-any.whl", hash = "sha256:70ebcd8fc5795dc457ff8b69d266a4e2e8a74ae0c3edc749381c64b5246c8dfd"}, ] +[[package]] +name = "requests" +version = "2.31.0" +requires_python = ">=3.7" +summary = "Python HTTP for Humans." +groups = ["test"] +dependencies = [ + "certifi>=2017.4.17", + "charset-normalizer<4,>=2", + "idna<4,>=2.5", + "urllib3<3,>=1.21.1", +] +files = [ + {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"}, + {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"}, +] + +[[package]] +name = "six" +version = "1.16.0" +requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +summary = "Python 2 and 3 compatibility utilities" +groups = ["dev"] +files = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] + +[[package]] +name = "stack-data" +version = "0.6.3" +summary = "Extract data from python stack frames and tracebacks for informative displays" +groups = ["dev"] +dependencies = [ + "asttokens>=2.1.0", + "executing>=1.2.0", + "pure-eval", +] +files = [ + {file = "stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695"}, + {file = "stack_data-0.6.3.tar.gz", hash = "sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9"}, +] + [[package]] name = "termcolor" version = "2.4.0" @@ -94,3 +342,47 @@ files = [ {file = "termcolor-2.4.0-py3-none-any.whl", hash = "sha256:9297c0df9c99445c2412e832e882a7884038a25617c60cea2ad69488d4040d63"}, {file = "termcolor-2.4.0.tar.gz", hash = "sha256:aab9e56047c8ac41ed798fa36d892a37aca6b3e9159f3e0c24bc64a9b3ac7b7a"}, ] + +[[package]] +name = "traitlets" +version = "5.14.3" +requires_python = ">=3.8" +summary = "Traitlets Python configuration system" +groups = ["dev"] +files = [ + {file = "traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f"}, + {file = "traitlets-5.14.3.tar.gz", hash = "sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7"}, +] + +[[package]] +name = "typing-extensions" +version = "4.11.0" +requires_python = ">=3.8" +summary = "Backported and Experimental Type Hints for Python 3.8+" +groups = ["dev"] +marker = "python_version < \"3.12\"" +files = [ + {file = "typing_extensions-4.11.0-py3-none-any.whl", hash = "sha256:c1f94d72897edaf4ce775bb7558d5b79d8126906a14ea5ed1635921406c0387a"}, + {file = "typing_extensions-4.11.0.tar.gz", hash = "sha256:83f085bd5ca59c80295fc2a82ab5dac679cbe02b9f33f7d83af68e241bea51b0"}, +] + +[[package]] +name = "urllib3" +version = "2.2.1" +requires_python = ">=3.8" +summary = "HTTP library with thread-safe connection pooling, file post, and more." +groups = ["test"] +files = [ + {file = "urllib3-2.2.1-py3-none-any.whl", hash = "sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d"}, + {file = "urllib3-2.2.1.tar.gz", hash = "sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19"}, +] + +[[package]] +name = "wcwidth" +version = "0.2.13" +summary = "Measures the displayed width of unicode strings in a terminal" +groups = ["dev"] +files = [ + {file = "wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859"}, + {file = "wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5"}, +] diff --git a/packages/python/openverse-attribution/pyproject.toml b/packages/python/openverse-attribution/pyproject.toml index 946af4f0415..ee1933cd6b4 100644 --- a/packages/python/openverse-attribution/pyproject.toml +++ b/packages/python/openverse-attribution/pyproject.toml @@ -16,6 +16,10 @@ build-backend = "pdm.backend" [tool.pdm.dev-dependencies] test = [ - "pytest >=8.1.1, <9", - "pytest-sugar >=1.0.0, <2", + "pytest >=8.1.1, <9", + "pytest-sugar >=1.0.0, <2", + "requests>=2.31.0", +] +dev = [ + "ipython >=8.24.0, <9", ] diff --git a/packages/python/openverse-attribution/src/openverse_attribution/attribution.py b/packages/python/openverse-attribution/src/openverse_attribution/attribution.py deleted file mode 100644 index 5d65fef6d21..00000000000 --- a/packages/python/openverse-attribution/src/openverse_attribution/attribution.py +++ /dev/null @@ -1,65 +0,0 @@ -from __future__ import annotations - -import re -from typing import TYPE_CHECKING - - -if TYPE_CHECKING: - from openverse_attribution.license import License - - -def get_attribution_text( - lic: License, - title: str | None = None, - creator: str | None = None, - license_version: str | None = None, - license_url: str | bool | None = None, -): - """ - Get the attribution text for a media item. This function only renders the - attribution in plain-text format for the English language. - Note that this is not a perfect attribution as it does not include - hyperlinks for the work or the creator. Also see the CC `wiki`_ to learn - best practices for attribution. - - .. _wiki: https://wiki.creativecommons.org/wiki/Best_practices_for_attribution - - To remove the sentence for viewing the legal text, set the ``license_url`` - parameter to ``False``. - - :param lic: the ``License`` enum instance for the work - :param title: the name of the work, if known - :param creator: the name of the work's creator, if known - :param license_version: the version of the license, if known - :param license_url: the URL to the license, to override the default - :return: the plain-text English language attribution - """ - - title = f'"{title}"' if title else "This work" - - attribution_template = "{title} {creator} {marked-licensed} {license}. {view-legal}" - attribution_parts = { - "title": title, - "marked-licensed": "is marked with" if lic.is_pd else "is licensed under", - "license": lic.name(license_version), - "view-legal": "", - "creator": "", - } - - if license_url is not False: - license_url = license_url or lic.url(license_version) - view_legal_template = "To view {terms-copy}, visit {url}." - view_legal_parts = { - "terms-copy": "the terms" if lic.is_pd else "a copy of this license", - "url": license_url, - } - attribution_parts["view-legal"] = view_legal_template.format(**view_legal_parts) - - if creator: - creator_template = "by {creator-name}" - creator_parts = {"creator-name": creator} - attribution_parts["creator"] = creator_template.format(**creator_parts) - - attribution = attribution_template.format(**attribution_parts) - - return re.sub(r"\s{2,}", " ", attribution).strip() diff --git a/packages/python/openverse-attribution/src/openverse_attribution/data/__init__.py b/packages/python/openverse-attribution/src/openverse_attribution/data/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/packages/python/openverse-attribution/src/openverse_attribution/data/all_licenses.json b/packages/python/openverse-attribution/src/openverse_attribution/data/all_licenses.json new file mode 100644 index 00000000000..40fc7c8aab6 --- /dev/null +++ b/packages/python/openverse-attribution/src/openverse_attribution/data/all_licenses.json @@ -0,0 +1,161 @@ +{ + "4.0": { + "": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"] + }, + "3.0": { + "": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "am": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "au": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "at": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "az": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "ca": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "cl": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "cr": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "hr": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "cz": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "ec": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "eg": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "ee": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "fr": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "ge": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "de": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "gr": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "gt": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "hk": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "igo": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "ie": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "it": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "lu": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "nl": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "nz": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "no": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "ph": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "pl": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "pt": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "pr": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "ro": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "sg": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "za": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "es": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "ch": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "th": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "ug": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "us": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "ve": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "vn": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"] + }, + "2.5": { + "": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "ar": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "au": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "bg": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "ca": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "co": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "hr": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "dk": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "hu": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "in": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "il": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "it": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "mk": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "my": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "mt": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "mx": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "nl": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "pe": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "pl": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "pt": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "si": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "za": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "es": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "se": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "ch": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "scotland": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"] + }, + "2.1": { + "au": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "ca": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "jp": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "es": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"] + }, + "2.0": { + "": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa", "devnations"], + "au": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "at": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "be": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "ca": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "cl": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "hr": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "fr": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "de": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "it": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "jp": [ + "by", + "by-nc", + "by-nc-nd", + "by-nc-sa", + "by-nd", + "by-sa", + "nc", + "nc-sa", + "nd", + "nd-nc", + "sa" + ], + "kr": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "nl": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "pl": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "za": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "es": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"], + "uk": ["by", "by-nc", "by-nc-nd", "by-nc-sa", "by-nd", "by-sa"] + }, + "1.0": { + "": [ + "by", + "by-nc", + "by-nc-sa", + "by-nd", + "by-nd-nc", + "by-sa", + "nc", + "nc-sa", + "nc-sampling+", + "nd", + "nd-nc", + "sa", + "sampling", + "sampling+", + "cc0", + "pdm" + ], + "fi": [ + "by", + "by-nc", + "by-nc-sa", + "by-nd", + "by-nd-nc", + "by-sa", + "nc", + "nc-sa", + "nd", + "nd-nc", + "sa" + ], + "de": ["sampling+"], + "il": ["by", "by-nc", "by-nc-sa", "by-nd", "by-nd-nc", "by-sa"], + "nl": [ + "by", + "by-nc", + "by-nc-sa", + "by-nd", + "by-nd-nc", + "by-sa", + "nc", + "nc-sa", + "nd", + "nd-nc", + "sa" + ], + "us": ["certification"] + } +} diff --git a/packages/python/openverse-attribution/src/openverse_attribution/data/all_licenses.py b/packages/python/openverse-attribution/src/openverse_attribution/data/all_licenses.py new file mode 100644 index 00000000000..f9f8fddffc1 --- /dev/null +++ b/packages/python/openverse-attribution/src/openverse_attribution/data/all_licenses.py @@ -0,0 +1,6 @@ +import json +from pathlib import Path + + +all_licenses_data = Path(__file__).parent / "all_licenses.json" +all_licenses = json.loads(all_licenses_data.read_bytes()) diff --git a/packages/python/openverse-attribution/src/openverse_attribution/license.py b/packages/python/openverse-attribution/src/openverse_attribution/license.py index f83fc51b377..e45587575fa 100644 --- a/packages/python/openverse-attribution/src/openverse_attribution/license.py +++ b/packages/python/openverse-attribution/src/openverse_attribution/license.py @@ -1,128 +1,276 @@ -from enum import StrEnum +import re +from dataclasses import dataclass -from openverse_attribution.attribution import get_attribution_text +from openverse_attribution.data.all_licenses import all_licenses +from openverse_attribution.license_name import LicenseName -class License(StrEnum): - """ - Represent all licenses that are handled by Openverse. This uses a very loose - interpretation of the term "license" as it includes licenses (both active - and deprecated), dedications and marks. - """ +KNOWN_ALIASES = { + "zero": "cc0", + "mark": "pdm", +} - # CC licenses - BY = "by" - BY_SA = "by-sa" - BY_NC = "by-nc" - BY_ND = "by-nd" - BY_NC_SA = "by-nc-sa" - BY_NC_ND = "by-nc-nd" - # Deprecated CC licenses - SA = "sa" - SAMPLING = "sampling+" - NC_SAMPLING = "nc-sampling+" +@dataclass +class License: + name: LicenseName + ver: str | None + jur: str | None - # Public domain dedication - CC0 = "cc0" + def __init__( + self, + slug: str, + version: str | None = None, + jurisdiction: str | None = None, + ): + """ + Create an instance of ``License``. - # Public domain mark - PDM = "pdm" - PUBLIC_DOMAIN = "publicdomain" + This function validates the license, version and jurisdiction. If some + fields are not provided, it makes an attempt to deduce them. - def name(self, version: str | None = None) -> str: + The only exception is 'publicdomain' for which the version and + jurisdiction fields are ignored. + + Use an empty string as ``jurisdiction`` to specify a generic, universal + or unported form of the license. + + :param slug: the slug for the license, from the ``LicenseName`` enum + :param version: the version of the license + :param jurisdiction: the jurisdiction of the license """ - Get the full name of the license. - The ``version`` parameter is disregarded and the version is always 1.0 - for CC0, PDM and deprecated licenses. If not provided, the version is - omitted for all other licenses. - :param version: the version number of the license - :return: the full name of the license + # Shorten long variable names + ver = version + jur = jurisdiction + + # Handle known aliases. + slug = KNOWN_ALIASES.get(slug, slug) + + self.slug = slug + + # Encapsulates internal enum validation. + self.name = LicenseName(slug) + + self.fallback_ver = None + self.fallback_jur = None + + if self.name is LicenseName.PUBLICDOMAIN: + self.ver = None + self.jur = None + return + + # Validate version against known versions. + if ver and ver not in all_licenses.keys(): + raise ValueError(f"Version `{ver}` does not exist.") + + # Validate jurisdiction against known jurisdictions. + if jur is not None: + if all(jur not in item for item in all_licenses.values()): + raise ValueError(f"Jurisdiction `{jur}` does not exist.") + + if ver and jur not in all_licenses[ver].keys(): + raise ValueError( + f"Jurisdiction `{jur}` does not exist for version `{ver}`." + ) + + # Validation (with autocompletion) + if not ver and jur is None: + self.ver, self.jur = self._deduce_ver_jur() or (None, None) + elif not ver and jur is not None: + self.jur = jur + self.ver = self._deduce_ver() + elif ver and jur is None: + self.ver = ver + self.jur = self._deduce_jur() + else: # ver and jur is not None + self.ver = ver + self.jur = jur + if (ver, jur) not in self.name.allowed_versions_jurisdictions: + raise ValueError( + f"License `{slug}` does not accept version `{ver}` and jurisdiction `{jur}`." + ) + + def _deduce_ver(self) -> str | None: + """ + Deduce version from slug and jurisdiction. + + This function sets ``fallback_ver`` to latest allowed version if it + cannot be determined for certain. + + :return: the certain value of the version + :raise ValueError: if no version matches slug and jurisdiction """ - if self in {License.PDM, License.PUBLIC_DOMAIN}: - name = "Public Domain Mark" + allowed_ver_jur = self.name.allowed_versions_jurisdictions + allowed_vers = [v for v, j in allowed_ver_jur if j == self.jur] + if len(allowed_vers) > 1: + self.fallback_ver = allowed_vers[0] # latest + elif len(allowed_vers) == 1: + return allowed_vers.pop() else: - name = self.value.upper().replace("SAMPLING", "Sampling") - if self.is_cc and self is not License.CC0: - name = f"CC {name}" - if self.is_pd or self.is_deprecated: - version = "1.0" - if version: - name = f"{name} {version}" - return name.strip() - - def url(self, version: str | None = None) -> str: + raise ValueError( + f"No version matches slug `{self.slug}` and jurisdiction `{self.jur}`." + ) + + def _deduce_jur(self) -> str | None: """ - Get the URL to the legal deed of this license. - The ``version`` parameter is disregarded and the version is always 1.0 - for CC0, PDM and deprecated licenses. If not provided, the version is - assumed to be 4.0 for all other licenses. + Deduce jurisdiction from slug and version. + + This function sets ``fallback_jur`` to generic jurisdiction if it cannot + be determined for certain and generic is an option. - :param version: the version number of the license - :return: the URL to the legal text of this license + :return: the certain value of the jurisdiction + :raise ValueError: if jurisdiction is required or no jurisdiction + matches slug and version """ - if self is License.CC0: - fragment = "publicdomain/zero/1.0" - elif self in {License.PUBLIC_DOMAIN, License.PDM}: - fragment = "publicdomain/mark/1.0" - elif self.is_deprecated: - fragment = f"licenses/{self}/1.0" + allowed_ver_jur = self.name.allowed_versions_jurisdictions + allowed_jurs = {j for v, j in allowed_ver_jur if v == self.ver} + if len(allowed_jurs) > 1: + if "" in allowed_jurs: + # We can only assume generic jurisdiction as fallback. + self.fallback_jur = "" + else: + raise ValueError( + f"Jurisdiction is required for slug `{self.slug}` and version `{self.ver}`." + ) + elif len(allowed_jurs) == 1: + return allowed_jurs.pop() else: - fragment = f"licenses/{self}/{version or '4.0'}" - return f"https://creativecommons.org/{fragment}/" + raise ValueError( + f"No jurisdiction matches slug `{self.slug}` and version `{self.ver}`." + ) - def attribution( - self, - title: str | None = None, - creator: str | None = None, - version: str | None = None, - url: str | bool | None = None, - ): + def _deduce_ver_jur(self) -> tuple[str, str] | None: """ - Get the attribution text for a media item released under this license. + Deduce version and jurisdiction from slug. - :param title: the name of the work, if known - :param creator: the name of the work's creator, if known - :param version: the version number of the license - :param url: the URL to the legal text of this license - :return: the plain-text English language attribution + This function sets ``fallback_ver`` and ``fallback_jur`` to the latest + allowed version and generic jurisdiction respectively if they cannot be + determined for certain. + + :return: the certain values of the version and jurisdiction """ - return get_attribution_text(self, title, creator, version, url) + allowed_ver_jur = self.name.allowed_versions_jurisdictions + if len(allowed_ver_jur) == 1: + return allowed_ver_jur[0] + + allowed_ver_jur = [(v, j) for (v, j) in allowed_ver_jur if j == ""] + if len(allowed_ver_jur) >= 1: + # We can only assume generic jurisdiction as fallback. + self.fallback_ver, self.fallback_jur = allowed_ver_jur[0] + else: + raise ValueError(f"No version and jurisdiction match slug `{self.slug}`.") @property - def is_deprecated(self) -> bool: + def full_name(self) -> str: """ - Determine if this license has been deprecated. These licenses are no - longer maintained as only have a version 1.0. + Get the full name of the license. + + This function does not use the fallback version and jurisdiction because + the license name is valid without them. - :return: whether this license has been deprecated + :return: the full name of the license """ - return self in {License.SAMPLING, License.NC_SAMPLING, License.SA} + name = self.name.display_name + if self.ver: + name = f"{name} {self.ver}" + if self.jur: + name = f"{name} {self.jur.upper()}" + return name @property - def is_pd(self) -> bool: + def url(self) -> str: """ - Determine whether a work with this license is in the public domain. This - function also differentiates a license from a mark or dedication. + Get the URL to the deed of this license. + + This function uses the fallback version and jurisdiction as they are + part of the URL and URL cannot be generated without them. - :return: whether a work with this license is in the public domain + :return: the URL to the deed of the license """ - return self in {License.PUBLIC_DOMAIN, License.PDM, License.CC0} + ver = self.ver if self.ver is not None else self.fallback_ver + jur = self.jur if self.jur is not None else self.fallback_jur - @property - def is_cc(self) -> bool: + if self.name is LicenseName.PUBLICDOMAIN: + return "https://en.wikipedia.org/wiki/Public_domain" + + if self.name is LicenseName.CC0: + fragment = f"publicdomain/zero/{ver}/{jur}" + elif self.name is LicenseName.PDM: + fragment = f"publicdomain/mark/{ver}/{jur}" + elif self.name is LicenseName.CERTIFICATION: + fragment = f"publicdomain/certification/{ver}/{jur}" + else: + fragment = f"licenses/{self.name}/{ver}/{jur}" + + if not fragment.endswith("/"): + fragment = f"{fragment}/" + + return f"https://creativecommons.org/{fragment}" + + def get_attribution_text( + self, + title: str | None = None, + creator: str | None = None, + url: str | bool | None = None, + ): """ - Determine whether this license was created by Creative Commons. Note - that this includes CC0 which was created by CC. + Get the attribution text for a media item. This function only renders + the attribution in plain-text format for the English language. + + Note that this is not a perfect attribution as it does not include + hyperlinks for the work or the creator. Also see the CC `wiki`_ to learn + best practices for attribution. + + .. _wiki: https://wiki.creativecommons.org/wiki/Best_practices_for_attribution + + To remove the sentence for viewing the legal text, set the ``url`` + parameter to ``False``. - :return: whether this license was created by Creative Commons + :param title: the name of the work, if known + :param creator: the name of the work's creator, if known + :param url: the URL to the license, to override the default + :return: the plain-text English language attribution """ - # Works because other than PDM, we only have CC licenses. - return self not in {License.PUBLIC_DOMAIN, License.PDM} + title = f'"{title}"' if title else "This work" + + attribution_template = ( + "{title} {creator} {marked-licensed} {license}. {view-legal}" + ) + attribution_parts = { + "title": title, + "marked-licensed": "is marked with" + if self.name.is_pd + else "is licensed under", + "license": self.full_name, + "view-legal": "", + "creator": "", + } + + if url is not False: + license_url = url or self.url + view_legal_template = "To view {terms-copy}, visit {url}." + view_legal_parts = { + "terms-copy": "the terms" + if self.name.is_pd + else "a copy of this license", + "url": license_url, + } + attribution_parts["view-legal"] = view_legal_template.format( + **view_legal_parts + ) + + if creator: + creator_template = "by {creator-name}" + creator_parts = {"creator-name": creator} + attribution_parts["creator"] = creator_template.format(**creator_parts) + + attribution = attribution_template.format(**attribution_parts) + + return re.sub(r"\s{2,}", " ", attribution).strip() diff --git a/packages/python/openverse-attribution/src/openverse_attribution/license_name.py b/packages/python/openverse-attribution/src/openverse_attribution/license_name.py new file mode 100644 index 00000000000..cd5fe2267ae --- /dev/null +++ b/packages/python/openverse-attribution/src/openverse_attribution/license_name.py @@ -0,0 +1,126 @@ +from enum import StrEnum + +from openverse_attribution.data.all_licenses import all_licenses + + +NON_CC_SLUGS = {"pdm", "publicdomain", "certification"} +DEPRECATED_SLUGS = { + "sa", + "nc", + "nd", + "nc-sa", + "nd-nc", + "sampling", + "sampling+", + "nc-sampling+", + "devnations", + "certification", +} +PUBLIC_DOMAIN_SLUGS = NON_CC_SLUGS | {"cc0"} + + +class LicenseName(StrEnum): + """ + Represents all existing CC "licenses". + + This uses a very loose interpretation of the term "license" as it includes + licenses (both active and deprecated), dedications and marks. + """ + + # CC licenses + BY = "by" + BY_SA = "by-sa" + BY_NC = "by-nc" + BY_ND = "by-nd" + BY_NC_SA = "by-nc-sa" + BY_NC_ND = "by-nc-nd" + + # Retired CC licenses + SA = "sa" + NC = "nc" + ND = "nd" + NC_SA = "nc-sa" + ND_NC = "nd-nc" + BY_ND_NC = "by-nd-nc" # later renamed to BY_NC_ND + + SAMPLING = "sampling" + SAMPLING_PLUS = "sampling+" + NC_SAMPLING_PLUS = "nc-sampling+" + DEVNATIONS = "devnations" + + # Public domain + CC0 = "cc0" + CERTIFICATION = "certification" + PDM = "pdm" + PUBLICDOMAIN = "publicdomain" + + @property + def display_name(self) -> str: + """ + Get the name of the license as supposed to be displayed to a reader. + + :return: the display name of the license + """ + + if self is LicenseName.PDM: + return "Public Domain Mark" + if self is LicenseName.CERTIFICATION: + return "Public Domain Certification" + if self is LicenseName.PUBLICDOMAIN: + return "Public Domain" + + name = self.value.upper() + if self is LicenseName.CC0: + return name + + name = name.replace("SAMPLING", "Sampling").replace("DEVNATIONS", "DevNations") + return f"CC {name}" + + @property + def is_cc(self) -> bool: + """ + Determine whether this license was created by Creative Commons. Note + that this includes CC0 which was created by CC. + + :return: whether this license was created by Creative Commons + """ + + return self.value not in NON_CC_SLUGS + + @property + def is_deprecated(self) -> bool: + """ + Determine if this license has been deprecated. These licenses are no + longer maintained and have a disclaimer on their legal page recommending + against their usage. + + :return: whether this license has been deprecated + """ + + return self in DEPRECATED_SLUGS + + @property + def is_pd(self) -> bool: + """ + Determine whether a work with this license is in the public domain. This + function also differentiates a license from a mark or dedication. + + :return: whether a work with this license is in the public domain + """ + + return self in PUBLIC_DOMAIN_SLUGS + + @property + def allowed_versions_jurisdictions(self) -> list[tuple[str, str]]: + """ + Get a list of versions and jurisdictions where this license is valid. + + :return: a list of allowed versions and jurisdictions + """ + + return [ + (ver, jur) + for ver in all_licenses.keys() + for jur in all_licenses[ver].keys() + if self.value in all_licenses[ver][jur] + ] diff --git a/packages/python/openverse-attribution/tests/test_attribution.py b/packages/python/openverse-attribution/tests/test_attribution.py index 5123ffd3d51..56f0e17bb31 100644 --- a/packages/python/openverse-attribution/tests/test_attribution.py +++ b/packages/python/openverse-attribution/tests/test_attribution.py @@ -5,57 +5,65 @@ BLANK = object() +# Test blank arguments against both ``None`` and empty string. @pytest.mark.parametrize( "blank_val", - ["", None], # Test blank arguments against both ``None`` and empty string. + [pytest.param("", id="blank"), pytest.param(None, id="none")], ) @pytest.mark.parametrize( "args, attribution", [ - ( - ("Title", "Creator", "0.0", "https://license/url"), # All known - '"Title" by Creator is licensed under CC BY 0.0. ' + pytest.param( + ("4.0", "Title", "Creator", "https://license/url"), + '"Title" by Creator is licensed under CC BY 4.0. ' "To view a copy of this license, visit https://license/url.", + id="all_known", ), - ( - (BLANK, "Creator", "0.0", "https://license/url"), # Unknown title - "This work by Creator is licensed under CC BY 0.0. " + pytest.param( + (None, "Title", "Creator", "https://license/url"), + '"Title" by Creator is licensed under CC BY. ' "To view a copy of this license, visit https://license/url.", + id="unknown_version", ), - ( - ("Title", BLANK, "0.0", "https://license/url"), # Unknown creator - '"Title" is licensed under CC BY 0.0. ' + pytest.param( + ("4.0", BLANK, "Creator", "https://license/url"), + "This work by Creator is licensed under CC BY 4.0. " "To view a copy of this license, visit https://license/url.", + id="unknown_title", ), - ( - ("Title", "Creator", BLANK, "https://license/url"), # Unknown version - '"Title" by Creator is licensed under CC BY. ' + pytest.param( + ("4.0", "Title", BLANK, "https://license/url"), + '"Title" is licensed under CC BY 4.0. ' "To view a copy of this license, visit https://license/url.", + id="unknown_creator", ), - ( - ("Title", "Creator", "0.0", BLANK), # Unknown license URL - '"Title" by Creator is licensed under CC BY 0.0. ' - "To view a copy of this license, visit https://creativecommons.org/licenses/by/0.0/.", + pytest.param( + ("4.0", "Title", "Creator", BLANK), + '"Title" by Creator is licensed under CC BY 4.0. ' + "To view a copy of this license, visit https://creativecommons.org/licenses/by/4.0/.", + id="unknown_license_url", ), - ( - ("Title", "Creator", "0.0", False), # Removed license URL - '"Title" by Creator is licensed under CC BY 0.0.', + pytest.param( + ("4.0", "Title", "Creator", False), + '"Title" by Creator is licensed under CC BY 4.0.', + id="removed_license_url", ), - ( - (BLANK, BLANK, BLANK, BLANK), # Almost all unknown + pytest.param( + (None, BLANK, BLANK, BLANK), "This work is licensed under CC BY. " "To view a copy of this license, visit https://creativecommons.org/licenses/by/4.0/.", + id="almost_all_unknown", ), ], ) def test_attribution_text( blank_val: str | None, - args: tuple[str, str, str, str], + args: tuple[str, str, str, str, str], attribution: str, ): - lic = License("by") - args = (blank_val if arg is BLANK else arg for arg in args) - assert lic.attribution(*args) == attribution + args = [blank_val if arg is BLANK else arg for arg in args] + lic = License("by", args[0]) + assert lic.get_attribution_text(*args[1:]) == attribution @pytest.mark.parametrize( @@ -77,5 +85,4 @@ def test_attribution_text_differentiates_license_and_other_tools( slug: str, attribution: str, ): - lic = License(slug) - assert lic.attribution() == attribution + assert License(slug).get_attribution_text() == attribution diff --git a/packages/python/openverse-attribution/tests/test_license.py b/packages/python/openverse-attribution/tests/test_license.py index ebf25e26ca3..41929c25d4c 100644 --- a/packages/python/openverse-attribution/tests/test_license.py +++ b/packages/python/openverse-attribution/tests/test_license.py @@ -1,87 +1,166 @@ import pytest +import requests from openverse_attribution.license import License +from openverse_attribution.license_name import LicenseName -def test_raises_value_error_on_invalid_license(): - with pytest.raises(ValueError): - License("invalid") +@pytest.mark.parametrize( + "slug, expected", + [ + ("zero", "cc0"), + ("mark", "pdm"), + ], +) +def test_license_handles_aliases(slug: str, expected: str): + assert License(slug).slug == expected @pytest.mark.parametrize( - "slug, version, name", + "slug, version, jurisdiction, attr, val", [ - ("cc0", None, "CC0 1.0"), - ("cc0", "2.0", "CC0 1.0"), - ("pdm", None, "Public Domain Mark 1.0"), - ("pdm", "2.0", "Public Domain Mark 1.0"), - ("sa", None, "CC SA 1.0"), - ("sa", "2.0", "CC SA 1.0"), - ("sampling+", None, "CC Sampling+ 1.0"), - ("sampling+", "2.0", "CC Sampling+ 1.0"), - ("by", None, "CC BY"), - ("by", "2.0", "CC BY 2.0"), + ("certification", None, None, "ver", "1.0"), # infers version with surety + ("certification", None, None, "jur", "us"), # infers jurisdiction with surety + ( + "by", + None, + None, + "fallback_ver", + "4.0", + ), # cannot infer version, falls back to latest + ( + "by", + None, + None, + "fallback_jur", + "", + ), # cannot infer jurisdiction, falls back to generic + ("nc", "2.0", None, "jur", "jp"), # infers jurisdiction with surety + ("by-nc", "4.0", None, "jur", ""), # infers jurisdiction with surety + ("by", None, "pe", "ver", "2.5"), # infers version with surety ], ) -def test_can_get_name_for_license(slug: str, version: str, name: str): - lic = License(slug) - assert lic.name(version) == name +def test_license_validation_autocompletes_missing_info( + slug: str, + version: str | None, + jurisdiction: str | None, + attr: str, + val: str, +): + lic = License(slug, version, jurisdiction) + assert getattr(lic, attr) == val @pytest.mark.parametrize( - "slug, version, path", + "slug, version, jurisdiction, msg", [ - ("cc0", None, "publicdomain/zero/1.0/"), - ("cc0", "2.0", "publicdomain/zero/1.0/"), - ("pdm", None, "publicdomain/mark/1.0/"), - ("pdm", "2.0", "publicdomain/mark/1.0/"), - ("sa", None, "licenses/sa/1.0/"), - ("sa", "2.0", "licenses/sa/1.0/"), - ("sampling+", None, "licenses/sampling+/1.0/"), - ("sampling+", "2.0", "licenses/sampling+/1.0/"), - ("by", None, "licenses/by/4.0/"), - ("by", "2.0", "licenses/by/2.0/"), + # raised in ``__init__`` + ("by", "5.0", None, "Version `5.0` does not exist."), + ("by", None, "done", "Jurisdiction `done` does not exist."), + ("by", "1.0", "jp", "Jurisdiction `jp` does not exist for version `1.0`."), + # raised in ``_deduce_ver`` + ("nd", None, "in", "No version matches slug `nd` and jurisdiction `in`."), + # raised in ``_deduce_jur`` + ( + "by", + "2.1", + None, + "Jurisdiction is required for slug `by` and version `2.1`.", + ), + ( + "sampling+", + "4.0", + None, + r"No jurisdiction matches slug `sampling\+` and version `4.0`.", + ), + # raised in ``_deduce_ver_jur`` + ( + "sampling", + "1.0", + "fi", + "License `sampling` does not accept version `1.0` and jurisdiction `fi`.", + ), ], ) -def test_can_get_url_for_license(slug: str, version: str, path: str): - lic = License(slug) - assert lic.url(version).endswith(path) +def test_license_validation_fails_if_contradictory_info( + slug: str, + version: str | None, + jurisdiction: str | None, + msg: str, +): + with pytest.raises(ValueError, match=msg): + License(slug, version, jurisdiction) + + +@pytest.mark.parametrize( + "slug", + [lic.value for lic in LicenseName], +) +def test_license_validation_never_fails_for_just_name(slug: str): + assert License(slug) @pytest.mark.parametrize( - "slug, is_dep", + "slug, version, jurisdiction, full_name", [ - ("sa", True), - ("sampling+", True), - ("nc-sampling+", True), - ("by", False), + ("cc0", None, None, "CC0 1.0"), + ("pdm", None, None, "Public Domain Mark 1.0"), + ("certification", None, None, "Public Domain Certification 1.0 US"), + ("publicdomain", None, None, "Public Domain"), + ("sa", None, None, "CC SA"), + ("sa", "2.0", None, "CC SA 2.0 JP"), + ("sa", "2.0", "jp", "CC SA 2.0 JP"), + ("sampling+", None, None, "CC Sampling+"), + ("by", None, None, "CC BY"), + ("by", "2.0", None, "CC BY 2.0"), + ("by", "2.5", "scotland", "CC BY 2.5 SCOTLAND"), + ("devnations", None, None, "CC DevNations 2.0"), ], ) -def test_can_identify_licenses_as_deprecated(slug: str, is_dep: bool): - lic = License(slug) - assert lic.is_deprecated == is_dep +def test_license_generates_name( + slug: str, + version: str | None, + jurisdiction: str | None, + full_name: str, +): + lic = License(slug, version, jurisdiction) + assert lic.full_name == full_name @pytest.mark.parametrize( - "slug, is_pd", + "slug, version, jurisdiction, path", [ - ("cc0", True), - ("pdm", True), - ("by", False), + ("cc0", None, None, "publicdomain/zero/1.0/"), + ("pdm", None, None, "publicdomain/mark/1.0/"), + ("certification", None, None, "publicdomain/certification/1.0/us/"), + ("publicdomain", None, None, "wiki/Public_domain"), + ("sa", None, None, "licenses/sa/1.0/"), + ("sa", "2.0", None, "licenses/sa/2.0/jp/"), + ("sa", "2.0", "jp", "licenses/sa/2.0/jp/"), + ("sampling+", None, None, "licenses/sampling+/1.0/"), + ("by", None, None, "licenses/by/4.0/"), + ("by", "2.0", None, "licenses/by/2.0/"), + ("by", "2.5", "scotland", "licenses/by/2.5/scotland/"), + ("devnations", None, None, "licenses/devnations/2.0/"), ], ) -def test_can_identify_licenses_as_pd(slug: str, is_pd: bool): - lic = License(slug) - assert lic.is_pd == is_pd +def test_license_generates_url( + slug: str, + version: str | None, + jurisdiction: str | None, + path: str, +): + lic = License(slug, version, jurisdiction) + assert lic.url.endswith(path) @pytest.mark.parametrize( - "slug, is_cc", + "lic", [ - ("cc0", True), - ("by", True), - ("pdm", False), + pytest.param(lic := License(name.value, ver, jur), id=lic.url) + for name in LicenseName + for (ver, jur) in name.allowed_versions_jurisdictions ], ) -def test_can_identify_licenses_as_cc(slug: str, is_cc: bool): - lic = License(slug) - assert lic.is_cc == is_cc +def test_all_urls_are_valid(lic: License): + res = requests.head(lic.url) + assert res.status_code == 200 diff --git a/packages/python/openverse-attribution/tests/test_license_name.py b/packages/python/openverse-attribution/tests/test_license_name.py new file mode 100644 index 00000000000..f3144f3129e --- /dev/null +++ b/packages/python/openverse-attribution/tests/test_license_name.py @@ -0,0 +1,82 @@ +import pytest +from openverse_attribution.license_name import LicenseName + + +@pytest.mark.parametrize( + "slug, display_name", + [ + ("by", "CC BY"), + ("sampling", "CC Sampling"), + ("devnations", "CC DevNations"), + ("cc0", "CC0"), + ("pdm", "Public Domain Mark"), + ("certification", "Public Domain Certification"), + ("publicdomain", "Public Domain"), + ], +) +def test_gets_display_name(slug: str, display_name: str): + lic = LicenseName(slug) + assert lic.display_name == display_name + + +@pytest.mark.parametrize( + "slug, is_cc", + [ + ("cc0", True), + ("by", True), + ("certification", False), + ("pdm", False), + ("publicdomain", False), + ], +) +def test_identifies_licenses_as_cc(slug: str, is_cc: bool): + lic = LicenseName(slug) + assert lic.is_cc == is_cc + + +@pytest.mark.parametrize( + "slug, is_dep", + [ + ("sa", True), + ("sampling+", True), + ("nc-sampling+", True), + ("by", False), + ], +) +def test_identifies_licenses_as_deprecated(slug: str, is_dep: bool): + lic = LicenseName(slug) + assert lic.is_deprecated == is_dep + + +@pytest.mark.parametrize( + "slug, is_pd", + [ + ("cc0", True), + ("certification", True), + ("pdm", True), + ("publicdomain", True), + ("by", False), + ], +) +def test_identifies_licenses_as_pd(slug: str, is_pd: bool): + lic = LicenseName(slug) + assert lic.is_pd == is_pd + + +@pytest.mark.parametrize( + "slug, ver_jur", + [ + ("sampling", [("1.0", "")]), + ("sampling+", [("1.0", ""), ("1.0", "de")]), + ("cc0", [("1.0", "")]), + ("certification", [("1.0", "us")]), + ("pdm", [("1.0", "")]), + ("publicdomain", []), + ], +) +def test_identifies_allowed_versions_and_jurisdictions( + slug: str, + ver_jur: list[tuple[str, str]], +): + lic = LicenseName(slug) + assert lic.allowed_versions_jurisdictions == ver_jur