From a675f46ab79139b4c7f4af94a6480b3a699ce5c8 Mon Sep 17 00:00:00 2001 From: Olga Bulat Date: Wed, 27 Mar 2024 17:07:53 +0300 Subject: [PATCH] Use search query parameters for additional search views in the API (#3887) * Use query parameters for API collections Signed-off-by: Olga Bulat * Fix source validation Signed-off-by: Olga Bulat * Update documentation Signed-off-by: Olga Bulat * Update api/api/serializers/media_serializers.py Co-authored-by: Madison Swain-Bowden * Update api/api/serializers/docs.py Co-authored-by: Madison Swain-Bowden * Update api/api/docs/base_docs.py Co-authored-by: Madison Swain-Bowden * Update api/api/serializers/docs.py Co-authored-by: Madison Swain-Bowden * Rename env variable to SHOW_COLLECTION_DOCS Signed-off-by: Olga Bulat * Combine search request source serializer with search request serializer Signed-off-by: Olga Bulat * Move index validation to media_serializer Signed-off-by: Olga Bulat * Remove MediaListRequestSerializer Signed-off-by: Olga Bulat * Update tests Signed-off-by: Olga Bulat * Use .format for doc strings Co-authored-by: FelixSjogren FEAT - Add lot for the content instead of a prop (#2) Closes #2 Co-authored-by: Stagge TEST - Add tests for VTag (#7) Added tests for Vtag, tests include: All props are sent to VButton VTag renders slot content Renders anchor tag. Co-authored-by: Stagge FEAT - Ensure inner VButton emits and handles events in VTag #4 Closes #4 Added accessibility label (#10) - Added aria-label to indicate that that the link is a tag Improvements from review lint Signed-off-by: Olga Bulat * Add comments on when fields need to be updated Signed-off-by: Olga Bulat --------- Signed-off-by: Olga Bulat Co-authored-by: Madison Swain-Bowden --- api/Pipfile | 2 +- api/Pipfile.lock | 201 ++++++++-------- api/api/constants/parameters.py | 2 + api/api/controllers/search_controller.py | 69 ++---- api/api/docs/audio_docs.py | 44 ++-- api/api/docs/base_docs.py | 176 +++++--------- api/api/docs/image_docs.py | 46 ++-- api/api/serializers/audio_serializers.py | 32 +-- api/api/serializers/docs.py | 84 +++++++ api/api/serializers/image_serializers.py | 19 +- api/api/serializers/media_serializers.py | 227 +++++++++++------- api/api/views/audio_views.py | 8 - api/api/views/image_views.py | 8 - api/api/views/media_views.py | 49 +--- api/conf/settings/misc.py | 2 + api/env.template | 1 + .../integration/test_media_integration.py | 15 +- .../controllers/test_search_controller.py | 33 ++- .../test_search_controller_search_query.py | 43 ++-- .../serializers/test_media_serializers.py | 24 +- api/test/unit/views/test_media_views.py | 32 --- 21 files changed, 520 insertions(+), 597 deletions(-) create mode 100644 api/api/constants/parameters.py create mode 100644 api/api/serializers/docs.py diff --git a/api/Pipfile b/api/Pipfile index f7086fb9378..b5ae2aabd22 100644 --- a/api/Pipfile +++ b/api/Pipfile @@ -16,7 +16,7 @@ pytest-django = "~=4.6" pytest-raises = "~=0.11" pytest-sugar = "~=0.9" remote-pdb = "~=2.1" -schemathesis = "~=3.23" +schemathesis = "~=3.25" [packages] adrf = "~=0.1.2" diff --git a/api/Pipfile.lock b/api/Pipfile.lock index 0c839e116c0..104e038208d 100644 --- a/api/Pipfile.lock +++ b/api/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "a90b59ed055b1041770e38fa7bcd43f9e6d6a2ff368be8d3a1e1e277a3568ee3" + "sha256": "25a3302a4894e9604bbb6f9d9cdcb8766aa268fd45b4da6762cd3e2aa1c6ec28" }, "pipfile-spec": 6, "requires": { @@ -22,7 +22,6 @@ "sha256:cc0283131f1da4ce81b3334a6f6dee121f9ce438502e4dc532da3f322cad3c85" ], "index": "pypi", - "markers": "python_version >= '3.8'", "version": "==0.1.4" }, "aiohttp": { @@ -105,7 +104,6 @@ "sha256:ff30218887e62209942f91ac1be902cc80cddb86bf00fbc6783b7a43b2bea26f" ], "index": "pypi", - "markers": "python_version >= '3.8'", "version": "==3.9.3" }, "aiosignal": { @@ -144,7 +142,7 @@ "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f", "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028" ], - "markers": "python_version >= '3.7'", + "markers": "python_version < '3.11'", "version": "==4.0.3" }, "attrs": { @@ -377,17 +375,15 @@ "sha256:b367e6fa6caac1c9f500adc79ada1b5b1242c50d5f716a1a4362030197847d30" ], "index": "pypi", - "markers": "python_version >= '3.7'", "version": "==6.7.1" }, "django": { "hashes": [ - "sha256:a2d4c4d4ea0b6f0895acde632071aff6400bfc331228fc978b05452a0ff3e9f1", - "sha256:b1260ed381b10a11753c73444408e19869f3241fc45c985cd55a30177c789d13" + "sha256:6e6ff3db2d8dd0c986b4eec8554c8e4f919b5c1ff62a5b4390c17aff2ed6e5c4", + "sha256:ddc24a0a8280a0430baa37aff11f28574720af05888c62b7cfe71d219f4599d3" ], "index": "pypi", - "markers": "python_version >= '3.8'", - "version": "==4.2.10" + "version": "==4.2.11" }, "django-asgi-lifespan": { "hashes": [ @@ -395,7 +391,6 @@ "sha256:f62f201eaa2a9d064eb24e3dd227a66a009f2b8cd79ea71ba7edc03cfbbbbd18" ], "index": "pypi", - "markers": "python_full_version >= '3.10.0' and python_full_version < '4.0.0'", "version": "==0.2.0" }, "django-cors-headers": { @@ -404,7 +399,6 @@ "sha256:0bf65ef45e606aff1994d35503e6b677c0b26cafff6506f8fd7187f3be840207" ], "index": "pypi", - "markers": "python_version >= '3.8'", "version": "==4.3.1" }, "django-log-request-id": { @@ -429,7 +423,6 @@ "sha256:ebc88df7da810732e2af9987f7f426c96204bf89319df4c6da6ca9a2942edd5b" ], "index": "pypi", - "markers": "python_version >= '3.6'", "version": "==5.4.0" }, "django-split-settings": { @@ -438,7 +431,6 @@ "sha256:4b3be146776d49c61bd9dcf89fad40edb1544f13ab27a87a0b1aecf5a0d636f4" ], "index": "pypi", - "markers": "python_version >= '3.7' and python_version < '4.0'", "version": "==1.2.0" }, "django-tqdm": { @@ -446,7 +438,6 @@ "sha256:571a68d50050667d6b8e0c1f284542d372801a0ac3e3e9f817f1b854e043c3f4" ], "index": "pypi", - "markers": "python_version >= '3.7'", "version": "==1.3.1" }, "django-uuslug": { @@ -455,7 +446,6 @@ "sha256:5029077e9682db81a9f847cec9dc33c07f2e455e31f98931869e6220ca65a3e9" ], "index": "pypi", - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", "version": "==2.0.0" }, "djangorestframework": { @@ -464,7 +454,6 @@ "sha256:eb63f58c9f218e1a7d064d17a70751f528ed4e1d35547fdade9aaf4cd103fd08" ], "index": "pypi", - "markers": "python_version >= '3.6'", "version": "==3.14.0" }, "drf-spectacular": { @@ -473,7 +462,6 @@ "sha256:452e0cff3c12ee057b897508a077562967b9e62717992eeec10e62dbbc7b5a33" ], "index": "pypi", - "markers": "python_version >= '3.6'", "version": "==0.27.1" }, "elastic-transport": { @@ -490,7 +478,6 @@ "sha256:cc459b7e0fb88dc85b43b9d7d254cffad552b0063a3e0a12290c8fa5f138c038" ], "index": "pypi", - "markers": "python_version >= '3.7'", "version": "==8.12.1" }, "elasticsearch-dsl": { @@ -499,9 +486,16 @@ "sha256:ce32b8529888a97be911531e7590816cf3b1f608263eff6fb75aa7106e233c88" ], "index": "pypi", - "markers": "python_version >= '3.7'", "version": "==8.12.0" }, + "exceptiongroup": { + "hashes": [ + "sha256:4bfd3996ac73b41e9b9628b04e079f193850720ea5945fc96a08633c66912f14", + "sha256:91f5c769735f051a4290d52edd0858999b57e5876e9f85937691bd4c9fa3ed68" + ], + "markers": "python_version < '3.11'", + "version": "==1.2.0" + }, "frozenlist": { "hashes": [ "sha256:04ced3e6a46b4cfffe20f9ae482818e34eba9b5fb0ce4056e4cc9b6e212d09b7", @@ -590,7 +584,6 @@ "sha256:34a17436ed1e96697a86f9de3d15a3b0be01d8bc8de9c1dffd59fb8234ed5307" ], "index": "pypi", - "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==0.18.3" }, "h11": { @@ -874,7 +867,6 @@ "sha256:fe4c15f6c9285dc54ce6553a3ce908ed37c8f3825b5a51a15c91442bb955b868" ], "index": "pypi", - "markers": "python_version >= '3.8'", "version": "==10.2.0" }, "psycopg": { @@ -883,7 +875,6 @@ "sha256:4d5a0a5a8590906daa58ebd5f3cfc34091377354a1acced269dd10faf55da60e" ], "index": "pypi", - "markers": "python_version >= '3.7'", "version": "==3.1.18" }, "pycparser": { @@ -895,11 +886,11 @@ }, "python-dateutil": { "hashes": [ - "sha256:78e73e19c63f5b20ffa567001531680d939dc042bf7850431877645523c66709", - "sha256:cbf2f1da5e6083ac2fbfd4da39a25f34312230110440f424a14c7558bb85d82e" + "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", + "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427" ], "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==2.9.0" + "version": "==2.9.0.post0" }, "python-decouple": { "hashes": [ @@ -1204,7 +1195,7 @@ "sha256:3d9a267296243532db80c83a959a3400502165ade2c1338dea4e67915fd4745a", "sha256:5c89da2f3895767472a35556e539fd59f7edbe9b1e9c0e1c99eebeadc61838e4" ], - "markers": "python_version >= '3.8'", + "index": "pypi", "version": "==0.27.1" }, "uvloop": { @@ -1519,7 +1510,7 @@ "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f", "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028" ], - "markers": "python_version >= '3.7'", + "markers": "python_version < '3.11'", "version": "==4.0.3" }, "attrs": { @@ -1686,6 +1677,14 @@ "markers": "python_version >= '3.5'", "version": "==5.1.1" }, + "exceptiongroup": { + "hashes": [ + "sha256:4bfd3996ac73b41e9b9628b04e079f193850720ea5945fc96a08633c66912f14", + "sha256:91f5c769735f051a4290d52edd0858999b57e5876e9f85937691bd4c9fa3ed68" + ], + "markers": "python_version < '3.11'", + "version": "==1.2.0" + }, "executing": { "hashes": [ "sha256:35afe2ce3affba8ee97f2d69927fa823b08b472b7b994e36a52a964b93d16147", @@ -1700,7 +1699,6 @@ "sha256:bc76d97d1a65bbd9842a6d722882098eb549ec8ee1081f9fb2e8ff29f0c300f1" ], "index": "pypi", - "markers": "python_version >= '3.7'", "version": "==3.3.0" }, "faker": { @@ -1717,7 +1715,6 @@ "sha256:773bd03c38fe745c0c03c5b4ebb92521a25d3306f903c0ca65706bf65cf19e2a" ], "index": "pypi", - "markers": "python_version >= '3.7' and python_version < '4.0'", "version": "==2.21.1" }, "freezegun": { @@ -1726,7 +1723,6 @@ "sha256:55e0fc3c84ebf0a96a5aa23ff8b53d70246479e9a68863f1fcac5a3e52f19dd6" ], "index": "pypi", - "markers": "python_version >= '3.7'", "version": "==1.4.0" }, "furl": { @@ -1770,11 +1766,11 @@ }, "hypothesis": { "hashes": [ - "sha256:1e31210951511b24ce8b3b6e04d791c466385a30ac3af571bf2223954b025d77", - "sha256:5b40fd81fce9e0b35f0a47e10eb41f375a6b9e8551d0e1084c83b8b0d0d1bb6b" + "sha256:313f64b9f9f95e12c8b5342466bef7f352d2608afeeb434817c039602b45f0c4", + "sha256:bbd227000cc21a9686a00867f031479c3812d8ab076e4af1c813f6b3a50c98f5" ], "markers": "python_version >= '3.8'", - "version": "==6.98.15" + "version": "==6.98.17" }, "hypothesis-graphql": { "hashes": [ @@ -1814,7 +1810,6 @@ "sha256:869335e8cded62ffb6fac8928e5287a05433d6462e3ebaac25f4216474dd6bc4" ], "index": "pypi", - "markers": "python_version >= '3.10'", "version": "==8.22.1" }, "jedi": { @@ -2144,7 +2139,6 @@ "sha256:cc448d95159fc0903d36182992778a096eda5752d660d47671383c8e2bf633f1" ], "index": "pypi", - "markers": "python_version >= '3.7'", "version": "==3.5.0" }, "pgspecial": { @@ -2163,8 +2157,7 @@ "version": "==1.4.0" }, "pook": { - "git": "git+https://github.com/h2non/pook.git", - "markers": "python_version >= '3.8'", + "git": "https://github.com/h2non/pook.git", "ref": "ef5bb1ade60aed66aaccb776a0fc9eb16d58bb5a" }, "prompt-toolkit": { @@ -2181,7 +2174,6 @@ "sha256:4d5a0a5a8590906daa58ebd5f3cfc34091377354a1acced269dd10faf55da60e" ], "index": "pypi", - "markers": "python_version >= '3.7'", "version": "==3.1.18" }, "ptyprocess": { @@ -2204,7 +2196,6 @@ "sha256:44fe31000b2d866f2e41841b18528a505fbd7fef9017b04eff4e2648a0fadc67" ], "index": "pypi", - "markers": "python_version >= '3.8'", "version": "==2.11.1" }, "pygments": { @@ -2229,7 +2220,6 @@ "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8" ], "index": "pypi", - "markers": "python_version >= '3.7'", "version": "==7.4.4" }, "pytest-django": { @@ -2238,7 +2228,6 @@ "sha256:ca1ddd1e0e4c227cf9e3e40a6afc6d106b3e70868fd2ac5798a22501271cd0c7" ], "index": "pypi", - "markers": "python_version >= '3.8'", "version": "==4.8.0" }, "pytest-raises": { @@ -2267,11 +2256,11 @@ }, "python-dateutil": { "hashes": [ - "sha256:78e73e19c63f5b20ffa567001531680d939dc042bf7850431877645523c66709", - "sha256:cbf2f1da5e6083ac2fbfd4da39a25f34312230110440f424a14c7558bb85d82e" + "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", + "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427" ], "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==2.9.0" + "version": "==2.9.0.post0" }, "pyyaml": { "hashes": [ @@ -2352,7 +2341,6 @@ "sha256:94f73a92ac1248cf16189211011f97096bdada8a7baac8c79372663bbb57b5d0" ], "index": "pypi", - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", "version": "==2.1.0" }, "requests": { @@ -2470,12 +2458,11 @@ }, "schemathesis": { "hashes": [ - "sha256:124ea082756acf4bda2258a90bfd0efd8ac436fa4428d4dbc2ddaa53275698ac", - "sha256:dc9e19129736c05d9a75bf8b3db2224fe50c81edc0ca3a14a5f0deb80f07c4ec" + "sha256:64738ecb8c8b5abc82a991a9b537daadcb81aefcf06105cf10fd7d26a0b98303", + "sha256:9d2e24c4d105af7c147565acff3ebddbf79500bd8afabe915c231b7e319d1e66" ], "index": "pypi", - "markers": "python_version >= '3.8'", - "version": "==3.25.5" + "version": "==3.25.6" }, "setproctitle": { "hashes": [ @@ -2646,72 +2633,72 @@ }, "time-machine": { "hashes": [ - "sha256:02b33a8c19768c94f7ffd6aa6f9f64818e88afce23250016b28583929d20fb12", - "sha256:0c9829b2edfcf6b5d72a6ff330d4380f36a937088314c675531b43d3423dd8af", - "sha256:0cc116056a8a2a917a4eec85661dfadd411e0d8faae604ef6a0e19fe5cd57ef1", - "sha256:0db97f92be3efe0ac62fd3f933c91a78438cef13f283b6dfc2ee11123bfd7d8a", - "sha256:12eed2e9171c85b703d75c985dab2ecad4fe7025b7d2f842596fce1576238ece", - "sha256:1812e48c6c58707db9988445a219a908a710ea065b2cc808d9a50636291f27d4", - "sha256:19a3b10161c91ca8e0fd79348665cca711fd2eac6ce336ff9e6b447783817f93", - "sha256:1a22be4df364f49a507af4ac9ea38108a0105f39da3f9c60dce62d6c6ea4ccdc", - "sha256:1ac8ff145c63cd0dcfd9590fe694b5269aacbc130298dc7209b095d101f8cdde", - "sha256:20205422fcf2caf9a7488394587df86e5b54fdb315c1152094fbb63eec4e9304", - "sha256:21bef5854d49b62e2c33848b5c3e8acf22a3b46af803ef6ff19529949cb7cf9f", - "sha256:2bd4169b808745d219a69094b3cb86006938d45e7293249694e6b7366225a186", - "sha256:2dc76ee55a7d915a55960a726ceaca7b9097f67e4b4e681ef89871bcf98f00be", - "sha256:32b71e50b07f86916ac04bd1eefc2bd2c93706b81393748b08394509ee6585dc", - "sha256:34dcdbbd25c1e124e17fe58050452960fd16a11f9d3476aaa87260e28ecca0fd", - "sha256:3a7a0a49ce50d9c306c4343a7d6a3baa11092d4399a4af4355c615ccc321a9d3", - "sha256:3c87856105dcb25b5bbff031d99f06ef4d1c8380d096222e1bc63b496b5258e6", - "sha256:42ef5349135626ad6cd889a0a81400137e5c6928502b0817ea9e90bb10702000", - "sha256:4ca20f85a973a4ca8b00cf466cd72c27ccc72372549b138fd48d7e70e5a190ab", - "sha256:4e3a2611f8788608ebbcb060a5e36b45911bc3b8adc421b1dc29d2c81786ce4d", - "sha256:4f2ae8d0e359b216b695f1e7e7256f208c390db0480601a439c5dd1e1e4e16ce", - "sha256:5aee23cd046abf9caeddc982113e81ba9097a01f3972e9560f5ed64e3495f66d", - "sha256:5c6245db573863b335d9ca64b3230f623caf0988594ae554c0c794e7f80e3e66", - "sha256:5f87787d562e42bf1006a87eb689814105b98c4d5545874a281280d0f8b9a2d9", - "sha256:5fe3fda5fa73fec74278912e438fce1612a79c36fd0cc323ea3dc2d5ce629f31", - "sha256:62fd14a80b8b71726e07018628daaee0a2e00937625083f96f69ed6b8e3304c0", - "sha256:66fb3877014dca0b9286b0f06fa74062357bd23f2d9d102d10e31e0f8fa9b324", - "sha256:679cbf9b15bfde1654cf48124128d3fbe52f821fa158a98fcee5fe7e05db1917", - "sha256:67fa45cd813821e4f5bec0ac0820869e8e37430b15509d3f5fad74ba34b53852", - "sha256:685d98593f13649ad5e7ce3e58efe689feca1badcf618ba397d3ab877ee59326", - "sha256:6c16d90a597a8c2d3ce22d6be2eb3e3f14786974c11b01886e51b3cf0d5edaf7", - "sha256:71acbc1febbe87532c7355eca3308c073d6e502ee4ce272b5028967847c8e063", - "sha256:7558622a62243be866a7e7c41da48eacd82c874b015ecf67d18ebf65ca3f7436", - "sha256:7693704c0f2f6b9beed912ff609781edf5fcf5d63aff30c92be4093e09d94b8e", - "sha256:88601de1da06c7cab3d5ed3d5c3801ef683366e769e829e96383fdab6ae2fe42", - "sha256:8d526cdcaca06a496877cfe61cc6608df2c3a6fce210e076761964ebac7f77cc", - "sha256:918f8389de29b4f41317d121f1150176fae2cdb5fa41f68b2aee0b9dc88df5c3", - "sha256:924377d398b1c48e519ad86a71903f9f36117f69e68242c99fb762a2465f5ad2", - "sha256:9f128db8997c3339f04f7f3946dd9bb2a83d15e0a40d35529774da1e9e501511", - "sha256:9fad549521c4c13bdb1e889b2855a86ec835780d534ffd8f091c2647863243be", - "sha256:a26bdf3462d5f12a4c1009fdbe54366c6ef22c7b6f6808705b51dedaaeba8296", - "sha256:ab04cf4e56e1ee65bee2adaa26a04695e92eb1ed1ccc65fbdafd0d114399595a", - "sha256:b0c8f24ae611a58782773af34dd356f1f26756272c04be2be7ea73b47e5da37d", - "sha256:bdfe4a7f033e6783c3e9a7f8d8fc0b115367330762e00a03ff35fedf663994f3", - "sha256:c23b2408e3adcedec84ea1131e238f0124a5bc0e491f60d1137ad7239b37c01a", - "sha256:ccbce292380ebf63fb9a52e6b03d91677f6a003e0c11f77473efe3913a75f289", - "sha256:cfef4ebfb4f055ce3ebc7b6c1c4d0dbfcffdca0e783ad8c6986c992915a57ed3", - "sha256:d4a2d3db2c3b8e519d5ef436cd405abd33542a7b7761fb05ef5a5f782a8ce0b1", - "sha256:dabb3b155819811b4602f7e9be936e2024e20dc99a90f103e36b45768badf9c3", - "sha256:de01f33aa53da37530ad97dcd17e9affa25a8df4ab822506bb08101bab0c2673", - "sha256:dec0ec2135a4e2a59623e40c31d6e8a8ae73305ade2634380e4263d815855750", - "sha256:e433827eccd6700a34a2ab28fd9361ff6e4d4923f718d2d1dac6d1dcd9d54da6", - "sha256:e58d82fe0e59d6e096ada3281d647a2e7420f7da5453b433b43880e1c2e8e0c5", - "sha256:e9935aff447f5400a2665ab10ed2da972591713080e1befe1bb8954e7c0c7806", - "sha256:e9a9d150e098be3daee5c9f10859ab1bd14a61abebaed86e6d71f7f18c05b9d7", - "sha256:f5fa9610f7e73fff42806a2ed8b06d862aa59ce4d178a52181771d6939c3e237" + "sha256:029cd697f9cd13b4701e256eb79d995f6728e80da0c825028c22035a2c222720", + "sha256:094e4149091f8f12691f71ecae8c8830e1cd23e5e22448a74c4e5a05310fd1cd", + "sha256:10e30c8e9b5ef1e4b10e588d3e789888ff2a94bcc9120d300954116a5d83556b", + "sha256:14a82de9b00ed8427e4b9136a6d8e10a8c330b5cea62b5813fbedde978701c4a", + "sha256:17f0c84329af5eb24544ac9f7097c20df3777cfce2cce8c1c4595055bef78102", + "sha256:26bf274f6b591ddc0f41e54b4b3a74d83748177dd96c5cfb8496adae1ada00ab", + "sha256:2c3e83e6976a3e0521fce8fd4a6d38d9385ea129cc433fb7a66c0918a499b18c", + "sha256:2f5666edb45201679786611b2f016ad5d655acc675e6f62f6d4e62891dbcdfe4", + "sha256:380d0a0ebda70637629ec18e1ca0ee098c04268a71d18852a3c4317fca7d7393", + "sha256:3c784c6bcc82856ca69f8cf26ce56f2cf06a113d340d929c41921d03f6b17b38", + "sha256:3e1b006d483d11f0dfe64b2a7f17d5fa16c3fd2940042731f5b3bd1533c7d827", + "sha256:412ace2c9053a7f4c513d8723f78bec3a5c2b4721e6bbf60f33de94abc88503a", + "sha256:451583aecfc6b41805a6685b72cefd65c068313bcb39a1a6e246cbcccfda71d2", + "sha256:4a2670120780ad67c327f065eed03be917209cecd6fb0e9ada29720dbc1411e9", + "sha256:4df6ee8f1ed9d9ca4aa7750e5cfc0d8bc0143c2cac068258af5bad5f50e3b3e8", + "sha256:669ae68799cbce72b09fb896a4a2c4314255f64dd5d68845b0aea71f32c082f5", + "sha256:6a04eee7c5832efc57203bbd0d1d7b11ce52dbd35ae592edfdd4c25808471d06", + "sha256:6aadb4bd780c5f89e55ac27d92192daff9cf7f307686798755f660a1f4ed3665", + "sha256:6b6559d8fac58d99a90c518f0a559de62b6ceff2fe9c3410eb78acdc3e16cfe4", + "sha256:6c02dac22ed1669045bd39d214a5c52e097fee82fdb8d665700ff9f6cb499cfe", + "sha256:6ce0f17783620fab245a7695e854cd7ecfb3c2cc6ccd5542d43ac3ecdb0100a3", + "sha256:6ed812603f0233770faba6f7e60f5ed04bae1a5290c8159f19cb8c6888f99fc1", + "sha256:6f5a5b20bde09ec4ab3143c94848b8323190c4aefab129f92da9e50b4f55d173", + "sha256:74be790ced84b063d4c63ec7618d9b2404f3e79c1397750197a046b303829eef", + "sha256:7552d38b1f985feaa3eb3142873881e96ca07be02137b60414daf709bab36a2c", + "sha256:7aab93218e9ad394164d69de164a81a4dce5a8b4528a07b77de806e422032fe2", + "sha256:7d3e37eb8243415a8b6429099f191a8a83483e64aba9e04b21184ce9a1b6b1e6", + "sha256:892ee00cc176c9da6b465cf9d44da408fa3297d72fcb45aec1aac09d8e381f22", + "sha256:8c36e9ecdf9afc729ba5c137f906a13bf24d16255871f3bb623b9d129859f3fa", + "sha256:8d0d56a67b4656ae527b8152dd682642e31735559de653619116e92ab345b86a", + "sha256:8f96ed5e7fe3cae13b23ff1c4e93c7f90165289b477b34f1da3fa1277bb0f5a6", + "sha256:9227c26a8d9e0cb0727917aa6470855320bde85f65deba58b988a8c0cc04bf9a", + "sha256:932cfde6024f9cd8874a0d3b4651db49fe72cbd144edc7b00153d5729ba75379", + "sha256:9ca7c08ded824e6ae138280524d9ebcceaf50623e5263f24e38a28259215fb37", + "sha256:9d2fac0e454c3aa63c10b331f5349fa2c961d58c4d430113f14698aac9565b3c", + "sha256:9e5a9ff08c585b8aac5d3db80a828dc549f5962c07297e1441e04cb0825464ac", + "sha256:a4e1a3c8bca77201dc6684d3c1d65d3ca4249872beb7ee9283c0b6e2df5cb677", + "sha256:a59bee89bf015f3ff1db012436bc7905fd99a4aa827d2feb73f1301afb0cb5c6", + "sha256:ae871acd4121c510e6822a649e0c511ad4301d7cb92431ffc99e662c64f9ba9d", + "sha256:b1076afb7825122a89a7be157d3a02f69f07d6fa0bacfaec463c71ac0488bd58", + "sha256:b604d904dbe5aa36be37df61b47c15d87c359764dadb70f3a8eae7191e382bd4", + "sha256:b6daf1ff062855ae4723fdb0e7d7f47bcd0b3d9b17496d63fbb1ef66907486e2", + "sha256:b99c8da2623dcb6c5cc05bd07138886d21fdab9081295f5783dfd799f9b91065", + "sha256:ba9c2da2cef0b0350beaaa7031acba5296cdc2146e59083f9b1ecd9036ff1cb9", + "sha256:c0124430457b4a5d4c33f739ea858bfbcdacba7cd0c72cc6c607d016a0bcac13", + "sha256:c7a55717b8e3a153e4b7a9b6f551cd89e9d037db7e7732fc909c436d94e79628", + "sha256:cb603f46281c2d7f5c9607dd195107c9642af9bb36806386f66087b2741d0327", + "sha256:cb9f6c62a205f12f6f054a027df221927f8066b2bca2b82477793291460410fa", + "sha256:dace63a21873a11ee2800cd765d35e295b78645477fe824283172e0f5ed87e93", + "sha256:dd12a0be7f8cf5ea5617e7a6fed3800c1cf26976e5932058bcab1ce962e9bb0d", + "sha256:e00a9cff6df58cfe584ab55cbb21acdaa3ecc6d75414d59cf65726b2e3d90a6c", + "sha256:e66796ba8d7adfe23deb03560eeaeb4ca7c11af43ad6cadadc7d3211ee6b696f", + "sha256:f01da787c2ac4c05e3722e94bf70da9698548c13ccfe6ca44ca2633c4b1cc24d", + "sha256:f163cbc66bcc76adcfdc8b649d3de51c3281b2193c4e753786d1af81582660fb", + "sha256:f92693a7ceedde14d507e906a26600ef11b80ca17cccfa91906266510f07b024", + "sha256:fb90ffdbc67fa5a35948f10c1b3e6658e8db474468f6a64f8e8a2ab611eea047" ], "markers": "implementation_name != 'pypy'", - "version": "==2.13.0" + "version": "==2.14.0" }, "tomli": { "hashes": [ "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc", "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f" ], - "markers": "python_version >= '3.7'", + "markers": "python_version < '3.11'", "version": "==2.0.1" }, "tomli-w": { diff --git a/api/api/constants/parameters.py b/api/api/constants/parameters.py new file mode 100644 index 00000000000..b6dd7ac1b72 --- /dev/null +++ b/api/api/constants/parameters.py @@ -0,0 +1,2 @@ +COLLECTION = "unstable__collection" +TAG = "unstable__tag" diff --git a/api/api/controllers/search_controller.py b/api/api/controllers/search_controller.py index 351712dfc38..2f322e75e96 100644 --- a/api/api/controllers/search_controller.py +++ b/api/api/controllers/search_controller.py @@ -34,17 +34,7 @@ # Using TYPE_CHECKING to avoid circular imports when importing types if TYPE_CHECKING: - from api.serializers.audio_serializers import AudioCollectionRequestSerializer - from api.serializers.media_serializers import ( - MediaSearchRequestSerializer, - PaginatedRequestSerializer, - ) - - MediaListRequestSerializer = ( - AudioCollectionRequestSerializer - | MediaSearchRequestSerializer - | PaginatedRequestSerializer - ) + from api.serializers.media_serializers import MediaSearchRequestSerializer module_logger = logging.getLogger(__name__) @@ -220,7 +210,7 @@ def get_excluded_providers_query() -> Q | None: def get_index( exact_index: bool, origin_index: OriginIndex, - search_params: MediaListRequestSerializer, + search_params: MediaSearchRequestSerializer, ) -> SearchIndex: if exact_index: return origin_index @@ -234,7 +224,7 @@ def get_index( def create_search_filter_queries( - search_params: MediaListRequestSerializer, + search_params: MediaSearchRequestSerializer, ) -> dict[str, list[Q]]: """ Create a list of Elasticsearch queries for filtering search results. @@ -275,7 +265,7 @@ def create_search_filter_queries( def create_ranking_queries( - search_params: MediaListRequestSerializer, + search_params: MediaSearchRequestSerializer, ) -> list[Q]: queries = [Q("rank_feature", field="standardized_popularity", boost=DEFAULT_BOOST)] if search_params.data["unstable__authority"]: @@ -286,7 +276,7 @@ def create_ranking_queries( def build_search_query( - search_params: MediaListRequestSerializer, + search_params: MediaSearchRequestSerializer, ) -> Q: # Apply filters from the url query search parameters. url_queries = create_search_filter_queries(search_params) @@ -383,12 +373,10 @@ def log_query_features(query: str, query_name) -> None: def build_collection_query( - search_params: MediaListRequestSerializer, - collection_params: dict[str, str], + search_params: MediaSearchRequestSerializer, ): """ Build the query to retrieve items in a collection. - :param collection_params: `tag`, `source` and/or `creator` values from the path. :param search_params: the validated search parameters. :return: the search client with the query applied. """ @@ -397,15 +385,12 @@ def build_collection_query( # with its corresponding field in Elasticsearch. "None" means that the # names are identical. filters = [ - # Collection filters allow a single value. ("tag", "tags.name.keyword"), ("source", None), ("creator", "creator.keyword"), ] for serializer_field, es_field in filters: - if serializer_field in collection_params: - if not (argument := collection_params.get(serializer_field)): - continue + if argument := search_params.validated_data.get(serializer_field): parameter = es_field or serializer_field search_query["filter"].append({"term": {parameter: argument}}) @@ -422,20 +407,14 @@ def build_collection_query( return Q("bool", **search_query) -def build_query( - strategy: SearchStrategy, - search_params: MediaListRequestSerializer, - collection_params: dict[str, str] | None, -) -> Q: - if strategy == "collection": - return build_collection_query(search_params, collection_params) - return build_search_query(search_params) +query_builders = { + "search": build_search_query, + "collection": build_collection_query, +} def query_media( - strategy: SearchStrategy, - search_params: MediaListRequestSerializer, - collection_params: dict[str, str] | None, + search_params: MediaSearchRequestSerializer, origin_index: OriginIndex, exact_index: bool, page_size: int, @@ -444,17 +423,15 @@ def query_media( page: int = 1, ) -> tuple[list[Hit], int, int, dict]: """ - If ``strategy`` is ``search``, perform a ranked paginated search + Build the search or collection query, execute it and return + paginated result. + For queries with `collection` parameter, returns media filtered + by the `tag`, `source` or `source`/`creator` combination, ordered + by the time when they were added to Openverse. + For other queries, performs a ranked paginated search from the set of keywords and, optionally, filters. - If `strategy` is `collection`, perform a paginated search - for the `tag`, `source` or `source` and `creator` combination. - - :param collection_params: The path parameters for collection search, if - strategy is `collection`. - :param strategy: Whether to perform a default search or retrieve a collection. - :param search_params: If `strategy` is `collection`, `PaginatedRequestSerializer` - or `AudioCollectionRequestSerializer`. If `strategy` is `search`, search - query params, see :class: `MediaRequestSerializer`. + + :param search_params: Search query params, see :class: `MediaSearchRequestSerializer`. :param origin_index: The Elasticsearch index to search (e.g. 'image') :param exact_index: whether to skip all modifications to the index name :param page_size: The number of results to return per page. @@ -468,7 +445,11 @@ def query_media( """ index = get_index(exact_index, origin_index, search_params) - query = build_query(strategy, search_params, collection_params) + strategy: SearchStrategy = ( + "collection" if search_params.validated_data.get("collection") else "search" + ) + + query = query_builders[strategy](search_params) s = Search(index=index).query(query) diff --git a/api/api/docs/audio_docs.py b/api/api/docs/audio_docs.py index 49adee372c3..06f5d573c3c 100644 --- a/api/api/docs/audio_docs.py +++ b/api/api/docs/audio_docs.py @@ -6,7 +6,11 @@ from drf_spectacular.utils import OpenApiResponse, extend_schema -from api.docs.base_docs import collection_schema, custom_extend_schema, fields_to_md +from api.docs.base_docs import ( + SEARCH_DESCRIPTION, + custom_extend_schema, + fields_to_md, +) from api.examples import ( audio_complain_201_example, audio_complain_curl, @@ -35,24 +39,17 @@ from api.serializers.provider_serializers import ProviderSerializer -search = custom_extend_schema( - desc=f""" - Search audio files using a query string. +serializer = AudioSearchRequestSerializer(context={"media_type": "audio"}) +audio_filter_fields = fields_to_md([f for f in serializer.field_names if f != "q"]) - By using this endpoint, you can obtain search results based on specified - query and optionally filter results by - {fields_to_md(AudioSearchRequestSerializer.field_names)}. - - Results are ranked in order of relevance and paginated on the basis of the - `page` param. The `page_size` param controls the total number of pages. +audio_search_description = SEARCH_DESCRIPTION.format( + filter_fields=audio_filter_fields, + media_type="audio files", +) - Although there may be millions of relevant records, only the most relevant - several thousand records can be viewed. This is by design: the search - endpoint should be used to find the top 10,000 most relevant results, not - for exhaustive search or bulk download of every barely relevant result. As - such, the caller should not try to access pages beyond `page_count`, or else - the server will reject the query.""", - params=AudioSearchRequestSerializer, +search = custom_extend_schema( + desc=audio_search_description, + params=serializer, res={ 200: (AudioSerializer, audio_search_200_example), 400: (ValidationError, audio_search_400_example), @@ -122,16 +119,3 @@ }, eg=[audio_waveform_curl], ) - -source_collection = collection_schema( - media_type="audio", - collection="source", -) -creator_collection = collection_schema( - media_type="audio", - collection="creator", -) -tag_collection = collection_schema( - media_type="audio", - collection="tag", -) diff --git a/api/api/docs/base_docs.py b/api/api/docs/base_docs.py index 483e051af46..96e3f6cee83 100644 --- a/api/api/docs/base_docs.py +++ b/api/api/docs/base_docs.py @@ -1,12 +1,9 @@ from http.client import responses as http_responses from textwrap import dedent -from typing import Literal from django.conf import settings from rest_framework.exceptions import ( - NotAuthenticated, NotFound, - ValidationError, ) from drf_spectacular.openapi import AutoSchema @@ -18,12 +15,6 @@ ) from api.constants.media_types import MediaType -from api.serializers.audio_serializers import ( - AudioCollectionRequestSerializer, - AudioSerializer, -) -from api.serializers.image_serializers import ImageSerializer -from api.serializers.media_serializers import PaginatedRequestSerializer def fields_to_md(field_names): @@ -149,109 +140,64 @@ def build_source_path_parameter(media_type: MediaType): description="The tag of the media. Not case-sensitive, matches exactly.", ) - -def get_collection_description(media_type, collection): - if collection == "tag": - return f""" -Get a collection of {media_type} with a specific tag. - -This endpoint matches a single tag, exactly and entirely. - -Differences that will cause tags to not match are: -- upper and lower case letters -- diacritical marks -- hyphenation -- spacing -- multi-word tags where the query is only one of the words in the tag -- multi-word tags where the words are in a different order - -Examples of tags that **do not** match: -- "Low-Quality" and "low-quality" -- "jalapeño" and "jalapeno" -- "Saint Pierre des Champs" and "Saint-Pierre-des-Champs" -- "dog walking" and "dog walking" (where the latter has two spaces between the -last two words, as in a typographical error) -- "runner" and "marathon runner" -- "exclaiming loudly" and "loudly exclaiming" - -For non-exact or multi-tag matching, using the `search` endpoint's `tags` query -parameter. - -The returned results are ordered based on the time when they were added to Openverse. - """ - elif collection == "source": - return f""" -Get a collection of {media_type} from a specific source. - -This endpoint returns only the exact matches. To search within the source value, -use the `search` endpoint with `source` query parameter. - -The results in the collection will be sorted by the order in which they -were added to Openverse. - """ - elif collection == "creator": - return f""" -Get a collection of {media_type} by a specific creator from the specified source. - -This endpoint returns only the exact matches both on the creator and the source. -Notice that a single creator's media items can be found on several sources, but -this endpoint only returns the items from the specified source. To search within -the creator value, use the `search` endpoint with `source` query parameter -instead of `q`. - -The items will be sorted by the date when they were added to Openverse. - """ - - -COLLECTION_TO_OPERATION_ID = { - ("images", "source"): "images_by_source", - ("images", "creator"): "images_by_source_and_creator", - ("images", "tag"): "images_by_tag", - ("audio", "source"): "audio_by_source", - ("audio", "creator"): "audio_by_source_and_creator", - ("audio", "tag"): "audio_by_tag", -} - - -def collection_schema( - media_type: Literal["images", "audio"], - collection: Literal["source", "creator", "tag"], -): - if media_type == "images": - request_serializer = PaginatedRequestSerializer - serializer = ImageSerializer - else: - request_serializer = AudioCollectionRequestSerializer - serializer = AudioSerializer - - if collection == "tag": - responses = { - 200: serializer(many=True), - 404: NotFound, - 400: ValidationError, - 401: (NotAuthenticated, None), - } - path_parameters = [tag_path_parameter] - else: - responses = { - 200: serializer(many=True), - 404: source_404_response, - 400: ValidationError, - 401: (NotAuthenticated, None), - } - path_parameters = [build_source_path_parameter(media_type)] - if collection == "creator": - path_parameters.append(creator_path_parameter) - operation_id = COLLECTION_TO_OPERATION_ID[(media_type, collection)] - description = get_collection_description(media_type, collection) - return extend_schema( - operation_id=operation_id, - summary=operation_id, - auth=[], - description=description, - responses=responses, - parameters=[ - request_serializer, - *path_parameters, - ], - ) +SEARCH_DESCRIPTION_DEFAULT = """ +Return audio files that match the query. + +This endpoint allows you to search within specific fields, or to retrieve +a collection of all {media_type} from a specific source, creator or tag. +Results are paginated on the basis of the `page` parameter. The `page_size` +parameter controls the total number of pages. + +Although there may be millions of relevant records, only the most relevant +or the most recent several thousand records can be viewed. This is by design: +the search endpoint should be used to find the top 10,000 most relevant +results, not for exhaustive search or bulk download of every barely relevant +result. As such, the caller should not try to access pages beyond `page_count`, +or else the server will reject the query. + +### Default search +The **default search** allows users to find media based on a query string. +It supports a wide range of optional filters to narrow down search results +according to specific needs. + +By default, this endpoint performs a full-text search for the value of `q` parameter. +You can search within the `creator`, `title` or `tags` fields by omitting +the `q` parameter and using one of these field parameters. +These results can be filtered by {filter_fields}. + +The default search results are sorted by relevance. + +### Collection search +The collection search allows to retrieve a collection of media from a specific source, +creator or tag. The `collection` parameter is used to specify the type of collection to retrieve. + +- `collection=tag&tag=tagName` will return the media with tag `tagName`. +- `collection=source&source=sourceName` will return the media from source `sourceName`. +- `collection=creator&creator=creatorName` will return the media by creator `creatorName`. + +Collection results are sorted by the time they were added to Openverse, with the most recent +additions appearing first. The filters such as `license` are not available for collections. +""" + +SEARCH_DESCRIPTION_COLLECTIONS_DISABLED = """ +Search {media_type} using a query string. + +By using this endpoint, you can obtain search results based on specified +query and optionally filter results by +{filter_fields}. + +Results are ranked in order of relevance and paginated on the basis of the +`page` param. The `page_size` param controls the total number of pages. + +Although there may be millions of relevant records, only the most relevant +several thousand records can be viewed. This is by design: the search +endpoint should be used to find the top 10,000 most relevant results, not +for exhaustive search or bulk download of every barely relevant result. As +such, the caller should not try to access pages beyond `page_count`, or else +the server will reject the query.""" + +SEARCH_DESCRIPTION = ( + SEARCH_DESCRIPTION_DEFAULT + if settings.SHOW_COLLECTION_DOCS + else SEARCH_DESCRIPTION_COLLECTIONS_DISABLED +) diff --git a/api/api/docs/image_docs.py b/api/api/docs/image_docs.py index fe63f3bc620..5770f02c794 100644 --- a/api/api/docs/image_docs.py +++ b/api/api/docs/image_docs.py @@ -6,7 +6,11 @@ from drf_spectacular.utils import OpenApiResponse, extend_schema -from api.docs.base_docs import collection_schema, custom_extend_schema, fields_to_md +from api.docs.base_docs import ( + SEARCH_DESCRIPTION, + custom_extend_schema, + fields_to_md, +) from api.examples import ( image_complain_201_example, image_complain_curl, @@ -37,24 +41,17 @@ from api.serializers.provider_serializers import ProviderSerializer +serializer = ImageSearchRequestSerializer(context={"media_type": "image"}) +image_filter_fields = fields_to_md([f for f in serializer.field_names if f != "q"]) + +image_search_description = SEARCH_DESCRIPTION.format( + filter_fields=image_filter_fields, + media_type="images", +) + search = custom_extend_schema( - desc=f""" - Search images using a query string. - - By using this endpoint, you can obtain search results based on specified - query and optionally filter results by - {fields_to_md(ImageSearchRequestSerializer.field_names)}. - - Results are ranked in order of relevance and paginated on the basis of the - `page` param. The `page_size` param controls the total number of pages. - - Although there may be millions of relevant records, only the most relevant - several thousand records can be viewed. This is by design: the search - endpoint should be used to find the top 10,000 most relevant results, not - for exhaustive search or bulk download of every barely relevant result. As - such, the caller should not try to access pages beyond `page_count`, or else - the server will reject the query.""", - params=ImageSearchRequestSerializer, + desc=image_search_description, + params=serializer, res={ 200: (ImageSerializer, image_search_200_example), 400: (ValidationError, image_search_400_example), @@ -128,16 +125,3 @@ ) watermark = extend_schema(deprecated=True, responses={404: NotFound}) - -source_collection = collection_schema( - media_type="images", - collection="source", -) -creator_collection = collection_schema( - media_type="images", - collection="creator", -) -tag_collection = collection_schema( - media_type="images", - collection="tag", -) diff --git a/api/api/serializers/audio_serializers.py b/api/api/serializers/audio_serializers.py index 69cf614a5df..b65f56c89ce 100644 --- a/api/api/serializers/audio_serializers.py +++ b/api/api/serializers/audio_serializers.py @@ -4,16 +4,13 @@ from api.constants.field_order import field_position_map from api.constants.field_values import AUDIO_CATEGORIES, LENGTHS -from api.constants.media_types import AUDIO_TYPE from api.models import Audio, AudioReport, AudioSet from api.serializers.fields import EnumCharField, SchemableHyperlinkedIdentityField from api.serializers.media_serializers import ( MediaReportRequestSerializer, MediaSearchRequestSerializer, MediaSerializer, - PaginatedRequestSerializer, get_hyperlinks_serializer, - get_search_request_source_serializer, ) @@ -22,31 +19,11 @@ ####################### -AudioSearchRequestSourceSerializer = get_search_request_source_serializer("audio") - - -class AudioCollectionRequestSerializer(PaginatedRequestSerializer): - field_names = [ - *PaginatedRequestSerializer.field_names, - "peaks", - ] - - peaks = serializers.BooleanField( - help_text="Whether to include the waveform peaks or not", - required=False, - default=False, - ) - - -class AudioSearchRequestSerializer( - AudioSearchRequestSourceSerializer, - MediaSearchRequestSerializer, -): +class AudioSearchRequestSerializer(MediaSearchRequestSerializer): """Parse and validate search query string parameters.""" field_names = [ *MediaSearchRequestSerializer.field_names, - *AudioSearchRequestSourceSerializer.field_names, "category", "length", ] @@ -71,13 +48,6 @@ class AudioSearchRequestSerializer( default=False, ) - def validate_internal__index(self, value): - if not (index := super().validate_internal__index(value)): - return None - if not index.startswith(AUDIO_TYPE): - raise serializers.ValidationError(f"Invalid index name `{value}`.") - return index - class AudioReportRequestSerializer(MediaReportRequestSerializer): identifier = serializers.SlugRelatedField( diff --git a/api/api/serializers/docs.py b/api/api/serializers/docs.py new file mode 100644 index 00000000000..5c333f2b6cd --- /dev/null +++ b/api/api/serializers/docs.py @@ -0,0 +1,84 @@ +from django.conf import settings + +from api.constants.parameters import TAG + + +CREATOR_COLLECTIONS_DISABLED = """ +Search by creator only. Cannot be used with `q`. The search +is fuzzy, so `creator=john` will match any value that includes the +word `john`. If the value contains space, items that contain any of +the words in the value will match. To search for several values, +join them with a comma.""" + +CREATOR = """ +_When `q` parameter is present, `creator` parameter is ignored._ + +**Creator collection** +When used with `collection=creator&source=sourceName`, returns the collection of media +by the specified creator. Notice that a single creator's media items +can be found on several sources, but this collection only returns the +items from the specified source. +This is why for this collection, both the creator and the source +parameters are required, and matched exactly. For a fuzzy creator search, +use the default search without the `collection` parameter. + +**Creator search** +When used without the `collection` parameter, will search in the creator field only. +The search is fuzzy, so `creator=john` will match any value that includes the +word `john`. If the value contains space, items that contain any of +the words in the value will match. To search for several values, +join them with a comma. +""" + +CREATOR_HELP_TEXT = ( + CREATOR if settings.SHOW_COLLECTION_DOCS else CREATOR_COLLECTIONS_DISABLED +) +COLLECTION_HELP_TEXT = f""" +The kind of media collection to return. + +Should be used with `{TAG}`, `source` or `creator`+`source`""" + +EXCLUDED_SOURCE_HELP_TEXT = """ +A comma separated list of data sources to exclude from the search. +Valid values are `source_name`s from the stats endpoint: {origin}/v1/{media_path}/stats/. +""" +SOURCE_HELP_TEXT_COLLECTIONS_DISABLED = """ +A comma separated list of data sources; valid values are +`source_name`s from the stats endpoint: {origin}/v1/{media_path}/stats/.""" + +SOURCE = """ +For default search, a comma separated list of data sources. +When the `collection` parameter is used, this parameter only accepts a single source. + +Valid values are `source_name`s from the stats endpoint: {origin}/v1/{media_path}/stats/. +""" + +SOURCE_HELP_TEXT = ( + SOURCE if settings.SHOW_COLLECTION_DOCS else SOURCE_HELP_TEXT_COLLECTIONS_DISABLED +) + +TAG_HELP_TEXT = """ +_Must be used with `collection=tag`_ + +Get the collection of media with a specific tag. Returns the collection of media +that has the specified tag, matching exactly and entirely. + +Differences that will cause tags to not match are: +- upper and lower case letters +- diacritical marks +- hyphenation +- spacing +- multi-word tags where the query is only one of the words in the tag +- multi-word tags where the words are in a different order. + +Examples of tags that **do not** match: +- "Low-Quality" and "low-quality" +- "jalapeño" and "jalapeno" +- "Saint Pierre des Champs" and "Saint-Pierre-des-Champs" +- "dog walking" and "dog walking" (where the latter has two spaces between the +last two words, as in a typographical error) +- "runner" and "marathon runner" +- "exclaiming loudly" and "loudly exclaiming" + +For non-exact or multi-tag matching, using the `tags` query parameter. +""" diff --git a/api/api/serializers/image_serializers.py b/api/api/serializers/image_serializers.py index 9238ce0b625..f4a7c05ddfe 100644 --- a/api/api/serializers/image_serializers.py +++ b/api/api/serializers/image_serializers.py @@ -5,7 +5,6 @@ from api.constants.field_order import field_position_map from api.constants.field_values import ASPECT_RATIOS, IMAGE_CATEGORIES, IMAGE_SIZES -from api.constants.media_types import IMAGE_TYPE from api.models import Image, ImageReport from api.serializers.base import BaseModelSerializer from api.serializers.fields import EnumCharField @@ -14,7 +13,6 @@ MediaSearchRequestSerializer, MediaSerializer, get_hyperlinks_serializer, - get_search_request_source_serializer, ) @@ -23,18 +21,11 @@ ####################### -ImageSearchRequestSourceSerializer = get_search_request_source_serializer("image") - - -class ImageSearchRequestSerializer( - ImageSearchRequestSourceSerializer, - MediaSearchRequestSerializer, -): +class ImageSearchRequestSerializer(MediaSearchRequestSerializer): """Parse and validate search query string parameters.""" field_names = [ *MediaSearchRequestSerializer.field_names, - *ImageSearchRequestSourceSerializer.field_names, "category", "aspect_ratio", "size", @@ -61,14 +52,6 @@ class ImageSearchRequestSerializer( required=False, ) - def validate_internal__index(self, value): - index = super().validate_internal__index(value) - if index is None: - return None - if not index.startswith(IMAGE_TYPE): - raise serializers.ValidationError(f"Invalid index name `{value}`.") - return index - class ImageReportRequestSerializer(MediaReportRequestSerializer): identifier = serializers.SlugRelatedField( diff --git a/api/api/serializers/media_serializers.py b/api/api/serializers/media_serializers.py index 26fdd521e52..e574b738cca 100644 --- a/api/api/serializers/media_serializers.py +++ b/api/api/serializers/media_serializers.py @@ -11,10 +11,18 @@ from api.constants import sensitivity from api.constants.licenses import LICENSE_GROUPS +from api.constants.parameters import COLLECTION, TAG from api.constants.sorting import DESCENDING, RELEVANCE, SORT_DIRECTIONS, SORT_FIELDS from api.controllers import search_controller from api.models.media import AbstractMedia from api.serializers.base import BaseModelSerializer +from api.serializers.docs import ( + COLLECTION_HELP_TEXT, + CREATOR_HELP_TEXT, + EXCLUDED_SOURCE_HELP_TEXT, + SOURCE_HELP_TEXT, + TAG_HELP_TEXT, +) from api.serializers.fields import SchemableHyperlinkedIdentityField from api.utils.help_text import make_comma_separated_help_text from api.utils.licenses import get_license_url @@ -82,6 +90,11 @@ def validate_page_size(self, value): return value +EXCLUDED_COLLECTION_REQUEST_FIELDS = ( + [] if settings.SHOW_COLLECTION_DOCS else [COLLECTION, TAG] +) + + @extend_schema_serializer( # Hide unstable and internal fields from documentation. # Also see `field_names` below. @@ -92,6 +105,7 @@ def validate_page_size(self, value): "unstable__authority_boost", "unstable__include_sensitive_results", "internal__index", + *EXCLUDED_COLLECTION_REQUEST_FIELDS, ], ) class MediaSearchRequestSerializer(PaginatedRequestSerializer): @@ -106,10 +120,15 @@ class MediaSearchRequestSerializer(PaginatedRequestSerializer): ] field_names = [ "q", + "source", + "excluded_source", "license", "license_type", "creator", "tags", + # TODO: Uncomment after https://github.com/WordPress/openverse/issues/3919 + # "collection", + # "tag", "title", "filter_dead", "extension", @@ -120,8 +139,16 @@ class MediaSearchRequestSerializer(PaginatedRequestSerializer): # "unstable__authority", # "unstable__authority_boost", # "unstable__include_sensitive_results", - *PaginatedRequestSerializer.field_names, ] + # TODO: Remove after https://github.com/WordPress/openverse/issues/3919 + if settings.SHOW_COLLECTION_DOCS: + field_names.extend( + [ + TAG, + COLLECTION, + ] + ) + field_names.extend(PaginatedRequestSerializer.field_names) """ Keep the fields names in sync with the actual fields below as this list is used to generate Swagger documentation. @@ -132,28 +159,14 @@ class MediaSearchRequestSerializer(PaginatedRequestSerializer): help_text="A query string that should not exceed 200 characters in length", required=False, ) - license = serializers.CharField( - label="licenses", - help_text=make_comma_separated_help_text(LICENSE_GROUPS["all"], "licenses"), + source = serializers.CharField( + label="provider", required=False, ) - license_type = serializers.CharField( - label="license type", - help_text=make_comma_separated_help_text( - LICENSE_GROUPS.keys(), "license types" - ), + excluded_source = serializers.CharField( + label="excluded_provider", required=False, ) - creator = serializers.CharField( - label="creator", - help_text="Search by creator only. Cannot be used with `q`. The search " - "is fuzzy, so `creator=john` will match any value that includes the " - "word `john`. If the value contains space, items that contain any of " - "the words in the value will match. To search for several values, " - "join them with a comma.", - required=False, - max_length=200, - ) tags = serializers.CharField( label="tags", help_text="Search by tag only. Cannot be used with `q`. The search " @@ -173,6 +186,24 @@ class MediaSearchRequestSerializer(PaginatedRequestSerializer): required=False, max_length=200, ) + creator = serializers.CharField( + label="creator", + help_text=CREATOR_HELP_TEXT, + required=False, + max_length=200, + ) + license = serializers.CharField( + label="licenses", + help_text=make_comma_separated_help_text(LICENSE_GROUPS["all"], "licenses"), + required=False, + ) + license_type = serializers.CharField( + label="license type", + help_text=make_comma_separated_help_text( + LICENSE_GROUPS.keys(), "license types" + ), + required=False, + ) filter_dead = serializers.BooleanField( label="filter_dead", help_text="Control whether 404 links are filtered out.", @@ -235,6 +266,21 @@ class MediaSearchRequestSerializer(PaginatedRequestSerializer): default=False, ) + unstable__tag = serializers.CharField( + label="tag", + source="tag", + help_text=TAG_HELP_TEXT, + required=False, + max_length=200, + ) + unstable__collection = serializers.ChoiceField( + source="collection", + label="collection", + choices=["tag", "source", "creator"], + help_text=COLLECTION_HELP_TEXT, + required=False, + ) + # The ``internal__`` prefix is used in the query params. # If you rename these fields, update the following references: # - ``field_names`` in ``MediaSearchRequestSerializer`` @@ -245,6 +291,24 @@ class MediaSearchRequestSerializer(PaginatedRequestSerializer): required=False, ) + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.media_type = self.context.get("media_type") + if not self.media_type: + raise ValueError( + "The media request serializer's `media_type` context variable must be set." + ) + media_path = {"image": "images", "audio": "audio"}[self.media_type] + variables = { + "origin": settings.CANONICAL_ORIGIN, + "media_path": media_path, + } + + self.fields["source"].help_text = SOURCE_HELP_TEXT.format(**variables) + self.fields["excluded_source"].help_text = EXCLUDED_SOURCE_HELP_TEXT.format( + **variables + ) + def is_request_anonymous(self): request = self.context.get("request") return getattr(request, "auth", None) is None @@ -285,6 +349,64 @@ def validate_license_type(value): intersected = set.intersection(*license_groups) return ",".join(intersected) + def validate_unstable__collection(self, value): + if self.initial_data.get("q", None) is not None: + raise serializers.ValidationError( + "The `collection` parameter cannot be used with the `q` parameter." + ) + if value == "tag" and not self.initial_data.get(TAG): + raise serializers.ValidationError( + f"The `{TAG}` parameter is required when `{COLLECTION}` is set to `tag`." + ) + if value == "source" and not self.initial_data.get("source"): + raise serializers.ValidationError( + f"The `source` parameter is required when `{COLLECTION}` is set to `source`." + ) + if value == "creator" and not ( + self.initial_data.get("creator") and self.initial_data.get("source") + ): + raise serializers.ValidationError( + f"The `creator` and `source` parameters are required when `{COLLECTION}` is set to `creator`." + ) + return value + + def validate_source(self, value): + """ + For the regular searches, split the value.lower() by comma and only return the + source names that are in the search controller's sources list for the media type. + For the collection=tag, return the value as is. It is ignored in the query builder. + For source and creator collections, accept the value as is, without lower-casing + or splitting, and check if it's in the valid source name list. + This function validates the source and excluded_source fields, but `excluded_source` + is ignored for collection requests. + """ + allowed_sources = list(search_controller.get_sources(self.media_type).keys()) + sources_list = ", ".join([f"'{s}'" for s in allowed_sources]) + collection = self.initial_data.get(COLLECTION) + + # For collection=tag, return the value as is. It is ignored in the query builder. + if collection == "tag": + return value + + if collection: + if value not in allowed_sources: + raise serializers.ValidationError( + f"Invalid source parameter '{value}'. Use one of the valid sources: {sources_list}" + ) + return value + else: + sources = value.lower().split(",") + valid_sources = [source for source in sources if source in allowed_sources] + return ",".join(valid_sources) + + def validate_excluded_source(self, input_sources): + if "source" in self.initial_data: + raise serializers.ValidationError( + "Cannot set both 'source' and 'excluded_source'. " + "Use exactly one of these." + ) + return self.validate_source(input_sources) + def validate_creator(self, value): return self._truncate(value) @@ -331,6 +453,9 @@ def validate_internal__index(self, value): return None if not settings.ES.indices.exists(value): # ``exists`` includes aliases. raise serializers.ValidationError(f"Invalid index name `{value}`.") + + if not value.startswith(self.media_type): + raise serializers.ValidationError(f"Invalid index name `{value}`.") return value @staticmethod @@ -611,70 +736,6 @@ def to_representation(self, *args, **kwargs): ####################### -def get_search_request_source_serializer(media_type): - media_path = { - "image": "images", - "audio": "audio", - }[media_type] - - class MediaSearchRequestSourceSerializer(serializers.Serializer): - """Parses and validates the source/not_source fields from the query params.""" - - field_names = [ - "source", - "excluded_source", - ] - """ - Keep the fields names in sync with the actual fields below as this list is - used to generate Swagger documentation. - """ - - _field_attrs = { - "help_text": ( - "A comma separated list of data sources; valid values are " - "``source_name``s from the stats endpoint: " - f"{settings.CANONICAL_ORIGIN}/v1/{media_path}/stats/." - ), - "required": False, - } - - source = serializers.CharField( - label="provider", - **_field_attrs, - ) - excluded_source = serializers.CharField( - label="excluded_provider", - **_field_attrs, - ) - - @staticmethod - def validate_source_field(value): - """Check whether source is a valid source.""" - - allowed_sources = list(search_controller.get_sources(media_type).keys()) - sources = value.lower().split(",") - sources = [source for source in sources if source in allowed_sources] - value = ",".join(sources) - return value - - def validate_source(self, input_sources): - return self.validate_source_field(input_sources) - - def validate_excluded_source(self, input_sources): - return self.validate_source(input_sources) - - def validate(self, data): - data = super().validate(data) - if "source" in self.initial_data and "excluded_source" in self.initial_data: - raise serializers.ValidationError( - "Cannot set both 'source' and 'excluded_source'. " - "Use exactly one of these." - ) - return data - - return MediaSearchRequestSourceSerializer - - def get_hyperlinks_serializer(media_type): class MediaHyperlinksSerializer(serializers.Serializer): """ diff --git a/api/api/views/audio_views.py b/api/api/views/audio_views.py index 383697d55cb..c87162611db 100644 --- a/api/api/views/audio_views.py +++ b/api/api/views/audio_views.py @@ -7,20 +7,16 @@ from api.constants.media_types import AUDIO_TYPE from api.docs.audio_docs import ( - creator_collection, detail, related, report, search, - source_collection, stats, - tag_collection, waveform, ) from api.docs.audio_docs import thumbnail as thumbnail_docs from api.models import Audio from api.serializers.audio_serializers import ( - AudioCollectionRequestSerializer, AudioReportRequestSerializer, AudioSearchRequestSerializer, AudioSerializer, @@ -37,9 +33,6 @@ stats=stats, retrieve=detail, related=related, - tag_collection=tag_collection, - creator_collection=creator_collection, - source_collection=source_collection, ) class AudioViewSet(MediaViewSet): """Viewset for all endpoints pertaining to audio.""" @@ -50,7 +43,6 @@ class AudioViewSet(MediaViewSet): default_index = settings.MEDIA_INDEX_MAPPING[AUDIO_TYPE] serializer_class = AudioSerializer - collection_serializer_class = AudioCollectionRequestSerializer def get_queryset(self): return super().get_queryset().select_related("sensitive_audio", "audioset") diff --git a/api/api/views/image_views.py b/api/api/views/image_views.py index 9afa1538a73..c99b189ed6d 100644 --- a/api/api/views/image_views.py +++ b/api/api/views/image_views.py @@ -11,15 +11,12 @@ from api.constants.media_types import IMAGE_TYPE from api.docs.image_docs import ( - creator_collection, detail, oembed, related, report, search, - source_collection, stats, - tag_collection, ) from api.docs.image_docs import thumbnail as thumbnail_docs from api.docs.image_docs import watermark as watermark_doc @@ -32,7 +29,6 @@ OembedSerializer, WatermarkRequestSerializer, ) -from api.serializers.media_serializers import PaginatedRequestSerializer from api.utils import image_proxy from api.utils.aiohttp import get_aiohttp_session from api.utils.asyncio import aget_object_or_404 @@ -46,9 +42,6 @@ stats=stats, retrieve=detail, related=related, - tag_collection=tag_collection, - creator_collection=creator_collection, - source_collection=source_collection, ) class ImageViewSet(MediaViewSet): """Viewset for all endpoints pertaining to images.""" @@ -59,7 +52,6 @@ class ImageViewSet(MediaViewSet): default_index = settings.MEDIA_INDEX_MAPPING[IMAGE_TYPE] serializer_class = ImageSerializer - collection_serializer_class = PaginatedRequestSerializer OEMBED_HEADERS = { "User-Agent": settings.OUTBOUND_USER_AGENT_TEMPLATE.format(purpose="OEmbed"), diff --git a/api/api/views/media_views.py b/api/api/views/media_views.py index 5fa05d8609f..b242eceb833 100644 --- a/api/api/views/media_views.py +++ b/api/api/views/media_views.py @@ -12,12 +12,11 @@ from asgiref.sync import sync_to_async from api.constants.media_types import MediaType -from api.constants.search import SearchStrategy from api.controllers import search_controller from api.controllers.elasticsearch.related import related_media from api.models import ContentProvider from api.models.media import AbstractMedia -from api.serializers import audio_serializers, media_serializers +from api.serializers import media_serializers from api.serializers.provider_serializers import ProviderSerializer from api.utils import image_proxy from api.utils.pagination import StandardPagination @@ -32,7 +31,6 @@ logger = logging.getLogger(__name__) MediaListRequestSerializer = Union[ - audio_serializers.AudioCollectionRequestSerializer, media_serializers.PaginatedRequestSerializer, media_serializers.MediaSearchRequestSerializer, ] @@ -59,7 +57,6 @@ class MediaViewSet(AsyncViewSetMixin, AsyncAPIView, ReadOnlyModelViewSet): model_class: type[AbstractMedia] = None media_type: MediaType | None = None query_serializer_class = None - collection_serializer_class = None default_index = None def __init__(self, *args, **kwargs): @@ -100,7 +97,8 @@ def get_serializer_context(self): def _get_request_serializer(self, request): req_serializer = self.query_serializer_class( - data=request.query_params, context={"request": request} + data=request.query_params, + context={"request": request, "media_type": self.media_type}, ) req_serializer.is_valid(raise_exception=True) return req_serializer @@ -145,7 +143,7 @@ def retrieve(self, request, *_, **__): def list(self, request, *_, **__): params = self._get_request_serializer(request) - return self.get_media_results(request, "search", params) + return self.get_media_results(request, params) def _validate_source(self, source): valid_sources = search_controller.get_sources(self.media_type) @@ -155,47 +153,10 @@ def _validate_source(self, source): detail=f"Invalid source '{source}'. Valid sources are: {valid_string}.", ) - def collection(self, request, tag, source, creator, *_, **__): - if tag: - collection_params = {"tag": tag} - elif creator: - collection_params = {"creator": creator, "source": source} - else: - collection_params = {"source": source} - if source: - self._validate_source(source) - - params = self.collection_serializer_class( - data=request.query_params, context={"request": request} - ) - params.is_valid(raise_exception=True) - - return self.get_media_results(request, "collection", params, collection_params) - - @action(detail=False, methods=["get"], url_path=r"tag/(?P[^/.]+)") - def tag_collection(self, request, tag, *_, **__): - return self.collection(request, tag, None, None) - - @action(detail=False, methods=["get"], url_path=r"source/(?P[^/.]+)") - def source_collection(self, request, source, *_, **__): - return self.collection(request, None, source, None) - - @action( - detail=False, - methods=["get"], - url_path=r"source/(?P[^/.]+)/creator/(?P.+)", - ) - def creator_collection(self, request, source, creator): - return self.collection(request, None, source, creator) - - # Common functionality for search and collection views - def get_media_results( self, request, - strategy: SearchStrategy, params: MediaListRequestSerializer, - collection_params: dict[str, str] | None = None, ): page_size = self.paginator.page_size = params.data["page_size"] page = self.paginator.page = params.data["page"] @@ -219,9 +180,7 @@ def get_media_results( num_results, search_context, ) = search_controller.query_media( - strategy, params, - collection_params, search_index, exact_index, page_size, diff --git a/api/conf/settings/misc.py b/api/conf/settings/misc.py index 4b470a70941..fa494fda8b0 100644 --- a/api/conf/settings/misc.py +++ b/api/conf/settings/misc.py @@ -3,6 +3,8 @@ from decouple import config +SHOW_COLLECTION_DOCS = config("SHOW_COLLECTION_DOCS", cast=bool, default=False) + FILTER_DEAD_LINKS_BY_DEFAULT = config( "FILTER_DEAD_LINKS_BY_DEFAULT", cast=bool, default=True ) diff --git a/api/env.template b/api/env.template index 5b04b6aecee..e8f0164fbba 100644 --- a/api/env.template +++ b/api/env.template @@ -56,3 +56,4 @@ IS_PROXIED=False FILTER_DEAD_LINKS_BY_DEFAULT=False ENABLE_FILTERED_INDEX_QUERIES=True +SHOW_COLLECTION_DOCS=False diff --git a/api/test/integration/test_media_integration.py b/api/test/integration/test_media_integration.py index a4f3ca968f5..c933e57cc5e 100644 --- a/api/test/integration/test_media_integration.py +++ b/api/test/integration/test_media_integration.py @@ -6,6 +6,7 @@ import pytest from api.constants.licenses import LICENSE_GROUPS +from api.constants.parameters import COLLECTION, TAG pytestmark = pytest.mark.django_db @@ -401,14 +402,14 @@ def test_report_is_created(single_result, api_client): #################### -# Collection views # +# Collection results # #################### def test_collection_by_tag(media_type: MediaType, api_client): tags = media_type.tags for tag in tags: - res = api_client.get(f"/v1/{media_type.path}/tag/{tag}/") + res = api_client.get(f"/v1/{media_type.path}/?{COLLECTION}=tag&{TAG}={tag}") assert res.status_code == 200 data = res.json() @@ -421,7 +422,7 @@ def test_collection_by_tag(media_type: MediaType, api_client): def test_collection_by_source(media_type: MediaType, api_client): source = api_client.get(f"/v1/{media_type.path}/stats/").json()[0]["source_name"] - res = api_client.get(f"/v1/{media_type.path}/source/{source}/") + res = api_client.get(f"/v1/{media_type.path}/?{COLLECTION}=source&source={source}") assert res.status_code == 200 data = res.json() @@ -433,11 +434,15 @@ def test_collection_by_creator(media_type: MediaType, api_client): source_res = api_client.get(f"/v1/{media_type.path}/stats/") source = source_res.json()[0]["source_name"] - first_res = api_client.get(f"/v1/{media_type.path}/source/{source}/") + first_res = api_client.get( + f"/v1/{media_type.path}/?{COLLECTION}=source&source={source}" + ) first = first_res.json()["results"][0] assert (creator := first.get("creator")) - res = api_client.get(f"/v1/{media_type.path}/source/{source}/creator/{creator}/") + res = api_client.get( + f"/v1/{media_type.path}/?{COLLECTION}=creator&source={source}&creator={creator}" + ) assert res.status_code == 200 data = res.json() diff --git a/api/test/unit/controllers/test_search_controller.py b/api/test/unit/controllers/test_search_controller.py index f64d3fa3919..66bf55caf78 100644 --- a/api/test/unit/controllers/test_search_controller.py +++ b/api/test/unit/controllers/test_search_controller.py @@ -481,14 +481,15 @@ def test_search_tallies_pages_less_than_5( data={ "q": "dogs", "unstable__include_sensitive_results": include_sensitive_results, - } + }, + context={ + "media_type": media_type_config.media_type, + }, ) serializer.is_valid() search_controller.query_media( - strategy="search", search_params=serializer, - collection_params=None, ip=0, origin_index=media_type_config.origin_index, exact_index=False, @@ -523,13 +524,13 @@ def test_search_tallies_handles_empty_page( ): mock_post_process_results.return_value = None - serializer = media_type_config.search_request_serializer(data={"q": "dogs"}) + serializer = media_type_config.search_request_serializer( + data={"q": "dogs"}, context={"media_type": media_type_config.media_type} + ) serializer.is_valid() search_controller.query_media( - strategy="search", search_params=serializer, - collection_params=None, ip=0, origin_index=media_type_config.origin_index, exact_index=False, @@ -567,14 +568,13 @@ def test_resolves_index( settings.ENABLE_FILTERED_INDEX_QUERIES = feature_enabled serializer = media_type_config.search_request_serializer( - data={"unstable__include_sensitive_results": include_sensitive_results} + data={"unstable__include_sensitive_results": include_sensitive_results}, + context={"media_type": media_type_config.media_type}, ) serializer.is_valid() search_controller.query_media( - strategy="search", search_params=serializer, - collection_params=None, ip=0, origin_index=origin_index, exact_index=False, @@ -635,13 +635,12 @@ def test_no_post_process_results_recursion( serializer = image_media_type_config.search_request_serializer( # This query string does not matter, ultimately, as pook is mocking # the ES response regardless of the input - data={"q": "bird perched"} + data={"q": "bird perched"}, + context={"media_type": image_media_type_config.media_type}, ) serializer.is_valid() results, _, _, _ = search_controller.query_media( - strategy="search", search_params=serializer, - collection_params=None, ip=0, origin_index=image_media_type_config.origin_index, exact_index=True, @@ -775,13 +774,12 @@ def test_post_process_results_recurses_as_needed( serializer = image_media_type_config.search_request_serializer( # This query string does not matter, ultimately, as pook is mocking # the ES response regardless of the input - data={"q": "bird perched"} + data={"q": "bird perched"}, + context={"media_type": image_media_type_config.media_type}, ) serializer.is_valid() results, _, _, _ = search_controller.query_media( - strategy="search", search_params=serializer, - collection_params=None, ip=0, origin_index=image_media_type_config.origin_index, exact_index=True, @@ -817,15 +815,14 @@ def _delete_all_results_but_first(_, __, results, ___): serializer = image_media_type_config.search_request_serializer( # This query string does not matter, ultimately, as pook is mocking # the ES response regardless of the input - data={"q": "bird perched"} + data={"q": "bird perched"}, + context={"media_type": image_media_type_config.media_type}, ) serializer.is_valid() with caplog.at_level(logging.INFO): results, _, _, _ = search_controller.query_media( - strategy="search", search_params=serializer, - collection_params=None, ip=0, origin_index=image_media_type_config.origin_index, exact_index=True, diff --git a/api/test/unit/controllers/test_search_controller_search_query.py b/api/test/unit/controllers/test_search_controller_search_query.py index 081bd356c27..70b4c117321 100644 --- a/api/test/unit/controllers/test_search_controller_search_query.py +++ b/api/test/unit/controllers/test_search_controller_search_query.py @@ -1,6 +1,7 @@ import pytest from elasticsearch_dsl import Q +from api.constants.parameters import COLLECTION, TAG from api.controllers import search_controller from api.controllers.search_controller import ( DEFAULT_SQS_FLAGS, @@ -32,7 +33,9 @@ def excluded_providers_cache(django_cache, monkeypatch): def test_create_search_query_empty(media_type_config): - serializer = media_type_config.search_request_serializer(data={}) + serializer = media_type_config.search_request_serializer( + data={}, context={"media_type": "image"} + ) serializer.is_valid(raise_exception=True) search_query = search_controller.build_search_query(serializer) actual_query_clauses = search_query.to_dict()["bool"] @@ -48,7 +51,9 @@ def test_create_search_query_empty(media_type_config): def test_create_search_query_empty_no_ranking(media_type_config, settings): settings.USE_RANK_FEATURES = False - serializer = media_type_config.search_request_serializer(data={}) + serializer = media_type_config.search_request_serializer( + data={}, context={"media_type": media_type_config.media_type} + ) serializer.is_valid(raise_exception=True) search_query = search_controller.build_search_query(serializer) actual_query_clauses = search_query.to_dict()["bool"] @@ -60,7 +65,9 @@ def test_create_search_query_empty_no_ranking(media_type_config, settings): def test_create_search_query_q_search_no_filters(media_type_config): - serializer = media_type_config.search_request_serializer(data={"q": "cat"}) + serializer = media_type_config.search_request_serializer( + data={"q": "cat"}, context={"media_type": media_type_config.media_type} + ) serializer.is_valid(raise_exception=True) search_query = search_controller.build_search_query(serializer) actual_query_clauses = search_query.to_dict()["bool"] @@ -93,7 +100,8 @@ def test_create_search_query_q_search_no_filters(media_type_config): def test_create_search_query_q_search_with_quotes_adds_raw_suffix(media_type_config): serializer = media_type_config.search_request_serializer( - data={"q": '"The cutest cat"'} + data={"q": '"The cutest cat"'}, + context={"media_type": media_type_config.media_type}, ) serializer.is_valid(raise_exception=True) search_query = search_controller.build_search_query(serializer) @@ -139,7 +147,8 @@ def test_create_search_query_q_search_with_filters(image_media_type_config): "unstable__authority": True, "unstable__authority_boost": "2.5", "unstable__include_sensitive_results": True, - } + }, + context={"media_type": image_media_type_config.media_type}, ) serializer.is_valid(raise_exception=True) search_query = search_controller.build_search_query(serializer) @@ -182,7 +191,8 @@ def test_create_search_query_non_q_query(image_media_type_config): "creator": "Artist From Openverse", "title": "kitten🐱", "tags": "cute", - } + }, + context={"media_type": image_media_type_config.media_type}, ) serializer.is_valid(raise_exception=True) search_query = search_controller.build_search_query(serializer) @@ -226,7 +236,8 @@ def test_create_search_query_q_search_license_license_type_creates_2_terms_filte data={ "license": "by-nc", "license_type": "commercial", - } + }, + context={"media_type": image_media_type_config.media_type}, ) serializer.is_valid(raise_exception=True) search_query = search_controller.build_search_query(serializer) @@ -261,7 +272,9 @@ def test_create_search_query_empty_with_dynamically_excluded_providers( image_media_type_config, excluded_providers_cache, ): - serializer = image_media_type_config.search_request_serializer(data={}) + serializer = image_media_type_config.search_request_serializer( + data={}, context={"media_type": image_media_type_config.media_type} + ) serializer.is_valid(raise_exception=True) search_query = search_controller.build_search_query(serializer) @@ -283,22 +296,22 @@ def test_create_search_query_empty_with_dynamically_excluded_providers( ("data", "expected_query_filter"), [ pytest.param( - {"tag": "art"}, + {COLLECTION: "tag", TAG: "art"}, [{"term": {"tags.name.keyword": "art"}}], id="filter_by_tag", ), pytest.param( - {"tag": "art, photography"}, + {COLLECTION: "tag", TAG: "art, photography"}, [{"term": {"tags.name.keyword": "art, photography"}}], id="filter_by_tag_treats_punctuation_as_part_of_tag", ), pytest.param( - {"source": "flickr"}, + {COLLECTION: "source", "source": "flickr"}, [{"term": {"source": "flickr"}}], id="filter_by_source", ), pytest.param( - {"source": "flickr", "creator": "nasa"}, + {COLLECTION: "creator", "source": "flickr", "creator": "nasa"}, [ {"term": {"source": "flickr"}}, {"term": {"creator.keyword": "nasa"}}, @@ -308,9 +321,11 @@ def test_create_search_query_empty_with_dynamically_excluded_providers( ], ) def test_build_collection_query(image_media_type_config, data, expected_query_filter): - serializer = image_media_type_config.search_request_serializer(data={}) + serializer = image_media_type_config.search_request_serializer( + data=data, context={"media_type": image_media_type_config.media_type} + ) serializer.is_valid(raise_exception=True) - actual_query = search_controller.build_collection_query(serializer, data) + actual_query = search_controller.build_collection_query(serializer) expected_query = Q( "bool", filter=expected_query_filter, diff --git a/api/test/unit/serializers/test_media_serializers.py b/api/test/unit/serializers/test_media_serializers.py index b6819e3c43d..3b854980c73 100644 --- a/api/test/unit/serializers/test_media_serializers.py +++ b/api/test/unit/serializers/test_media_serializers.py @@ -3,7 +3,8 @@ from unittest.mock import MagicMock, patch from django.conf import settings -from rest_framework.exceptions import NotAuthenticated, ValidationError +from rest_framework.exceptions import NotAuthenticated +from rest_framework.serializers import ValidationError from rest_framework.test import force_authenticate from rest_framework.views import APIView @@ -81,7 +82,8 @@ def anon_request(request_factory): def test_page_size_validation(page_size, authenticated, anon_request, authed_request): request = authed_request if authenticated else anon_request serializer = MediaSearchRequestSerializer( - context={"request": request}, data={"page_size": page_size} + context={"request": request, "media_type": "image"}, + data={"page_size": page_size}, ) assert serializer.is_valid(raise_exception=True) @@ -159,7 +161,9 @@ def test_media_serializer_sensitivity( def test_search_request_serializer_include_sensitive_results_validation_well_formed_request( data: dict, result ): - serializer = MediaSearchRequestSerializer(data=data) + serializer = MediaSearchRequestSerializer( + data=data, context={"media_type": "image"} + ) assert serializer.is_valid() # The expected value should be mapped from the field actually # passed in data @@ -176,7 +180,9 @@ def test_search_request_serializer_include_sensitive_results_validation_well_for ), ) def test_search_request_serializer_include_sensitive_results_malformed_request(data): - serializer = MediaSearchRequestSerializer(data=data) + serializer = MediaSearchRequestSerializer( + data=data, context={"media_type": "image"} + ) assert not serializer.is_valid() @@ -196,7 +202,8 @@ def test_index_is_only_set_if_authenticated( request = authed_request if authenticated else anon_request serializer = ImageSearchRequestSerializer( - data={"internal__index": "image-some-index"}, context={"request": request} + data={"internal__index": "image-some-index"}, + context={"request": request, "media_type": "image"}, ) assert serializer.is_valid() assert serializer.validated_data.get("index") == ( @@ -221,7 +228,8 @@ def test_index_is_only_set_if_valid(mock_es, index, is_valid, authed_request): mock_es.indices.exists = lambda index: "exists" in index serializer = ImageSearchRequestSerializer( - data={"internal__index": index}, context={"request": authed_request} + data={"internal__index": index}, + context={"request": authed_request, "media_type": "image"}, ) assert serializer.is_valid() == is_valid assert serializer.validated_data.get("index") == (index if is_valid else None) @@ -242,9 +250,11 @@ def test_index_is_only_set_if_matches_media_type( mock_es, serializer_class, index, is_valid, authed_request ): mock_es.indices.exists.return_value = True + media_type = "audio" if serializer_class.__name__.startswith("Audio") else "image" serializer = serializer_class( - data={"internal__index": index}, context={"request": authed_request} + data={"internal__index": index}, + context={"request": authed_request, "media_type": media_type}, ) assert serializer.is_valid() == is_valid assert serializer.validated_data.get("index") == (index if is_valid else None) diff --git a/api/test/unit/views/test_media_views.py b/api/test/unit/views/test_media_views.py index 4042ce0f9c3..15c211cadf4 100644 --- a/api/test/unit/views/test_media_views.py +++ b/api/test/unit/views/test_media_views.py @@ -2,8 +2,6 @@ from unittest.mock import MagicMock, patch from uuid import uuid4 -from rest_framework.response import Response - import pytest import pytest_django.asserts @@ -49,36 +47,6 @@ def test_retrieve_query_count(api_client, media_type_config): assert res.status_code == 200 -@pytest.mark.django_db -@pytest.mark.parametrize( - "path, expected_params", - [ - pytest.param("tag/cat/", {"tag": "cat"}, id="tag"), - pytest.param("source/flickr/", {"source": "flickr"}, id="source"), - pytest.param( - "source/flickr/creator/cat/", - {"source": "flickr", "creator": "cat"}, - id="source_creator", - ), - ], -) -def test_collection_parameters(path, expected_params, api_client): - mock_get_media_results = MagicMock(return_value=Response()) - - with patch( - "api.views.media_views.MediaViewSet.get_media_results", - new_callable=lambda: mock_get_media_results, - ) as mock_get_media_results: - api_client.get(f"/v1/images/{path}") - - actual_params = mock_get_media_results.call_args[0][3] - request_kind = mock_get_media_results.call_args[0][1] - - assert mock_get_media_results.called - assert actual_params == expected_params - assert request_kind == "collection" - - @pytest.mark.parametrize( "filter_content", (True, False), ids=lambda x: "filtered" if x else "not_filtered" )