Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert "Fix dynamically excluded providers caching" #3399

Merged
merged 1 commit into from
Nov 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 8 additions & 14 deletions api/api/controllers/search_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@
NESTING_THRESHOLD = config("POST_PROCESS_NESTING_THRESHOLD", cast=int, default=5)
SOURCE_CACHE_TIMEOUT = 60 * 60 * 4 # 4 hours
FILTER_CACHE_TIMEOUT = 30
FILTERED_PROVIDERS_CACHE_KEY = "filtered_providers"
THUMBNAIL = "thumbnail"
URL = "url"
PROVIDER = "provider"
Expand Down Expand Up @@ -171,24 +170,19 @@ def get_excluded_providers_query() -> Q | None:
Hide data sources from the catalog dynamically.
To exclude a provider, set ``filter_content`` to ``True`` in the
``ContentProvider`` model in Django admin.
The list of ``provider_identifier``s is cached in Redis with
`:1:FILTER_CACHE_KEY` key.
"""

filtered_providers = cache.get(key=FILTERED_PROVIDERS_CACHE_KEY)
filter_cache_key = "filtered_providers"
filtered_providers = cache.get(key=filter_cache_key)
if not filtered_providers:
filtered_providers = list(
models.ContentProvider.objects.filter(filter_content=True).values_list(
"provider_identifier", flat=True
)
)
filtered_providers = models.ContentProvider.objects.filter(
filter_content=True
).values("provider_identifier")
cache.set(
key=FILTERED_PROVIDERS_CACHE_KEY,
timeout=FILTER_CACHE_TIMEOUT,
value=filtered_providers,
key=filter_cache_key, timeout=FILTER_CACHE_TIMEOUT, value=filtered_providers
)
if filtered_providers:
return Q("terms", provider=filtered_providers)
if provider_list := [f["provider_identifier"] for f in filtered_providers]:
return Q("terms", provider=provider_list)
return None


Expand Down
8 changes: 4 additions & 4 deletions api/test/unit/controllers/elasticsearch/test_related.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,21 @@
import pytest

from api.controllers.elasticsearch import related
from api.controllers.search_controller import FILTERED_PROVIDERS_CACHE_KEY


pytestmark = pytest.mark.django_db


@pytest.fixture
def excluded_providers_cache():
cache_key = "filtered_providers"
excluded_provider = "excluded_provider"
cache_value = [excluded_provider]
cache.set(FILTERED_PROVIDERS_CACHE_KEY, cache_value, timeout=1)
cache_value = [{"provider_identifier": excluded_provider}]
cache.set(cache_key, cache_value, timeout=1)

yield excluded_provider

cache.delete(FILTERED_PROVIDERS_CACHE_KEY)
cache.delete(cache_key)


@mock.patch(
Expand Down
28 changes: 0 additions & 28 deletions api/test/unit/controllers/test_search_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,13 @@
from unittest import mock
from uuid import uuid4

from django.core.cache import cache

import pook
import pytest
from django_redis import get_redis_connection
from elasticsearch_dsl import Search
from elasticsearch_dsl.query import Terms

from api.controllers import search_controller
from api.controllers.elasticsearch import helpers as es_helpers
from api.controllers.search_controller import FILTERED_PROVIDERS_CACHE_KEY
from api.utils import tallies
from api.utils.dead_link_mask import get_query_hash, save_query_mask
from api.utils.search_context import SearchContext
Expand All @@ -30,19 +26,6 @@
pytestmark = pytest.mark.django_db


@pytest.fixture()
def cache_setter():
keys = []

def _cache_setter(key, value):
keys.append(key)
cache.set(key, value, timeout=1)

yield _cache_setter
for key in keys:
cache.delete(key)


@pytest.mark.parametrize(
"total_hits, real_result_count, page_size, page, expected",
[
Expand Down Expand Up @@ -824,14 +807,3 @@ def _delete_all_results_but_first(_, __, results, ___):
filter_dead=True,
)
assert "Nesting threshold breached" in caplog.text


def test_get_excluded_providers_query_returns_None_when_no_provider_is_excluded():
assert search_controller.get_excluded_providers_query() is None


def test_get_excluded_providers_query_returns_when_cache_is_set(cache_setter):
cache_setter(FILTERED_PROVIDERS_CACHE_KEY, ["provider1", "provider2"])
assert search_controller.get_excluded_providers_query() == Terms(
provider=["provider1", "provider2"]
)
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,21 @@
from elasticsearch_dsl import Q

from api.controllers import search_controller
from api.controllers.search_controller import FILTERED_PROVIDERS_CACHE_KEY


pytestmark = pytest.mark.django_db


@pytest.fixture
def excluded_providers_cache():
cache_key = "filtered_providers"
excluded_provider = "excluded_provider"
cache_value = [excluded_provider]
cache.set(FILTERED_PROVIDERS_CACHE_KEY, cache_value, timeout=1)
cache_value = [{"provider_identifier": excluded_provider}]
cache.set(cache_key, cache_value, timeout=1)

yield excluded_provider

cache.delete(FILTERED_PROVIDERS_CACHE_KEY)
cache.delete(cache_key)


def test_create_search_query_empty(media_type_config):
Expand Down