diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 89d4b1dc7a..8cac4f50ba 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -14,7 +14,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Login to Docker Hub - uses: docker/login-action@v2 + uses: docker/login-action@v3 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} @@ -33,7 +33,7 @@ jobs: run: echo "sha_short=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v3 + uses: aws-actions/configure-aws-credentials@v4 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} diff --git a/.github/workflows/flp-dependencies-pr.yml b/.github/workflows/flp-dependencies-pr.yml index 710082cdd2..a3d4fdf4ee 100644 --- a/.github/workflows/flp-dependencies-pr.yml +++ b/.github/workflows/flp-dependencies-pr.yml @@ -24,7 +24,7 @@ jobs: poetry update courts-db eyecite juriscraper reporters-db - name: Create Pull Request - uses: peter-evans/create-pull-request@v3 + uses: peter-evans/create-pull-request@v7 with: commit-message: Update freelawproject dependencies title: Update freelawproject dependencies diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 92d2426e66..38d8297458 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -53,7 +53,7 @@ jobs: # Build and cache docker images so tests are always run on the latest # dependencies - name: Set up docker Buildx - uses: docker/setup-buildx-action@v2 + uses: docker/setup-buildx-action@v3 with: driver-opts: network=host - name: Prebuild docker images diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py index 1b9ca92683..dc1d9bcb71 100644 --- a/cl/lib/elasticsearch_utils.py +++ b/cl/lib/elasticsearch_utils.py @@ -9,16 +9,14 @@ from functools import reduce, wraps from typing import Any, Callable, Dict, List, Literal -from asgiref.sync import sync_to_async +from asgiref.sync import async_to_sync from django.conf import settings -from django.core.cache import caches -from django.core.paginator import EmptyPage, Page -from django.db.models import Case, CharField +from django.core.paginator import Page +from django.db.models import Case from django.db.models import Q as QObject -from django.db.models import QuerySet, TextField, Value, When +from django.db.models import QuerySet, TextField, When from django.db.models.functions import Substr from django.forms.boundfield import BoundField -from django.http import HttpRequest from django.http.request import QueryDict from django.utils.html import strip_tags from django_elasticsearch_dsl.search import Search @@ -31,9 +29,7 @@ from cl.audio.models import Audio from cl.custom_filters.templatetags.text_filters import html_decode -from cl.lib.bot_detector import is_bot from cl.lib.date_time import midnight_pt -from cl.lib.paginators import ESPaginator from cl.lib.string_utils import trunc from cl.lib.types import ( ApiPositionMapping, @@ -59,6 +55,7 @@ RELATED_PATTERN, SEARCH_ALERTS_ORAL_ARGUMENT_ES_HL_FIELDS, SEARCH_HL_TAG, + SEARCH_MLT_OPINION_QUERY_FIELDS, SEARCH_OPINION_HL_FIELDS, SEARCH_OPINION_QUERY_FIELDS, SEARCH_ORAL_ARGUMENT_ES_HL_FIELDS, @@ -81,7 +78,6 @@ ) from cl.search.forms import SearchForm from cl.search.models import ( - PRECEDENTIAL_STATUS, SEARCH_TYPES, Court, Opinion, @@ -169,24 +165,33 @@ def build_daterange_query( return [] -def build_more_like_this_query(related_id: list[str]): - document_list = [{"_id": f"o_{id}"} for id in related_id] - more_like_this_fields = SEARCH_OPINION_QUERY_FIELDS.copy() - more_like_this_fields.extend( - [ - "type", - "text", - "caseName", - "docketNumber", - ] - ) - return Q( +async def build_more_like_this_query(related_ids: list[str]) -> Query: + """Build an ES "more like this" query based on related Opinion IDs. + + :param related_ids: A list of related Opinion IDs to build the query on. + :return: An ES query object with "more like this" query and + exclusions for specific opinion clusters. + """ + + document_list = [{"_id": f"o_{id}"} for id in related_ids] + more_like_this_fields = SEARCH_MLT_OPINION_QUERY_FIELDS.copy() + mlt_query = Q( "more_like_this", fields=more_like_this_fields, like=document_list, min_term_freq=1, max_query_terms=12, ) + # Exclude opinion clusters to which the related IDs to query belong. + cluster_ids_to_exclude = ( + OpinionCluster.objects.filter(sub_opinions__pk__in=related_ids) + .distinct("pk") + .values_list("pk", flat=True) + ) + cluster_ids_list = [pk async for pk in cluster_ids_to_exclude.aiterator()] + exclude_cluster_ids = [Q("terms", cluster_id=cluster_ids_list)] + bool_query = Q("bool", must=[mlt_query], must_not=exclude_cluster_ids) + return bool_query def make_es_boost_list(fields: Dict[str, float]) -> list[str]: @@ -1177,7 +1182,19 @@ def build_es_base_query( mlt_query = None if related_match: cluster_pks = related_match.group("pks").split(",") - mlt_query = build_more_like_this_query(cluster_pks) + mlt_query = async_to_sync(build_more_like_this_query)( + cluster_pks + ) + main_query, join_query = build_full_join_es_queries( + cd, + {"opinion": []}, + [], + mlt_query, + child_highlighting=False, + api_version=api_version, + ) + return search_query.query(main_query), join_query + opinion_search_fields = SEARCH_OPINION_QUERY_FIELDS child_fields = opinion_search_fields.copy() child_fields.extend( @@ -1441,6 +1458,12 @@ def add_es_highlighting( :param highlighting: Whether highlighting should be enabled in docs. :return: The modified Elasticsearch search query object with highlights set """ + + # Avoid highlighting for the related cluster query. + related_match = RELATED_PATTERN.search(cd.get("q", "")) + if related_match: + return search_query + highlighting_fields = {} highlighting_keyword_fields = [] hl_tag = ALERTS_HL_TAG if alerts else SEARCH_HL_TAG @@ -2035,7 +2058,6 @@ def fetch_es_results( main_doc_count_query = build_cardinality_count( main_doc_count_query, parent_unique_field ) - if child_docs_count_query: child_unique_field = cardinality_query_unique_ids[ SEARCH_TYPES.RECAP_DOCUMENT @@ -2461,12 +2483,13 @@ def build_full_join_es_queries( child_filters_original = deepcopy(child_filters) # Build child text query. child_fields = child_query_fields[child_type] - child_text_query = build_fulltext_query( - child_fields, cd.get("q", ""), only_queries=True - ) if mlt_query: - child_text_query.append(mlt_query) + child_text_query = [mlt_query] + else: + child_text_query = build_fulltext_query( + child_fields, cd.get("q", ""), only_queries=True + ) # Build parent filters. parent_filters = build_join_es_filters(cd) @@ -2602,7 +2625,7 @@ def build_full_join_es_queries( should=string_query, minimum_should_match=1, ) - if parent_query: + if parent_query and not mlt_query: q_should.append(parent_query) if not q_should: @@ -2758,91 +2781,6 @@ def merge_opinion_and_cluster(results: Page | dict) -> None: result["status_exact"] = result["status"] -async def get_related_clusters_with_cache_and_es( - search: Search, - cluster: OpinionCluster, - request: HttpRequest, -) -> tuple[Page | list, list[int], dict[str, str]]: - """Retrieve related opinion clusters from ES or cache. - - :param search: The ES Search object. - :param cluster: The current OpinionCluster. - :param request: The HttpRequest object. - :return: A three tuple containing a Page containing opinion clusters or an - empty list. A list containing the cluster sub opinions ids. A dic containing - the url_search_params. - """ - - # By default, all statuses are included. Retrieve the PRECEDENTIAL_STATUS - # attributes (since they're indexed in ES) instead of the NAMES values. - available_statuses = [status[0] for status in PRECEDENTIAL_STATUS.NAMES] - url_search_params = {f"stat_{v}": "on" for v in available_statuses} - search_params: CleanData = {} - # Opinions that belong to the targeted cluster - sub_opinion_ids = cluster.sub_opinions.values_list("pk", flat=True) - sub_opinion_pks = [pk async for pk in sub_opinion_ids] - if is_bot(request) or not sub_opinion_pks: - # If it is a bot or lacks sub-opinion IDs, return empty results - return [], [], url_search_params - - # Use cache if enabled - cache = caches["db_cache"] - mlt_cache_key = f"mlt-cluster-es:{cluster.pk}" - related_clusters = ( - await cache.aget(mlt_cache_key) if settings.RELATED_USE_CACHE else None - ) - - if settings.RELATED_FILTER_BY_STATUS: - # Filter results by status (e.g., Precedential) - # Update URL parameters accordingly - search_params[ - f"stat_{PRECEDENTIAL_STATUS.get_status_value(settings.RELATED_FILTER_BY_STATUS)}" - ] = True - url_search_params = { - f"stat_{PRECEDENTIAL_STATUS.get_status_value(settings.RELATED_FILTER_BY_STATUS)}": "on" - } - - if related_clusters is None: - sub_opinion_queries = ",".join(str(pk) for pk in sub_opinion_pks) - search_params["q"] = f"related:{sub_opinion_queries}" - search_params["type"] = SEARCH_TYPES.OPINION - query_dict = QueryDict("", mutable=True) - query_dict.update(search_params) - search_query, child_docs_count_query, _ = await sync_to_async( - build_es_main_query - )(search, search_params) - hits, _, error, total_query_results, _ = await sync_to_async( - fetch_es_results - )( - query_dict, - search_query, - child_docs_count_query, - 1, - settings.RELATED_COUNT, - ) - if error: - return [], [], url_search_params - - @sync_to_async - def paginate_related_clusters(total_results: int, results: Response): - paginator = ESPaginator( - total_results, results, settings.RELATED_COUNT - ) - try: - return paginator.page(1) - except EmptyPage: - return paginator.page(paginator.num_pages) - - related_clusters = await paginate_related_clusters( - total_query_results, hits - ) - - await cache.aset( - mlt_cache_key, related_clusters, settings.RELATED_CACHE_TIMEOUT - ) - return related_clusters, sub_opinion_pks, url_search_params - - def make_es_stats_variable( search_form: SearchForm, results: Page | Response, diff --git a/cl/opinion_page/templates/includes/opinions_sidebar.html b/cl/opinion_page/templates/includes/opinions_sidebar.html index 8ca4a0881c..24c114f90f 100644 --- a/cl/opinion_page/templates/includes/opinions_sidebar.html +++ b/cl/opinion_page/templates/includes/opinions_sidebar.html @@ -1,27 +1,15 @@ {% load text_filters %} {% load waffle_tags %}
This case has not yet been cited in our system.
+ {% if queries_timeout %} +Unable to retrieve citing clusters. Please try by clicking the button below:
+ + {% else %} +This case has not yet been cited in our system.
+ {% endif %} {% endif %}