fix(api): Added HL for Docket "d" and RECAPDocument "rd" search types

- Refactored tests to reuse code.
freelawproject · Apr 26, 2024 · 9d57c24 · 9d57c24
1 parent 5745716
commit 9d57c24
Show file tree

Hide file tree

Showing 5 changed files with 422 additions and 183 deletions.
diff --git a/cl/api/pagination.py b/cl/api/pagination.py
@@ -6,9 +6,11 @@
 from elasticsearch_dsl.response import Response as ESResponse
 from rest_framework.exceptions import NotFound
 from rest_framework.pagination import BasePagination, PageNumberPagination
+from rest_framework.request import Request
 from rest_framework.response import Response
 from rest_framework.utils.urls import replace_query_param
 
+from cl.search.api_utils import CursorESList
 from cl.search.types import ESCursor
 
 
@@ -90,15 +92,13 @@ def __init__(self):
         self.invalid_cursor_message = "Invalid cursor"
 
     def paginate_queryset(
-        self, es_list_instance, request, view=None
+        self, es_list_instance: CursorESList, request: Request, view=None
     ) -> ESResponse:
         """Paginate the Elasticsearch query and retrieve the results."""
 
         self.base_url = request.build_absolute_uri()
-
         self.request = request
         self.cursor = self.decode_cursor(request)
-
         self.es_list_instance = es_list_instance
         self.es_list_instance.set_pagination(self.cursor, self.page_size)
         results = self.es_list_instance.get_paginated_results()
@@ -118,7 +118,7 @@ def get_paginated_response(self, data):
             }
         )
 
-    def get_next_link(self):
+    def get_next_link(self) -> str | None:
         """Constructs the URL for the next page based on the current page's
         last item.
         """
@@ -131,7 +131,7 @@ def get_next_link(self):
         cursor = ESCursor(search_after=search_after_sort_key, reverse=False)
         return self.encode_cursor(cursor)
 
-    def get_previous_link(self):
+    def get_previous_link(self) -> str | None:
         """Constructs the URL for the next page based on the current page's
         last item.
         """
@@ -146,7 +146,7 @@ def get_previous_link(self):
         )
         return self.encode_cursor(cursor)
 
-    def decode_cursor(self, request):
+    def decode_cursor(self, request: Request) -> ESCursor | None:
         """Given a request with a cursor, return a `ESCursor` instance."""
         encoded = request.query_params.get(self.cursor_query_param)
         if encoded is None:
@@ -162,7 +162,7 @@ def decode_cursor(self, request):
             raise NotFound(self.invalid_cursor_message)
         return ESCursor(search_after=search_after, reverse=reverse)
 
-    def encode_cursor(self, cursor):
+    def encode_cursor(self, cursor: ESCursor) -> str:
         """Given a ESCursor instance, return an url with encoded cursor."""
         tokens = {}
         if cursor.search_after != 0:
@@ -176,7 +176,7 @@ def encode_cursor(self, cursor):
             self.base_url, self.cursor_query_param, encoded
         )
 
-    def get_results_count(self):
+    def get_results_count(self) -> dict[str, bool | int]:
         """Provides a structured count of results based on settings.
 
         :return: A dictionary containing "exact" count and whether there are
@@ -193,7 +193,7 @@ def get_results_count(self):
             > settings.ELASTICSEARCH_MAX_RESULT_COUNT,
         }
 
-    def has_next(self):
+    def has_next(self) -> bool:
         """Determines if there is a next page based on the search_after key
         and results count.
         """
@@ -206,7 +206,7 @@ def has_next(self):
         # If going backward, it indicates that there was a next page.
         return True
 
-    def has_prev(self):
+    def has_prev(self) -> bool:
         """Determines if there is a next page based on the search_after key
         and results count.
         """

diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
@@ -805,7 +805,7 @@ def build_has_child_query(
     highlighting_fields: dict[str, int] | None = None,
     order_by: tuple[str, str] | None = None,
     child_highlighting: bool = True,
-    api_version: Literal["v3", "v4"] = None,
+    api_version: Literal["v3", "v4"] | None = None,
 ) -> QueryString:
     """Build a 'has_child' query.
 
@@ -1548,7 +1548,7 @@ def fill_position_mapping(
 
 
 def merge_unavailable_fields_on_parent_document(
-    results: Page | dict,
+    results: Page | dict | Response,
     search_type: str,
     request_type: Literal["frontend", "api"] = "frontend",
     highlight: bool = True,
@@ -1589,14 +1589,21 @@ def merge_unavailable_fields_on_parent_document(
                     value = position_dict.get(person_id)
                     cleaned_name = re.sub("_dict", "", field.name)
                     result[cleaned_name] = value
-        case SEARCH_TYPES.RECAP if request_type == "api" and not highlight:
+        case (
+            SEARCH_TYPES.RECAP | SEARCH_TYPES.RECAP_DOCUMENT
+        ) if request_type == "api" and not highlight:
             # Retrieves the plain_text from the DB to fill the snippet when
             # highlighting is disabled.
-            rd_ids = {
-                doc["_source"]["id"]
-                for entry in results
-                for doc in entry["child_docs"]
-            }
+
+            if search_type == SEARCH_TYPES.RECAP:
+                rd_ids = {
+                    doc["_source"]["id"]
+                    for entry in results
+                    for doc in entry["child_docs"]
+                }
+            else:
+                rd_ids = {entry["id"] for entry in results}
+
             recap_docs = (
                 RECAPDocument.objects.filter(pk__in=rd_ids)
                 .annotate(
@@ -1610,10 +1617,13 @@ def merge_unavailable_fields_on_parent_document(
                 doc["id"]: doc["plain_text_short"] for doc in recap_docs
             }
             for result in results:
-                for rd in result["child_docs"]:
-                    rd["_source"]["plain_text"] = recap_docs_dict[
-                        rd["_source"]["id"]
-                    ]
+                if search_type == SEARCH_TYPES.RECAP:
+                    for rd in result["child_docs"]:
+                        rd["_source"]["plain_text"] = recap_docs_dict[
+                            rd["_source"]["id"]
+                        ]
+                else:
+                    result["plain_text"] = recap_docs_dict[result["id"]]
 
         case _:
             return
@@ -2114,42 +2124,34 @@ def apply_custom_score_to_parent_query(
     child_order is used.
     """
     child_order_by = get_child_sorting_key(cd, api_version)
-    if (
-        child_order_by
-        and all(child_order_by)
-        and cd["type"] in [SEARCH_TYPES.RECAP, SEARCH_TYPES.DOCKETS]
-    ):
-        sort_field, order = child_order_by
-        if sort_field == "entry_date_filed":
-            # It applies a function score to the parent query to nullify the
-            # parent score (sets it to 0) to prioritize child documents sorting
-            # criteria. This will ensure that dockets without documents come
-            # last on results.
-            query = nullify_query_score(query)
-        elif sort_field == "dateFiled" and api_version:
-            # Applies a custom function score to sort dockets based on their
-            # dateFiled field. This serves as a workaround to enable the use of
-            # the  search_after cursor for pagination on documents with a None
-            # dateFiled.
-            query = build_custom_function_score_for_date(
-                query, child_order_by, default_score=0
-            )
-
-    if (
-        child_order_by
-        and all(child_order_by)
-        and cd["type"] == SEARCH_TYPES.RECAP_DOCUMENT
-    ):
-        sort_field, order = child_order_by
-        if sort_field == "dateFiled" and api_version:
-            # Applies a custom function score to sort dockets based on their
-            # dateFiled field. This serves as a workaround to enable the use of
-            # the  search_after cursor for pagination on documents with a None
-            # dateFiled.
-            query = build_custom_function_score_for_date(
-                query, child_order_by, default_score=0
-            )
-
+    valid_child_order_by = child_order_by and all(child_order_by)
+    match cd["type"]:
+        case SEARCH_TYPES.RECAP | SEARCH_TYPES.DOCKETS if valid_child_order_by:
+            sort_field, order = child_order_by
+            if sort_field == "entry_date_filed":
+                # It applies a function score to the parent query to nullify
+                # the parent score (sets it to 0) to prioritize child documents
+                # sorting criteria. This will ensure that dockets without
+                # documents come last on results.
+                query = nullify_query_score(query)
+            elif sort_field == "dateFiled" and api_version:
+                # Applies a custom function score to sort Dockets based on
+                # their dateFiled field. This serves as a workaround to enable
+                # the use of the  search_after cursor for pagination on
+                # documents with a None dateFiled.
+                query = build_custom_function_score_for_date(
+                    query, child_order_by, default_score=0
+                )
+        case SEARCH_TYPES.RECAP_DOCUMENT if valid_child_order_by:
+            sort_field, order = child_order_by
+            if sort_field in ["dateFiled", "entry_date_filed"] and api_version:
+                # Applies a custom function score to sort RECAPDocuments based
+                # on their docket dateFiled or entry_date_filed field. This
+                # serves as a workaround to enable the use of the  search_after
+                # cursor for pagination on documents with a None dateFiled.
+                query = build_custom_function_score_for_date(
+                    query, child_order_by, default_score=0
+                )
     return query
 
 
@@ -2667,19 +2669,22 @@ def do_es_api_query(
     s, join_query = build_es_base_query(
         search_query, cd, cd["highlight"], api_version
     )
+    extra_options: dict[str, dict[str, Any]] = {}
     if api_version == "v3":
+        # Build query parameters for the ES V3 Search API endpoints.
+        # V3 endpoints display child documents. Here, the child documents query
+        # is retrieved, and extra parameters like highlighting, field exclusion,
+        # and sorting are set.
         s = build_child_docs_query(
             join_query,
             cd=cd,
         )
-        s = search_query.query(s)
+        main_query = search_query.query(s)
         highlight_options, fields_to_exclude = build_highlights_dict(
             highlighting_fields, hl_tag
         )
-        s = s.source(excludes=fields_to_exclude)
-        extra_options: dict[str, dict[str, Any]] = {
-            "highlight": highlight_options
-        }
+        main_query = main_query.source(excludes=fields_to_exclude)
+        extra_options["highlight"] = highlight_options
         if cd["type"] == SEARCH_TYPES.OPINION:
             extra_options.update(
                 {
@@ -2688,24 +2693,36 @@ def do_es_api_query(
                     }
                 }
             )
-        main_query = s.extra(**extra_options)
+        main_query = main_query.extra(**extra_options)
         main_query = main_query.sort(
             build_sort_results(cd, api_version=api_version)
         )
     else:
+        # Build query params for the ES V4 Search API endpoints.
         if cd["type"] == SEARCH_TYPES.RECAP_DOCUMENT:
             # The RECAP_DOCUMENT search type returns only child documents.
+            # Here, the child documents query is retrieved, highlighting and
+            # field exclusion are set.
             s = build_child_docs_query(
                 join_query,
                 cd=cd,
             )
             s = apply_custom_score_to_parent_query(cd, s, api_version)
             main_query = search_query.query(s)
+            highlight_options, fields_to_exclude = build_highlights_dict(
+                SEARCH_RECAP_CHILD_HL_FIELDS, hl_tag
+            )
+            main_query = main_query.source(excludes=fields_to_exclude)
+            if cd["highlight"]:
+                extra_options["highlight"] = highlight_options
+                main_query = main_query.extra(**extra_options)
         else:
+            # DOCKETS and RECAP search types. Use the same query parameters as
+            # in the frontend. Only switch highlighting according to the user
+            # request.
             main_query = s
             if cd["highlight"]:
                 main_query = add_es_highlighting(s, cd)
-
     return main_query
 
 

diff --git a/cl/lib/test_helpers.py b/cl/lib/test_helpers.py
@@ -158,7 +158,7 @@ def midnight_pt_test(d: datetime.date) -> datetime.datetime:
 }
 
 
-docket_v4_api_keys = {
+docket_v4_api_keys_base = {
     "assignedTo": lambda x: (
         x["assignedTo"]
         if x.get("assignedTo")
@@ -261,7 +261,6 @@ def midnight_pt_test(d: datetime.date) -> datetime.datetime:
             "party_id"
         ]
     ),
-    "recap_documents": [],
     "referredTo": lambda x: (
         x["referredTo"]
         if x.get("referredTo")
@@ -289,9 +288,16 @@ def midnight_pt_test(d: datetime.date) -> datetime.datetime:
         if hasattr(x["result"].docket_entry.docket, "bankruptcy_information")
         else None
     ),
-    "more_docs": lambda x: False,
 }
 
+docket_v4_api_keys = docket_v4_api_keys_base.copy()
+docket_v4_api_keys.update(
+    {
+        "more_docs": lambda x: False,
+        "recap_documents": [],  # type: ignore
+    }
+)
+
 recap_document_v4_api_keys = {
     "id": lambda x: x["result"].pk,
     "docket_entry_id": lambda x: x["result"].docket_entry.pk,