Skip to content

Commit

Permalink
fix(api): Added HL for Docket "d" and RECAPDocument "rd" search types
Browse files Browse the repository at this point in the history
- Refactored tests to reuse code.
  • Loading branch information
albertisfu committed Apr 26, 2024
1 parent 5745716 commit 9d57c24
Show file tree
Hide file tree
Showing 5 changed files with 422 additions and 183 deletions.
20 changes: 10 additions & 10 deletions cl/api/pagination.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@
from elasticsearch_dsl.response import Response as ESResponse
from rest_framework.exceptions import NotFound
from rest_framework.pagination import BasePagination, PageNumberPagination
from rest_framework.request import Request
from rest_framework.response import Response
from rest_framework.utils.urls import replace_query_param

from cl.search.api_utils import CursorESList
from cl.search.types import ESCursor


Expand Down Expand Up @@ -90,15 +92,13 @@ def __init__(self):
self.invalid_cursor_message = "Invalid cursor"

def paginate_queryset(
self, es_list_instance, request, view=None
self, es_list_instance: CursorESList, request: Request, view=None
) -> ESResponse:
"""Paginate the Elasticsearch query and retrieve the results."""

self.base_url = request.build_absolute_uri()

self.request = request
self.cursor = self.decode_cursor(request)

self.es_list_instance = es_list_instance
self.es_list_instance.set_pagination(self.cursor, self.page_size)
results = self.es_list_instance.get_paginated_results()
Expand All @@ -118,7 +118,7 @@ def get_paginated_response(self, data):
}
)

def get_next_link(self):
def get_next_link(self) -> str | None:
"""Constructs the URL for the next page based on the current page's
last item.
"""
Expand All @@ -131,7 +131,7 @@ def get_next_link(self):
cursor = ESCursor(search_after=search_after_sort_key, reverse=False)
return self.encode_cursor(cursor)

def get_previous_link(self):
def get_previous_link(self) -> str | None:
"""Constructs the URL for the next page based on the current page's
last item.
"""
Expand All @@ -146,7 +146,7 @@ def get_previous_link(self):
)
return self.encode_cursor(cursor)

def decode_cursor(self, request):
def decode_cursor(self, request: Request) -> ESCursor | None:
"""Given a request with a cursor, return a `ESCursor` instance."""
encoded = request.query_params.get(self.cursor_query_param)
if encoded is None:
Expand All @@ -162,7 +162,7 @@ def decode_cursor(self, request):
raise NotFound(self.invalid_cursor_message)
return ESCursor(search_after=search_after, reverse=reverse)

def encode_cursor(self, cursor):
def encode_cursor(self, cursor: ESCursor) -> str:
"""Given a ESCursor instance, return an url with encoded cursor."""
tokens = {}
if cursor.search_after != 0:
Expand All @@ -176,7 +176,7 @@ def encode_cursor(self, cursor):
self.base_url, self.cursor_query_param, encoded
)

def get_results_count(self):
def get_results_count(self) -> dict[str, bool | int]:
"""Provides a structured count of results based on settings.
:return: A dictionary containing "exact" count and whether there are
Expand All @@ -193,7 +193,7 @@ def get_results_count(self):
> settings.ELASTICSEARCH_MAX_RESULT_COUNT,
}

def has_next(self):
def has_next(self) -> bool:
"""Determines if there is a next page based on the search_after key
and results count.
"""
Expand All @@ -206,7 +206,7 @@ def has_next(self):
# If going backward, it indicates that there was a next page.
return True

def has_prev(self):
def has_prev(self) -> bool:
"""Determines if there is a next page based on the search_after key
and results count.
"""
Expand Down
127 changes: 72 additions & 55 deletions cl/lib/elasticsearch_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -805,7 +805,7 @@ def build_has_child_query(
highlighting_fields: dict[str, int] | None = None,
order_by: tuple[str, str] | None = None,
child_highlighting: bool = True,
api_version: Literal["v3", "v4"] = None,
api_version: Literal["v3", "v4"] | None = None,
) -> QueryString:
"""Build a 'has_child' query.
Expand Down Expand Up @@ -1548,7 +1548,7 @@ def fill_position_mapping(


def merge_unavailable_fields_on_parent_document(
results: Page | dict,
results: Page | dict | Response,
search_type: str,
request_type: Literal["frontend", "api"] = "frontend",
highlight: bool = True,
Expand Down Expand Up @@ -1589,14 +1589,21 @@ def merge_unavailable_fields_on_parent_document(
value = position_dict.get(person_id)
cleaned_name = re.sub("_dict", "", field.name)
result[cleaned_name] = value
case SEARCH_TYPES.RECAP if request_type == "api" and not highlight:
case (
SEARCH_TYPES.RECAP | SEARCH_TYPES.RECAP_DOCUMENT
) if request_type == "api" and not highlight:
# Retrieves the plain_text from the DB to fill the snippet when
# highlighting is disabled.
rd_ids = {
doc["_source"]["id"]
for entry in results
for doc in entry["child_docs"]
}

if search_type == SEARCH_TYPES.RECAP:
rd_ids = {
doc["_source"]["id"]
for entry in results
for doc in entry["child_docs"]
}
else:
rd_ids = {entry["id"] for entry in results}

recap_docs = (
RECAPDocument.objects.filter(pk__in=rd_ids)
.annotate(
Expand All @@ -1610,10 +1617,13 @@ def merge_unavailable_fields_on_parent_document(
doc["id"]: doc["plain_text_short"] for doc in recap_docs
}
for result in results:
for rd in result["child_docs"]:
rd["_source"]["plain_text"] = recap_docs_dict[
rd["_source"]["id"]
]
if search_type == SEARCH_TYPES.RECAP:
for rd in result["child_docs"]:
rd["_source"]["plain_text"] = recap_docs_dict[
rd["_source"]["id"]
]
else:
result["plain_text"] = recap_docs_dict[result["id"]]

case _:
return
Expand Down Expand Up @@ -2114,42 +2124,34 @@ def apply_custom_score_to_parent_query(
child_order is used.
"""
child_order_by = get_child_sorting_key(cd, api_version)
if (
child_order_by
and all(child_order_by)
and cd["type"] in [SEARCH_TYPES.RECAP, SEARCH_TYPES.DOCKETS]
):
sort_field, order = child_order_by
if sort_field == "entry_date_filed":
# It applies a function score to the parent query to nullify the
# parent score (sets it to 0) to prioritize child documents sorting
# criteria. This will ensure that dockets without documents come
# last on results.
query = nullify_query_score(query)
elif sort_field == "dateFiled" and api_version:
# Applies a custom function score to sort dockets based on their
# dateFiled field. This serves as a workaround to enable the use of
# the search_after cursor for pagination on documents with a None
# dateFiled.
query = build_custom_function_score_for_date(
query, child_order_by, default_score=0
)

if (
child_order_by
and all(child_order_by)
and cd["type"] == SEARCH_TYPES.RECAP_DOCUMENT
):
sort_field, order = child_order_by
if sort_field == "dateFiled" and api_version:
# Applies a custom function score to sort dockets based on their
# dateFiled field. This serves as a workaround to enable the use of
# the search_after cursor for pagination on documents with a None
# dateFiled.
query = build_custom_function_score_for_date(
query, child_order_by, default_score=0
)

valid_child_order_by = child_order_by and all(child_order_by)
match cd["type"]:
case SEARCH_TYPES.RECAP | SEARCH_TYPES.DOCKETS if valid_child_order_by:
sort_field, order = child_order_by
if sort_field == "entry_date_filed":
# It applies a function score to the parent query to nullify
# the parent score (sets it to 0) to prioritize child documents
# sorting criteria. This will ensure that dockets without
# documents come last on results.
query = nullify_query_score(query)
elif sort_field == "dateFiled" and api_version:
# Applies a custom function score to sort Dockets based on
# their dateFiled field. This serves as a workaround to enable
# the use of the search_after cursor for pagination on
# documents with a None dateFiled.
query = build_custom_function_score_for_date(
query, child_order_by, default_score=0
)
case SEARCH_TYPES.RECAP_DOCUMENT if valid_child_order_by:
sort_field, order = child_order_by
if sort_field in ["dateFiled", "entry_date_filed"] and api_version:
# Applies a custom function score to sort RECAPDocuments based
# on their docket dateFiled or entry_date_filed field. This
# serves as a workaround to enable the use of the search_after
# cursor for pagination on documents with a None dateFiled.
query = build_custom_function_score_for_date(
query, child_order_by, default_score=0
)
return query


Expand Down Expand Up @@ -2667,19 +2669,22 @@ def do_es_api_query(
s, join_query = build_es_base_query(
search_query, cd, cd["highlight"], api_version
)
extra_options: dict[str, dict[str, Any]] = {}
if api_version == "v3":
# Build query parameters for the ES V3 Search API endpoints.
# V3 endpoints display child documents. Here, the child documents query
# is retrieved, and extra parameters like highlighting, field exclusion,
# and sorting are set.
s = build_child_docs_query(
join_query,
cd=cd,
)
s = search_query.query(s)
main_query = search_query.query(s)
highlight_options, fields_to_exclude = build_highlights_dict(
highlighting_fields, hl_tag
)
s = s.source(excludes=fields_to_exclude)
extra_options: dict[str, dict[str, Any]] = {
"highlight": highlight_options
}
main_query = main_query.source(excludes=fields_to_exclude)
extra_options["highlight"] = highlight_options
if cd["type"] == SEARCH_TYPES.OPINION:
extra_options.update(
{
Expand All @@ -2688,24 +2693,36 @@ def do_es_api_query(
}
}
)
main_query = s.extra(**extra_options)
main_query = main_query.extra(**extra_options)
main_query = main_query.sort(
build_sort_results(cd, api_version=api_version)
)
else:
# Build query params for the ES V4 Search API endpoints.
if cd["type"] == SEARCH_TYPES.RECAP_DOCUMENT:
# The RECAP_DOCUMENT search type returns only child documents.
# Here, the child documents query is retrieved, highlighting and
# field exclusion are set.
s = build_child_docs_query(
join_query,
cd=cd,
)
s = apply_custom_score_to_parent_query(cd, s, api_version)
main_query = search_query.query(s)
highlight_options, fields_to_exclude = build_highlights_dict(
SEARCH_RECAP_CHILD_HL_FIELDS, hl_tag
)
main_query = main_query.source(excludes=fields_to_exclude)
if cd["highlight"]:
extra_options["highlight"] = highlight_options
main_query = main_query.extra(**extra_options)
else:
# DOCKETS and RECAP search types. Use the same query parameters as
# in the frontend. Only switch highlighting according to the user
# request.
main_query = s
if cd["highlight"]:
main_query = add_es_highlighting(s, cd)

return main_query


Expand Down
12 changes: 9 additions & 3 deletions cl/lib/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def midnight_pt_test(d: datetime.date) -> datetime.datetime:
}


docket_v4_api_keys = {
docket_v4_api_keys_base = {
"assignedTo": lambda x: (
x["assignedTo"]
if x.get("assignedTo")
Expand Down Expand Up @@ -261,7 +261,6 @@ def midnight_pt_test(d: datetime.date) -> datetime.datetime:
"party_id"
]
),
"recap_documents": [],
"referredTo": lambda x: (
x["referredTo"]
if x.get("referredTo")
Expand Down Expand Up @@ -289,9 +288,16 @@ def midnight_pt_test(d: datetime.date) -> datetime.datetime:
if hasattr(x["result"].docket_entry.docket, "bankruptcy_information")
else None
),
"more_docs": lambda x: False,
}

docket_v4_api_keys = docket_v4_api_keys_base.copy()
docket_v4_api_keys.update(
{
"more_docs": lambda x: False,
"recap_documents": [], # type: ignore
}
)

recap_document_v4_api_keys = {
"id": lambda x: x["result"].pk,
"docket_entry_id": lambda x: x["result"].docket_entry.pk,
Expand Down
Loading

0 comments on commit 9d57c24

Please sign in to comment.