Skip to content

Commit

Permalink
Merge branch 'main' into scrape_citations_command
Browse files Browse the repository at this point in the history
  • Loading branch information
grossir committed Aug 20, 2024
2 parents 5a738b3 + fb89240 commit 9d034de
Show file tree
Hide file tree
Showing 8 changed files with 349 additions and 16 deletions.
19 changes: 15 additions & 4 deletions cl/citations/match_citations_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,11 +131,13 @@ def es_case_name_query(


def es_search_db_for_full_citation(
full_citation: FullCaseCitation,
full_citation: FullCaseCitation, query_citation: bool = False
) -> list[Hit]:
"""For a citation object, try to match it to an item in the database using
a variety of heuristics.
:param full_citation: A FullCaseCitation instance.
:param query_citation: Whether this is related to es_get_query_citation
resolution
return: A ElasticSearch Result object with the results, or an empty list if
no hits
"""
Expand All @@ -147,12 +149,20 @@ def es_search_db_for_full_citation(
Q(
"term", **{"status.raw": "Published"}
), # Non-precedential documents aren't cited
Q("match", cluster_child="opinion"),
]

if query_citation:
# If this is related to query citation resolution, look for
# opinion_cluster to determine if a citation matched a single cluster.
filters.append(Q("match", cluster_child="opinion_cluster"))
else:
filters.append(Q("match", cluster_child="opinion"))

must_not = []
if full_citation.citing_opinion is not None:
# Eliminate self-cites.
must_not.append(Q("match", id=full_citation.citing_opinion.pk))

# Set up filter parameters
if full_citation.year:
start_year = end_year = full_citation.year
Expand Down Expand Up @@ -204,7 +214,6 @@ def es_search_db_for_full_citation(
full_citation.citing_opinion,
)
return results

# Give up.
return []

Expand All @@ -225,7 +234,9 @@ def es_get_query_citation(cd: CleanData) -> Hit | None:
matches = None
if len(citations) == 1:
# If it's not exactly one match, user doesn't get special help.
matches = es_search_db_for_full_citation(citations[0])
matches = es_search_db_for_full_citation(
citations[0], query_citation=True
)
if len(matches) == 1:
# If more than one match, don't show the tip
return matches[0]
Expand Down
77 changes: 77 additions & 0 deletions cl/lib/elasticsearch_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1789,6 +1789,83 @@ def merge_unavailable_fields_on_parent_document(
result["id"], ""
)

case (
SEARCH_TYPES.RECAP | SEARCH_TYPES.DOCKETS
) if request_type == "frontend":
# Merge initial complaint button to the frontend search results.
docket_ids = {doc["docket_id"] for doc in results}
# This query retrieves initial complaint documents considering two
# possibilities:
# 1. For district, bankruptcy, and appellate entries where we don't know
# if the entry contains attachments, it considers:
# document_number=1 and attachment_number=None and document_type=PACER_DOCUMENT
# This represents the main document with document_number 1.
# 2. For appellate entries where the attachment page has already been
# merged, it considers:
# document_number=1 and attachment_number=1 and document_type=ATTACHMENT
# This represents document_number 1 that has been converted to an attachment.

appellate_court_ids = (
Court.federal_courts.appellate_pacer_courts().values_list(
"pk", flat=True
)
)
initial_complaints = (
RECAPDocument.objects.filter(
QObject(
QObject(
attachment_number=None,
document_type=RECAPDocument.PACER_DOCUMENT,
)
| QObject(
attachment_number=1,
document_type=RECAPDocument.ATTACHMENT,
docket_entry__docket__court_id__in=appellate_court_ids,
)
),
docket_entry__docket_id__in=docket_ids,
document_number="1",
)
.select_related(
"docket_entry",
"docket_entry__docket",
"docket_entry__docket__court",
)
.only(
"pk",
"document_type",
"document_number",
"attachment_number",
"pacer_doc_id",
"is_available",
"filepath_local",
"docket_entry__docket_id",
"docket_entry__docket__slug",
"docket_entry__docket__pacer_case_id",
"docket_entry__docket__court__jurisdiction",
"docket_entry__docket__court_id",
)
)
initial_complaints_in_page = {}
for initial_complaint in initial_complaints:
if initial_complaint.has_valid_pdf:
initial_complaints_in_page[
initial_complaint.docket_entry.docket_id
] = (initial_complaint.get_absolute_url(), None)
else:
initial_complaints_in_page[
initial_complaint.docket_entry.docket_id
] = (None, initial_complaint.pacer_url)

for result in results:
complaint_url, buy_complaint_url = (
initial_complaints_in_page.get(
result.docket_id, (None, None)
)
)
result["initial_complaint_url"] = complaint_url
result["buy_initial_complaint_url"] = buy_complaint_url

case SEARCH_TYPES.OPINION if request_type == "v4" and not highlight:
# Retrieves the Opinion plain_text from the DB to fill the snippet
# when highlighting is disabled. Considering the same prioritization
Expand Down
5 changes: 3 additions & 2 deletions cl/search/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,11 +329,11 @@
RECAPDocument: {
"self": {
"description": ["short_description"],
"document_type": ["document_type"],
"document_type": ["document_type", "absolute_url"],
"document_number": ["document_number", "absolute_url"],
"pacer_doc_id": ["pacer_doc_id"],
"plain_text": ["plain_text"],
"attachment_number": ["attachment_number"],
"attachment_number": ["attachment_number", "absolute_url"],
"is_available": ["is_available"],
"page_count": ["page_count"],
"filepath_local": ["filepath_local"],
Expand Down Expand Up @@ -364,6 +364,7 @@
"assigned_to_str": ["assignedTo"],
"referred_to_str": ["referredTo"],
"pacer_case_id": ["pacer_case_id"],
"slug": ["absolute_url"],
}
},
Person: {
Expand Down
9 changes: 9 additions & 0 deletions cl/search/templates/includes/search_result.html
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,15 @@ <h4>
{% endwith %}
{% endfor %}
<div class="col-md-offset-half">
{% if result.initial_complaint_url %}
<a href="{{ result.initial_complaint_url }}" class="initial-complaint btn-primary btn">
Initial Complaint
</a>
{% elif result.buy_initial_complaint_url %}
<a href="{{ result.buy_initial_complaint_url }}" rel="nofollow" target="_blank" class="initial-complaint btn-primary btn">
Buy Initial Complaint
</a>
{% endif %}
{% if result.child_remaining %}
<a href="{% url "show_results" %}?type={{ type|urlencode }}&q={% if request.GET.q %}({{ request.GET.q|urlencode }})%20AND%20{% endif %}docket_id%3A{{ result.docket_id|urlencode }}" class="btn-default btn">
View Additional Results for this Case
Expand Down
35 changes: 35 additions & 0 deletions cl/search/tests/tests_es_opinion.py
Original file line number Diff line number Diff line change
Expand Up @@ -1982,13 +1982,15 @@ def test_display_query_citation_frontend(self) -> None:
"""Confirm if the query citation alert is shown on the frontend when
querying a single citation, and it's found into ES."""

# Cluster with citation and multiple sibling opinions is properly matched.
with self.captureOnCommitCallbacks(execute=True):
cluster = OpinionClusterFactory.create(
precedential_status=PRECEDENTIAL_STATUS.PUBLISHED,
docket=self.docket_1,
date_filed=datetime.date(2024, 8, 23),
)
OpinionFactory.create(cluster=cluster, plain_text="")
OpinionFactory.create(cluster=cluster, plain_text="")
CitationWithParentsFactory.create(
volume=31,
reporter="Pa. D. & C.",
Expand All @@ -2009,6 +2011,39 @@ def test_display_query_citation_frontend(self) -> None:
self.assertIn(
"It looks like you're trying to search for", r.content.decode()
)

# Add a new cluster for the same citation. This time, it is not
# possible to identify a unique case for the citation.
with self.captureOnCommitCallbacks(execute=True):
cluster_2 = OpinionClusterFactory.create(
case_name="Test case",
precedential_status=PRECEDENTIAL_STATUS.PUBLISHED,
docket=self.docket_1,
date_filed=datetime.date(2024, 8, 23),
)
OpinionFactory.create(cluster=cluster_2, plain_text="")
CitationWithParentsFactory.create(
volume=31,
reporter="Pa. D. & C.",
page="445",
type=2,
cluster=cluster_2,
)

search_params = {
"type": SEARCH_TYPES.OPINION,
"q": "31 Pa. D. & C. 445",
"order_by": "score desc",
}
r = self.client.get(
reverse("show_results"),
search_params,
)
self.assertNotIn(
"It looks like you're trying to search for", r.content.decode()
)

cluster_2.delete()
cluster.delete()


Expand Down
Loading

0 comments on commit 9d034de

Please sign in to comment.